De:PHP2Crawler

Aus YaCyWiki
Wechseln zu: Navigation, Suche

Crawler mittels PHP ansteuern

Es ist möglich den Crawler über PHP mit Links zu füllen die YaCy indizieren soll. Hier ein einfaches Beispielscript.

<?php 

$url="http://www.example.com/"; 

$yacy_host='127.0.0.1'; 
$yacy_port='8090'; 
$yacy_user='admin'; 
$yacy_pass='mypassword'; 

$url_parts = parse_url($url); 
$host = $url_parts['host']; 

$call= 
   'http://'.$yacy_user.':'.$yacy_pass.'@'.$yacy_host.':'.$yacy_port.'/IndexCreate_p.html'. 
   '?crawlingDepth=5'. 
   '&crawlingFilter=.*'.urlencode($host).'.*'. 
   '&crawlingQ=on'. 
   '&localIndexing=on'. 
   '&crawlOrder=on'. 
   '&xsstopw=on'. 
   '&intention='.urlencode('requested by '.$_SERVER["REMOTE_ADDR"]). 
   '&crawlingFile='. 
   '&crawlingMode=url'. 
   '&crawlingURL='.urlencode($url). 
   '&crawlingstart=Start+New+Crawl'; 

if (preg_match('/crawling of "(.*?)" started/is',implode('',@file($call)))) { 
   echo "Hat geklappt - URL hinzugefügt."; 
} else { 
   echo "Mist - Irgendetwas ging schief."; 
} 

?>

Hinweis: Der reguläre Ausdruck in der IF-Abfrage wurde muß evt. angepaßt werden. Hier wurde von einer englischen YaCy-Version ausgegangen. Auch sollten die Parameter in der Vaiable $call entsprechend den eigenen Bedürfnissen angepaßt werden.

Komplexe Lösung

Mit dieser Klasse yacy ist es möglich YaCy sehr umfangreich fernzusteuern. Da ist Klasse sich momentan noch in Entwicklung befindet, gibt es erstmal keine Dokumenation. Die Klasse bedarf der Klasse browser (und deren "Subklasse" site) welche unten anhängt. Verwendung auf eigene Gefahr. Zuletzt getestet mit SVN 2841 unter PHP 5.1.6.

<?php

class yacy {

	var $env;
	var $cache;

	function yacy() {

		// initialize
		$this->env = array();
		$this->cache = array();

		// access-settings
		$this->env['yacy']['host']='localhost';
		$this->env['yacy']['port']='8090';
		$this->env['yacy']['user']='admin';
		$this->env['yacy']['pass']='mypassword';

		// browser-settings, each line is an line request header
		$this->env['browser']['User-Agent']='YaCy-Remote/0.1';
		$this->env['browser']['Accept']='text/xml,application/xml,application/xhtml+xml,text/html,text/plain';
		$this->env['browser']['Accept-Charset']='ISO-8859-1';
		$this->env['browser']['Accept-Language']='de,en';

		// defaults values for new crawls
		$this->env['new_crawl_defaults'] = array();
		$this->env['new_crawl_defaults']['crawlingDepth']=3;
		$this->env['new_crawl_defaults']['crawlingFilter']='.*';
		$this->env['new_crawl_defaults']['crawlingIfOlderCheck']='on';
		$this->env['new_crawl_defaults']['crawlingIfOlderNumber']=180;
		$this->env['new_crawl_defaults']['crawlingIfOlderUnit']='day';
		$this->env['new_crawl_defaults']['crawlingDomFilterCheck']='on';
		$this->env['new_crawl_defaults']['crawlingDomFilterDepth']=3;
		$this->env['new_crawl_defaults']['crawlingDomMaxCheck']='on';
		$this->env['new_crawl_defaults']['crawlingDomMaxPages']=5000;
		$this->env['new_crawl_defaults']['crawlingQ']='on';
		$this->env['new_crawl_defaults']['localIndexing']='on';
		$this->env['new_crawl_defaults']['crawlOrder']='on';
		$this->env['new_crawl_defaults']['xsstopw']='on';
		$this->env['new_crawl_defaults']['intention']='';
		$this->env['new_crawl_defaults']['crawlingMode']='url';
		$this->env['new_crawl_defaults']['crawlingURL']='';
		$this->env['new_crawl_defaults']['crawlingstart']='Start+New+Crawl';

		// the file which is requested for online-check
		$this->env['online_testfile']='/env/grafics/empty.gif';
		
	}

	// return array of entries in indexingqueue
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_indexingqueue_entries($cache=true) {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'indexingqueue_entries';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {

				preg_match('/<indexingqueue>(.*?)<\/indexingqueue>/si',$this->cache[$filename],$matches);
				$entries_line=$matches[1];

				preg_match_all('/<entry>(.*?)<initiator>(.*?)<\/initiator>(.*?)<depth>(.*?)<\/depth>(.*?)<modified>(.*?)<\/modified>(.*?)<anchor>(.*?)<\/anchor>(.*?)<url>(.*?)<\/url>(.*?)<size>(.*?)<\/size>(.*?)<hash>(.*?)<\/hash>(.*?)<inProcess>(.*?)<\/inProcess>(.*?)<\/entry>/si',$entries_line,$matches);
				$entries = array();
				if (!is_array($matches[0])) {
					$matches[0] = array();
				}
				foreach($matches[0] as $key=>$value) {
					$entries[]=array(
						'initiator'=>$matches[2][$key],
						'depth'=>intval($matches[4][$key]),
						'modified'=>$matches[6][$key],
						'anchor'=>$matches[8][$key],
						'url'=>$matches[10][$key],
						'size'=>intval($matches[12][$key]),
						'hash'=>$matches[14][$key],
						'inprocess'=>(($matches[16][$key] == 'false') ? false : true)	
					);
				}
				$this->cache[$cache_name] = $entries;
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
		
	}

	// return array of entries in localcrawlerqueue
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_localcrawlerqueue_entries($cache=true) {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'localcrawlerqueue_entries';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {

				preg_match('/<localcrawlerqueue>(.*?)<\/localcrawlerqueue>/si',$this->cache[$filename],$matches);
				$entries_line=$matches[1];

				preg_match_all('/<entry>(.*?)<initiator>(.*?)<\/initiator>(.*?)<depth>(.*?)<\/depth>(.*?)<modified>(.*?)<\/modified>(.*?)<anchor>(.*?)<\/anchor>(.*?)<url>(.*?)<\/url>(.*?)<hash>(.*?)<\/hash>(.*?)<inProcess>(.*?)<\/inProcess>(.*?)<\/entry>/si',$entries_line,$matches);
				$entries = array();
				if (!is_array($matches[0])) {
					$matches[0] = array();
				}
				foreach($matches[0] as $key=>$value) {
					$entries[]=array(
						'initiator'=>$matches[2][$key],
						'depth'=>intval($matches[4][$key]),
						'modified'=>$matches[6][$key],
						'anchor'=>$matches[8][$key],
						'url'=>$matches[10][$key],
						'hash'=>$matches[12][$key],
						'inprocess'=>(($matches[14][$key] == 'false') ? false : true)	
					);
				}
				$this->cache[$cache_name] = $entries;
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
		
	}

	// return size of indexingqueue in integer
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_indexingqueue_size($cache=true) {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'indexingqueue_size';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {
				preg_match('/<indexingqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/indexingqueue>/si',$this->cache[$filename],$matches);
				$this->cache[$cache_name] = intval($matches[2]);
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
	}

	// return max size of indexingqueue in integer
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_indexingqueue_max_size($cache=true) {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'indexingqueue_max_size';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {
				preg_match('/<indexingqueue>(.*?)<max>(.*?)<\/max>(.*?)<\/indexingqueue>/si',$this->cache[$filename],$matches);
				$this->cache[$cache_name] = intval($matches[2]);
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
	}

	// return size of loaderqueue in integer
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_loaderqueue_size() {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'loaderqueue_size';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {
				preg_match('/<loaderqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/loaderqueue>/si',$this->cache[$filename],$matches);
				$this->cache[$cache_name] = intval($matches[2]);
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
	}

	// return max size of loaderqueue in integer
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_loaderqueue_max_size() {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'loaderqueue_max_size';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {
				preg_match('/<loaderqueue>(.*?)<max>(.*?)<\/max>(.*?)<\/loaderqueue>/si',$this->cache[$filename],$matches);
				$this->cache[$cache_name] = intval($matches[2]);
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
	}

	// return size of localcrawlerqueue in integer
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_localcrawlerqueue_size() {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'localcrawlerqueue_size';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {
				preg_match('/<localcrawlerqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/localcrawlerqueue>/si',$this->cache[$filename],$matches);
				$this->cache[$cache_name] = intval($matches[2]);
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
	}

	// return size of remotecrawlerqueue in integer
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_remotecrawlerqueue_size() {
		$filename = '/xml/queues_p.xml';
		$cache_name = 'remotecrawlerqueue_size';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {
				preg_match('/<remotecrawlerqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/remotecrawlerqueue>/si',$this->cache[$filename],$matches);
				$this->cache[$cache_name] = intval($matches[2]);
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
	}

	// remove an item from indexingqueue (given by hash)
	// $hash (string) - hash of item
	// return true, false on error
	// return true means the request was successfull, say nothing about removing
	function indexingqueue_remove_entry($hash) {
		$browser = $this->_create_browser();
		$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateIndexingQueue_p.html?deleteEntry='.$hash);
		$site->get();
		if ($site->get_status()==302) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// remove all items from indexingqueue
	// $hash (string) - hash of item
	// return true, false on error
	// return true means the request was successfull, say nothing about removing
	// keep in mind: this can take an long time, so the request failed, but work
	function indexingqueue_remove_all() {
		$browser = $this->_create_browser();
		$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateIndexingQueue_p.html');
		$site->post(array('clearIndexingQueue'=>'clear indexing queue'));
		if ($site->get_status()==302) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// remove an item from localcrawlerqueue (given by hash)
	// $hash (string) - hash of item
	// return true means the request was successfull, say nothing about removing
	function localcrawlerqueue_remove_entry($hash) {
		$browser = $this->_create_browser();
		$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateWWWLocalQueue_p.html?deleteEntry='.$hash);
		$site->get();
		if ($site->get_status()==302) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// remove all items from localcrawlerqueue
	// $hash (string) - hash of item
	// return true means the request was successfull, say nothing about removing
	// keep in mind: this can take an long time, so the request failed, but work
	function localcrawlerqueue_remove_all() {
		$browser = $this->_create_browser();
		$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateWWWLocalQueue_p.html');
		$site->post(array('pattern'=>'.*','option'=>'URL','deleteEntries'=>'Delete'));
		if ($site->get_status()==302) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// set an option in config-file, some need to reboot to effect
	// $option (string) - name of the option
	// $value (string or integer) -  new value of the option
	// return true or false on error
	function set_config_option($option,$value) {
		$cache_name = 'config_options';
		$browser = $this->_create_browser();
		$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Config_p.html');
		$site->post(array('key'=>$option,'value'=>$value,'submit'=>'Save'));
		if (($site->get_status()==200) && ($site->get_content())) {
			$this->cache['is_online']=true;
			$this->cache[$cache_name][$option] = $value;
			return true;
		} else {
			return false;
		}
	}

	// reads the config (over xml), parse them and return the value of an given option, return string or array, false on error
	// $option (string or boolean(false)) - the name of the option from config-file, case-sensitive
	// if $option == false then all options as array are returned
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	// return the value as string, all options as array or false on error
	function get_config_option($option=false,$cache=true) {
		$filename = '/xml/config_p.xml';
		$cache_name = 'config_options';
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {

				preg_match_all('/<option>(.*?)<key>(.*?)<\/key>(.*?)<value>(.*?)<\/value>(.*?)<\/option>/is',$this->cache[$filename],$matches);

				$option_vars=$matches[2];
				$option_keys=$matches[4];

				$options = array();
				foreach ($option_vars as $key=>$var) {
					$options[$option_vars[$key]] = $option_keys[$key];
				}
			
				$this->cache[$cache_name] = $options;
			
				if ($option!==false) {
					if (isset($options[$option])) {
						return $options[$option];
					} else {
						return false;
					}
				} else {
					return $options;
				}

			} else {
				return false;
			}
		} else {

			if ($option!==false) {
				if (isset($this->cache[$cache_name][$option])) {
					return $this->cache[$cache_name][$option];
				} else {
					return false;
				}
			} else {
				return $this->cache[$cache_name];
			}

		}
	}

	// pause or resume global grawl triggers
	// $bool (boolean) if true resume global grawl triggers, if false pause global grawl triggers
	function set_global_grawl_triggers($bool=true) {
		$browser = $this->_create_browser();
		if ($bool===false) {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?pauseCrawlJob=&jobType=globalCrawlTrigger');
		} else {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?continueCrawlJob=&jobType=globalCrawlTrigger');
		}
		$site->get();
		if (($site->get_status()==200) && ($site->get_content())) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// pause or resume remote triggered crawls
	// $bool (boolean) if true resume remote triggered crawls, if false pause remote triggered crawls
	function set_remote_triggered_crawling($bool=true) {
		$browser = $this->_create_browser();
		if ($bool===false) {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?pauseCrawlJob=&jobType=remoteTriggeredCrawl');
		} else {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?continueCrawlJob=&jobType=remoteTriggeredCrawl');
		}
		$site->get();
		if (($site->get_status()==200) && ($site->get_content())) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// pause or resume local crawls
	// $bool (boolean) if true resume local crawls, if false pause local crawls
	function set_local_crawling($bool=true) {
		$browser = $this->_create_browser();
		if ($bool===false) {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?pauseCrawlJob=&jobType=localCrawl');
		} else {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?continueCrawlJob=&jobType=localCrawl');
		}
		$site->get();
		if (($site->get_status()==200) && ($site->get_content())) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// setup the Distributed Indexing
	// $limit (boolean or intener)
	//   if $limit is true Distributed Indexing goes on without an limit
	//   if $limit is an integer value >0 Distributed Indexing goes on with this as new Pages Per Minute Limit
	//   if $limit false or =<0 Distributed Indexing goes off
	// return true if set, otherwise false
	function set_distributed_indexing($limit=true) {
		$browser = $this->_create_browser();
		if ((!is_bool($limit)) && ($limit>=1)) {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?distributedcrawling=set&dcr=acceptCrawlLimited&acceptCrawlLimit='.urlencode($limit));
		} elseif ($limit===true) {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?distributedcrawling=set&dcr=acceptCrawlMax&acceptCrawlLimit=60');
		} else {
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?distributedcrawling=set&dcr=acceptCrawlDenied&acceptCrawlLimit=1');
		}
		$site->get();
		if (($site->get_status()==200) && ($site->get_content())) {
			$this->cache['is_online']=true;
			return true;
		} else {
			return false;
		}
	}

	// starts (simple) an new crawl
	// $url (string) - URL
	// $comment (string or false) - intension, set to false if want not want to use it
	// $crawl_host_only (boolean) - should ony the host crawled
	// $crawl_global (boolean) - start as global crawl
	// $max_pages_per_dom (integer or false) - maximum pages per domain, set to false if want not want to use it
	// $crawl_deep (integer) - maximal crawl deep
	// $dom_deep (integer or false) - the maximal deep over external domains, set to false if want not want to use it
	// $min_age_days (integer or  false) - if you want recrawl files older then this in days, set to false if want not want to use it
	// $accept_dynamic (boolean) - set if you want crawl dynamic content
	// return true if the crawl was succesfull requestet, false on error
	function new_crawl_simple($url=false, $comment=false, $crawl_host_only=false, $crawl_global=true, $max_pages_per_dom=5000, $crawl_deep=3, $dom_deep=3, $min_age_days=50, $accept_dynamic=true) {
		
		$url_parts = @parse_url($url);
		$host = $url_parts['host'];

		$options = array();

		$options['crawlingURL'] = $url;

		if ($comment) {
			$options['intention'] = $comment;
		} else {
			$options['intention'] = '';
		}
		
		if ($crawl_global) {
			$options['crawlOrder'] = 'on';
		} else {
			$options['crawlOrder'] = 'off';
		}

		if ($crawl_host_only) {
			$options['crawlingFilter']='.*'.$host.'.*';
			$dom_deep = 0;
		} else {
			$options['crawlingFilter']='.*';
		}
		
		$options['crawlingDepth']=$crawl_deep;

		if ($dom_deep===false) {
			$options['crawlingDomFilterCheck']='off';
			$options['crawlingDomFilterDepth']=0;
		} else {
			$options['crawlingDomFilterCheck']='on';
			$options['crawlingDomFilterDepth']=$dom_deep;
		}

		if ($max_pages_per_dom===false) {
			$options['crawlingDomMaxCheck']='on';
			$options['crawlingDomMaxPages']=999999;
		} else {
			$options['crawlingDomMaxCheck']='on';
			$options['crawlingDomMaxPages']=$max_pages_per_dom;
		}

		if ($min_age_days===false) {
			$options['crawlingIfOlderCheck']='off';
			$options['crawlingIfOlderNumber']=0;
			$options['crawlingIfOlderUnit']='day';
		} else {
			$options['crawlingIfOlderCheck']='on';
			$options['crawlingIfOlderNumber']=$min_age_days;
			$options['crawlingIfOlderUnit']='day';
;		}

		if ($accept_dynamic) {
			$options['crawlingQ']='on';
		} else {
			$options['crawlingQ']='off';
		}

		return $this->new_crawl($options);

	}

	// start an new crawl
	// $options (array) - all get-params to create the new crawl, $key as name and $value as value
	// see $this->env['new_crawl_defaults']
	// i.E.: $options =  array("crawlingURL" => "http://example.com", ...)
	// all params not given are taken from $this->env['new_crawl_defaults']
	// return true if the crawl was succesfull requestet, false on error
	function new_crawl($options) {

		$options = array_merge($this->env['new_crawl_defaults'],$options);

		if (!$options['crawlingURL']) {
			return false;
		} else {
			$request = array();
			foreach ($options as $key=>$value){
				$request[] = $key.'='.urlencode($value);
			}
			$browser = $this->_create_browser();
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?'.implode('&',$request));
			$site->get();
			if (($site->get_status()==200) && ($site->get_content())) {
				$this->cache['is_online']=true;
				return true;
			} else {
				return false;
			}
		}
	}


	// return active_count as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_active_count($cache=true) {
		$pattern = '/<peers>(.*?)<active>(.*?)<count>(.*?)<\/count>(.*?)<\/active>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return active_links as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_active_links($cache=true) {
		$pattern = '/<peers>(.*?)<active>(.*?)<links>(.*?)<\/links>(.*?)<\/active>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return active_words as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_active_words($cache=true) {
		$pattern = '/<peers>(.*?)<active>(.*?)<words>(.*?)<\/words>(.*?)<\/active>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return passive_count as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_passive_count($cache=true) {
		$pattern = '/<peers>(.*?)<passive>(.*?)<count>(.*?)<\/count>(.*?)<\/passive>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return passive_links as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_passive_links($cache=true) {
		$pattern = '/<peers>(.*?)<passive>(.*?)<links>(.*?)<\/links>(.*?)<\/passive>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return passive_words as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_passive_words($cache=true) {
		$pattern = '/<peers>(.*?)<passive>(.*?)<words>(.*?)<\/words>(.*?)<\/passive>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return potential_count as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_potential_count($cache=true) {
		$pattern = '/<peers>(.*?)<potential>(.*?)<count>(.*?)<\/count>(.*?)<\/potential>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return potential_links as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_potential_links($cache=true) {
		$pattern = '/<peers>(.*?)<potential>(.*?)<links>(.*?)<\/links>(.*?)<\/potential>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return potential_words as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_potential_words($cache=true) {
		$pattern = '/<peers>(.*?)<potential>(.*?)<words>(.*?)<\/words>(.*?)<\/potential>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return all_count as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_all_count($cache=true) {
		$pattern = '/<peers>(.*?)<all>(.*?)<count>(.*?)<\/count>(.*?)<\/all>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return all_links as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_all_links($cache=true) {
		$pattern = '/<peers>(.*?)<all>(.*?)<links>(.*?)<\/links>(.*?)<\/all>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return all_words as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_all_words($cache=true) {
		$pattern = '/<peers>(.*?)<all>(.*?)<words>(.*?)<\/words>(.*?)<\/all>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_name as string or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_name($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<name>(.*?)<\/name>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_get_network_detail($pattern,$position,$cache);
	}

	// return peer_version as string or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_version($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<version>(.*?)<\/version>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_get_network_detail($pattern,$position,$cache);
	}

	// return peer_utc as is as string or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_utc($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<utc>(.*?)<\/utc>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_get_network_detail($pattern,$position,$cache);
	}

	// return peer_uptime as integer in minutes or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_uptime($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<uptime>(.*?)<\/uptime>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		$uptime = $this->_get_network_detail($pattern,$position,$cache);
		if ($uptime !== false) {
			$uptime_array = preg_split('/(\s|:)/',$uptime);
			return (intval($uptime_array[0])*1440)+(intval($uptime_array[2])*60)+intval($uptime_array[3]);
		} else {
			return false;
		}
	}

	// return peer_links as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_links($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<links>(.*?)<\/links>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_words as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_words($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<words>(.*?)<\/words>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_acceptcrawl as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_acceptcrawl($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<acceptcrawl>(.*?)<\/acceptcrawl>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_acceptindex as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_acceptindex($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<acceptindex>(.*?)<\/acceptindex>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_sentwords as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_sentwords($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<sentwords>(.*?)<\/sentwords>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_senturls as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_senturls($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<senturls>(.*?)<\/senturls>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_receivedwords as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_receivedwords($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<receivedwords>(.*?)<\/receivedwords>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_recievedlinks as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_recievedlinks($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<recievedlinks>(.*?)<\/recievedlinks>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_ppm as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_ppm($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<ppm>(.*?)<\/ppm>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_seeds as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_seeds($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<seeds>(.*?)<\/seeds>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_connects as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_connects($cache=true) {
		$pattern = '/<peers>(.*?)<your>(.*?)<connects>(.*?)<\/connects>(.*?)<\/your>(.*?)<\/peers>/is';
		$position = 3;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}

	// return peer_status as integer or false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function get_peer_status($cache=true) {
		$pattern = '/<peers>(.*?)<status>(.*?)<\/status>(.*?)<\/peers>/is';
		$position = 2;
		return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache));
	}
	
	// ------------------------------------------------------------------------------
	// private functions - use internal only
	// ------------------------------------------------------------------------------

	// removed unwantet dots and return an integer, otherwise false
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	function _remove_dots($value) {
		if ($value !== false) {
			return intval(str_replace('.','',$value));
		} else {
			return false;
		}
	}

	// gets the Network.xml, parse them and return als value (mixed), false on error
	// $pattern (string) - the pattern to extract the value
	// $pattern_position (integer) - the position in pattern where is the value
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	// return the value or false on error
	function _get_network_detail($pattern,$pattern_position,$cache=true) {
		$filename = '/Network.xml';
		$cache_name = md5($pattern.$pattern_position);
		if (($cache == false) || (!isset($this->cache[$cache_name]))) {
			if ($this->_get_network_file($filename,$cache)) {
				preg_match($pattern,$this->cache[$filename],$matches);
				$this->cache[$cache_name] = $matches[$pattern_position];
				return $this->cache[$cache_name];
			} else {
				return false;
			}
		} else {
			return $this->cache[$cache_name];
		}
	}

	// receive and parse an xml-file
	// $filename (string) - relative path (based on serverroot) to file
	// $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result
	// return if the file was successfull retrieved, otherwise return false
	function _get_network_file($filename,$cache=true) {
		if (($cache == false) || (!isset($this->cache[$filename]))) {
			$browser = $this->_create_browser();
			$site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].$filename);
			$site->get();
			if (($site->get_status()==200) && ($site->get_content())) {
				$this->cache[$filename] = $site->get_content();
				$this->cache['is_online']=true;
				return true;
			} else {
				return false;
			}
		} else {
			return true;
		}
	}

	// check if the host is online
	// $cache (boolean) - if false then every call will check the network, otherwise use the cached result
	// return true if online, return false if offline
	function is_online($cache=true) {
		if (($cache == false) || (!isset($this->cache['is_online']))) {
			$browser = $this->_create_browser();
			$site = $browser->site('http://'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].$this->env['online_testfile']);
			$site->get();
			if ($site->get_status()==200) {
				$this->cache['is_online']=true;	
				return true;
			}
			return false;
		} else {
			return $this->cache['is_online'];
		}		
	}

	// create the browser
	// return on object
	function _create_browser() {
		$browser = new browser();
		$browser->set_user_agent(false);
		$browser->set_accept(false);
		$browser->set_language(false);
		$browser->set_charset(false);
		$extra_headers = array();

		foreach ($this->env['browser'] as $key=>$value){
			$extra_headers[] = $key.': '.$value;
		}
		$browser->set_extra_headers($extra_headers);
		return $browser;
	}

}

/////////////////////////////////////////////////////////////////////////
// Class browser
/////////////////////////////////////////////////////////////////////////

Examples:

get a site, via proxy
	$browser = new browser();
	$browser->set_proxy("http://localhost:8090/");
	$site = $browser->site("http://example.com");
	$site->get();
	echo $site->get_content();

head a site with authentification on Port 8090
	$browser = new browser();
	$site = $browser->site("http://user:pass@www.example.com:8090/");
	$site->head();
	echo $site->get_header();

post a form to site
	$browser = new browser();
	$site = $browser->site("http://www.example.com");
	$site->post(array("foo"=>"bar","foo2"=>"bar2"));
	echo $site->get_content();

post a file & form to site
	$browser = new browser();
	$site = $browser->site("http://www.example.com");
	$site->post(array("foo"=>"bar","foo2"=>"bar2"),array(array("file"=>"/tmp/upload.txt","name"=>"file","type"=>"text/html")));
	echo $site->get_content();

post only a file to site
	$browser = new browser();
	$site = $browser->site("http://www.example.com");
	$site->post(false,array(array("file"=>"/tmp/upload.txt","name"=>"file","type"=>"text/html")));
	echo $site->get_content();

simple link-checker
	$browser = new browser();
	$site = $browser->site("http://www.example.com/link.htm");
	$site->get();
	echo $site->get_status();

print the created request, dont send
	$browser = new browser();
	$bowser->debug = true;
	$site = $browser->site("http://www.example.com/link.htm");
	$site->get();
*/

class browser
{
	var $user_agent;
	var $accept;
	var $language;
	var $charset;
	var $referer;
	var $cookies;
	var $extra_headers;
	var $proxy;

	var $debug;

	function browser()
	{
		// defaults, can override
		$this->user_agent		= 'PHP-Browser/1.0';
		$this->accept				= '*/*';
		$this->language			= 'de';
		$this->charset				= 'ISO-8859-1';
		$this->referer				= false;
		$this->cookies				= false;
		$this->extra_headers	= false;
		$this->proxy				= false;
		$this->debug				= false;
	}

	// set user-agent
	// $user_agent as string
	function set_user_agent($user_agent = false)
	{
		$this->user_agent = $user_agent;
	}

	// set content-type
	// $accept  as string
	function set_accept($accept = false)
	{
		$this->accept = $accept;
	}

	// set language
	// $language as string
	function set_language($language = false)
	{
		$this->language = $language;
	}

	// set charset
	// $charset  as string
	function set_charset($charset = false)
	{
		$this->charset  = $charset ;
	}

	// set referer
	// $referer as string
	function set_referer($referer = false)
	{
		$this->referer  = $referer ;
	}

	// set cookies
	// $cookies as array
	// format: array("key1"=>"value1","key2"=>"value2")
	function set_cookies($cookies = false)
	{
		$this->cookies  = $cookies ;
}

	// set extra_header
	// $headers as array
	// format: array("key1: value","key2: value","key3: value")
	function set_extra_headers ($extra_headers = false)
	{
		$this->extra_headers  = $extra_headers;
	}

	// set proxy, if set proxy will use
	// $proxy as string
	// format: http://user:pass@server:port
	function set_proxy($proxy = false)
	{
		$this->proxy  = $proxy;
	}

	// set it to true and the request will only print out, not send, fsocket will not open
	// $debug as boolean
	function set_debug($debug = false)
	{
		$this->debug  = $debug;
	}

	// reset all to defaults
	function reset()
	{
		$this->browser();
	}

	function site($url)
	{
		return new site($url,$this->user_agent,$this->accept,$this->language,$this->charset,$this->referer,$this->cookies,$this->extra_headers,$this->debug,$this->proxy);
	}
}

class site
{
	var $url;
	var $user_agent;
	var $accept;
	var $language;
	var $charset;
	var $referer;
	var $cookies;
	var $extra_headers;
	var $proxy;

	var $debug;

	var $scheme;
	var $host;
	var $port;
	var $path;

	var $user;
	var $pass;

	var $proxy_host;
	var $proxy_port;

	var $proxy_user;
	var $proxy_pass;

	// private
	var $socket;
	var $data;

	// contructor
	function site($url,$user_agent,$accept,$language,$charset,$referer,$cookies,$extra_headers,$debug,$proxy)
	{

		$this->url 					= $url;
		$this->user_agent		= $user_agent;
		$this->accept				= $accept;
		$this->language			= $language;
		$this->charset				= $charset;
		$this->referer				= $referer;
		$this->cookies				= $cookies;
		$this->extra_headers	= $extra_headers;
		$this->proxy				= $proxy;

		$this->debug				= $debug;

		$this->scheme				= false;
		$this->host					= false;
		$this->port					= false;
		$this->user					= false;
		$this->pass					= false;

		$this->proxy_host		= false;
		$this->proxy_port		= false;
		$this->proxy_user		= false;
		$this->proxy_pass		= false;

		// parse URL
		$url_parts = parse_url($this->url);
		$this->host = $url_parts['host'];

		// setting scheme
		if ( $url_parts['scheme'] ) {
			$this->scheme = $url_parts['scheme'];
		} else {
			$this->scheme = "http";
		}

		// setting port
		if ( $url_parts['port'] ) {
			$this->port = $url_parts['port'];
		} else {
			$this->port = 80;
		}

		// setting path
		if ( $url_parts['path'] ) {
			$this->path =  $url_parts['path'];
		} else {
			$this->path = '/';
		}

		// adding query to path
		if ( $url_parts['query'] )
		{
			$this->path = $this->path.'?'.$url_parts['query'];
		}

		// authentification
		if ( $url_parts['user'] )
		{
			if ( $url_parts['pass']) {
				$this->pass = $url_parts['pass'];
			} else {
				$this->pass = "";
			}
			$this->user = $url_parts['user'];
		}

		// parse proxy url
		if ($this->proxy) {

			$url_parts = parse_url($this->proxy);
			$this->proxy_host = $url_parts['host'];

			// setting port
			if ( $url_parts['port'] ) {
				$this->proxy_port = $url_parts['port'];
			} else {
				$this->proxy_port = 8090;
			}

			// authentification für proxy
			if ( $url_parts['user'] )
			{
				if ( $url_parts['pass']) {
					$this->proxy_pass = $url_parts['pass'];
				} else {
					$this->proxy_pass = "";
				}
				$this->proxy_user = $url_parts['user'];
			}
		}
	}

	// GET a site, return site-obj
	function get()
	{

		if ($this->proxy) {
			$connect_host = $this->proxy_host;
			$connect_port = $this->proxy_port;
		} else {
			$connect_host = $this->host;
			$connect_port = $this->port;
		}

		if ( $this->_fsockopen($connect_host, $connect_port) ) {

			if ($this->proxy) {
				if ( $this->port != 80 ) {
					$header  = 'GET '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n";
				} else {
					$header  = 'GET '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n";
				}
				$header .= 'Host: '.$this->proxy_host."\r\n";
			} else {
				$header  = 'GET '.$this->path.' HTTP/1.0'."\r\n";
				$header .= 'Host: '.$this->host."\r\n";
			}

			if ( $this->user ) {
				$header  .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n";
			}

			if ( $this->language !== false ) {
				$header .= 'Accept-Language: '.$this->language."\r\n";
			}

			if ( $this->charset !== false ) {
				$header .= 'Accept-Charset: '.$this->charset."\r\n";
			}

			if ( $this->user_agent !== false ) {
				$header .= 'User-Agent: '.$this->user_agent."\r\n";
			}

			if ( $this->accept !== false ) {
				$header .= 'Accept: '.$this->accept."\r\n";
			}

			if ( $this->referer !== false ) {
				$header .= 'Referer: '.$this->referer."\r\n";
			}

			if ((is_array($this->extra_headers)) && (count($this->extra_headers))) {
				reset($this->extra_headers);
				foreach ($this->extra_headers as $extra_header) {
					if ( $extra_header ) {
						$header .= $extra_header."\r\n";
					}
				}
			}

			if ((is_array($this->cookies)) && (count($this->cookies))) {
				$cookie = false;
				reset($this->cookies);
				foreach ($this->cookies as $var => $value) {
					if ( ($var) && ($value != "") ) {
						if ( ! $cookie) {
							$cookie = $var.'='.$value;
						} else {
							$cookie .= '; '.$var.'='.$value;
						}
					}
				}
				if ($cookie) {
					$header .= 'Cookie: '.$cookie."\r\n";
				}
			}

			$header .=   "Connection: close\r\n\r\n";

			$this->_fputs($header);
			$this->data = $this->_fgets();
			$this->_fclose();
			return true;
		} else {
			return false;
		}
	}

	// HEAD a site, return site-obj
	function head()
	{
		if ($this->proxy) {
			$connect_host = $this->proxy_host;
			$connect_port = $this->proxy_port;
		} else {
			$connect_host = $this->host;
			$connect_port = $this->port;
		}

		if ( $this->_fsockopen($connect_host, $connect_port) ) {

			if ($this->proxy) {
				if ( $this->port != 80 ) {
					$header  = 'HEAD '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n";
				} else {
					$header  = 'HEAD '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n";
				}
				$header .= 'Host: '.$this->proxy_host."\r\n";
			} else {
				$header  = 'HEAD '.$this->path.' '.' HTTP/1.0'."\r\n";
				$header .= 'Host: '.$this->host."\r\n";
			}

			if ( $this->user ) {
				$header  .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n";
			}

			if ( $this->language !== false ) {
				$header .= 'Accept-Language: '.$this->language."\r\n";
			}

			if ( $this->charset !== false ) {
				$header .= 'Accept-Charset: '.$this->charset."\r\n";
			}

			if ( $this->user_agent !== false ) {
				$header .= 'User-Agent: '.$this->user_agent."\r\n";
			}

			if ( $this->accept !== false ) {
				$header .= 'Accept: '.$this->accept."\r\n";
			}

			if ( $this->referer !== false ) {
				$header .= 'Referer: '.$this->referer."\r\n";
			}

			if ((is_array($this->extra_headers)) && (count($this->extra_headers))) {
				reset($this->extra_headers);
				foreach ($this->extra_headers as $extra_header) {
					if ( $extra_header ) {
						$header .= $extra_header."\r\n";
					}
				}
			}

			if ((is_array($this->cookies)) && (count($this->cookies))) {
				$cookie = false;
				reset($this->cookies);
				foreach ($this->cookies as $var => $value) {
					if ( ($var) && ($value != "") ) {
						if ( ! $cookie) {
							$cookie = $var.'='.$value;
						} else {
							$cookie .= '; '.$var.'='.$value;
						}
					}
				}
				if ($cookie) {
					$header .= 'Cookie: '.$cookie."\r\n";
				}
			}

			$header .=   "Connection: close\r\n\r\n";

			$this->_fputs($header);
			$this->data = $this->_fgets();
			$this->_fclose();
			return true;
		} else {
			return false;
		}
	}

	// POST a site, return site-obj
	// $data data to send as array()
	// format: array("key1"=>"value1","key2"=>"value2")
	// $files files to send as array()
	// format: array(array("name"=>"file1","file"=>"/filename","type"=>"text/html","rename"="(optional) newfilename"),array(...))
	// before post check that files exist and readable !
	function post($data_to_send = false,$files_to_send = false)
	{
		if ($this->proxy) {
			$connect_host = $this->proxy_host;
			$connect_port = $this->proxy_port;
		} else {
			$connect_host = $this->host;
			$connect_port = $this->port;
		}

		if ( $this->_fsockopen($connect_host, $connect_port) ) {

			if ( ! is_array($data_to_send) ) {
				$data_to_send = array();
			}

			if ( ! is_array($files_to_send)  ) {
				$files_to_send = array();
			}

			if ($this->proxy) {
				if ( $this->port != 80 ) {
					$header  = 'POST '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n";
				} else {
					$header  = 'POST '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n";
				}
				$header .= 'Host: '.$this->proxy_host."\r\n";
			} else {
				$header  = 'POST '.$this->path.' HTTP/1.0'."\r\n";
				$header .= 'Host: '.$this->host."\r\n";
			}

			if ( $this->user ) {
				$header  .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n";
			}

			if ( $this->language !== false ) {
				$header .= 'Accept-Language: '.$this->language."\r\n";
			}

			if ( $this->charset !== false ) {
				$header .= 'Accept-Charset: '.$this->charset."\r\n";
			}

			if ( $this->user_agent !== false ) {
				$header .= 'User-Agent: '.$this->user_agent."\r\n";
			}

			if ( $this->accept !== false ) {
				$header .= 'Accept: '.$this->accept."\r\n";
			}

			if ( $this->referer !== false ) {
				$header .= 'Referer: '.$this->referer."\r\n";
			}

			if ((is_array($this->extra_headers)) && (count($this->extra_headers))) {
				reset($this->extra_headers);
				foreach ($this->extra_headers as $extra_header) {
					if ( $extra_header ) {
						$header .= $extra_header."\r\n";
					}
				}
			}

			if ((is_array($this->cookies)) && (count($this->cookies))) {
				$cookie = false;
				reset($this->cookies);
				foreach ($this->cookies as $var => $value) {
					if ( ($var) && ($value != "") ) {
						if ( ! $cookie) {
							$cookie = $var.'='.$value;
						} else {
							$cookie .= '; '.$var.'='.$value;
						}
					}
				}
				if ($cookie) {
					$header .= 'Cookie: '.$cookie."\r\n";
				}
			}

			$header .=   "Connection: close\r\n";

			$this->_fputs($header);

			if ( (count($data_to_send)) || (count($files_to_send)) ) {

				srand((double)microtime()*1000000);
				$boundary = "---------------------------".substr(md5(rand(0,32000)),0,10);

				$this->_fputs('Content-Type: multipart/form-data; boundary='.$boundary."\r\n");

				$length = 0;
				// calculate Content-Length
				reset($data_to_send);
				foreach($data_to_send as $key=>$val) {
					$length += 2+strlen($boundary)+strlen($key)+strlen($val)+strlen('Content-Disposition: form-data; name=""')+8;
				}

				reset($files_to_send);
				foreach($files_to_send as $key=>$file) {
					if ( ! $file['rename']) {
							$file['rename'] = basename($file['file']);
					}
					$length += 2+strlen($boundary)+strlen('Content-Disposition: form-data; name="'.$file['name'].'"; filename="'.$file['rename'].'"')+strlen('Content-Type: '.$file['type'])+strlen('Content-Transfer-Encoding: binary')+10+filesize($file['file']);
				}

				$this->_fputs('Content-Length: '.strval($length)."\r\n\r\n");

				if (count ($data_to_send)) {
					reset($data_to_send);
					foreach($data_to_send as $key=>$val) {
						$this->_fputs('--'.$boundary."\r\n");
						$this->_fputs('Content-Disposition: form-data; name="'.$key.'"'."\r\n\r\n".$val."\r\n");
					}
				}

				if (count ($files_to_send)) {
					reset($files_to_send);
					foreach($files_to_send as $file) {

						if ( (is_array($file)) && (count($file))) {

							if ( ! $file['rename']) {
								$file['rename'] = basename($file['file']);
							}

							$fh = fopen ($file['file'], "r");

							$this->_fputs('--'.$boundary."\r\n");
							$this->_fputs('Content-Disposition: form-data; name="'.$file['name'].'"; filename="'.$file['rename'].'"'."\r\n");
							$this->_fputs('Content-Type: '.$file['type']."\r\n");
							$this->_fputs('Content-Transfer-Encoding: binary'."\r\n\r\n");
							$this->_fputs(fread ($fh, filesize ($file['file']))."\r\n");

							fclose ($fh);
						}
					}
				}
				$this->_fputs('--'.$boundary.'--');
			}
			$this->data = $this->_fgets();
			$this->_fclose();
			return true;
		} else {
			return false;
		}
	}

	// return http-status as string
	function get_status()
	{
		$head = $this->get_headers();
		reset($head);
		foreach($head as $headline) {
			if (preg_match('/HTTP\/(.*) ([0-9][0-9][0-9])(.*)/i',$headline)) {
				preg_match ("/HTTP\/(.*) ([0-9][0-9][0-9])(.*)/i",$headline,$tmp);
				if ((isset($tmp[2])) && (is_numeric($tmp[2]))) {
					return $tmp[2];
				}
			}
		}
		return false;
	}

	// return content-type as string
	// if $full == true return full content-type incl. q, otherwise only type (*/*)
	function get_type($full = false)
	{
		$head = $this->get_headers();
		reset($head);
		foreach($head as $headline) {
			if (preg_match('/^Content-Type: /i',$headline)) {
				$type = preg_replace('/^Content-Type: /i', '', $headline);
				if ($full) {
					return $type;
				} else {
					$return = split(";",$type);
					return $return[0];
				}
			}
		}
		return false;
	}

	// return content-lenght as integer
	function get_length()
	{
		$head = $this->get_headers();
		reset($head);
		foreach($head as $headline) {
			if (preg_match('/^Content-Length: /i',$headline)) {
				$length = preg_replace('/^Content-Length: /i', '', $headline);
				return $length;
			}
		}
		return false;
	}

	// return new location if set
	function get_location()
	{
		$head = $this->get_headers();
		reset($head);
		foreach($head as $headline) {
			if (preg_match('/^Location: /i',$headline)) {
				$location = preg_replace('/^Location: /i', '', $headline);
				return $location;
			}
		}
		return false;
	}

	// return cookies as muti-array or false
	// format: array(array("name"=>"foo","value"=>"bar","path"=>"string","time"=>timestamp,"domain"=>"string","secure"=>boolean),array(...))
	function get_cookies()
	{
		$head = $this->get_headers();
		$cookies = false;
		reset($head);
		foreach($head as $headline) {
			if (preg_match('/^Set-Cookie: /i',$headline)) {

					if (! is_array($cookies) ) {
						$cookies = array();
					}

					$headline = trim($headline);
					$headline = preg_replace("/^Set-Cookie: /i", "", $headline);
					$cookiesplit = split(";",$headline);

					$cookieinfo = array();

					// avr und value
					list($cookieinfo['name'],$cookieinfo['value']) = split("=",$cookiesplit[0],2);

					// zeit als timestamp
					if ( $cookiesplit[1]) {
						$cookieinfo['time'] = strtotime(preg_replace("/^expires=/i", "", trim($cookiesplit[1])));
					}

					// path
					if ( $cookiesplit[2]) {
						$cookieinfo['path'] = preg_replace("/^path=/i", "", trim($cookiesplit[2]));
					}

					//domain
					if ( $cookiesplit[3]) {
						$cookieinfo['domain'] = preg_replace("/^domain=/i", "", trim($cookiesplit[3]));
					}

					// secure
					if ( strtolower(trim($cookiesplit[4]))=="secure") {
						$cookieinfo['secure'] = true;
					}

					$cookies[] = $cookieinfo;
			}
		}
		return $cookies;
	}

	// return all headers as array
	// format: array("HTTP/1.1 200 OK","Date: Thu, 27 May 2004 20:33:10 GMT")
	function get_headers()
	{
		return split ("\r\n",$this->get_header());
	}

	// return date
	// return string like "Date: Thu, 27 May 2004 20:33:10 GMT"
	function get_date()
	{
		$head = $this->get_headers();
		reset($head);
		foreach($head as $headline) {
			if (preg_match('/^Date: /i',$headline)) {
				$date = preg_replace('/^Date: /i', '', $headline);
				return $date;
			}
		}
		return false;
	}

	// return complete header as string
	function get_header()
	{
		$tmp = split ("\r\n\r\n", $this->data,2);
		if (isset($tmp[0])) {
			return $tmp[0];
		} else {
			return "";
		}
	}

	// return content as string
	function get_content()
	{
		$tmp = split ("\r\n\r\n", $this->data,2);
		if (isset($tmp[1])) {
			return $tmp[1];
		} else {
			return "";
		}
	}

	// intern, for debugging
	function _fsockopen($host, $port)
	{
		if ( ! $this->debug) {
			if ( $this->socket = @fsockopen($host, $port,$errno, $errstr, 100)) {
				return true;
			} else {
				return false;
			}
		} else {
			return true;
		}
	}

	// intern, for debugging
	function _fputs($data)
	{
		if ( ! $this->debug) {
			@fputs($this->socket, $data);
		}  else {
			echo $data;
		}
	}

	// intern, for debugging
	function _fgets()
	{
		if ( ! $this->debug) {
			while(!@feof($this->socket)) {
				$return .= @fgets($this->socket, 512);
			}
			return $return;
		} else {
			return false;
		}
	}

	// intern, for debugging
	function _fclose()
	{
		if ( ! $this->debug) {
			fclose($this->socket);
		}
	}

}

?>