De:PHP2Crawler
Aus YaCyWiki
Crawler mittels PHP ansteuern
Es ist möglich den Crawler über PHP mit Links zu füllen die YaCy indizieren soll. Hier ein einfaches Beispielscript.
<?php $url="http://www.example.com/"; $yacy_host='127.0.0.1'; $yacy_port='8090'; $yacy_user='admin'; $yacy_pass='mypassword'; $url_parts = parse_url($url); $host = $url_parts['host']; $call= 'http://'.$yacy_user.':'.$yacy_pass.'@'.$yacy_host.':'.$yacy_port.'/IndexCreate_p.html'. '?crawlingDepth=5'. '&crawlingFilter=.*'.urlencode($host).'.*'. '&crawlingQ=on'. '&localIndexing=on'. '&crawlOrder=on'. '&xsstopw=on'. '&intention='.urlencode('requested by '.$_SERVER["REMOTE_ADDR"]). '&crawlingFile='. '&crawlingMode=url'. '&crawlingURL='.urlencode($url). '&crawlingstart=Start+New+Crawl'; if (preg_match('/crawling of "(.*?)" started/is',implode('',@file($call)))) { echo "Hat geklappt - URL hinzugefügt."; } else { echo "Mist - Irgendetwas ging schief."; } ?>
Hinweis: Der reguläre Ausdruck in der IF-Abfrage wurde muß evt. angepaßt werden. Hier wurde von einer englischen YaCy-Version ausgegangen. Auch sollten die Parameter in der Vaiable $call entsprechend den eigenen Bedürfnissen angepaßt werden.
Komplexe Lösung
Mit dieser Klasse yacy ist es möglich YaCy sehr umfangreich fernzusteuern. Da ist Klasse sich momentan noch in Entwicklung befindet, gibt es erstmal keine Dokumenation. Die Klasse bedarf der Klasse browser (und deren "Subklasse" site) welche unten anhängt. Verwendung auf eigene Gefahr. Zuletzt getestet mit SVN 2841 unter PHP 5.1.6.
<?php class yacy { var $env; var $cache; function yacy() { // initialize $this->env = array(); $this->cache = array(); // access-settings $this->env['yacy']['host']='localhost'; $this->env['yacy']['port']='8090'; $this->env['yacy']['user']='admin'; $this->env['yacy']['pass']='mypassword'; // browser-settings, each line is an line request header $this->env['browser']['User-Agent']='YaCy-Remote/0.1'; $this->env['browser']['Accept']='text/xml,application/xml,application/xhtml+xml,text/html,text/plain'; $this->env['browser']['Accept-Charset']='ISO-8859-1'; $this->env['browser']['Accept-Language']='de,en'; // defaults values for new crawls $this->env['new_crawl_defaults'] = array(); $this->env['new_crawl_defaults']['crawlingDepth']=3; $this->env['new_crawl_defaults']['crawlingFilter']='.*'; $this->env['new_crawl_defaults']['crawlingIfOlderCheck']='on'; $this->env['new_crawl_defaults']['crawlingIfOlderNumber']=180; $this->env['new_crawl_defaults']['crawlingIfOlderUnit']='day'; $this->env['new_crawl_defaults']['crawlingDomFilterCheck']='on'; $this->env['new_crawl_defaults']['crawlingDomFilterDepth']=3; $this->env['new_crawl_defaults']['crawlingDomMaxCheck']='on'; $this->env['new_crawl_defaults']['crawlingDomMaxPages']=5000; $this->env['new_crawl_defaults']['crawlingQ']='on'; $this->env['new_crawl_defaults']['localIndexing']='on'; $this->env['new_crawl_defaults']['crawlOrder']='on'; $this->env['new_crawl_defaults']['xsstopw']='on'; $this->env['new_crawl_defaults']['intention']=''; $this->env['new_crawl_defaults']['crawlingMode']='url'; $this->env['new_crawl_defaults']['crawlingURL']=''; $this->env['new_crawl_defaults']['crawlingstart']='Start+New+Crawl'; // the file which is requested for online-check $this->env['online_testfile']='/env/grafics/empty.gif'; } // return array of entries in indexingqueue // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_indexingqueue_entries($cache=true) { $filename = '/xml/queues_p.xml'; $cache_name = 'indexingqueue_entries'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<indexingqueue>(.*?)<\/indexingqueue>/si',$this->cache[$filename],$matches); $entries_line=$matches[1]; preg_match_all('/<entry>(.*?)<initiator>(.*?)<\/initiator>(.*?)<depth>(.*?)<\/depth>(.*?)<modified>(.*?)<\/modified>(.*?)<anchor>(.*?)<\/anchor>(.*?)<url>(.*?)<\/url>(.*?)<size>(.*?)<\/size>(.*?)<hash>(.*?)<\/hash>(.*?)<inProcess>(.*?)<\/inProcess>(.*?)<\/entry>/si',$entries_line,$matches); $entries = array(); if (!is_array($matches[0])) { $matches[0] = array(); } foreach($matches[0] as $key=>$value) { $entries[]=array( 'initiator'=>$matches[2][$key], 'depth'=>intval($matches[4][$key]), 'modified'=>$matches[6][$key], 'anchor'=>$matches[8][$key], 'url'=>$matches[10][$key], 'size'=>intval($matches[12][$key]), 'hash'=>$matches[14][$key], 'inprocess'=>(($matches[16][$key] == 'false') ? false : true) ); } $this->cache[$cache_name] = $entries; return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // return array of entries in localcrawlerqueue // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_localcrawlerqueue_entries($cache=true) { $filename = '/xml/queues_p.xml'; $cache_name = 'localcrawlerqueue_entries'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<localcrawlerqueue>(.*?)<\/localcrawlerqueue>/si',$this->cache[$filename],$matches); $entries_line=$matches[1]; preg_match_all('/<entry>(.*?)<initiator>(.*?)<\/initiator>(.*?)<depth>(.*?)<\/depth>(.*?)<modified>(.*?)<\/modified>(.*?)<anchor>(.*?)<\/anchor>(.*?)<url>(.*?)<\/url>(.*?)<hash>(.*?)<\/hash>(.*?)<inProcess>(.*?)<\/inProcess>(.*?)<\/entry>/si',$entries_line,$matches); $entries = array(); if (!is_array($matches[0])) { $matches[0] = array(); } foreach($matches[0] as $key=>$value) { $entries[]=array( 'initiator'=>$matches[2][$key], 'depth'=>intval($matches[4][$key]), 'modified'=>$matches[6][$key], 'anchor'=>$matches[8][$key], 'url'=>$matches[10][$key], 'hash'=>$matches[12][$key], 'inprocess'=>(($matches[14][$key] == 'false') ? false : true) ); } $this->cache[$cache_name] = $entries; return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // return size of indexingqueue in integer // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_indexingqueue_size($cache=true) { $filename = '/xml/queues_p.xml'; $cache_name = 'indexingqueue_size'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<indexingqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/indexingqueue>/si',$this->cache[$filename],$matches); $this->cache[$cache_name] = intval($matches[2]); return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // return max size of indexingqueue in integer // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_indexingqueue_max_size($cache=true) { $filename = '/xml/queues_p.xml'; $cache_name = 'indexingqueue_max_size'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<indexingqueue>(.*?)<max>(.*?)<\/max>(.*?)<\/indexingqueue>/si',$this->cache[$filename],$matches); $this->cache[$cache_name] = intval($matches[2]); return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // return size of loaderqueue in integer // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_loaderqueue_size() { $filename = '/xml/queues_p.xml'; $cache_name = 'loaderqueue_size'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<loaderqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/loaderqueue>/si',$this->cache[$filename],$matches); $this->cache[$cache_name] = intval($matches[2]); return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // return max size of loaderqueue in integer // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_loaderqueue_max_size() { $filename = '/xml/queues_p.xml'; $cache_name = 'loaderqueue_max_size'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<loaderqueue>(.*?)<max>(.*?)<\/max>(.*?)<\/loaderqueue>/si',$this->cache[$filename],$matches); $this->cache[$cache_name] = intval($matches[2]); return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // return size of localcrawlerqueue in integer // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_localcrawlerqueue_size() { $filename = '/xml/queues_p.xml'; $cache_name = 'localcrawlerqueue_size'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<localcrawlerqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/localcrawlerqueue>/si',$this->cache[$filename],$matches); $this->cache[$cache_name] = intval($matches[2]); return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // return size of remotecrawlerqueue in integer // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_remotecrawlerqueue_size() { $filename = '/xml/queues_p.xml'; $cache_name = 'remotecrawlerqueue_size'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match('/<remotecrawlerqueue>(.*?)<size>(.*?)<\/size>(.*?)<\/remotecrawlerqueue>/si',$this->cache[$filename],$matches); $this->cache[$cache_name] = intval($matches[2]); return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // remove an item from indexingqueue (given by hash) // $hash (string) - hash of item // return true, false on error // return true means the request was successfull, say nothing about removing function indexingqueue_remove_entry($hash) { $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateIndexingQueue_p.html?deleteEntry='.$hash); $site->get(); if ($site->get_status()==302) { $this->cache['is_online']=true; return true; } else { return false; } } // remove all items from indexingqueue // $hash (string) - hash of item // return true, false on error // return true means the request was successfull, say nothing about removing // keep in mind: this can take an long time, so the request failed, but work function indexingqueue_remove_all() { $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateIndexingQueue_p.html'); $site->post(array('clearIndexingQueue'=>'clear indexing queue')); if ($site->get_status()==302) { $this->cache['is_online']=true; return true; } else { return false; } } // remove an item from localcrawlerqueue (given by hash) // $hash (string) - hash of item // return true means the request was successfull, say nothing about removing function localcrawlerqueue_remove_entry($hash) { $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateWWWLocalQueue_p.html?deleteEntry='.$hash); $site->get(); if ($site->get_status()==302) { $this->cache['is_online']=true; return true; } else { return false; } } // remove all items from localcrawlerqueue // $hash (string) - hash of item // return true means the request was successfull, say nothing about removing // keep in mind: this can take an long time, so the request failed, but work function localcrawlerqueue_remove_all() { $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreateWWWLocalQueue_p.html'); $site->post(array('pattern'=>'.*','option'=>'URL','deleteEntries'=>'Delete')); if ($site->get_status()==302) { $this->cache['is_online']=true; return true; } else { return false; } } // set an option in config-file, some need to reboot to effect // $option (string) - name of the option // $value (string or integer) - new value of the option // return true or false on error function set_config_option($option,$value) { $cache_name = 'config_options'; $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Config_p.html'); $site->post(array('key'=>$option,'value'=>$value,'submit'=>'Save')); if (($site->get_status()==200) && ($site->get_content())) { $this->cache['is_online']=true; $this->cache[$cache_name][$option] = $value; return true; } else { return false; } } // reads the config (over xml), parse them and return the value of an given option, return string or array, false on error // $option (string or boolean(false)) - the name of the option from config-file, case-sensitive // if $option == false then all options as array are returned // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result // return the value as string, all options as array or false on error function get_config_option($option=false,$cache=true) { $filename = '/xml/config_p.xml'; $cache_name = 'config_options'; if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match_all('/<option>(.*?)<key>(.*?)<\/key>(.*?)<value>(.*?)<\/value>(.*?)<\/option>/is',$this->cache[$filename],$matches); $option_vars=$matches[2]; $option_keys=$matches[4]; $options = array(); foreach ($option_vars as $key=>$var) { $options[$option_vars[$key]] = $option_keys[$key]; } $this->cache[$cache_name] = $options; if ($option!==false) { if (isset($options[$option])) { return $options[$option]; } else { return false; } } else { return $options; } } else { return false; } } else { if ($option!==false) { if (isset($this->cache[$cache_name][$option])) { return $this->cache[$cache_name][$option]; } else { return false; } } else { return $this->cache[$cache_name]; } } } // pause or resume global grawl triggers // $bool (boolean) if true resume global grawl triggers, if false pause global grawl triggers function set_global_grawl_triggers($bool=true) { $browser = $this->_create_browser(); if ($bool===false) { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?pauseCrawlJob=&jobType=globalCrawlTrigger'); } else { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?continueCrawlJob=&jobType=globalCrawlTrigger'); } $site->get(); if (($site->get_status()==200) && ($site->get_content())) { $this->cache['is_online']=true; return true; } else { return false; } } // pause or resume remote triggered crawls // $bool (boolean) if true resume remote triggered crawls, if false pause remote triggered crawls function set_remote_triggered_crawling($bool=true) { $browser = $this->_create_browser(); if ($bool===false) { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?pauseCrawlJob=&jobType=remoteTriggeredCrawl'); } else { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?continueCrawlJob=&jobType=remoteTriggeredCrawl'); } $site->get(); if (($site->get_status()==200) && ($site->get_content())) { $this->cache['is_online']=true; return true; } else { return false; } } // pause or resume local crawls // $bool (boolean) if true resume local crawls, if false pause local crawls function set_local_crawling($bool=true) { $browser = $this->_create_browser(); if ($bool===false) { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?pauseCrawlJob=&jobType=localCrawl'); } else { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/Status.html?continueCrawlJob=&jobType=localCrawl'); } $site->get(); if (($site->get_status()==200) && ($site->get_content())) { $this->cache['is_online']=true; return true; } else { return false; } } // setup the Distributed Indexing // $limit (boolean or intener) // if $limit is true Distributed Indexing goes on without an limit // if $limit is an integer value >0 Distributed Indexing goes on with this as new Pages Per Minute Limit // if $limit false or =<0 Distributed Indexing goes off // return true if set, otherwise false function set_distributed_indexing($limit=true) { $browser = $this->_create_browser(); if ((!is_bool($limit)) && ($limit>=1)) { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?distributedcrawling=set&dcr=acceptCrawlLimited&acceptCrawlLimit='.urlencode($limit)); } elseif ($limit===true) { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?distributedcrawling=set&dcr=acceptCrawlMax&acceptCrawlLimit=60'); } else { $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?distributedcrawling=set&dcr=acceptCrawlDenied&acceptCrawlLimit=1'); } $site->get(); if (($site->get_status()==200) && ($site->get_content())) { $this->cache['is_online']=true; return true; } else { return false; } } // starts (simple) an new crawl // $url (string) - URL // $comment (string or false) - intension, set to false if want not want to use it // $crawl_host_only (boolean) - should ony the host crawled // $crawl_global (boolean) - start as global crawl // $max_pages_per_dom (integer or false) - maximum pages per domain, set to false if want not want to use it // $crawl_deep (integer) - maximal crawl deep // $dom_deep (integer or false) - the maximal deep over external domains, set to false if want not want to use it // $min_age_days (integer or false) - if you want recrawl files older then this in days, set to false if want not want to use it // $accept_dynamic (boolean) - set if you want crawl dynamic content // return true if the crawl was succesfull requestet, false on error function new_crawl_simple($url=false, $comment=false, $crawl_host_only=false, $crawl_global=true, $max_pages_per_dom=5000, $crawl_deep=3, $dom_deep=3, $min_age_days=50, $accept_dynamic=true) { $url_parts = @parse_url($url); $host = $url_parts['host']; $options = array(); $options['crawlingURL'] = $url; if ($comment) { $options['intention'] = $comment; } else { $options['intention'] = ''; } if ($crawl_global) { $options['crawlOrder'] = 'on'; } else { $options['crawlOrder'] = 'off'; } if ($crawl_host_only) { $options['crawlingFilter']='.*'.$host.'.*'; $dom_deep = 0; } else { $options['crawlingFilter']='.*'; } $options['crawlingDepth']=$crawl_deep; if ($dom_deep===false) { $options['crawlingDomFilterCheck']='off'; $options['crawlingDomFilterDepth']=0; } else { $options['crawlingDomFilterCheck']='on'; $options['crawlingDomFilterDepth']=$dom_deep; } if ($max_pages_per_dom===false) { $options['crawlingDomMaxCheck']='on'; $options['crawlingDomMaxPages']=999999; } else { $options['crawlingDomMaxCheck']='on'; $options['crawlingDomMaxPages']=$max_pages_per_dom; } if ($min_age_days===false) { $options['crawlingIfOlderCheck']='off'; $options['crawlingIfOlderNumber']=0; $options['crawlingIfOlderUnit']='day'; } else { $options['crawlingIfOlderCheck']='on'; $options['crawlingIfOlderNumber']=$min_age_days; $options['crawlingIfOlderUnit']='day'; ; } if ($accept_dynamic) { $options['crawlingQ']='on'; } else { $options['crawlingQ']='off'; } return $this->new_crawl($options); } // start an new crawl // $options (array) - all get-params to create the new crawl, $key as name and $value as value // see $this->env['new_crawl_defaults'] // i.E.: $options = array("crawlingURL" => "http://example.com", ...) // all params not given are taken from $this->env['new_crawl_defaults'] // return true if the crawl was succesfull requestet, false on error function new_crawl($options) { $options = array_merge($this->env['new_crawl_defaults'],$options); if (!$options['crawlingURL']) { return false; } else { $request = array(); foreach ($options as $key=>$value){ $request[] = $key.'='.urlencode($value); } $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].'/IndexCreate_p.html?'.implode('&',$request)); $site->get(); if (($site->get_status()==200) && ($site->get_content())) { $this->cache['is_online']=true; return true; } else { return false; } } } // return active_count as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_active_count($cache=true) { $pattern = '/<peers>(.*?)<active>(.*?)<count>(.*?)<\/count>(.*?)<\/active>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return active_links as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_active_links($cache=true) { $pattern = '/<peers>(.*?)<active>(.*?)<links>(.*?)<\/links>(.*?)<\/active>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return active_words as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_active_words($cache=true) { $pattern = '/<peers>(.*?)<active>(.*?)<words>(.*?)<\/words>(.*?)<\/active>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return passive_count as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_passive_count($cache=true) { $pattern = '/<peers>(.*?)<passive>(.*?)<count>(.*?)<\/count>(.*?)<\/passive>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return passive_links as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_passive_links($cache=true) { $pattern = '/<peers>(.*?)<passive>(.*?)<links>(.*?)<\/links>(.*?)<\/passive>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return passive_words as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_passive_words($cache=true) { $pattern = '/<peers>(.*?)<passive>(.*?)<words>(.*?)<\/words>(.*?)<\/passive>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return potential_count as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_potential_count($cache=true) { $pattern = '/<peers>(.*?)<potential>(.*?)<count>(.*?)<\/count>(.*?)<\/potential>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return potential_links as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_potential_links($cache=true) { $pattern = '/<peers>(.*?)<potential>(.*?)<links>(.*?)<\/links>(.*?)<\/potential>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return potential_words as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_potential_words($cache=true) { $pattern = '/<peers>(.*?)<potential>(.*?)<words>(.*?)<\/words>(.*?)<\/potential>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return all_count as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_all_count($cache=true) { $pattern = '/<peers>(.*?)<all>(.*?)<count>(.*?)<\/count>(.*?)<\/all>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return all_links as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_all_links($cache=true) { $pattern = '/<peers>(.*?)<all>(.*?)<links>(.*?)<\/links>(.*?)<\/all>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return all_words as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_all_words($cache=true) { $pattern = '/<peers>(.*?)<all>(.*?)<words>(.*?)<\/words>(.*?)<\/all>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_name as string or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_name($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<name>(.*?)<\/name>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_get_network_detail($pattern,$position,$cache); } // return peer_version as string or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_version($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<version>(.*?)<\/version>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_get_network_detail($pattern,$position,$cache); } // return peer_utc as is as string or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_utc($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<utc>(.*?)<\/utc>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_get_network_detail($pattern,$position,$cache); } // return peer_uptime as integer in minutes or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_uptime($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<uptime>(.*?)<\/uptime>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; $uptime = $this->_get_network_detail($pattern,$position,$cache); if ($uptime !== false) { $uptime_array = preg_split('/(\s|:)/',$uptime); return (intval($uptime_array[0])*1440)+(intval($uptime_array[2])*60)+intval($uptime_array[3]); } else { return false; } } // return peer_links as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_links($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<links>(.*?)<\/links>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_words as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_words($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<words>(.*?)<\/words>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_acceptcrawl as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_acceptcrawl($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<acceptcrawl>(.*?)<\/acceptcrawl>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_acceptindex as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_acceptindex($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<acceptindex>(.*?)<\/acceptindex>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_sentwords as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_sentwords($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<sentwords>(.*?)<\/sentwords>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_senturls as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_senturls($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<senturls>(.*?)<\/senturls>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_receivedwords as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_receivedwords($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<receivedwords>(.*?)<\/receivedwords>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_recievedlinks as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_recievedlinks($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<recievedlinks>(.*?)<\/recievedlinks>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_ppm as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_ppm($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<ppm>(.*?)<\/ppm>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_seeds as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_seeds($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<seeds>(.*?)<\/seeds>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_connects as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_connects($cache=true) { $pattern = '/<peers>(.*?)<your>(.*?)<connects>(.*?)<\/connects>(.*?)<\/your>(.*?)<\/peers>/is'; $position = 3; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // return peer_status as integer or false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function get_peer_status($cache=true) { $pattern = '/<peers>(.*?)<status>(.*?)<\/status>(.*?)<\/peers>/is'; $position = 2; return $this->_remove_dots($this->_get_network_detail($pattern,$position,$cache)); } // ------------------------------------------------------------------------------ // private functions - use internal only // ------------------------------------------------------------------------------ // removed unwantet dots and return an integer, otherwise false // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result function _remove_dots($value) { if ($value !== false) { return intval(str_replace('.','',$value)); } else { return false; } } // gets the Network.xml, parse them and return als value (mixed), false on error // $pattern (string) - the pattern to extract the value // $pattern_position (integer) - the position in pattern where is the value // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result // return the value or false on error function _get_network_detail($pattern,$pattern_position,$cache=true) { $filename = '/Network.xml'; $cache_name = md5($pattern.$pattern_position); if (($cache == false) || (!isset($this->cache[$cache_name]))) { if ($this->_get_network_file($filename,$cache)) { preg_match($pattern,$this->cache[$filename],$matches); $this->cache[$cache_name] = $matches[$pattern_position]; return $this->cache[$cache_name]; } else { return false; } } else { return $this->cache[$cache_name]; } } // receive and parse an xml-file // $filename (string) - relative path (based on serverroot) to file // $cache (boolean) - if false then every call will download allways the file, otherwise use the cached result // return if the file was successfull retrieved, otherwise return false function _get_network_file($filename,$cache=true) { if (($cache == false) || (!isset($this->cache[$filename]))) { $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['user'].':'.$this->env['yacy']['pass'].'@'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].$filename); $site->get(); if (($site->get_status()==200) && ($site->get_content())) { $this->cache[$filename] = $site->get_content(); $this->cache['is_online']=true; return true; } else { return false; } } else { return true; } } // check if the host is online // $cache (boolean) - if false then every call will check the network, otherwise use the cached result // return true if online, return false if offline function is_online($cache=true) { if (($cache == false) || (!isset($this->cache['is_online']))) { $browser = $this->_create_browser(); $site = $browser->site('http://'.$this->env['yacy']['host'].':'.$this->env['yacy']['port'].$this->env['online_testfile']); $site->get(); if ($site->get_status()==200) { $this->cache['is_online']=true; return true; } return false; } else { return $this->cache['is_online']; } } // create the browser // return on object function _create_browser() { $browser = new browser(); $browser->set_user_agent(false); $browser->set_accept(false); $browser->set_language(false); $browser->set_charset(false); $extra_headers = array(); foreach ($this->env['browser'] as $key=>$value){ $extra_headers[] = $key.': '.$value; } $browser->set_extra_headers($extra_headers); return $browser; } } ///////////////////////////////////////////////////////////////////////// // Class browser ///////////////////////////////////////////////////////////////////////// Examples: get a site, via proxy $browser = new browser(); $browser->set_proxy("http://localhost:8090/"); $site = $browser->site("http://example.com"); $site->get(); echo $site->get_content(); head a site with authentification on Port 8090 $browser = new browser(); $site = $browser->site("http://user:pass@www.example.com:8090/"); $site->head(); echo $site->get_header(); post a form to site $browser = new browser(); $site = $browser->site("http://www.example.com"); $site->post(array("foo"=>"bar","foo2"=>"bar2")); echo $site->get_content(); post a file & form to site $browser = new browser(); $site = $browser->site("http://www.example.com"); $site->post(array("foo"=>"bar","foo2"=>"bar2"),array(array("file"=>"/tmp/upload.txt","name"=>"file","type"=>"text/html"))); echo $site->get_content(); post only a file to site $browser = new browser(); $site = $browser->site("http://www.example.com"); $site->post(false,array(array("file"=>"/tmp/upload.txt","name"=>"file","type"=>"text/html"))); echo $site->get_content(); simple link-checker $browser = new browser(); $site = $browser->site("http://www.example.com/link.htm"); $site->get(); echo $site->get_status(); print the created request, dont send $browser = new browser(); $bowser->debug = true; $site = $browser->site("http://www.example.com/link.htm"); $site->get(); */ class browser { var $user_agent; var $accept; var $language; var $charset; var $referer; var $cookies; var $extra_headers; var $proxy; var $debug; function browser() { // defaults, can override $this->user_agent = 'PHP-Browser/1.0'; $this->accept = '*/*'; $this->language = 'de'; $this->charset = 'ISO-8859-1'; $this->referer = false; $this->cookies = false; $this->extra_headers = false; $this->proxy = false; $this->debug = false; } // set user-agent // $user_agent as string function set_user_agent($user_agent = false) { $this->user_agent = $user_agent; } // set content-type // $accept as string function set_accept($accept = false) { $this->accept = $accept; } // set language // $language as string function set_language($language = false) { $this->language = $language; } // set charset // $charset as string function set_charset($charset = false) { $this->charset = $charset ; } // set referer // $referer as string function set_referer($referer = false) { $this->referer = $referer ; } // set cookies // $cookies as array // format: array("key1"=>"value1","key2"=>"value2") function set_cookies($cookies = false) { $this->cookies = $cookies ; } // set extra_header // $headers as array // format: array("key1: value","key2: value","key3: value") function set_extra_headers ($extra_headers = false) { $this->extra_headers = $extra_headers; } // set proxy, if set proxy will use // $proxy as string // format: http://user:pass@server:port function set_proxy($proxy = false) { $this->proxy = $proxy; } // set it to true and the request will only print out, not send, fsocket will not open // $debug as boolean function set_debug($debug = false) { $this->debug = $debug; } // reset all to defaults function reset() { $this->browser(); } function site($url) { return new site($url,$this->user_agent,$this->accept,$this->language,$this->charset,$this->referer,$this->cookies,$this->extra_headers,$this->debug,$this->proxy); } } class site { var $url; var $user_agent; var $accept; var $language; var $charset; var $referer; var $cookies; var $extra_headers; var $proxy; var $debug; var $scheme; var $host; var $port; var $path; var $user; var $pass; var $proxy_host; var $proxy_port; var $proxy_user; var $proxy_pass; // private var $socket; var $data; // contructor function site($url,$user_agent,$accept,$language,$charset,$referer,$cookies,$extra_headers,$debug,$proxy) { $this->url = $url; $this->user_agent = $user_agent; $this->accept = $accept; $this->language = $language; $this->charset = $charset; $this->referer = $referer; $this->cookies = $cookies; $this->extra_headers = $extra_headers; $this->proxy = $proxy; $this->debug = $debug; $this->scheme = false; $this->host = false; $this->port = false; $this->user = false; $this->pass = false; $this->proxy_host = false; $this->proxy_port = false; $this->proxy_user = false; $this->proxy_pass = false; // parse URL $url_parts = parse_url($this->url); $this->host = $url_parts['host']; // setting scheme if ( $url_parts['scheme'] ) { $this->scheme = $url_parts['scheme']; } else { $this->scheme = "http"; } // setting port if ( $url_parts['port'] ) { $this->port = $url_parts['port']; } else { $this->port = 80; } // setting path if ( $url_parts['path'] ) { $this->path = $url_parts['path']; } else { $this->path = '/'; } // adding query to path if ( $url_parts['query'] ) { $this->path = $this->path.'?'.$url_parts['query']; } // authentification if ( $url_parts['user'] ) { if ( $url_parts['pass']) { $this->pass = $url_parts['pass']; } else { $this->pass = ""; } $this->user = $url_parts['user']; } // parse proxy url if ($this->proxy) { $url_parts = parse_url($this->proxy); $this->proxy_host = $url_parts['host']; // setting port if ( $url_parts['port'] ) { $this->proxy_port = $url_parts['port']; } else { $this->proxy_port = 8090; } // authentification für proxy if ( $url_parts['user'] ) { if ( $url_parts['pass']) { $this->proxy_pass = $url_parts['pass']; } else { $this->proxy_pass = ""; } $this->proxy_user = $url_parts['user']; } } } // GET a site, return site-obj function get() { if ($this->proxy) { $connect_host = $this->proxy_host; $connect_port = $this->proxy_port; } else { $connect_host = $this->host; $connect_port = $this->port; } if ( $this->_fsockopen($connect_host, $connect_port) ) { if ($this->proxy) { if ( $this->port != 80 ) { $header = 'GET '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n"; } else { $header = 'GET '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n"; } $header .= 'Host: '.$this->proxy_host."\r\n"; } else { $header = 'GET '.$this->path.' HTTP/1.0'."\r\n"; $header .= 'Host: '.$this->host."\r\n"; } if ( $this->user ) { $header .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n"; } if ( $this->language !== false ) { $header .= 'Accept-Language: '.$this->language."\r\n"; } if ( $this->charset !== false ) { $header .= 'Accept-Charset: '.$this->charset."\r\n"; } if ( $this->user_agent !== false ) { $header .= 'User-Agent: '.$this->user_agent."\r\n"; } if ( $this->accept !== false ) { $header .= 'Accept: '.$this->accept."\r\n"; } if ( $this->referer !== false ) { $header .= 'Referer: '.$this->referer."\r\n"; } if ((is_array($this->extra_headers)) && (count($this->extra_headers))) { reset($this->extra_headers); foreach ($this->extra_headers as $extra_header) { if ( $extra_header ) { $header .= $extra_header."\r\n"; } } } if ((is_array($this->cookies)) && (count($this->cookies))) { $cookie = false; reset($this->cookies); foreach ($this->cookies as $var => $value) { if ( ($var) && ($value != "") ) { if ( ! $cookie) { $cookie = $var.'='.$value; } else { $cookie .= '; '.$var.'='.$value; } } } if ($cookie) { $header .= 'Cookie: '.$cookie."\r\n"; } } $header .= "Connection: close\r\n\r\n"; $this->_fputs($header); $this->data = $this->_fgets(); $this->_fclose(); return true; } else { return false; } } // HEAD a site, return site-obj function head() { if ($this->proxy) { $connect_host = $this->proxy_host; $connect_port = $this->proxy_port; } else { $connect_host = $this->host; $connect_port = $this->port; } if ( $this->_fsockopen($connect_host, $connect_port) ) { if ($this->proxy) { if ( $this->port != 80 ) { $header = 'HEAD '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n"; } else { $header = 'HEAD '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n"; } $header .= 'Host: '.$this->proxy_host."\r\n"; } else { $header = 'HEAD '.$this->path.' '.' HTTP/1.0'."\r\n"; $header .= 'Host: '.$this->host."\r\n"; } if ( $this->user ) { $header .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n"; } if ( $this->language !== false ) { $header .= 'Accept-Language: '.$this->language."\r\n"; } if ( $this->charset !== false ) { $header .= 'Accept-Charset: '.$this->charset."\r\n"; } if ( $this->user_agent !== false ) { $header .= 'User-Agent: '.$this->user_agent."\r\n"; } if ( $this->accept !== false ) { $header .= 'Accept: '.$this->accept."\r\n"; } if ( $this->referer !== false ) { $header .= 'Referer: '.$this->referer."\r\n"; } if ((is_array($this->extra_headers)) && (count($this->extra_headers))) { reset($this->extra_headers); foreach ($this->extra_headers as $extra_header) { if ( $extra_header ) { $header .= $extra_header."\r\n"; } } } if ((is_array($this->cookies)) && (count($this->cookies))) { $cookie = false; reset($this->cookies); foreach ($this->cookies as $var => $value) { if ( ($var) && ($value != "") ) { if ( ! $cookie) { $cookie = $var.'='.$value; } else { $cookie .= '; '.$var.'='.$value; } } } if ($cookie) { $header .= 'Cookie: '.$cookie."\r\n"; } } $header .= "Connection: close\r\n\r\n"; $this->_fputs($header); $this->data = $this->_fgets(); $this->_fclose(); return true; } else { return false; } } // POST a site, return site-obj // $data data to send as array() // format: array("key1"=>"value1","key2"=>"value2") // $files files to send as array() // format: array(array("name"=>"file1","file"=>"/filename","type"=>"text/html","rename"="(optional) newfilename"),array(...)) // before post check that files exist and readable ! function post($data_to_send = false,$files_to_send = false) { if ($this->proxy) { $connect_host = $this->proxy_host; $connect_port = $this->proxy_port; } else { $connect_host = $this->host; $connect_port = $this->port; } if ( $this->_fsockopen($connect_host, $connect_port) ) { if ( ! is_array($data_to_send) ) { $data_to_send = array(); } if ( ! is_array($files_to_send) ) { $files_to_send = array(); } if ($this->proxy) { if ( $this->port != 80 ) { $header = 'POST '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n"; } else { $header = 'POST '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n"; } $header .= 'Host: '.$this->proxy_host."\r\n"; } else { $header = 'POST '.$this->path.' HTTP/1.0'."\r\n"; $header .= 'Host: '.$this->host."\r\n"; } if ( $this->user ) { $header .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n"; } if ( $this->language !== false ) { $header .= 'Accept-Language: '.$this->language."\r\n"; } if ( $this->charset !== false ) { $header .= 'Accept-Charset: '.$this->charset."\r\n"; } if ( $this->user_agent !== false ) { $header .= 'User-Agent: '.$this->user_agent."\r\n"; } if ( $this->accept !== false ) { $header .= 'Accept: '.$this->accept."\r\n"; } if ( $this->referer !== false ) { $header .= 'Referer: '.$this->referer."\r\n"; } if ((is_array($this->extra_headers)) && (count($this->extra_headers))) { reset($this->extra_headers); foreach ($this->extra_headers as $extra_header) { if ( $extra_header ) { $header .= $extra_header."\r\n"; } } } if ((is_array($this->cookies)) && (count($this->cookies))) { $cookie = false; reset($this->cookies); foreach ($this->cookies as $var => $value) { if ( ($var) && ($value != "") ) { if ( ! $cookie) { $cookie = $var.'='.$value; } else { $cookie .= '; '.$var.'='.$value; } } } if ($cookie) { $header .= 'Cookie: '.$cookie."\r\n"; } } $header .= "Connection: close\r\n"; $this->_fputs($header); if ( (count($data_to_send)) || (count($files_to_send)) ) { srand((double)microtime()*1000000); $boundary = "---------------------------".substr(md5(rand(0,32000)),0,10); $this->_fputs('Content-Type: multipart/form-data; boundary='.$boundary."\r\n"); $length = 0; // calculate Content-Length reset($data_to_send); foreach($data_to_send as $key=>$val) { $length += 2+strlen($boundary)+strlen($key)+strlen($val)+strlen('Content-Disposition: form-data; name=""')+8; } reset($files_to_send); foreach($files_to_send as $key=>$file) { if ( ! $file['rename']) { $file['rename'] = basename($file['file']); } $length += 2+strlen($boundary)+strlen('Content-Disposition: form-data; name="'.$file['name'].'"; filename="'.$file['rename'].'"')+strlen('Content-Type: '.$file['type'])+strlen('Content-Transfer-Encoding: binary')+10+filesize($file['file']); } $this->_fputs('Content-Length: '.strval($length)."\r\n\r\n"); if (count ($data_to_send)) { reset($data_to_send); foreach($data_to_send as $key=>$val) { $this->_fputs('--'.$boundary."\r\n"); $this->_fputs('Content-Disposition: form-data; name="'.$key.'"'."\r\n\r\n".$val."\r\n"); } } if (count ($files_to_send)) { reset($files_to_send); foreach($files_to_send as $file) { if ( (is_array($file)) && (count($file))) { if ( ! $file['rename']) { $file['rename'] = basename($file['file']); } $fh = fopen ($file['file'], "r"); $this->_fputs('--'.$boundary."\r\n"); $this->_fputs('Content-Disposition: form-data; name="'.$file['name'].'"; filename="'.$file['rename'].'"'."\r\n"); $this->_fputs('Content-Type: '.$file['type']."\r\n"); $this->_fputs('Content-Transfer-Encoding: binary'."\r\n\r\n"); $this->_fputs(fread ($fh, filesize ($file['file']))."\r\n"); fclose ($fh); } } } $this->_fputs('--'.$boundary.'--'); } $this->data = $this->_fgets(); $this->_fclose(); return true; } else { return false; } } // return http-status as string function get_status() { $head = $this->get_headers(); reset($head); foreach($head as $headline) { if (preg_match('/HTTP\/(.*) ([0-9][0-9][0-9])(.*)/i',$headline)) { preg_match ("/HTTP\/(.*) ([0-9][0-9][0-9])(.*)/i",$headline,$tmp); if ((isset($tmp[2])) && (is_numeric($tmp[2]))) { return $tmp[2]; } } } return false; } // return content-type as string // if $full == true return full content-type incl. q, otherwise only type (*/*) function get_type($full = false) { $head = $this->get_headers(); reset($head); foreach($head as $headline) { if (preg_match('/^Content-Type: /i',$headline)) { $type = preg_replace('/^Content-Type: /i', '', $headline); if ($full) { return $type; } else { $return = split(";",$type); return $return[0]; } } } return false; } // return content-lenght as integer function get_length() { $head = $this->get_headers(); reset($head); foreach($head as $headline) { if (preg_match('/^Content-Length: /i',$headline)) { $length = preg_replace('/^Content-Length: /i', '', $headline); return $length; } } return false; } // return new location if set function get_location() { $head = $this->get_headers(); reset($head); foreach($head as $headline) { if (preg_match('/^Location: /i',$headline)) { $location = preg_replace('/^Location: /i', '', $headline); return $location; } } return false; } // return cookies as muti-array or false // format: array(array("name"=>"foo","value"=>"bar","path"=>"string","time"=>timestamp,"domain"=>"string","secure"=>boolean),array(...)) function get_cookies() { $head = $this->get_headers(); $cookies = false; reset($head); foreach($head as $headline) { if (preg_match('/^Set-Cookie: /i',$headline)) { if (! is_array($cookies) ) { $cookies = array(); } $headline = trim($headline); $headline = preg_replace("/^Set-Cookie: /i", "", $headline); $cookiesplit = split(";",$headline); $cookieinfo = array(); // avr und value list($cookieinfo['name'],$cookieinfo['value']) = split("=",$cookiesplit[0],2); // zeit als timestamp if ( $cookiesplit[1]) { $cookieinfo['time'] = strtotime(preg_replace("/^expires=/i", "", trim($cookiesplit[1]))); } // path if ( $cookiesplit[2]) { $cookieinfo['path'] = preg_replace("/^path=/i", "", trim($cookiesplit[2])); } //domain if ( $cookiesplit[3]) { $cookieinfo['domain'] = preg_replace("/^domain=/i", "", trim($cookiesplit[3])); } // secure if ( strtolower(trim($cookiesplit[4]))=="secure") { $cookieinfo['secure'] = true; } $cookies[] = $cookieinfo; } } return $cookies; } // return all headers as array // format: array("HTTP/1.1 200 OK","Date: Thu, 27 May 2004 20:33:10 GMT") function get_headers() { return split ("\r\n",$this->get_header()); } // return date // return string like "Date: Thu, 27 May 2004 20:33:10 GMT" function get_date() { $head = $this->get_headers(); reset($head); foreach($head as $headline) { if (preg_match('/^Date: /i',$headline)) { $date = preg_replace('/^Date: /i', '', $headline); return $date; } } return false; } // return complete header as string function get_header() { $tmp = split ("\r\n\r\n", $this->data,2); if (isset($tmp[0])) { return $tmp[0]; } else { return ""; } } // return content as string function get_content() { $tmp = split ("\r\n\r\n", $this->data,2); if (isset($tmp[1])) { return $tmp[1]; } else { return ""; } } // intern, for debugging function _fsockopen($host, $port) { if ( ! $this->debug) { if ( $this->socket = @fsockopen($host, $port,$errno, $errstr, 100)) { return true; } else { return false; } } else { return true; } } // intern, for debugging function _fputs($data) { if ( ! $this->debug) { @fputs($this->socket, $data); } else { echo $data; } } // intern, for debugging function _fgets() { if ( ! $this->debug) { while(!@feof($this->socket)) { $return .= @fgets($this->socket, 512); } return $return; } else { return false; } } // intern, for debugging function _fclose() { if ( ! $this->debug) { fclose($this->socket); } } } ?>