00001 <?php 00002 00003 /************************************************* 00004 00005 Snoopy - the PHP net client 00006 Author: Monte Ohrt <monte@ispi.net> 00007 Copyright (c): 1999-2000 ispi, all rights reserved 00008 Version: 1.0 00009 00010 * This library is free software; you can redistribute it and/or 00011 * modify it under the terms of the GNU Lesser General Public 00012 * License as published by the Free Software Foundation; either 00013 * version 2.1 of the License, or (at your option) any later version. 00014 * 00015 * This library is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 * Lesser General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU Lesser General Public 00021 * License along with this library; if not, write to the Free Software 00022 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 00024 You may contact the author of Snoopy by e-mail at: 00025 monte@ispi.net 00026 00027 Or, write to: 00028 Monte Ohrt 00029 CTO, ispi 00030 237 S. 70th suite 220 00031 Lincoln, NE 68510 00032 00033 The latest version of Snoopy can be obtained from: 00034 http://snoopy.sourceforge.com 00035 00036 *************************************************/ 00037 00038 class Snoopy 00039 { 00040 /**** Public variables ****/ 00041 00042 /* user definable vars */ 00043 00044 var $host = "www.php.net"; // host name we are connecting to 00045 var $port = 80; // port we are connecting to 00046 var $proxy_host = ""; // proxy host to use 00047 var $proxy_port = ""; // proxy port to use 00048 var $agent = "Snoopy v1.0"; // agent we masquerade as 00049 var $referer = ""; // referer info to pass 00050 var $cookies = array(); // array of cookies to pass 00051 // $cookies["username"]="joe"; 00052 var $rawheaders = array(); // array of raw headers to send 00053 // $rawheaders["Content-type"]="text/html"; 00054 00055 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow 00056 var $lastredirectaddr = ""; // contains address of last redirected address 00057 var $offsiteok = true; // allows redirection off-site 00058 var $maxframes = 0; // frame content depth maximum. 0 = disallow 00059 var $expandlinks = true; // expand links to fully qualified URLs. 00060 // this only applies to fetchlinks() 00061 // or submitlinks() 00062 var $passcookies = true; // pass set cookies back through redirects 00063 // NOTE: this currently does not respect 00064 // dates, domains or paths. 00065 00066 var $user = ""; // user for http authentication 00067 var $pass = ""; // password for http authentication 00068 00069 // http accept types 00070 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; 00071 00072 var $results = ""; // where the content is put 00073 00074 var $error = ""; // error messages sent here 00075 var $response_code = ""; // response code returned from server 00076 var $headers = array(); // headers returned from server sent here 00077 var $maxlength = 500000; // max return data length (body) 00078 var $read_timeout = 0; // timeout on read operations, in seconds 00079 // supported only since PHP 4 Beta 4 00080 // set to 0 to disallow timeouts 00081 var $timed_out = false; // if a read operation timed out 00082 var $status = 0; // http request status 00083 00084 var $curl_path = "/usr/bin/curl"; 00085 // Snoopy will use cURL for fetching 00086 // SSL content if a full system path to 00087 // the cURL binary is supplied here. 00088 // set to false if you do not have 00089 // cURL installed. See http://curl.haxx.se 00090 // for details on installing cURL. 00091 // Snoopy does *not* use the cURL 00092 // library functions built into php, 00093 // as these functions are not stable 00094 // as of this Snoopy release. 00095 00096 // send Accept-encoding: gzip? 00097 var $use_gzip = true; 00098 00099 /**** Private variables ****/ 00100 00101 var $_maxlinelen = 4096; // max line length (headers) 00102 00103 var $_httpmethod = "GET"; // default http request method 00104 var $_httpversion = "HTTP/1.0"; // default http request version 00105 var $_submit_method = "POST"; // default submit method 00106 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type 00107 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type 00108 var $_redirectaddr = false; // will be set if page fetched is a redirect 00109 var $_redirectdepth = 0; // increments on an http redirect 00110 var $_frameurls = array(); // frame src urls 00111 var $_framedepth = 0; // increments on frame depth 00112 00113 var $_isproxy = false; // set if using a proxy server 00114 var $_fp_timeout = 30; // timeout for socket connection 00115 00116 /*======================================================================*\ 00117 Function: fetch 00118 Purpose: fetch the contents of a web page 00119 (and possibly other protocols in the 00120 future like ftp, nntp, gopher, etc.) 00121 Input: $URI the location of the page to fetch 00122 Output: $this->results the output text from the fetch 00123 \*======================================================================*/ 00124 00125 function fetch($URI) 00126 { 00127 00128 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); 00129 $URI_PARTS = parse_url($URI); 00130 if (!empty($URI_PARTS["user"])) 00131 $this->user = $URI_PARTS["user"]; 00132 if (!empty($URI_PARTS["pass"])) 00133 $this->pass = $URI_PARTS["pass"]; 00134 00135 switch($URI_PARTS["scheme"]) 00136 { 00137 case "http": 00138 $this->host = $URI_PARTS["host"]; 00139 if(!empty($URI_PARTS["port"])) 00140 $this->port = $URI_PARTS["port"]; 00141 if($this->_connect($fp)) 00142 { 00143 if($this->_isproxy) 00144 { 00145 // using proxy, send entire URI 00146 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); 00147 } 00148 else 00149 { 00150 $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); 00151 // no proxy, send only the path 00152 $this->_httprequest($path, $fp, $URI, $this->_httpmethod); 00153 } 00154 00155 $this->_disconnect($fp); 00156 00157 if($this->_redirectaddr) 00158 { 00159 /* url was redirected, check if we've hit the max depth */ 00160 if($this->maxredirs > $this->_redirectdepth) 00161 { 00162 // only follow redirect if it's on this site, or offsiteok is true 00163 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 00164 { 00165 /* follow the redirect */ 00166 $this->_redirectdepth++; 00167 $this->lastredirectaddr=$this->_redirectaddr; 00168 $this->fetch($this->_redirectaddr); 00169 } 00170 } 00171 } 00172 00173 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 00174 { 00175 $frameurls = $this->_frameurls; 00176 $this->_frameurls = array(); 00177 00178 while(list(,$frameurl) = each($frameurls)) 00179 { 00180 if($this->_framedepth < $this->maxframes) 00181 { 00182 $this->fetch($frameurl); 00183 $this->_framedepth++; 00184 } 00185 else 00186 break; 00187 } 00188 } 00189 } 00190 else 00191 { 00192 return false; 00193 } 00194 return true; 00195 break; 00196 case "https": 00197 if(!$this->curl_path || (!is_executable($this->curl_path))) { 00198 $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; 00199 return false; 00200 } 00201 $this->host = $URI_PARTS["host"]; 00202 if(!empty($URI_PARTS["port"])) 00203 $this->port = $URI_PARTS["port"]; 00204 if($this->_isproxy) 00205 { 00206 // using proxy, send entire URI 00207 $this->_httpsrequest($URI,$URI,$this->_httpmethod); 00208 } 00209 else 00210 { 00211 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 00212 // no proxy, send only the path 00213 $this->_httpsrequest($path, $URI, $this->_httpmethod); 00214 } 00215 00216 if($this->_redirectaddr) 00217 { 00218 /* url was redirected, check if we've hit the max depth */ 00219 if($this->maxredirs > $this->_redirectdepth) 00220 { 00221 // only follow redirect if it's on this site, or offsiteok is true 00222 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 00223 { 00224 /* follow the redirect */ 00225 $this->_redirectdepth++; 00226 $this->lastredirectaddr=$this->_redirectaddr; 00227 $this->fetch($this->_redirectaddr); 00228 } 00229 } 00230 } 00231 00232 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 00233 { 00234 $frameurls = $this->_frameurls; 00235 $this->_frameurls = array(); 00236 00237 while(list(,$frameurl) = each($frameurls)) 00238 { 00239 if($this->_framedepth < $this->maxframes) 00240 { 00241 $this->fetch($frameurl); 00242 $this->_framedepth++; 00243 } 00244 else 00245 break; 00246 } 00247 } 00248 return true; 00249 break; 00250 default: 00251 // not a valid protocol 00252 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 00253 return false; 00254 break; 00255 } 00256 return true; 00257 } 00258 00259 00260 00261 /*======================================================================*\ 00262 Private functions 00263 \*======================================================================*/ 00264 00265 00266 /*======================================================================*\ 00267 Function: _striplinks 00268 Purpose: strip the hyperlinks from an html document 00269 Input: $document document to strip. 00270 Output: $match an array of the links 00271 \*======================================================================*/ 00272 00273 function _striplinks($document) 00274 { 00275 preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= 00276 ([\"\'])? # find single or double quote 00277 (?(1) (.*?)\\1 | ([^\s>]+)) # if quote found, match up to next matching 00278 # quote, otherwise match up to next space 00279 'isx",$document,$links); 00280 00281 00282 // catenate the non-empty matches from the conditional subpattern 00283 00284 while(list($key,$val) = each($links[2])) 00285 { 00286 if(!empty($val)) 00287 $match[] = $val; 00288 } 00289 00290 while(list($key,$val) = each($links[3])) 00291 { 00292 if(!empty($val)) 00293 $match[] = $val; 00294 } 00295 00296 // return the links 00297 return $match; 00298 } 00299 00300 /*======================================================================*\ 00301 Function: _stripform 00302 Purpose: strip the form elements from an html document 00303 Input: $document document to strip. 00304 Output: $match an array of the links 00305 \*======================================================================*/ 00306 00307 function _stripform($document) 00308 { 00309 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); 00310 00311 // catenate the matches 00312 $match = implode("\r\n",$elements[0]); 00313 00314 // return the links 00315 return $match; 00316 } 00317 00318 00319 00320 /*======================================================================*\ 00321 Function: _striptext 00322 Purpose: strip the text from an html document 00323 Input: $document document to strip. 00324 Output: $text the resulting text 00325 \*======================================================================*/ 00326 00327 function _striptext($document) 00328 { 00329 00330 // I didn't use preg eval (//e) since that is only available in PHP 4.0. 00331 // so, list your entities one by one here. I included some of the 00332 // more common ones. 00333 00334 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript 00335 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags 00336 "'([\r\n])[\s]+'", // strip out white space 00337 "'&(quote|#34);'i", // replace html entities 00338 "'&(amp|#38);'i", 00339 "'&(lt|#60);'i", 00340 "'&(gt|#62);'i", 00341 "'&(nbsp|#160);'i", 00342 "'&(iexcl|#161);'i", 00343 "'&(cent|#162);'i", 00344 "'&(pound|#163);'i", 00345 "'&(copy|#169);'i" 00346 ); 00347 $replace = array( "", 00348 "", 00349 "\\1", 00350 "\"", 00351 "&", 00352 "<", 00353 ">", 00354 " ", 00355 chr(161), 00356 chr(162), 00357 chr(163), 00358 chr(169)); 00359 00360 $text = preg_replace($search,$replace,$document); 00361 00362 return $text; 00363 } 00364 00365 /*======================================================================*\ 00366 Function: _expandlinks 00367 Purpose: expand each link into a fully qualified URL 00368 Input: $links the links to qualify 00369 $URI the full URI to get the base from 00370 Output: $expandedLinks the expanded links 00371 \*======================================================================*/ 00372 00373 function _expandlinks($links,$URI) 00374 { 00375 00376 preg_match("/^[^\?]+/",$URI,$match); 00377 00378 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); 00379 00380 $search = array( "|^http://".preg_quote($this->host)."|i", 00381 "|^(?!http://)(\/)?(?!mailto:)|i", 00382 "|/\./|", 00383 "|/[^\/]+/\.\./|" 00384 ); 00385 00386 $replace = array( "", 00387 $match."/", 00388 "/", 00389 "/" 00390 ); 00391 00392 $expandedLinks = preg_replace($search,$replace,$links); 00393 00394 return $expandedLinks; 00395 } 00396 00397 /*======================================================================*\ 00398 Function: _httprequest 00399 Purpose: go get the http data from the server 00400 Input: $url the url to fetch 00401 $fp the current open file pointer 00402 $URI the full URI 00403 $body body contents to send if any (POST) 00404 Output: 00405 \*======================================================================*/ 00406 00407 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") 00408 { 00409 if($this->passcookies && $this->_redirectaddr) 00410 $this->setcookies(); 00411 00412 $URI_PARTS = parse_url($URI); 00413 if(empty($url)) 00414 $url = "/"; 00415 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; 00416 if(!empty($this->agent)) 00417 $headers .= "User-Agent: ".$this->agent."\r\n"; 00418 if(!empty($this->host) && !isset($this->rawheaders['Host'])) 00419 $headers .= "Host: ".$this->host."\r\n"; 00420 if(!empty($this->accept)) 00421 $headers .= "Accept: ".$this->accept."\r\n"; 00422 00423 if($this->use_gzip) { 00424 // make sure PHP was built with --with-zlib 00425 // and we can handle gzipp'ed data 00426 if ( function_exists(gzinflate) ) { 00427 $headers .= "Accept-encoding: gzip\r\n"; 00428 } 00429 else { 00430 trigger_error( 00431 "use_gzip is on, but PHP was built without zlib support.". 00432 " Requesting file(s) without gzip encoding.", 00433 E_USER_NOTICE); 00434 } 00435 } 00436 00437 if(!empty($this->referer)) 00438 $headers .= "Referer: ".$this->referer."\r\n"; 00439 if(!empty($this->cookies)) 00440 { 00441 if(!is_array($this->cookies)) 00442 $this->cookies = (array)$this->cookies; 00443 00444 reset($this->cookies); 00445 if ( count($this->cookies) > 0 ) { 00446 $cookie_headers .= 'Cookie: '; 00447 foreach ( $this->cookies as $cookieKey => $cookieVal ) { 00448 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; 00449 } 00450 $headers .= substr($cookie_headers,0,-2) . "\r\n"; 00451 } 00452 } 00453 if(!empty($this->rawheaders)) 00454 { 00455 if(!is_array($this->rawheaders)) 00456 $this->rawheaders = (array)$this->rawheaders; 00457 while(list($headerKey,$headerVal) = each($this->rawheaders)) 00458 $headers .= $headerKey.": ".$headerVal."\r\n"; 00459 } 00460 if(!empty($content_type)) { 00461 $headers .= "Content-type: $content_type"; 00462 if ($content_type == "multipart/form-data") 00463 $headers .= "; boundary=".$this->_mime_boundary; 00464 $headers .= "\r\n"; 00465 } 00466 if(!empty($body)) 00467 $headers .= "Content-length: ".strlen($body)."\r\n"; 00468 if(!empty($this->user) || !empty($this->pass)) 00469 $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; 00470 00471 $headers .= "\r\n"; 00472 00473 // set the read timeout if needed 00474 if ($this->read_timeout > 0) 00475 socket_set_timeout($fp, $this->read_timeout); 00476 $this->timed_out = false; 00477 00478 fwrite($fp,$headers.$body,strlen($headers.$body)); 00479 00480 $this->_redirectaddr = false; 00481 unset($this->headers); 00482 00483 // content was returned gzip encoded? 00484 $is_gzipped = false; 00485 00486 while($currentHeader = fgets($fp,$this->_maxlinelen)) 00487 { 00488 if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 00489 { 00490 $this->status=-100; 00491 return false; 00492 } 00493 00494 // if($currentHeader == "\r\n") 00495 if(preg_match("/^\r?\n$/", $currentHeader) ) 00496 break; 00497 00498 // if a header begins with Location: or URI:, set the redirect 00499 if(preg_match("/^(Location:|URI:)/i",$currentHeader)) 00500 { 00501 // get URL portion of the redirect 00502 preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); 00503 // look for :// in the Location header to see if hostname is included 00504 if(!preg_match("|\:\/\/|",$matches[2])) 00505 { 00506 // no host in the path, so prepend 00507 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 00508 // eliminate double slash 00509 if(!preg_match("|^/|",$matches[2])) 00510 $this->_redirectaddr .= "/".$matches[2]; 00511 else 00512 $this->_redirectaddr .= $matches[2]; 00513 } 00514 else 00515 $this->_redirectaddr = $matches[2]; 00516 } 00517 00518 if(preg_match("|^HTTP/|",$currentHeader)) 00519 { 00520 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) 00521 { 00522 $this->status= $status[1]; 00523 } 00524 $this->response_code = $currentHeader; 00525 } 00526 00527 if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { 00528 $is_gzipped = true; 00529 } 00530 00531 $this->headers[] = $currentHeader; 00532 } 00533 00534 # $results = fread($fp, $this->maxlength); 00535 $results = ""; 00536 while ( $data = fread($fp, $this->maxlength) ) { 00537 $results .= $data; 00538 if ( 00539 strlen($results) > $this->maxlength ) { 00540 break; 00541 } 00542 } 00543 00544 // gunzip 00545 if ( $is_gzipped ) { 00546 // per http://www.php.net/manual/en/function.gzencode.php 00547 $results = substr($results, 10); 00548 $results = gzinflate($results); 00549 } 00550 00551 if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 00552 { 00553 $this->status=-100; 00554 return false; 00555 } 00556 00557 // check if there is a a redirect meta tag 00558 00559 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) 00560 { 00561 $this->_redirectaddr = $this->_expandlinks($match[1],$URI); 00562 } 00563 00564 // have we hit our frame depth and is there frame src to fetch? 00565 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\">]+)'i",$results,$match)) 00566 { 00567 $this->results[] = $results; 00568 for($x=0; $x<count($match[1]); $x++) 00569 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); 00570 } 00571 // have we already fetched framed content? 00572 elseif(is_array($this->results)) 00573 $this->results[] = $results; 00574 // no framed content 00575 else 00576 $this->results = $results; 00577 00578 return true; 00579 } 00580 00581 /*======================================================================*\ 00582 Function: _httpsrequest 00583 Purpose: go get the https data from the server using curl 00584 Input: $url the url to fetch 00585 $URI the full URI 00586 $body body contents to send if any (POST) 00587 Output: 00588 \*======================================================================*/ 00589 00590 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") 00591 { 00592 if($this->passcookies && $this->_redirectaddr) 00593 $this->setcookies(); 00594 00595 $headers = array(); 00596 00597 $URI_PARTS = parse_url($URI); 00598 if(empty($url)) 00599 $url = "/"; 00600 // GET ... header not needed for curl 00601 //$headers[] = $http_method." ".$url." ".$this->_httpversion; 00602 if(!empty($this->agent)) 00603 $headers[] = "User-Agent: ".$this->agent; 00604 if(!empty($this->host)) 00605 $headers[] = "Host: ".$this->host; 00606 if(!empty($this->accept)) 00607 $headers[] = "Accept: ".$this->accept; 00608 if(!empty($this->referer)) 00609 $headers[] = "Referer: ".$this->referer; 00610 if(!empty($this->cookies)) 00611 { 00612 if(!is_array($this->cookies)) 00613 $this->cookies = (array)$this->cookies; 00614 00615 reset($this->cookies); 00616 if ( count($this->cookies) > 0 ) { 00617 $cookie_str = 'Cookie: '; 00618 foreach ( $this->cookies as $cookieKey => $cookieVal ) { 00619 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; 00620 } 00621 $headers[] = substr($cookie_str,0,-2); 00622 } 00623 } 00624 if(!empty($this->rawheaders)) 00625 { 00626 if(!is_array($this->rawheaders)) 00627 $this->rawheaders = (array)$this->rawheaders; 00628 while(list($headerKey,$headerVal) = each($this->rawheaders)) 00629 $headers[] = $headerKey.": ".$headerVal; 00630 } 00631 if(!empty($content_type)) { 00632 if ($content_type == "multipart/form-data") 00633 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; 00634 else 00635 $headers[] = "Content-type: $content_type"; 00636 } 00637 if(!empty($body)) 00638 $headers[] = "Content-length: ".strlen($body); 00639 if(!empty($this->user) || !empty($this->pass)) 00640 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); 00641 00642 for($curr_header = 0; $curr_header < count($headers); $curr_header++) { 00643 $cmdline_params .= " -H \"".$headers[$curr_header]."\""; 00644 } 00645 00646 if(!empty($body)) 00647 $cmdline_params .= " -d \"$body\""; 00648 00649 if($this->read_timeout > 0) 00650 $cmdline_params .= " -m ".$this->read_timeout; 00651 00652 $headerfile = uniqid(time()); 00653 00654 # accept self-signed certs 00655 $cmdline_params .= " -k"; 00656 exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return); 00657 00658 if($return) 00659 { 00660 $this->error = "Error: cURL could not retrieve the document, error $return."; 00661 return false; 00662 } 00663 00664 00665 $results = implode("\r\n",$results); 00666 00667 $result_headers = file("/tmp/$headerfile"); 00668 00669 $this->_redirectaddr = false; 00670 unset($this->headers); 00671 00672 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) 00673 { 00674 00675 // if a header begins with Location: or URI:, set the redirect 00676 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) 00677 { 00678 // get URL portion of the redirect 00679 preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); 00680 // look for :// in the Location header to see if hostname is included 00681 if(!preg_match("|\:\/\/|",$matches[2])) 00682 { 00683 // no host in the path, so prepend 00684 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 00685 // eliminate double slash 00686 if(!preg_match("|^/|",$matches[2])) 00687 $this->_redirectaddr .= "/".$matches[2]; 00688 else 00689 $this->_redirectaddr .= $matches[2]; 00690 } 00691 else 00692 $this->_redirectaddr = $matches[2]; 00693 } 00694 00695 if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) 00696 { 00697 $this->response_code = $result_headers[$currentHeader]; 00698 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) 00699 { 00700 $this->status= $match[1]; 00701 } 00702 } 00703 $this->headers[] = $result_headers[$currentHeader]; 00704 } 00705 00706 // check if there is a a redirect meta tag 00707 00708 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) 00709 { 00710 $this->_redirectaddr = $this->_expandlinks($match[1],$URI); 00711 } 00712 00713 // have we hit our frame depth and is there frame src to fetch? 00714 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\">]+)'i",$results,$match)) 00715 { 00716 $this->results[] = $results; 00717 for($x=0; $x<count($match[1]); $x++) 00718 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); 00719 } 00720 // have we already fetched framed content? 00721 elseif(is_array($this->results)) 00722 $this->results[] = $results; 00723 // no framed content 00724 else 00725 $this->results = $results; 00726 00727 unlink("/tmp/$headerfile"); 00728 00729 return true; 00730 } 00731 00732 /*======================================================================*\ 00733 Function: setcookies() 00734 Purpose: set cookies for a redirection 00735 \*======================================================================*/ 00736 00737 function setcookies() 00738 { 00739 for($x=0; $x<count($this->headers); $x++) 00740 { 00741 if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) 00742 $this->cookies[$match[1]] = $match[2]; 00743 } 00744 } 00745 00746 00747 /*======================================================================*\ 00748 Function: _check_timeout 00749 Purpose: checks whether timeout has occurred 00750 Input: $fp file pointer 00751 \*======================================================================*/ 00752 00753 function _check_timeout($fp) 00754 { 00755 if ($this->read_timeout > 0) { 00756 $fp_status = socket_get_status($fp); 00757 if ($fp_status["timed_out"]) { 00758 $this->timed_out = true; 00759 return true; 00760 } 00761 } 00762 return false; 00763 } 00764 00765 /*======================================================================*\ 00766 Function: _connect 00767 Purpose: make a socket connection 00768 Input: $fp file pointer 00769 \*======================================================================*/ 00770 00771 function _connect(&$fp) 00772 { 00773 if(!empty($this->proxy_host) && !empty($this->proxy_port)) 00774 { 00775 $this->_isproxy = true; 00776 $host = $this->proxy_host; 00777 $port = $this->proxy_port; 00778 } 00779 else 00780 { 00781 $host = $this->host; 00782 $port = $this->port; 00783 } 00784 00785 $this->status = 0; 00786 00787 if($fp = fsockopen( 00788 $host, 00789 $port, 00790 $errno, 00791 $errstr, 00792 $this->_fp_timeout 00793 )) 00794 { 00795 // socket connection succeeded 00796 00797 return true; 00798 } 00799 else 00800 { 00801 // socket connection failed 00802 $this->status = $errno; 00803 switch($errno) 00804 { 00805 case -3: 00806 $this->error="socket creation failed (-3)"; 00807 case -4: 00808 $this->error="dns lookup failure (-4)"; 00809 case -5: 00810 $this->error="connection refused or timed out (-5)"; 00811 default: 00812 $this->error="connection failed (".$errno.")"; 00813 } 00814 return false; 00815 } 00816 } 00817 /*======================================================================*\ 00818 Function: _disconnect 00819 Purpose: disconnect a socket connection 00820 Input: $fp file pointer 00821 \*======================================================================*/ 00822 00823 function _disconnect($fp) 00824 { 00825 return(fclose($fp)); 00826 } 00827 00828 00829 /*======================================================================*\ 00830 Function: _prepare_post_body 00831 Purpose: Prepare post body according to encoding type 00832 Input: $formvars - form variables 00833 $formfiles - form upload files 00834 Output: post body 00835 \*======================================================================*/ 00836 00837 function _prepare_post_body($formvars, $formfiles) 00838 { 00839 settype($formvars, "array"); 00840 settype($formfiles, "array"); 00841 00842 if (count($formvars) == 0 && count($formfiles) == 0) 00843 return; 00844 00845 switch ($this->_submit_type) { 00846 case "application/x-www-form-urlencoded": 00847 reset($formvars); 00848 while(list($key,$val) = each($formvars)) { 00849 if (is_array($val) || is_object($val)) { 00850 while (list($cur_key, $cur_val) = each($val)) { 00851 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; 00852 } 00853 } else 00854 $postdata .= urlencode($key)."=".urlencode($val)."&"; 00855 } 00856 break; 00857 00858 case "multipart/form-data": 00859 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); 00860 00861 reset($formvars); 00862 while(list($key,$val) = each($formvars)) { 00863 if (is_array($val) || is_object($val)) { 00864 while (list($cur_key, $cur_val) = each($val)) { 00865 $postdata .= "--".$this->_mime_boundary."\r\n"; 00866 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; 00867 $postdata .= "$cur_val\r\n"; 00868 } 00869 } else { 00870 $postdata .= "--".$this->_mime_boundary."\r\n"; 00871 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; 00872 $postdata .= "$val\r\n"; 00873 } 00874 } 00875 00876 reset($formfiles); 00877 while (list($field_name, $file_names) = each($formfiles)) { 00878 settype($file_names, "array"); 00879 while (list(, $file_name) = each($file_names)) { 00880 if (!is_readable($file_name)) continue; 00881 00882 $fp = fopen($file_name, "r"); 00883 $file_content = fread($fp, filesize($file_name)); 00884 fclose($fp); 00885 $base_name = basename($file_name); 00886 00887 $postdata .= "--".$this->_mime_boundary."\r\n"; 00888 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; 00889 $postdata .= "$file_content\r\n"; 00890 } 00891 } 00892 $postdata .= "--".$this->_mime_boundary."--\r\n"; 00893 break; 00894 } 00895 00896 return $postdata; 00897 } 00898 } 00899 00900 ?>
For more help developing with SiT! see http://sitracker.org/wiki/DevelopmentHowTo