I've been working on Hans Anderson's Browser class and I've adapted it to do
what you're looking for. It's not complete but there should be a lot here
for you to go on. Again, I haven't had time to fine tune things at all so
it's a very rough hack right now. Hope it helps.
The dirty on how to use it:
get_url(array(
"url"=>"$file",
"req_mthd"=>'GET',
"protocol"=>'HTTP/1.1',
"robot_rules"=>FALSE
)
);
if ($file_array["errcode"] == 1) {
$file_text = $file_array["content"];
// convert relative links to absolute
$file_text = $browser->translate($file_text, $file);
?>
class_browser.php:
class Browser {
/*
Class Browser by Hans Anderson.
This code is released under the GPL license.
Modifications by Aral Balkan
06.22.01 - Added two new methods:
(1) $string = translate_links($string, $url);
Translates all relative links in argument $string to
absolute links using the full URL to the page being
accessed in the $url argument. Returns the translated
string.
(2) $string = translate_images($string, $url);
Translates all relative image links in argument $string to
absolute links using the full URL to the page being
accessed in the $url argument. Returns the translated
string.
Aral Balkan ([EMAIL PROTECTED])
*/
function get_url($array) {
/* defaults (there is no default for 'url' or 'content') */
$robot_rules = TRUE; /* follow the robots.txt standard */
$req_mthd = 'GET';
$protocol = 'HTTP/1.0';
$user_agent = 'PHP3 Browser';
$time_out = 10;
/* for each argument set in the array, overwrite default */
while(list($k,$v) = each($array)) {
$$k=$v;
}
/* set up the cookies. If it exists, the straight variable
will be written above ($$k=$v). */
if(is_array($cookies)) {
$cookies2send = '';
while(list($k,$v) = each($cookies) ) {
$cookies2send .= "Cookie: $k=" . urlencode($v) . "\n";
}
}
if(!isset($url))
return array("content"=>' ',"headers"=>' ',"errcode"=>-1,"errmsg"=>'Fatal
Error: No URL defined');
$parsed_url = parse_url("$url");
if($robot_rules) {
$robots_url = $parsed_url["scheme"] . "://" . $parsed_url["host"];
if($parsed_url["port"]) $robots_url .= ":" . $parsed_url["port"];
$robots_url .= "/robots.txt";
if(!$this->robot_rules($url,$robots_url,$user_agent))
return array("content"=>' ',"headers"=>'
',"errcode"=>0,"errmsg"=>"Non-fatal Error: Robot Rules do not permit this
browser to access $url");
}
$req_mthd = strtoupper($req_mthd); // 2068 rfc says it's case
sensitive.
$host = $parsed_url["host"];
if(!$host || $host=='' || !isset($host))
array("content"=>' ',"headers"=>' ',"errcode"=>-1,"errmsg"=>'Fatal Error:
No URL defined');
$path = $parsed_url["path"];
if(!$path || $path=='' || !isset($path))
$path = "/";
$query = $parsed_url["query"];
if($query!='')
$path = "$path?$query";
if(!isset($parsed_url["port"])) {
$port = 80;
} else {
$port = $parsed_url["port"];
}
$timeout = time() + $time_out;
$fp = fsockopen("$host",$port,$errno,$errstring,$time_out);
if(!$fp) {
return array("content"=>' ',"headers"=>'
',"errcode"=>0,"errmsg"=>"Non-Fatal Error: Could not make connection to url
$url (not found in DNS or you are not connected to the Internet)");
} else {
set_socket_blocking($fp,1); // aral: set to 1 for it to work on
Windows & Unix
$REQUEST = "$req_mthd $path $protocol\n";
if(eregi("^HTTP\/1\.[1-9]",$protocol)) $REQUEST .= "Host: $host\n";
$REQUEST .= "User-Agent: $user_agent\n";
if($referer) {
$REQUEST .= "Referer: $referer\n";
}
$REQUEST .= "Connection: close\n";
if($cookies) {
$REQUEST .= $cookies2send;
}
if($req_mthd=="POST") {
$REQUEST .= "Content-length: " . (strlen($content)) . "\n";
$REQUEST .= "Content-type: application/x-www-form-urlencoded\n";
$REQUEST .= "\n$content\n";
}
fputs($fp,"$REQUEST\n"); // complete the request
# print "$REQUEST\n";
if($timeout' ',"headers"=>'
',"errcode"=>0,"errmsg"=>"Non-Fatal Error: Timed out while downloading
page");
while (!feof($fp) && time()<$timeout) {
$output = fgets($fp,255);
$view_output .= $output;
if(!isset($header)) {
if($output=="\n" || $output == "\r\n" || $output == "\n\l") {
$header = $view_output;
$view_output = '';
}
}
}
}
fclose($fp);
if(time()>$timeout)
return
array("content"=>"$content","headers"=>"$headers","errcode"=>0,"errmsg"=>"No
n-Fatal Error: Timed out while downloading page");
return
array("content"=>"$view_output","headers"=>"$header","errcode"=>1,"errmsg"=>
"Success");
} // end function get_url
/* * */
function get_headers($h) {
$array = exp