Re: [PHP-CVS] cvs: php4 /ext/standard file.c file.h
elixer Sat Feb 10 18:38:40 2001 EDT Modified files: /php4/ext/standard file.c file.h Log: Fix for bug #4556 # This is pretty much a total rewrite of get_meta_tags using a simple # handwritten tokenizer. It might be overkill, but it works. I'd say this is news worthy... Can you add an entry into the NEWS file. -Sterling -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] To contact the list administrators, e-mail: [EMAIL PROTECTED]
Re: [PHP-CVS] cvs: php4 /ext/standard file.c file.h
[Sun, 11 Feb 2001] Sterling Hughes said: elixer Sat Feb 10 18:38:40 2001 EDT Modified files: /php4/ext/standard file.c file.h Log: Fix for bug #4556 # This is pretty much a total rewrite of get_meta_tags using a simple # handwritten tokenizer. It might be overkill, but it works. I'd say this is news worthy... Can you add an entry into the NEWS file. I agree. However, on first glance, it only seems to grab the meta-tags that have the NAME/CONTENT attributes, not the HTTP-EQUIV/CONTENT attributes. This was a major drawback of the original code (IMHO). I wrote my own get_metatags function in PHP. Find the code below. If someone likes this and wants to convert it into C ... ?php function get_metatags($url) { if (substr($url,0,7)=='http://') { $url = substr($url,7); } if( !($fp = fopen('http://'.$url, 'r')) ) { return false; } else { $file = ''; while (!feof($fp) !stristr($file,'/head') ) { $file.= fgets($fp, 80); } fclose($fp); $file = str_replace("\r", '', $file); $file = str_replace("\n", '', $file); $result = array(); preg_match_all('/meta(.+?)/i', $file, $temp); if (is_array($temp[1])) { foreach($temp[1] as $key=$match) { $t = $n = $c = ''; if (preg_match('/name=("|\')(.*?)\\1/i', $match, $b)) { $t = 'NAME'; $n = $b[2]; } else if (preg_match('/http-equiv=("|\')(.*?)\\1/i', $match, $b)) { $t = 'HTTP-EQUIV'; $n = $b[2]; } if (preg_match('/content=("|\')(.*?)\\1/i', $match, $b)) { $c = $b[2]; } if ($t $n $c) { $result[] = array( 'type' = $t, 'meta_name' = $n, 'meta_content' = $c ); } } } return $result; } } ? - Colin -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] To contact the list administrators, e-mail: [EMAIL PROTECTED]
RE: [PHP-CVS] cvs: php4 /ext/standard file.c file.h
Well, I was trying to fix one bug, not introduce others. If you read the documentation for get_meta_tags you will see that it returns an associative array that is keyed by the value of the NAME attribute while the value is the data within the CONTENT attribute. If other members of the developers list would like to suggest a solution for this problem, I would be more than happy to implement it. Right now I don't know how to add what you are asking for without breaking existing PHP code. Sean -Original Message- From: Colin Viebrock [mailto:[EMAIL PROTECTED]] Sent: Sunday, February 11, 2001 10:52 PM To: Sterling Hughes Cc: Sean Bright; [EMAIL PROTECTED] Subject: Re: [PHP-CVS] cvs: php4 /ext/standard file.c file.h [Sun, 11 Feb 2001] Sterling Hughes said: elixer Sat Feb 10 18:38:40 2001 EDT Modified files: /php4/ext/standard file.c file.h Log: Fix for bug #4556 # This is pretty much a total rewrite of get_meta_tags using a simple # handwritten tokenizer. It might be overkill, but it works. I'd say this is news worthy... Can you add an entry into the NEWS file. I agree. However, on first glance, it only seems to grab the meta-tags that have the NAME/CONTENT attributes, not the HTTP-EQUIV/CONTENT attributes. This was a major drawback of the original code (IMHO). I wrote my own get_metatags function in PHP. Find the code below. If someone likes this and wants to convert it into C ... ?php function get_metatags($url) { if (substr($url,0,7)=='http://') { $url = substr($url,7); } if( !($fp = fopen('http://'.$url, 'r')) ) { return false; } else { $file = ''; while (!feof($fp) !stristr($file,'/head') ) { $file.= fgets($fp, 80); } fclose($fp); $file = str_replace("\r", '', $file); $file = str_replace("\n", '', $file); $result = array(); preg_match_all('/meta(.+?)/i', $file, $temp); if (is_array($temp[1])) { foreach($temp[1] as $key=$match) { $t = $n = $c = ''; if (preg_match('/name=("|\')(.*?)\\1/i', $match, $b)) { $t = 'NAME'; $n = $b[2]; } else if (preg_match('/http-equiv=("|\')(.*?)\\1/i', $match, $b)) { $t = 'HTTP-EQUIV'; $n = $b[2]; } if (preg_match('/content=("|\')(.*?)\\1/i', $match, $b)) { $c = $b[2]; } if ($t $n $c) { $result[] = array( 'type' = $t, 'meta_name' = $n, 'meta_content' = $c ); } } } return $result; } } ? - Colin -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] To contact the list administrators, e-mail: [EMAIL PROTECTED] -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] To contact the list administrators, e-mail: [EMAIL PROTECTED]
[PHP-CVS] cvs: php4 /ext/standard file.c file.h
elixer Sat Feb 10 18:38:40 2001 EDT Modified files: /php4/ext/standard file.c file.h Log: Fix for bug #4556 # This is pretty much a total rewrite of get_meta_tags using a simple # handwritten tokenizer. It might be overkill, but it works. Index: php4/ext/standard/file.c diff -u php4/ext/standard/file.c:1.138 php4/ext/standard/file.c:1.139 --- php4/ext/standard/file.c:1.138 Fri Feb 9 07:28:23 2001 +++ php4/ext/standard/file.cSat Feb 10 18:38:40 2001 @@ -20,7 +20,7 @@ +--+ */ -/* $Id: file.c,v 1.138 2001/02/09 15:28:23 rasmus Exp $ */ +/* $Id: file.c,v 1.139 2001/02/11 02:38:40 elixer Exp $ */ /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */ @@ -260,12 +260,13 @@ { pval **filename, **arg2; FILE *fp; - char buf[8192]; - char buf_lcase[8192]; int use_include_path = 0; int issock=0, socketd=0; - int len, var_namelen; - char var_name[50],*val=NULL,*tmp,*end,*slashed; + int in_tag=0, in_meta_tag=0, looking_for_val=0, done=0, ulc=0; + int num_parts=0, lc=0; + int token_len=0; + char *token_data=NULL, *name=NULL, *value=NULL, *temp=NULL; + php_meta_tags_token tok, tok_last; PLS_FETCH(); /* check args */ @@ -306,83 +307,78 @@ } RETURN_FALSE; } - /* Now loop through the file and do the magic quotes thing if needed */ - memset(buf, 0, 8191); - while((FP_FGETS(buf,8191,socketd,fp,issock) != NULL)) { - memcpy(buf_lcase, buf, 8191); - php_strtolower(buf_lcase, 8191); - if (php_memnstr(buf_lcase, "/head", sizeof("/head")-1, buf_lcase + 8191)) - break; - if(php_memnstr(buf_lcase, "meta", sizeof("meta")-1, buf_lcase + 8191)) { + tok_last = TOK_EOF; - memset(var_name,0,50); - /* get the variable name from the name attribute of the meta tag */ - tmp = php_memnstr(buf_lcase, "name=\"", sizeof("name=\"")-1, buf_lcase + 8191); - if(tmp) { - tmp = buf[tmp - buf_lcase]; - tmp+=6; - end=strstr(tmp,"\""); - if(end) { - unsigned char *c; - *end='\0'; - snprintf(var_name,50,"%s",tmp); - *end='"'; - - c = (unsigned char*)var_name; - while (*c) { - switch(*c) { - case '.': - case '\\': - case '+': - case '*': - case '?': - case '[': - case '^': - case ']': - case '$': - case '(': - case ')': - case ' ': - *c++ ='_'; - break; - default: - *c++ = tolower((unsigned char)*c); - } - } - var_namelen=strlen(var_name); + while (!done (tok = +php_next_meta_token(fp,socketd,issock,ulc,lc,token_data,token_len)) != TOK_EOF) { + if (tok == TOK_ID) { + if (tok_last == TOK_OPENTAG) { + in_meta_tag = !strcasecmp("meta",token_data); + } else if (tok_last == TOK_SLASH in_tag) { + if (strcasecmp("head",token_data) == 0) { + /* We are done here! */ + done = 1; } - - /* get the variable value from the content attribute of the meta tag */ - tmp = php_memnstr(buf_lcase, "content=\"", sizeof("content=\"")-1, buf_lcase + 8191); - val = NULL; -