elixer Sat Feb 10 18:38:40 2001 EDT
Modified files:
/php4/ext/standard file.c file.h
Log:
Fix for bug #4556
# This is pretty much a total rewrite of get_meta_tags using a simple
# handwritten tokenizer. It might be overkill, but it works.
Index: php4/ext/standard/file.c
diff -u php4/ext/standard/file.c:1.138 php4/ext/standard/file.c:1.139
--- php4/ext/standard/file.c:1.138 Fri Feb 9 07:28:23 2001
+++ php4/ext/standard/file.c Sat Feb 10 18:38:40 2001
@@ -20,7 +20,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: file.c,v 1.138 2001/02/09 15:28:23 rasmus Exp $ */
+/* $Id: file.c,v 1.139 2001/02/11 02:38:40 elixer Exp $ */
/* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */
@@ -260,12 +260,13 @@
{
pval **filename, **arg2;
FILE *fp;
- char buf[8192];
- char buf_lcase[8192];
int use_include_path = 0;
int issock=0, socketd=0;
- int len, var_namelen;
- char var_name[50],*val=NULL,*tmp,*end,*slashed;
+ int in_tag=0, in_meta_tag=0, looking_for_val=0, done=0, ulc=0;
+ int num_parts=0, lc=0;
+ int token_len=0;
+ char *token_data=NULL, *name=NULL, *value=NULL, *temp=NULL;
+ php_meta_tags_token tok, tok_last;
PLS_FETCH();
/* check args */
@@ -306,83 +307,78 @@
}
RETURN_FALSE;
}
- /* Now loop through the file and do the magic quotes thing if needed */
- memset(buf, 0, 8191);
- while((FP_FGETS(buf,8191,socketd,fp,issock) != NULL)) {
- memcpy(buf_lcase, buf, 8191);
- php_strtolower(buf_lcase, 8191);
- if (php_memnstr(buf_lcase, "</head>", sizeof("</head>")-1, buf_lcase +
8191))
- break;
- if(php_memnstr(buf_lcase, "<meta", sizeof("<meta")-1, buf_lcase +
8191)) {
+ tok_last = TOK_EOF;
- memset(var_name,0,50);
- /* get the variable name from the name attribute of the meta
tag */
- tmp = php_memnstr(buf_lcase, "name=\"", sizeof("name=\"")-1,
buf_lcase + 8191);
- if(tmp) {
- tmp = &buf[tmp - buf_lcase];
- tmp+=6;
- end=strstr(tmp,"\"");
- if(end) {
- unsigned char *c;
- *end='\0';
- snprintf(var_name,50,"%s",tmp);
- *end='"';
-
- c = (unsigned char*)var_name;
- while (*c) {
- switch(*c) {
- case '.':
- case '\\':
- case '+':
- case '*':
- case '?':
- case '[':
- case '^':
- case ']':
- case '$':
- case '(':
- case ')':
- case ' ':
- *c++ ='_';
- break;
- default:
- *c++ =
tolower((unsigned char)*c);
- }
- }
- var_namelen=strlen(var_name);
+ while (!done && (tok =
+php_next_meta_token(fp,socketd,issock,&ulc,&lc,&token_data,&token_len)) != TOK_EOF) {
+ if (tok == TOK_ID) {
+ if (tok_last == TOK_OPENTAG) {
+ in_meta_tag = !strcasecmp("meta",token_data);
+ } else if (tok_last == TOK_SLASH && in_tag) {
+ if (strcasecmp("head",token_data) == 0) {
+ /* We are done here! */
+ done = 1;
}
-
- /* get the variable value from the content attribute
of the meta tag */
- tmp = php_memnstr(buf_lcase, "content=\"",
sizeof("content=\"")-1, buf_lcase + 8191);
- val = NULL;
- if(tmp) {
- tmp = &buf[tmp - buf_lcase];
- tmp+=9;
- end=strstr(tmp,"\"");
- if(end) {
- *end='\0';
- val=estrdup(tmp);
- *end='"';
+ } else {
+ if (in_meta_tag) {
+ if (strcasecmp("name",token_data) == 0 ||
+strcasecmp("content",token_data) == 0) {
+ looking_for_val = 1;
+ } else {
+ looking_for_val = 0;
}
}
}
- if(*var_name && val) {
+ } else if (tok == TOK_STRING && tok_last == TOK_EQUAL &&
+looking_for_val) {
+ if (!num_parts) {
+ /* First, get the name value and store it */
+ temp = name = estrndup(token_data,token_len);
+ while (temp && *temp) {
+ if (strchr(".\\+*?[^]$() ",*temp)) {
+ *temp = '_';
+ }
+ temp++;
+ }
+ num_parts++;
+ } else {
+ /* Then get the value value and store it, quoting if
+neccessary */
if (PG(magic_quotes_runtime)) {
- slashed = php_addslashes(val,0,&len,0);
+ value =
+php_addslashes(token_data,0,&token_len,0);
} else {
- slashed = estrndup(val,strlen(val));
+ value = estrndup(token_data,token_len);
}
- add_assoc_string(return_value, var_name, slashed, 0);
- efree(val);
+
+ /* Insert the value into the array */
+ add_assoc_string(return_value, name, value, 0);
+ num_parts = 0;
+ }
+ looking_for_val = 0;
+ } else if (tok == TOK_OPENTAG) {
+ if (looking_for_val) {
+ looking_for_val = 0;
}
+ in_tag = 1;
+ } else if (tok == TOK_CLOSETAG) {
+ /* We never made it to the value, free the name */
+ if (num_parts) {
+ efree(name);
+ }
+ /* Reset all of our flags */
+ in_tag = in_meta_tag = looking_for_val = num_parts = 0;
}
- }
- if (issock) {
- SOCK_FCLOSE(socketd);
- } else {
- fclose(fp);
+
+ tok_last = tok;
+
+ if (token_data)
+ efree(token_data);
+
+ token_data = NULL;
}
+
+ if (issock) {
+ SOCK_FCLOSE(socketd);
+ } else {
+ fclose(fp);
+ }
}
/* }}} */
@@ -2100,6 +2096,88 @@
}
return len;
+}
+
+/* Tokenizes an HTML file for get_meta_tags */
+php_meta_tags_token php_next_meta_token(FILE *fp, int socketd, int issock, int
+*use_last_char, int *last_char, char **data, int *datalen) {
+ int ch;
+ char buff[META_DEF_BUFSIZE + 1];
+
+ memset((void *)buff,0,META_DEF_BUFSIZE + 1);
+
+ while (*use_last_char || (!FP_FEOF(socketd,fp,issock) && (ch =
+FP_FGETC(socketd,fp,issock)))) {
+
+ if(FP_FEOF(socketd,fp,issock))
+ break;
+
+ if (*use_last_char) {
+ ch = *last_char;
+ *use_last_char = 0;
+ }
+
+ switch (ch) {
+ case '<':
+ return TOK_OPENTAG;
+ break;
+ case '>':
+ return TOK_CLOSETAG;
+ break;
+ case '=':
+ return TOK_EQUAL;
+ break;
+ case '/':
+ return TOK_SLASH;
+ break;
+ case '"':
+ *datalen = 0;
+ while (!FP_FEOF(socketd,fp,issock) && (ch = FP_FGETC(socketd,fp,issock))
+&& ch != '"') {
+ buff[(*datalen)++] = ch;
+
+ if (*datalen == META_DEF_BUFSIZE)
+ break;
+ }
+
+ *data = (char *) emalloc( *datalen + 1 );
+ memcpy(*data,buff,*datalen+1);
+
+ return TOK_STRING;
+ break;
+ case '\n':
+ case '\r':
+ case '\t':
+ break;
+ case ' ':
+ return TOK_SPACE;
+ break;
+ default:
+ if (isalpha(ch)) {
+ *datalen = 0;
+ buff[(*datalen)++] = ch;
+ while (!FP_FEOF(socketd,fp,issock) && (ch =
+FP_FGETC(socketd,fp,issock)) && (isalpha(ch) || ch == '-')) {
+ buff[(*datalen)++] = ch;
+
+ if (*datalen == META_DEF_BUFSIZE)
+ break;
+ }
+
+ /* This is ugly, but we have to replace ungetc */
+ if (!isalpha(ch) && ch != '-') {
+ *use_last_char = 1;
+ *last_char = ch;
+ }
+
+ *data = (char *) emalloc( *datalen + 1 );
+ memcpy(*data,buff,*datalen+1);
+
+ return TOK_ID;
+ } else {
+ return TOK_OTHER;
+ }
+ break;
+ }
+ }
+
+ return TOK_EOF;
}
/*
Index: php4/ext/standard/file.h
diff -u php4/ext/standard/file.h:1.32 php4/ext/standard/file.h:1.33
--- php4/ext/standard/file.h:1.32 Sat Jan 13 15:49:44 2001
+++ php4/ext/standard/file.h Sat Feb 10 18:38:40 2001
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: file.h,v 1.32 2001/01/13 23:49:44 zeev Exp $ */
+/* $Id: file.h,v 1.33 2001/02/11 02:38:40 elixer Exp $ */
/* Synced with php 3.0 revision 1.30 1999-06-16 [ssb] */
@@ -72,6 +72,22 @@
PHPAPI int php_file_le_popen(void);
PHPAPI int php_file_le_socket(void);
PHPAPI int php_copy_file(char *src, char *dest);
+
+#define META_DEF_BUFSIZE 8192
+
+typedef enum _php_meta_tags_token {
+ TOK_EOF = 0,
+ TOK_OPENTAG,
+ TOK_CLOSETAG,
+ TOK_SLASH,
+ TOK_EQUAL,
+ TOK_SPACE,
+ TOK_ID,
+ TOK_STRING,
+ TOK_OTHER
+} php_meta_tags_token;
+
+php_meta_tags_token php_next_meta_token(FILE *, int, int, int *, int *, char **, int
+*);
typedef struct {
int fgetss_state;
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
To contact the list administrators, e-mail: [EMAIL PROTECTED]