dmitry Fri May 18 13:12:47 2007 UTC Modified files: /ZendEngine2 zend_compile.c zend_compile.h zend_language_parser.y zend_language_scanner.l zend_vm_def.h zend_vm_execute.h /php-src/ext/tokenizer tokenizer.c /php-src/ext/tokenizer/tests 001.phpt bug26463.phpt Log: Improved compilation of heredocs and interpolated strings. (Matt)
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_compile.c?r1=1.748&r2=1.749&diff_format=u Index: ZendEngine2/zend_compile.c diff -u ZendEngine2/zend_compile.c:1.748 ZendEngine2/zend_compile.c:1.749 --- ZendEngine2/zend_compile.c:1.748 Fri May 11 09:38:52 2007 +++ ZendEngine2/zend_compile.c Fri May 18 13:12:47 2007 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_compile.c,v 1.748 2007/05/11 09:38:52 tony2001 Exp $ */ +/* $Id: zend_compile.c,v 1.749 2007/05/18 13:12:47 dmitry Exp $ */ #include <zend_language_parser.h> #include "zend.h" @@ -989,25 +989,25 @@ } -void zend_do_add_char(znode *result, znode *op1, znode *op2 TSRMLS_DC) -{ - zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC); - - opline->opcode = ZEND_ADD_CHAR; - opline->op1 = *op1; - opline->op2 = *op2; - opline->op2.op_type = IS_CONST; - opline->extended_value = CG(literal_type); - opline->result = opline->op1; - *result = opline->result; -} - - void zend_do_add_string(znode *result, znode *op1, znode *op2 TSRMLS_DC) { - zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC); + zend_op *opline; - opline->opcode = ZEND_ADD_STRING; + if (Z_UNILEN(op2->u.constant) > 1) { + opline = get_next_op(CG(active_op_array) TSRMLS_CC); + opline->opcode = ZEND_ADD_STRING; + } else if (Z_UNILEN(op2->u.constant) == 1) { + int ch = (Z_TYPE(op2->u.constant) == IS_UNICODE) ? *Z_USTRVAL(op2->u.constant) : *Z_STRVAL(op2->u.constant); + + /* Free memory and use ZEND_ADD_CHAR in case of 1 character strings */ + efree(Z_UNIVAL(op2->u.constant).v); + ZVAL_LONG(&op2->u.constant, ch); + opline = get_next_op(CG(active_op_array) TSRMLS_CC); + opline->opcode = ZEND_ADD_CHAR; + } else { /* String can be empty after a variable at the end of a heredoc */ + efree(Z_UNIVAL(op2->u.constant).v); + return; + } opline->op1 = *op1; opline->op2 = *op2; opline->op2.op_type = IS_CONST; @@ -4154,33 +4154,6 @@ } -void zend_do_end_heredoc(TSRMLS_D) -{ - int opline_num = get_next_op_number(CG(active_op_array))-1; - zend_op *opline = &CG(active_op_array)->opcodes[opline_num]; - - if (opline->opcode != ZEND_ADD_STRING) { - return; - } - - if (Z_TYPE(opline->op2.u.constant) == IS_UNICODE) { - Z_USTRVAL(opline->op2.u.constant)[(Z_USTRLEN(opline->op2.u.constant)--)-1] = 0; - if (Z_USTRLEN(opline->op2.u.constant)>0) { - if (Z_USTRVAL(opline->op2.u.constant)[Z_USTRLEN(opline->op2.u.constant)-1]=='\r') { - Z_USTRVAL(opline->op2.u.constant)[(Z_USTRLEN(opline->op2.u.constant)--)-1] = 0; - } - } - } else { - Z_STRVAL(opline->op2.u.constant)[(Z_STRLEN(opline->op2.u.constant)--)-1] = 0; - if (Z_STRLEN(opline->op2.u.constant)>0) { - if (Z_STRVAL(opline->op2.u.constant)[Z_STRLEN(opline->op2.u.constant)-1]=='\r') { - Z_STRVAL(opline->op2.u.constant)[(Z_STRLEN(opline->op2.u.constant)--)-1] = 0; - } - } - } -} - - void zend_do_exit(znode *result, znode *message TSRMLS_DC) { zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC); @@ -4425,12 +4398,12 @@ { int retval; -again: if (CG(increment_lineno)) { CG(zend_lineno)++; CG(increment_lineno) = 0; } +again: Z_TYPE(zendlval->u.constant) = IS_LONG; retval = lex_scan(&zendlval->u.constant TSRMLS_CC); switch (retval) { @@ -4441,8 +4414,7 @@ goto again; case T_CLOSE_TAG: - if (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1]=='\n' - || (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-2]=='\r' && LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1])) { + if (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1] != '>') { CG(increment_lineno) = 1; } retval = ';'; /* implicit ; */ http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_compile.h?r1=1.356&r2=1.357&diff_format=u Index: ZendEngine2/zend_compile.h diff -u ZendEngine2/zend_compile.h:1.356 ZendEngine2/zend_compile.h:1.357 --- ZendEngine2/zend_compile.h:1.356 Thu Mar 8 17:30:28 2007 +++ ZendEngine2/zend_compile.h Fri May 18 13:12:47 2007 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_compile.h,v 1.356 2007/03/08 17:30:28 helly Exp $ */ +/* $Id: zend_compile.h,v 1.357 2007/05/18 13:12:47 dmitry Exp $ */ #ifndef ZEND_COMPILE_H #define ZEND_COMPILE_H @@ -404,7 +404,6 @@ void zend_do_free(znode *op1 TSRMLS_DC); void zend_do_init_string(znode *result TSRMLS_DC); -void zend_do_add_char(znode *result, znode *op1, znode *op2 TSRMLS_DC); void zend_do_add_string(znode *result, znode *op1, znode *op2 TSRMLS_DC); void zend_do_add_variable(znode *result, znode *op1, znode *op2 TSRMLS_DC); @@ -499,8 +498,6 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC); void zend_do_declare_end(znode *declare_token TSRMLS_DC); -void zend_do_end_heredoc(TSRMLS_D); - void zend_do_exit(znode *result, znode *message TSRMLS_DC); void zend_do_begin_silence(znode *strudel_token TSRMLS_DC); http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_language_parser.y?r1=1.181&r2=1.182&diff_format=u Index: ZendEngine2/zend_language_parser.y diff -u ZendEngine2/zend_language_parser.y:1.181 ZendEngine2/zend_language_parser.y:1.182 --- ZendEngine2/zend_language_parser.y:1.181 Thu Apr 26 15:50:12 2007 +++ ZendEngine2/zend_language_parser.y Fri May 18 13:12:47 2007 @@ -18,14 +18,12 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_language_parser.y,v 1.181 2007/04/26 15:50:12 andrei Exp $ */ +/* $Id: zend_language_parser.y,v 1.182 2007/05/18 13:12:47 dmitry Exp $ */ /* * LALR shift/reduce conflicts and how they are resolved: * * - 2 shift/reduce conflicts due to the dangeling elseif/else ambiguity. Solved by shift. - * - 1 shift/reduce conflict due to arrays within encapsulated strings. Solved by shift. - * - 1 shift/reduce conflict due to objects within encapsulated strings. Solved by shift. * */ @@ -49,7 +47,7 @@ %} %pure_parser -%expect 4 +%expect 2 %left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE %left ',' @@ -718,9 +716,9 @@ | class_constant { $$ = $1; } | common_scalar { $$ = $1; } | '"' { CG(literal_type) = UG(unicode)?IS_UNICODE:IS_STRING; } encaps_list '"' { $$ = $3; } - | T_START_HEREDOC { CG(literal_type) = UG(unicode)?IS_UNICODE:IS_STRING; } encaps_list T_END_HEREDOC { $$ = $3; zend_do_end_heredoc(TSRMLS_C); } + | T_START_HEREDOC { CG(literal_type) = UG(unicode)?IS_UNICODE:IS_STRING; } encaps_list T_END_HEREDOC { $$ = $3; } | T_BINARY_DOUBLE { CG(literal_type) = IS_STRING; } encaps_list '"' { $$ = $3; } - | T_BINARY_HEREDOC { CG(literal_type) = IS_STRING; } encaps_list T_END_HEREDOC { $$ = $3; zend_do_end_heredoc(TSRMLS_C); } + | T_BINARY_HEREDOC { CG(literal_type) = IS_STRING; } encaps_list T_END_HEREDOC { $$ = $3; } ; @@ -879,16 +877,7 @@ encaps_list: encaps_list encaps_var { zend_do_end_variable_parse(BP_VAR_R, 0 TSRMLS_CC); zend_do_add_variable(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list T_STRING { zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list T_NUM_STRING { zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); } | encaps_list T_ENCAPSED_AND_WHITESPACE { zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list T_CHARACTER { zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list T_BAD_CHARACTER { zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list '[' { Z_LVAL($2.u.constant) = (long) '['; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list ']' { Z_LVAL($2.u.constant) = (long) ']'; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list '{' { Z_LVAL($2.u.constant) = (long) '{'; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list '}' { Z_LVAL($2.u.constant) = (long) '}'; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list T_OBJECT_OPERATOR { znode tmp; Z_LVAL($2.u.constant) = (long) '-'; zend_do_add_char(&tmp, &$1, &$2 TSRMLS_CC); Z_LVAL($2.u.constant) = (long) '>'; zend_do_add_char(&$$, &tmp, &$2 TSRMLS_CC); } | /* empty */ { zend_do_init_string(&$$ TSRMLS_CC); } ; http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_language_scanner.l?r1=1.164&r2=1.165&diff_format=u Index: ZendEngine2/zend_language_scanner.l diff -u ZendEngine2/zend_language_scanner.l:1.164 ZendEngine2/zend_language_scanner.l:1.165 --- ZendEngine2/zend_language_scanner.l:1.164 Thu Apr 19 11:24:48 2007 +++ ZendEngine2/zend_language_scanner.l Fri May 18 13:12:47 2007 @@ -19,7 +19,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_language_scanner.l,v 1.164 2007/04/19 11:24:48 tony2001 Exp $ */ +/* $Id: zend_language_scanner.l,v 1.165 2007/05/18 13:12:47 dmitry Exp $ */ #define yyleng SCNG(yy_leng) #define yytext SCNG(yy_text) @@ -37,8 +37,11 @@ %x ST_DOUBLE_QUOTES %x ST_BACKQUOTE %x ST_HEREDOC +%x ST_START_HEREDOC +%x ST_END_HEREDOC %x ST_LOOKING_FOR_PROPERTY %x ST_LOOKING_FOR_VARNAME +%x ST_VAR_OFFSET %x ST_COMMENT %x ST_DOC_COMMENT %x ST_ONE_LINE_COMMENT @@ -99,9 +102,7 @@ char *p = (s), *boundary = p+(l); \ \ while (p<boundary) { \ - if (*p == '\n') { \ - CG(zend_lineno)++; \ - } else if ((*p == '\r') && (p+1 < boundary) && (*(p+1) != '\n')) { \ + if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \ CG(zend_lineno)++; \ } \ p++; \ @@ -313,30 +314,6 @@ return -1; } -static inline zend_bool zend_digits_to_codepoint(char *s, char *end, UChar32 *c, int8_t digits) -{ - int8_t n = 0; - int8_t digit = 0; - UChar32 codepoint = 0; - - while (s < end && n < digits) { - digit = zend_get_hex_digit((UChar)*s); - if (digit < 0) { - break; - } - codepoint = (codepoint << 4) | digit; - ++s; - ++n; - } - - if (n < digits) { - return 0; - } - - *c = codepoint; - return 1; -} - static inline zend_bool zend_udigits_to_codepoint(UChar *s, UChar *end, UChar32 *c, int8_t digits) { int8_t n = 0; @@ -361,20 +338,6 @@ return 1; } -static inline int zend_uchar_from_name(char *name, UChar32 *c) -{ - UChar32 codepoint = 0; - UErrorCode status = U_ZERO_ERROR; - - codepoint = u_charFromName(U_UNICODE_CHAR_NAME, name, &status); - if (U_SUCCESS(status)) { - *c = codepoint; - return 1; - } else { - return 0; - } -} - static inline int zend_uchar_from_uname(UChar *name, int name_len, UChar32 *c TSRMLS_DC) { UChar32 codepoint = 0; @@ -1029,7 +992,7 @@ Z_STRVAL_P(zendlval) = (char *)estrndup(yytext, yyleng); \ Z_STRLEN_P(zendlval) = yyleng; -int zend_scan_unicode_double_string(zval *zendlval TSRMLS_DC) +static int zend_scan_unicode_escape_string(zval *zendlval, char *str, int len, UChar quote_type, int type TSRMLS_DC) { register UChar *s, *t, c; UChar *end; @@ -1039,9 +1002,7 @@ int8_t bits; int8_t n; - HANDLE_NEWLINES(yytext, yyleng); - - if (!zend_copy_scanner_string(zendlval, yytext+1, yyleng-2, IS_UNICODE, SCNG(output_conv) TSRMLS_CC)) { + if (!zend_copy_scanner_string(zendlval, str, len, IS_UNICODE, SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -1074,9 +1035,15 @@ *t++ = (UChar) 0x09; /*'\t'*/ Z_USTRLEN_P(zendlval)--; break; + case 0x22: /*'"'*/ + case 0x60: /*'`'*/ + if (c != quote_type) { + *t++ = 0x5C; /*'\\'*/ + *t++ = *s; + break; + } case 0x5C: /*'\\'*/ case 0x24: /*'$'*/ - case 0x22: /*'"'*/ *t++ = *s; Z_USTRLEN_P(zendlval)--; break; @@ -1110,6 +1077,20 @@ max_digits = 6; Z_USTRLEN_P(zendlval)--; break; + case 0x78: /*'x'*/ + case 0x58: /*'X'*/ + if ((digit = zend_get_hex_digit(*(s+1))) >= 0) { + min_digits = 1; + max_digits = 2; + Z_USTRLEN_P(zendlval)--; + s++; + n = 1; /* already have one digit */ + codepoint = digit; + } else { + *t++ = 0x5C; /*'\\'*/ + *t++ = *s; + } + break; default: digit = zend_get_octal_digit(*s); if (digit >= 0) { @@ -1118,14 +1099,6 @@ bits = 3; n = 1; /* already have one digit */ codepoint = digit; - } else if (c == 0x78 /*'x'*/ - && (s+1) < end && (digit = zend_get_hex_digit(*(s+1))) >= 0) { - min_digits = 1; - max_digits = 2; - Z_USTRLEN_P(zendlval)--; - s++; - n = 1; /* already have one digit */ - codepoint = digit; } else { *t++ = 0x5C; /*'\\'*/ *t++ = *s; @@ -1163,26 +1136,30 @@ efree(Z_USTRVAL_P(zendlval)); return 0; } - } else { - s++; + + /* s is already incremented and not past a newline */ + continue; } } else { - *t++ = *s++; + *t++ = *s; } + + if (*s == 0x0A /*'\n'*/ || (*s == 0x0D /*'\r'*/ && (*(s+1) != 0x0A /*'\n'*/))) { + CG(zend_lineno)++; + } + s++; } *t = 0; - return T_CONSTANT_ENCAPSED_STRING; + return type; } -int zend_scan_unicode_single_string(zval *zendlval TSRMLS_DC) +static int zend_scan_unicode_single_string(zval *zendlval TSRMLS_DC) { register UChar *s, *t; UChar *end; UChar32 codepoint = 0; - HANDLE_NEWLINES(yytext, yyleng); - if (!zend_copy_scanner_string(zendlval, yytext+1, yyleng-2, IS_UNICODE, SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -1265,25 +1242,26 @@ *t++ = *s; break; } - s++; } else { - *t++ = *s++; + *t++ = *s; } + + if (*s == 0x0A /*'\n'*/ || (*s == 0x0D /*'\r'*/ && (*(s+1) != 0x0A /*'\n'*/))) { + CG(zend_lineno)++; + } + s++; } *t = 0; return T_CONSTANT_ENCAPSED_STRING; } -int zend_scan_binary_double_string(zval *zendlval, int bprefix TSRMLS_DC) +static void zend_scan_binary_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) { register char *s, *t; char *end; - Z_STRVAL_P(zendlval) = estrndup(yytext+bprefix+1, yyleng-bprefix-2); - Z_STRLEN_P(zendlval) = yyleng-bprefix-2; - Z_TYPE_P(zendlval) = IS_STRING; - HANDLE_NEWLINES(yytext, yyleng); + ZVAL_STRINGL(zendlval, str, len, 1); /* convert escape sequences */ s = t = Z_STRVAL_P(zendlval); @@ -1307,12 +1285,37 @@ *t++ = '\t'; Z_STRLEN_P(zendlval)--; break; + case '"': + case '`': + if (*s != quote_type) { + *t++ = '\\'; + *t++ = *s; + break; + } case '\\': case '$': - case '"': *t++ = *s; Z_STRLEN_P(zendlval)--; break; + case 'x': + case 'X': + if (ZEND_IS_HEX(*(s+1))) { + char hex_buf[3] = { 0, 0, 0 }; + + Z_STRLEN_P(zendlval)--; /* for the 'x' */ + + hex_buf[0] = *(++s); + Z_STRLEN_P(zendlval)--; + if (ZEND_IS_HEX(*(s+1))) { + hex_buf[1] = *(++s); + Z_STRLEN_P(zendlval)--; + } + *t++ = (char) strtol(hex_buf, NULL, 16); + } else { + *t++ = '\\'; + *t++ = *s; + } + break; default: /* check for an octal */ if (ZEND_IS_OCT(*s)) { @@ -1320,52 +1323,39 @@ octal_buf[0] = *s; Z_STRLEN_P(zendlval)--; - if ((s+1)<end && ZEND_IS_OCT(*(s+1))) { + if (ZEND_IS_OCT(*(s+1))) { octal_buf[1] = *(++s); Z_STRLEN_P(zendlval)--; - if ((s+1)<end && ZEND_IS_OCT(*(s+1))) { + if (ZEND_IS_OCT(*(s+1))) { octal_buf[2] = *(++s); Z_STRLEN_P(zendlval)--; } } *t++ = (char) strtol(octal_buf, NULL, 8); - } else if (*s=='x' && (s+1)<end && ZEND_IS_HEX(*(s+1))) { - char hex_buf[3] = { 0, 0, 0}; - - Z_STRLEN_P(zendlval)--; /* for the 'x' */ - - hex_buf[0] = *(++s); - Z_STRLEN_P(zendlval)--; - if ((s+1)<end && ZEND_IS_HEX(*(s+1))) { - hex_buf[1] = *(++s); - Z_STRLEN_P(zendlval)--; - } - *t++ = (char) strtol(hex_buf, NULL, 16); } else { *t++ = '\\'; *t++ = *s; } break; } - s++; } else { - *t++ = *s++; + *t++ = *s; + } + + if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { + CG(zend_lineno)++; } + s++; } *t = 0; - - return T_CONSTANT_ENCAPSED_STRING; } -int zend_scan_binary_single_string(zval *zendlval, int bprefix TSRMLS_DC) +static void zend_scan_binary_single_string(zval *zendlval, char *str, int len TSRMLS_DC) { register char *s, *t; char *end; - Z_STRVAL_P(zendlval) = estrndup(yytext+bprefix+1, yyleng-bprefix-2); - Z_STRLEN_P(zendlval) = yyleng-bprefix-2; - Z_TYPE_P(zendlval) = IS_STRING; - HANDLE_NEWLINES(yytext, yyleng); + ZVAL_STRINGL(zendlval, str, len, 1); /* convert escape sequences */ s = t = Z_STRVAL_P(zendlval); @@ -1387,14 +1377,16 @@ *t++ = *s; break; } - s++; } else { - *t++ = *s++; + *t++ = *s; + } + + if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { + CG(zend_lineno)++; } + s++; } *t = 0; - - return T_CONSTANT_ENCAPSED_STRING; } %} @@ -1407,11 +1399,61 @@ WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* TOKENS [;:,.\[\]()|^&+-/*=%!~$<>[EMAIL PROTECTED] -ENCAPSED_TOKENS [\[\]{}$] -ESCAPED_AND_WHITESPACE [\n\t\r #'.:;,()|^&+-/*=%!~<>[EMAIL PROTECTED] ANY_CHAR (.|[\n]) NEWLINE ("\r"|"\n"|"\r\n") +/* + * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character + * or a { and therefore will be taken literally. The case of literal $ before + * a variable or "${" is handled in a rule for each string type + */ +DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR}))) +BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR}))) +HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r]))) + +/* + * Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some + * special cases need to be handled. HEREDOC_CHARS doesn't allow a line to + * match when { or $, and/or \ is at the end. (("{"*|"$"*)"\\"?) handles that, + * along with cases where { or $, and/or \ is the ONLY thing on a line + * + * The other case is when a line contains a label, followed by ONLY + * { or $, and/or \ Handled by ({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\")) + */ +HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE}) + +/* + * This pattern is just used in the next 2 for matching { or literal $, and/or + * \ escape sequence immediately at the beginning of a line or after a label + */ +HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR}) + +/* + * These 2 label-related patterns allow HEREDOC_CHARS to continue "regular" + * matching after a newline that starts with either a non-label character or a + * label that isn't followed by a newline. Like HEREDOC_CHARS, they won't match + * a variable or "{$" Matching a newline, and possibly label, up TO a variable + * or "{$", is handled in the heredoc rules + * + * The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{]) handles cases where ; + * follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a label + * character or ; from matching on a possible (real) ending label + */ +HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}) +HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}))) + +/* + * CHARS matches everything up to a variable or "{$" + * {'s are matched as long as they aren't followed by a $ + * The case of { before "{$" is handled in a rule for each string type + * + * For heredocs, matching continues across/after newlines if/when it's known + * that the next line doesn't contain a possible ending label + */ +DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR}) +BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR}) +HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE}))) + %option noyylineno %option noyywrap %% @@ -1560,11 +1602,15 @@ return T_IMPLEMENTS; } -<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"->" { +<ST_IN_SCRIPTING>"->" { yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); return T_OBJECT_OPERATOR; } +<ST_LOOKING_FOR_PROPERTY>"->" { + return T_OBJECT_OPERATOR; +} + <ST_LOOKING_FOR_PROPERTY>{LABEL} { yy_pop_state(TSRMLS_C); if (!zend_copy_scanner_string(zendlval, yytext, yyleng, UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { @@ -1906,7 +1952,19 @@ } } -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */ +<ST_VAR_OFFSET>0|([1-9][0-9]*) { /* Offset could be treated as a long */ + if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) { + Z_LVAL_P(zendlval) = strtol(yytext, NULL, 10); + Z_TYPE_P(zendlval) = IS_LONG; + } else { + if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { + return 0; + } + } + return T_NUM_STRING; +} + +<ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */ if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -2080,7 +2138,40 @@ return T_OPEN_TAG; } -<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL} { +<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} { + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { + return 0; + } + if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) { + return 0; + } + return T_VARIABLE; +} + +%{ +/* Make sure a label character follows "->", otherwise there is no property + * and "->" will be taken literally + */ %} +<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] { + yyless(yyleng - 3); + yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); + + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { + return 0; + } + if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) { + return 0; + } + return T_VARIABLE; +} + +%{ +/* A [ always designates a variable offset, regardless of what follows + */ %} +<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" { + yyless(yyleng - 1); + yy_push_state(ST_VAR_OFFSET TSRMLS_CC); + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -2090,6 +2181,21 @@ return T_VARIABLE; } +<ST_VAR_OFFSET>"]" { + yy_pop_state(TSRMLS_C); + return ']'; +} + +<ST_VAR_OFFSET>{TOKENS}|[{}] { + /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */ + return yytext[0]; +} + +<ST_VAR_OFFSET>[ \n\r\t'"`\\#] { + yyless(0); + yy_pop_state(TSRMLS_C); +} + <ST_IN_SCRIPTING>{LABEL} { if (!zend_copy_scanner_string(zendlval, yytext, yyleng, UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { return 0; @@ -2100,7 +2206,7 @@ return T_STRING; } -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LABEL} { +<ST_VAR_OFFSET>{LABEL} { if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -2230,37 +2336,44 @@ } -<ST_IN_SCRIPTING>(["]([^$"\\]|("\\".))*["]) { +%{ +/* ("{"*|"$"*) handles { or $ at the end of a string (or the entire contents) + */ %} +<ST_IN_SCRIPTING>(["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) { if (UG(unicode)) { - return zend_scan_unicode_double_string(zendlval TSRMLS_CC); + return zend_scan_unicode_escape_string(zendlval, yytext+1, yyleng-2, 0x22 /*'"'*/, T_CONSTANT_ENCAPSED_STRING TSRMLS_CC); } else { - return zend_scan_binary_double_string(zendlval, 0 TSRMLS_CC); + zend_scan_binary_escape_string(zendlval, yytext+1, yyleng-2, '"' TSRMLS_CC); + return T_CONSTANT_ENCAPSED_STRING; } } -<ST_IN_SCRIPTING>(b["]([^$"\\]|("\\".))*["]) { - return zend_scan_binary_double_string(zendlval, 1 TSRMLS_CC); +<ST_IN_SCRIPTING>(b["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) { + zend_scan_binary_escape_string(zendlval, yytext+2, yyleng-3, '"' TSRMLS_CC); + return T_CONSTANT_ENCAPSED_STRING; } -<ST_IN_SCRIPTING>([']([^'\\]|("\\".))*[']) { +<ST_IN_SCRIPTING>([']([^'\\]|("\\"{ANY_CHAR}))*[']) { if (UG(unicode)) { return zend_scan_unicode_single_string(zendlval TSRMLS_CC); } else { - return zend_scan_binary_single_string(zendlval, 0 TSRMLS_CC); + zend_scan_binary_single_string(zendlval, yytext+1, yyleng-2 TSRMLS_CC); + return T_CONSTANT_ENCAPSED_STRING; } } -<ST_IN_SCRIPTING>("b'"([^'\\]|("\\".))*[']) { - return zend_scan_binary_single_string(zendlval, 1 TSRMLS_CC); +<ST_IN_SCRIPTING>("b'"([^'\\]|("\\"{ANY_CHAR}))*[']) { + zend_scan_binary_single_string(zendlval, yytext+2, yyleng-3 TSRMLS_CC); + return T_CONSTANT_ENCAPSED_STRING; } <ST_IN_SCRIPTING>["] { BEGIN(ST_DOUBLE_QUOTES); - return '\"'; + return '"'; } <ST_IN_SCRIPTING>b["] { @@ -2278,7 +2391,7 @@ CG(heredoc_len)--; } CG(heredoc) = estrndup(s, CG(heredoc_len)); - BEGIN(ST_HEREDOC); + BEGIN(ST_START_HEREDOC); return T_BINARY_HEREDOC; } @@ -2293,7 +2406,7 @@ CG(heredoc_len)--; } CG(heredoc) = estrndup(s, CG(heredoc_len)); - BEGIN(ST_HEREDOC); + BEGIN(ST_START_HEREDOC); return T_START_HEREDOC; } @@ -2304,204 +2417,180 @@ } -<ST_HEREDOC>^{LABEL}(";")?{NEWLINE} { - int label_len; +<ST_START_HEREDOC>{ANY_CHAR} { + yyless(0); + BEGIN(ST_HEREDOC); +} - if (yytext[yyleng-2]=='\r') { - label_len = yyleng-2; - } else { - label_len = yyleng-1; - } +<ST_START_HEREDOC>{LABEL}";"?[\n\r] { + int label_len = yyleng - 1; if (yytext[label_len-1]==';') { label_len--; } + yyless(label_len); + if (label_len==CG(heredoc_len) && !memcmp(yytext, CG(heredoc), label_len)) { - Z_STRVAL_P(zendlval) = estrndup(yytext, label_len); /* unput destroys yytext */ + Z_STRVAL_P(zendlval) = CG(heredoc); Z_STRLEN_P(zendlval) = label_len; - yyless(yyleng - (yyleng - label_len)); - efree(CG(heredoc)); CG(heredoc)=NULL; CG(heredoc_len)=0; BEGIN(ST_IN_SCRIPTING); return T_END_HEREDOC; } else { - CG(zend_lineno)++; - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_STRING; + yymore(); + BEGIN(ST_HEREDOC); } } +%{ +/* Match everything up to and including a possible ending label, so if the label + * doesn't match, it's kept with the rest of the string + * + * {HEREDOC_NEWLINE}+ handles the case of more than one newline sequence that + * couldn't be matched with HEREDOC_CHARS, because of the following label + */ %} +<ST_HEREDOC>{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] { + char *end = yytext + yyleng - 1; -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ESCAPED_AND_WHITESPACE} { - HANDLE_NEWLINES(yytext, yyleng); - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; + if (end[-1] == ';') { + end--; + yyleng--; } - return T_ENCAPSED_AND_WHITESPACE; -} -<ST_DOUBLE_QUOTES>[`]+ { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_ENCAPSED_AND_WHITESPACE; -} + if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) { + int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */ + if (len > 0 && yytext[len - 1] == '\r' && yytext[len] == '\n') { + len--; + } -<ST_BACKQUOTE>["]+ { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_ENCAPSED_AND_WHITESPACE; -} + /* Go back before last label char, to match in ST_END_HEREDOC state */ + yyless(yyleng - 2); + /* Subtract the remaining label length. yyleng must include newline + * before label, for zend_highlight/strip, tokenizer, etc. */ + yyleng -= CG(heredoc_len) - 1; -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"$"[^a-zA-Z_\x7f-\xff{] { - Z_LVAL_P(zendlval) = (long) yytext[0]; - if (yyleng == 2) { - yyless(1); + CG(increment_lineno) = 1; /* For newline before label */ + BEGIN(ST_END_HEREDOC); + + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, yytext, len, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + zend_scan_binary_escape_string(zendlval, yytext, len, 0 TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; + } + } else { + /* Go back to end of label, so there's something to match again in case + * there's a variable at the beginning of the next line */ + yyless(yyleng - 1); + yymore(); } - return T_CHARACTER; } - -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ENCAPSED_TOKENS} { - Z_LVAL_P(zendlval) = (long) yytext[0]; - return yytext[0]; +<ST_END_HEREDOC>{ANY_CHAR} { + Z_STRVAL_P(zendlval) = CG(heredoc); + Z_STRLEN_P(zendlval) = CG(heredoc_len); + yytext = Z_STRVAL_P(zendlval); + yyleng = Z_STRLEN_P(zendlval); + CG(heredoc) = NULL; + CG(heredoc_len) = 0; + BEGIN(ST_IN_SCRIPTING); + return T_END_HEREDOC; } + <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" { - Z_LVAL_P(zendlval) = (long) yytext[0]; + Z_LVAL_P(zendlval) = (long) '{'; yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); yyless(1); return T_CURLY_OPEN; } -<ST_DOUBLE_QUOTES>"\\\"" { - Z_LVAL_P(zendlval) = (long) '"'; - return T_CHARACTER; -} - -<ST_BACKQUOTE>"\\`" { - Z_LVAL_P(zendlval) = (long) '`'; - return T_CHARACTER; +<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}+ { + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + zend_scan_binary_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; + } } -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\"[0-7]{1,3} { - Z_LVAL_P(zendlval) = strtol(yytext+1, NULL, 8); - return T_CHARACTER; -} +%{ +/* "{"{2,}|"$"{2,} handles { before "{$" or literal $ before a variable or "${" + * (("{"+|"$"+)["]) handles { or $ at the end of a string + * + * Same for backquotes and heredocs, except the second case doesn't apply to + * heredocs. yyless(yyleng - 1) is used to correct taking one character too many + */ %} +<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) { + yyless(yyleng - 1); -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\x"[0-9A-Fa-f]{1,2} { - Z_LVAL_P(zendlval) = strtol (yytext+2, NULL, 16); - return T_CHARACTER; + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + zend_scan_binary_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; + } } -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\u"[0-9A-Fa-f]{0,6} { - UChar32 codepoint; - int req_digits = (yytext[1] == 'U') ? 6 : 4; - +<ST_BACKQUOTE>{BACKQUOTE_CHARS}+ { if (CG(literal_type) == IS_UNICODE) { - if (zend_digits_to_codepoint(yytext+2, yytext+yyleng, &codepoint, req_digits)) { - if (codepoint <= 0x10FFFF) { - Z_LVAL_P(zendlval) = (long) codepoint; - /* give back if we grabbed more than needed for \u case */ - if (yyleng > req_digits + 2) { - yyless(req_digits + 2); - } - return T_CHARACTER; - } else { - zend_error(E_COMPILE_WARNING,"\\U%06x is above the highest valid codepoint 0x10FFFF", codepoint); - return 0; - } - } else { - zend_error(E_COMPILE_WARNING,"\\%c escape sequence requires exactly %d hexadecimal digits", yytext[1], req_digits); - return 0; - } + return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); } else { - zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC); - return T_STRING; + zend_scan_binary_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; } } +<ST_BACKQUOTE>{BACKQUOTE_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[`])) { + yyless(yyleng - 1); -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\C"("{"[A-Z0-9 -]+"}")? { - UChar32 codepoint; - - if (CG(literal_type) == IS_UNICODE && (yytext[1] == 'C')) { - /* minimum valid string is \C{.} */ - if (yyleng >= 5) { - /* safe, since we have } at the end */ - yytext[yyleng-1] = 0; - if (zend_uchar_from_name(yytext+3, &codepoint)) { - Z_LVAL_P(zendlval) = (long) codepoint; - return T_CHARACTER; - } else { - zend_error(E_COMPILE_WARNING, "Invalid Unicode character name: '%s'", yytext+3); - return 0; - } - } else { - zend_error(E_COMPILE_WARNING, "Invalid \\C{..} sequence"); - return 0; - } + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); } else { - zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC); - return T_STRING; + zend_scan_binary_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; } } -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\{" { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_STRING; -} -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\"{ANY_CHAR} { - switch (yytext[1]) { - case 'n': - Z_LVAL_P(zendlval) = (long) '\n'; - break; - case 't': - Z_LVAL_P(zendlval) = (long) '\t'; - break; - case 'r': - Z_LVAL_P(zendlval) = (long) '\r'; - break; - case '\\': - Z_LVAL_P(zendlval) = (long) '\\'; - break; - case '$': - Z_LVAL_P(zendlval) = (long) yytext[1]; - break; - default: - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_BAD_CHARACTER; - break; +%{ +/* ({HEREDOC_NEWLINE}+({LABEL}";"?)?)? handles the possible case of newline + * sequences, possibly followed by a label, that couldn't be matched with + * HEREDOC_CHARS because of a following variable or "{$" + * + * This doesn't affect real ending labels, as they are followed by a newline, + * which will result in a longer match for the correct rule if present + */ %} +<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? { + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + zend_scan_binary_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; } - return T_CHARACTER; } +<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?("{"{2,}|"$"{2,}) { + yyless(yyleng - 1); -<ST_HEREDOC>["'`]+ { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + zend_scan_binary_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; } - return T_ENCAPSED_AND_WHITESPACE; } <ST_DOUBLE_QUOTES>["] { BEGIN(ST_IN_SCRIPTING); - return '\"'; + return '"'; } @@ -2511,10 +2600,6 @@ } -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,INITIAL,ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY><<EOF>> { - return 0; -} - <ST_COMMENT,ST_DOC_COMMENT><<EOF>> { zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line)); return 0; @@ -2522,6 +2607,6 @@ -<ST_IN_SCRIPTING,INITIAL,ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ANY_CHAR} { +<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} { zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); } http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_vm_def.h?r1=1.167&r2=1.168&diff_format=u Index: ZendEngine2/zend_vm_def.h diff -u ZendEngine2/zend_vm_def.h:1.167 ZendEngine2/zend_vm_def.h:1.168 --- ZendEngine2/zend_vm_def.h:1.167 Thu May 17 17:28:12 2007 +++ ZendEngine2/zend_vm_def.h Fri May 18 13:12:47 2007 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_vm_def.h,v 1.167 2007/05/17 17:28:12 tony2001 Exp $ */ +/* $Id: zend_vm_def.h,v 1.168 2007/05/18 13:12:47 dmitry Exp $ */ /* If you change this file, please regenerate the zend_vm_execute.h and * zend_vm_opcodes.h files by running: @@ -1629,7 +1629,7 @@ Z_STRVAL_P(tmp) = emalloc(1); Z_STRVAL_P(tmp)[0] = 0; Z_STRLEN_P(tmp) = 0; - Z_TYPE_P(tmp) = EX(opline)->extended_value; + Z_TYPE_P(tmp) = IS_STRING; } tmp->refcount = 1; tmp->is_ref = 0; @@ -1666,15 +1666,18 @@ zend_free_op free_op1, free_op2; zval *var = GET_OP2_ZVAL_PTR(BP_VAR_R); zval var_copy; - int use_copy; + int use_copy = 0; - if (opline->extended_value == IS_UNICODE) { - zend_make_unicode_zval(var, &var_copy, &use_copy); - } else { - zend_make_string_zval(var, &var_copy, &use_copy); - } - if (use_copy) { - var = &var_copy; + if (Z_TYPE_P(var) != opline->extended_value) { + if (opline->extended_value == IS_UNICODE) { + zend_make_unicode_zval(var, &var_copy, &use_copy); + } else { + zend_make_string_zval(var, &var_copy, &use_copy); + } + + if (use_copy) { + var = &var_copy; + } } add_string_to_string(&EX_T(opline->result.u.var).tmp_var, GET_OP1_ZVAL_PTR(BP_VAR_NA), var); http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_vm_execute.h?r1=1.170&r2=1.171&diff_format=u Index: ZendEngine2/zend_vm_execute.h diff -u ZendEngine2/zend_vm_execute.h:1.170 ZendEngine2/zend_vm_execute.h:1.171 --- ZendEngine2/zend_vm_execute.h:1.170 Thu May 17 17:28:12 2007 +++ ZendEngine2/zend_vm_execute.h Fri May 18 13:12:47 2007 @@ -122,7 +122,7 @@ Z_STRVAL_P(tmp) = emalloc(1); Z_STRVAL_P(tmp)[0] = 0; Z_STRLEN_P(tmp) = 0; - Z_TYPE_P(tmp) = EX(opline)->extended_value; + Z_TYPE_P(tmp) = IS_STRING; } tmp->refcount = 1; tmp->is_ref = 0; @@ -5832,15 +5832,18 @@ zend_free_op free_op1, free_op2; zval *var = _get_zval_ptr_tmp(&opline->op2, EX(Ts), &free_op2 TSRMLS_CC); zval var_copy; - int use_copy; + int use_copy = 0; - if (opline->extended_value == IS_UNICODE) { - zend_make_unicode_zval(var, &var_copy, &use_copy); - } else { - zend_make_string_zval(var, &var_copy, &use_copy); - } - if (use_copy) { - var = &var_copy; + if (Z_TYPE_P(var) != opline->extended_value) { + if (opline->extended_value == IS_UNICODE) { + zend_make_unicode_zval(var, &var_copy, &use_copy); + } else { + zend_make_string_zval(var, &var_copy, &use_copy); + } + + if (use_copy) { + var = &var_copy; + } } add_string_to_string(&EX_T(opline->result.u.var).tmp_var, _get_zval_ptr_tmp(&opline->op1, EX(Ts), &free_op1 TSRMLS_CC), var); @@ -6280,15 +6283,18 @@ zend_free_op free_op1, free_op2; zval *var = _get_zval_ptr_var(&opline->op2, EX(Ts), &free_op2 TSRMLS_CC); zval var_copy; - int use_copy; + int use_copy = 0; - if (opline->extended_value == IS_UNICODE) { - zend_make_unicode_zval(var, &var_copy, &use_copy); - } else { - zend_make_string_zval(var, &var_copy, &use_copy); - } - if (use_copy) { - var = &var_copy; + if (Z_TYPE_P(var) != opline->extended_value) { + if (opline->extended_value == IS_UNICODE) { + zend_make_unicode_zval(var, &var_copy, &use_copy); + } else { + zend_make_string_zval(var, &var_copy, &use_copy); + } + + if (use_copy) { + var = &var_copy; + } } add_string_to_string(&EX_T(opline->result.u.var).tmp_var, _get_zval_ptr_tmp(&opline->op1, EX(Ts), &free_op1 TSRMLS_CC), var); @@ -6822,15 +6828,18 @@ zend_free_op free_op1; zval *var = _get_zval_ptr_cv(&opline->op2, EX(Ts), BP_VAR_R TSRMLS_CC); zval var_copy; - int use_copy; + int use_copy = 0; - if (opline->extended_value == IS_UNICODE) { - zend_make_unicode_zval(var, &var_copy, &use_copy); - } else { - zend_make_string_zval(var, &var_copy, &use_copy); - } - if (use_copy) { - var = &var_copy; + if (Z_TYPE_P(var) != opline->extended_value) { + if (opline->extended_value == IS_UNICODE) { + zend_make_unicode_zval(var, &var_copy, &use_copy); + } else { + zend_make_string_zval(var, &var_copy, &use_copy); + } + + if (use_copy) { + var = &var_copy; + } } add_string_to_string(&EX_T(opline->result.u.var).tmp_var, _get_zval_ptr_tmp(&opline->op1, EX(Ts), &free_op1 TSRMLS_CC), var); http://cvs.php.net/viewvc.cgi/php-src/ext/tokenizer/tokenizer.c?r1=1.41&r2=1.42&diff_format=u Index: php-src/ext/tokenizer/tokenizer.c diff -u php-src/ext/tokenizer/tokenizer.c:1.41 php-src/ext/tokenizer/tokenizer.c:1.42 --- php-src/ext/tokenizer/tokenizer.c:1.41 Sun Apr 8 00:17:40 2007 +++ php-src/ext/tokenizer/tokenizer.c Fri May 18 13:12:47 2007 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: tokenizer.c,v 1.41 2007/04/08 00:17:40 johannes Exp $ */ +/* $Id: tokenizer.c,v 1.42 2007/05/18 13:12:47 dmitry Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -282,12 +282,15 @@ while ((token_type = lex_scan(&token TSRMLS_CC))) { destroy = 1; switch (token_type) { + case T_CLOSE_TAG: + if (zendtext[zendleng - 1] != '>') { + CG(zend_lineno)++; + } case T_OPEN_TAG: case T_OPEN_TAG_WITH_ECHO: case T_WHITESPACE: case T_COMMENT: case T_DOC_COMMENT: - case T_CLOSE_TAG: destroy = 0; break; } @@ -297,6 +300,10 @@ array_init(keyword); add_next_index_long(keyword, token_type); if (token_type == T_END_HEREDOC) { + if (CG(increment_lineno)) { + token_line = ++CG(zend_lineno); + CG(increment_lineno) = 0; + } add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1); efree(Z_STRVAL(token)); } else { @@ -372,8 +379,6 @@ case T_VARIABLE: return "T_VARIABLE"; case T_NUM_STRING: return "T_NUM_STRING"; case T_INLINE_HTML: return "T_INLINE_HTML"; - case T_CHARACTER: return "T_CHARACTER"; - case T_BAD_CHARACTER: return "T_BAD_CHARACTER"; case T_ENCAPSED_AND_WHITESPACE: return "T_ENCAPSED_AND_WHITESPACE"; case T_CONSTANT_ENCAPSED_STRING: return "T_CONSTANT_ENCAPSED_STRING"; case T_ECHO: return "T_ECHO"; http://cvs.php.net/viewvc.cgi/php-src/ext/tokenizer/tests/001.phpt?r1=1.5&r2=1.6&diff_format=u Index: php-src/ext/tokenizer/tests/001.phpt diff -u php-src/ext/tokenizer/tests/001.phpt:1.5 php-src/ext/tokenizer/tests/001.phpt:1.6 --- php-src/ext/tokenizer/tests/001.phpt:1.5 Tue Jun 20 18:14:12 2006 +++ php-src/ext/tokenizer/tests/001.phpt Fri May 18 13:12:47 2007 @@ -57,8 +57,6 @@ echo token_name(T_VARIABLE), "\n"; echo token_name(T_NUM_STRING), "\n"; echo token_name(T_INLINE_HTML), "\n"; -echo token_name(T_CHARACTER), "\n"; -echo token_name(T_BAD_CHARACTER), "\n"; echo token_name(T_ENCAPSED_AND_WHITESPACE), "\n"; echo token_name(T_CONSTANT_ENCAPSED_STRING), "\n"; echo token_name(T_ECHO), "\n"; @@ -185,8 +183,6 @@ T_VARIABLE T_NUM_STRING T_INLINE_HTML -T_CHARACTER -T_BAD_CHARACTER T_ENCAPSED_AND_WHITESPACE T_CONSTANT_ENCAPSED_STRING T_ECHO @@ -314,8 +310,6 @@ T_VARIABLE T_NUM_STRING T_INLINE_HTML -T_CHARACTER -T_BAD_CHARACTER T_ENCAPSED_AND_WHITESPACE T_CONSTANT_ENCAPSED_STRING T_ECHO http://cvs.php.net/viewvc.cgi/php-src/ext/tokenizer/tests/bug26463.phpt?r1=1.9&r2=1.10&diff_format=u Index: php-src/ext/tokenizer/tests/bug26463.phpt diff -u php-src/ext/tokenizer/tests/bug26463.phpt:1.9 php-src/ext/tokenizer/tests/bug26463.phpt:1.10 --- php-src/ext/tokenizer/tests/bug26463.phpt:1.9 Sun Apr 8 00:17:40 2007 +++ php-src/ext/tokenizer/tests/bug26463.phpt Fri May 18 13:12:47 2007 @@ -15,12 +15,12 @@ ?>'; var_dump(token_get_all($str)); ?> ---EXPECT-- +--EXPECTF-- array(19) { [0]=> array(3) { [0]=> - int(370) + int(%d) [1]=> string(6) "<?php " @@ -30,7 +30,7 @@ [1]=> array(3) { [0]=> - int(311) + int(%d) [1]=> string(2) "$x" [2]=> @@ -41,7 +41,7 @@ [3]=> array(3) { [0]=> - int(374) + int(%d) [1]=> string(6) "<<<DD " @@ -51,7 +51,7 @@ [4]=> array(3) { [0]=> - int(309) + int(%d) [1]=> string(13) "jhdsjkfhjdsh " @@ -61,7 +61,7 @@ [5]=> array(3) { [0]=> - int(375) + int(%d) [1]=> string(2) "DD" [2]=> @@ -70,7 +70,7 @@ [6]=> array(3) { [0]=> - int(373) + int(%d) [1]=> string(1) " " @@ -82,7 +82,7 @@ [8]=> array(3) { [0]=> - int(317) + int(%d) [1]=> string(2) """" [2]=> @@ -93,7 +93,7 @@ [10]=> array(3) { [0]=> - int(373) + int(%d) [1]=> string(1) " " @@ -103,7 +103,7 @@ [11]=> array(3) { [0]=> - int(311) + int(%d) [1]=> string(2) "$a" [2]=> @@ -114,7 +114,7 @@ [13]=> array(3) { [0]=> - int(374) + int(%d) [1]=> string(8) "<<<DDDD " @@ -124,7 +124,7 @@ [14]=> array(3) { [0]=> - int(309) + int(%d) [1]=> string(13) "jhdsjkfhjdsh " @@ -134,7 +134,7 @@ [15]=> array(3) { [0]=> - int(375) + int(%d) [1]=> string(4) "DDDD" [2]=> @@ -145,7 +145,7 @@ [17]=> array(3) { [0]=> - int(373) + int(%d) [1]=> string(1) " " @@ -155,7 +155,7 @@ [18]=> array(3) { [0]=> - int(372) + int(%d) [1]=> string(2) "?>" [2]=> @@ -167,7 +167,7 @@ [0]=> array(3) { [0]=> - int(370) + int(%d) [1]=> string(6) "<?php " @@ -177,7 +177,7 @@ [1]=> array(3) { [0]=> - int(311) + int(%d) [1]=> string(2) "$x" [2]=> @@ -188,7 +188,7 @@ [3]=> array(3) { [0]=> - int(374) + int(%d) [1]=> string(6) "<<<DD " @@ -198,7 +198,7 @@ [4]=> array(3) { [0]=> - int(309) + int(%d) [1]=> string(13) "jhdsjkfhjdsh " @@ -208,7 +208,7 @@ [5]=> array(3) { [0]=> - int(375) + int(%d) [1]=> string(2) "DD" [2]=> @@ -217,7 +217,7 @@ [6]=> array(3) { [0]=> - int(373) + int(%d) [1]=> string(1) " " @@ -229,7 +229,7 @@ [8]=> array(3) { [0]=> - int(317) + int(%d) [1]=> string(2) """" [2]=> @@ -240,7 +240,7 @@ [10]=> array(3) { [0]=> - int(373) + int(%d) [1]=> string(1) " " @@ -250,7 +250,7 @@ [11]=> array(3) { [0]=> - int(311) + int(%d) [1]=> string(2) "$a" [2]=> @@ -261,7 +261,7 @@ [13]=> array(3) { [0]=> - int(374) + int(%d) [1]=> string(8) "<<<DDDD " @@ -271,7 +271,7 @@ [14]=> array(3) { [0]=> - int(309) + int(%d) [1]=> string(13) "jhdsjkfhjdsh " @@ -281,7 +281,7 @@ [15]=> array(3) { [0]=> - int(375) + int(%d) [1]=> string(4) "DDDD" [2]=> @@ -292,7 +292,7 @@ [17]=> array(3) { [0]=> - int(373) + int(%d) [1]=> string(1) " " @@ -302,7 +302,7 @@ [18]=> array(3) { [0]=> - int(372) + int(%d) [1]=> string(2) "?>" [2]=>
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php