It seems the pach tester is confused by the addition of the demonstration diff file. I'm reattaching just the patchset to see if it turns green.
-John Naylor
From 107e3c8a0b65b0196ea4370a724c8b2a1b0fdf79 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Sun, 30 Sep 2018 12:51:41 +0700 Subject: [PATCH v1 1/4] First pass at syncing ECPG scanner with the core scanner. Adjust whitespace and formatting, clean up some comments, and move the block of whitespace rules. --- src/backend/parser/scan.l | 2 +- src/fe_utils/psqlscan.l | 2 +- src/interfaces/ecpg/preproc/pgc.l | 773 ++++++++++++++++-------------- 3 files changed, 408 insertions(+), 369 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 950b8b8591..a2454732a1 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -192,7 +192,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); * XXX perhaps \f (formfeed) should be treated as a newline as well? * * XXX if you change the set of whitespace characters, fix scanner_isspace() - * to agree, and see also the plpgsql lexer. + * to agree. */ space [ \t\n\r\f] diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index fdf49875a7..25253b54ea 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -151,7 +151,7 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); * XXX perhaps \f (formfeed) should be treated as a newline as well? * * XXX if you change the set of whitespace characters, fix scanner_isspace() - * to agree, and see also the plpgsql lexer. + * to agree. */ space [ \t\n\r\f] diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index 0792118cfe..b96f17ca20 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -108,16 +108,19 @@ static struct _if_value * We use exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings. * Exclusive states: - * <xb> bit string literal - * <xcc> extended C-style comments in C - * <xcsql> extended C-style comments in SQL - * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 - * <xh> hexadecimal numeric string - thomas 1997-11-16 - * <xq> standard quoted strings - thomas 1997-07-30 - * <xqc> standard quoted strings in C - michael - * <xe> extended quoted strings (support backslash escape sequences) - * <xn> national character quoted strings + * <xb> bit string literal + * <xcc> extended C-style comments in C + * <xcsql> extended C-style comments in SQL + * <xd> delimited identifiers (double-quoted identifiers) + * <xdc> + * <xh> hexadecimal numeric string + * <xq> standard quoted strings + * <xe> extended quoted strings (support backslash escape sequences) + * <xn> national character quoted strings + * <xqc> standard quoted strings in C * <xdolq> $foo$ quoted strings + * <xcond> + * <xskip> * <xui> quoted identifier with Unicode escapes * <xus> quoted string with Unicode escapes */ @@ -138,6 +141,48 @@ static struct _if_value %x xui %x xus +/* + * In order to make the world safe for Windows and Mac clients as well as + * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n + * sequence will be seen as two successive newlines, but that doesn't cause + * any problems. SQL-style comments, which start with -- and extend to the + * next newline, are treated as equivalent to a single whitespace character. + * + * NOTE a fine point: if there is no newline following --, we will absorb + * everything to the end of the input as a comment. This is correct. Older + * versions of Postgres failed to recognize -- as a comment if the input + * did not end with a newline. + * + * XXX perhaps \f (formfeed) should be treated as a newline as well? + * + * XXX if you change the set of whitespace characters, fix ecpg_isspace() + * to agree. + */ + +space [ \t\n\r\f] +horiz_space [ \t\f] +newline [\n\r] +non_newline [^\n\r] + +comment ("--"{non_newline}*) + +whitespace ({space}+|{comment}) + +/* + * SQL requires at least one newline in the whitespace separating + * string literals that are to be concatenated. Silly, but who are we + * to argue? Note that {whitespace_with_newline} should not have * after + * it, whereas {whitespace} should generally have a * after it... + */ + +horiz_whitespace ({horiz_space}|{comment}) +whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) + +quote ' +quotestop {quote}{whitespace}* +quotecontinue {quote}{whitespace_with_newline}{quote} +quotefail {quote}{whitespace}*"-" + /* Bit string */ xbstart [bB]{quote} @@ -216,17 +261,17 @@ xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) * The "extended comment" syntax closely resembles allowable operator syntax. * The tricky part here is to get lex to recognize a string starting with * slash-star as a comment, when interpreting it as an operator would produce - * a longer match --- remember lex will prefer a longer match! Also, if we + * a longer match --- remember lex will prefer a longer match! Also, if we * have something like plus-slash-star, lex will think this is a 3-character * operator whereas we want to see it as a + operator and a comment start. * The solution is two-fold: * 1. append {op_chars}* to xcstart so that it matches as much text as - * {operator} would. Then the tie-breaker (first matching rule of same - * length) ensures xcstart wins. We put back the extra stuff with yyless() - * in case it contains a star-slash that should terminate the comment. + * {operator} would. Then the tie-breaker (first matching rule of same + * length) ensures xcstart wins. We put back the extra stuff with yyless() + * in case it contains a star-slash that should terminate the comment. * 2. In the operator rule, check for slash-star within the operator, and - * if found throw it back with yyless(). This handles the plus-slash-star - * problem. + * if found throw it back with yyless(). This handles the plus-slash-star + * problem. * Dash-dash comments have similar interactions with the operator rule. */ xcstart \/\*{op_chars}* @@ -262,7 +307,7 @@ not_equals "!=" /* * "self" is the set of chars that should be returned as single-character - * tokens. "op_chars" is the set of chars that can make up "Op" tokens, + * tokens. "op_chars" is the set of chars that can make up "Op" tokens, * which can be one or more characters long (but if a single-char token * appears in the "self" set, it is not to be returned as an Op). Note * that the sets overlap, but each has some chars that are not in the other. @@ -290,50 +335,6 @@ realfail2 ({integer}|{decimal})[Ee][-+] param \${integer} -/* - * In order to make the world safe for Windows and Mac clients as well as - * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n - * sequence will be seen as two successive newlines, but that doesn't cause - * any problems. SQL-style comments, which start with -- and extend to the - * next newline, are treated as equivalent to a single whitespace character. - * - * NOTE a fine point: if there is no newline following --, we will absorb - * everything to the end of the input as a comment. This is correct. Older - * versions of Postgres failed to recognize -- as a comment if the input - * did not end with a newline. - * - * XXX perhaps \f (formfeed) should be treated as a newline as well? - * - * XXX if you change the set of whitespace characters, fix ecpg_isspace() - * to agree. - */ - -ccomment "//".*\n - -space [ \t\n\r\f] -horiz_space [ \t\f] -newline [\n\r] -non_newline [^\n\r] - -comment ("--"{non_newline}*) - -whitespace ({space}+|{comment}) - -/* - * SQL requires at least one newline in the whitespace separating - * string literals that are to be concatenated. Silly, but who are we - * to argue? Note that {whitespace_with_newline} should not have * after - * it, whereas {whitespace} should generally have a * after it... - */ - -horiz_whitespace ({horiz_space}|{comment}) -whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) - -quote ' -quotestop {quote}{whitespace}* -quotecontinue {quote}{whitespace_with_newline}{quote} -quotefail {quote}{whitespace}*"-" - /* special characters for other dbms */ /* we have to react differently in compat mode */ informix_special [\$] @@ -349,6 +350,8 @@ include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT] import [iI][mM][pP][oO][rR][tT] undef [uU][nN][dD][eE][fF] +ccomment "//".*\n + if [iI][fF] ifdef [iI][fF][dD][eE][fF] ifndef [iI][fF][nN][dD][eE][fF] @@ -375,14 +378,14 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ /* * Dollar quoted strings are totally opaque, and no escaping is done on them. * Other quoted strings must allow some special characters such as single-quote - * and newline. + * and newline. * Embedded single-quotes are implemented both in the SQL standard - * style of two adjacent single quotes "''" and in the Postgres/Java style - * of escaped-quote "\'". + * style of two adjacent single quotes "''" and in the Postgres/Java style + * of escaped-quote "\'". * Other embedded escaped characters are matched explicitly and the leading - * backslash is dropped from the string. - thomas 1997-09-24 + * backslash is dropped from the string. * Note that xcstart must appear before operator, as explained above! - * Also whitespace (comment) must appear before operator. + * Also whitespace (comment) must appear before operator. */ %% @@ -392,7 +395,9 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ token_start = NULL; %} -<SQL>{whitespace} { /* ignore */ } +<SQL>{whitespace} { + /* ignore */ + } <C>{xcstart} { token_start = yytext; @@ -437,9 +442,16 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ BEGIN(state_before); token_start = NULL; } -<xcc,xcsql>{xcinside} { ECHO; } -<xcc,xcsql>{op_chars} { ECHO; } -<xcc,xcsql>\*+ { ECHO; } + +<xcc,xcsql>{xcinside} { + ECHO; + } +<xcc,xcsql>{op_chars} { + ECHO; + } +<xcc,xcsql>\*+ { + ECHO; + } <xcc,xcsql><<EOF>> { mmfatal(PARSE_ERROR, "unterminated /* comment"); } @@ -449,8 +461,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ startlit(); addlitchar('b'); } -<xb>{quotestop} | -<xb>{quotefail} { +<xb>{quotestop} | +<xb>{quotefail} { yyless(1); BEGIN(SQL); if (literalbuf[strspn(literalbuf, "01") + 1] != '\0') @@ -460,9 +472,13 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } <xh>{xhinside} | -<xb>{xbinside} { addlit(yytext, yyleng); } +<xb>{xbinside} { + addlit(yytext, yyleng); + } <xh>{quotecontinue} | -<xb>{quotecontinue} { /* ignore */ } +<xb>{quotecontinue} { + /* ignore */ + } <xb><<EOF>> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); } <SQL>{xhstart} { @@ -472,176 +488,194 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ addlitchar('x'); } <xh>{quotestop} | -<xh>{quotefail} { - yyless(1); - BEGIN(SQL); - base_yylval.str = mm_strdup(literalbuf); - return XCONST; - } +<xh>{quotefail} { + yyless(1); + BEGIN(SQL); + base_yylval.str = mm_strdup(literalbuf); + return XCONST; + } <xh><<EOF>> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); } <SQL>{xnstart} { - /* National character. - * Transfer it as-is to the backend. - */ - token_start = yytext; - state_before = YYSTATE; - BEGIN(xn); - startlit(); - } + /* National character. + * Transfer it as-is to the backend. + */ + token_start = yytext; + state_before = YYSTATE; + BEGIN(xn); + startlit(); + } <C>{xqstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xqc); - startlit(); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xqc); + startlit(); + } <SQL>{xqstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xq); - startlit(); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xq); + startlit(); + } <SQL>{xestart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xe); - startlit(); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xe); + startlit(); + } <SQL>{xusstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xus); - startlit(); - addlit(yytext, yyleng); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xus); + startlit(); + addlit(yytext, yyleng); + } <xq,xqc>{quotestop} | <xq,xqc>{quotefail} { - yyless(1); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return SCONST; - } + yyless(1); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return SCONST; + } <xe>{quotestop} | <xe>{quotefail} { - yyless(1); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return ECONST; - } + yyless(1); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return ECONST; + } <xn>{quotestop} | <xn>{quotefail} { - yyless(1); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return NCONST; - } + yyless(1); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return NCONST; + } <xus>{xusstop} { - addlit(yytext, yyleng); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return UCONST; - } + addlit(yytext, yyleng); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return UCONST; + } <xq,xe,xn,xus>{xqdouble} { addlitchar('\''); } -<xqc>{xqcquote} { - addlitchar('\\'); - addlitchar('\''); - } +<xqc>{xqcquote} { + addlitchar('\\'); + addlitchar('\''); + } <xq,xqc,xn,xus>{xqinside} { addlit(yytext, yyleng); } -<xe>{xeinside} { addlit(yytext, yyleng); } -<xe>{xeunicode} { addlit(yytext, yyleng); } -<xe>{xeescape} { addlit(yytext, yyleng); } -<xe>{xeoctesc} { addlit(yytext, yyleng); } -<xe>{xehexesc} { addlit(yytext, yyleng); } +<xe>{xeinside} { + addlit(yytext, yyleng); + } +<xe>{xeunicode} { + addlit(yytext, yyleng); + } +<xe>{xeescape} { + addlit(yytext, yyleng); + } +<xe>{xeoctesc} { + addlit(yytext, yyleng); + } +<xe>{xehexesc} { + addlit(yytext, yyleng); + } <xq,xqc,xe,xn,xus>{quotecontinue} { /* ignore */ } -<xe>. { - /* This is only needed for \ just before EOF */ - addlitchar(yytext[0]); - } +<xe>. { + /* This is only needed for \ just before EOF */ + addlitchar(yytext[0]); + } <xq,xqc,xe,xn,xus><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); } <SQL>{dolqfailed} { - /* throw back all but the initial "$" */ - yyless(1); - /* and treat it as {other} */ - return yytext[0]; - } + /* throw back all but the initial "$" */ + yyless(1); + /* and treat it as {other} */ + return yytext[0]; + } <SQL>{dolqdelim} { - token_start = yytext; - if (dolqstart) - free(dolqstart); - dolqstart = mm_strdup(yytext); - BEGIN(xdolq); - startlit(); - addlit(yytext, yyleng); - } -<xdolq>{dolqdelim} { - if (strcmp(yytext, dolqstart) == 0) - { + token_start = yytext; + if (dolqstart) + free(dolqstart); + dolqstart = mm_strdup(yytext); + BEGIN(xdolq); + startlit(); addlit(yytext, yyleng); - free(dolqstart); - dolqstart = NULL; - BEGIN(SQL); - base_yylval.str = mm_strdup(literalbuf); - return DOLCONST; - } - else - { - /* - * When we fail to match $...$ to dolqstart, transfer - * the $... part to the output, but put back the final - * $ for rescanning. Consider $delim$...$junk$delim$ - */ - addlit(yytext, yyleng-1); - yyless(yyleng-1); } - } -<xdolq>{dolqinside} { addlit(yytext, yyleng); } -<xdolq>{dolqfailed} { addlit(yytext, yyleng); } -<xdolq>{other} { - /* single quote or dollar sign */ - addlitchar(yytext[0]); - } -<xdolq><<EOF>> { base_yyerror("unterminated dollar-quoted string"); } -<SQL>{xdstart} { - state_before = YYSTATE; - BEGIN(xd); - startlit(); - } -<SQL>{xuistart} { - state_before = YYSTATE; - BEGIN(xui); - startlit(); +<xdolq>{dolqdelim} { + if (strcmp(yytext, dolqstart) == 0) + { addlit(yytext, yyleng); - } -<xd>{xdstop} { - BEGIN(state_before); - if (literallen == 0) - mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); - /* The backend will truncate the identifier here. We do not as it does not change the result. */ - base_yylval.str = mm_strdup(literalbuf); - return CSTRING; - } -<xdc>{xdstop} { - BEGIN(state_before); + free(dolqstart); + dolqstart = NULL; + BEGIN(SQL); base_yylval.str = mm_strdup(literalbuf); - return CSTRING; + return DOLCONST; } -<xui>{xuistop} { - BEGIN(state_before); - if (literallen == 2) /* "U&" */ - mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); - /* The backend will truncate the identifier here. We do not as it does not change the result. */ - addlit(yytext, yyleng); - base_yylval.str = mm_strdup(literalbuf); - return UIDENT; + else + { + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + addlit(yytext, yyleng-1); + yyless(yyleng-1); } -<xd,xui>{xddouble} { addlitchar('"'); } -<xd,xui>{xdinside} { addlit(yytext, yyleng); } -<xd,xdc,xui><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); } + } +<xdolq>{dolqinside} { + addlit(yytext, yyleng); + } +<xdolq>{dolqfailed} { + addlit(yytext, yyleng); + } +<xdolq>. { + /* single quote or dollar sign */ + addlitchar(yytext[0]); + } +<xdolq><<EOF>> { base_yyerror("unterminated dollar-quoted string"); } +<SQL>{xdstart} { + state_before = YYSTATE; + BEGIN(xd); + startlit(); + } +<SQL>{xuistart} { + state_before = YYSTATE; + BEGIN(xui); + startlit(); + addlit(yytext, yyleng); + } +<xd>{xdstop} { + BEGIN(state_before); + if (literallen == 0) + mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); + /* The backend will truncate the identifier here. We do not as it does not change the result. */ + base_yylval.str = mm_strdup(literalbuf); + return CSTRING; + } +<xdc>{xdstop} { + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return CSTRING; + } +<xui>{xuistop} { + BEGIN(state_before); + if (literallen == 2) /* "U&" */ + mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); + /* The backend will truncate the identifier here. We do not as it does not change the result. */ + addlit(yytext, yyleng); + base_yylval.str = mm_strdup(literalbuf); + return UIDENT; + } +<xd,xui>{xddouble} { + addlitchar('"'); + } +<xd,xui>{xdinside} { + addlit(yytext, yyleng); + } +<xd,xdc,xui><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); } <C,SQL>{xdstart} { - state_before = YYSTATE; - BEGIN(xdc); - startlit(); - } + state_before = YYSTATE; + BEGIN(xdc); + startlit(); + } <xdc>{xdcinside} { addlit(yytext, yyleng); } <SQL>{typecast} { return TYPECAST; } <SQL>{dot_dot} { return DOT_DOT; } @@ -660,184 +694,189 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ else return yytext[0]; } -<SQL>{self} { /* - * We may find a ';' inside a structure - * definition in a TYPE or VAR statement. - * This is not an EOL marker. - */ - if (yytext[0] == ';' && struct_level == 0) - BEGIN(C); - return yytext[0]; - } -<SQL>{operator} { - /* - * Check for embedded slash-star or dash-dash; those - * are comment starts, so operator must stop there. - * Note that slash-star or dash-dash at the first - * character will match a prior rule, not this one. - */ - int nchars = yyleng; - char *slashstar = strstr(yytext, "/*"); - char *dashdash = strstr(yytext, "--"); +<SQL>{self} { + /* + * We may find a ';' inside a structure + * definition in a TYPE or VAR statement. + * This is not an EOL marker. + */ + if (yytext[0] == ';' && struct_level == 0) + BEGIN(C); + return yytext[0]; + } +<SQL>{operator} { + /* + * Check for embedded slash-star or dash-dash; those + * are comment starts, so operator must stop there. + * Note that slash-star or dash-dash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr(yytext, "/*"); + char *dashdash = strstr(yytext, "--"); - if (slashstar && dashdash) - { - /* if both appear, take the first one */ - if (slashstar > dashdash) - slashstar = dashdash; - } - else if (!slashstar) + if (slashstar && dashdash) + { + /* if both appear, take the first one */ + if (slashstar > dashdash) slashstar = dashdash; - if (slashstar) - nchars = slashstar - yytext; + } + else if (!slashstar) + slashstar = dashdash; + if (slashstar) + nchars = slashstar - yytext; - /* - * For SQL compatibility, '+' and '-' cannot be the - * last char of a multi-char operator unless the operator - * contains chars that are not in SQL operators. - * The idea is to lex '=-' as two operators, but not - * to forbid operator names like '?-' that could not be - * sequences of SQL operators. - */ - if (nchars > 1 && - (yytext[nchars - 1] == '+' || - yytext[nchars - 1] == '-')) - { - int ic; + /* + * For SQL compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL operators. + */ + if (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')) + { + int ic; - for (ic = nchars - 2; ic >= 0; ic--) - { - char c = yytext[ic]; - if (c == '~' || c == '!' || c == '@' || - c == '#' || c == '^' || c == '&' || - c == '|' || c == '`' || c == '?' || - c == '%') - break; - } - if (ic < 0) - { - /* - * didn't find a qualifying character, so remove - * all trailing [+-] - */ - do { - nchars--; - } while (nchars > 1 && - (yytext[nchars - 1] == '+' || - yytext[nchars - 1] == '-')); - } + for (ic = nchars - 2; ic >= 0; ic--) + { + char c = yytext[ic]; + if (c == '~' || c == '!' || c == '@' || + c == '#' || c == '^' || c == '&' || + c == '|' || c == '`' || c == '?' || + c == '%') + break; } - - if (nchars < yyleng) + if (ic < 0) { - /* Strip the unwanted chars from the token */ - yyless(nchars); - /* - * If what we have left is only one char, and it's - * one of the characters matching "self", then - * return it as a character token the same way - * that the "self" rule would have. - */ - if (nchars == 1 && - strchr(",()[].;:+-*/%^<>=", yytext[0])) - return yytext[0]; /* - * Likewise, if what we have left is two chars, and - * those match the tokens ">=", "<=", "=>", "<>" or - * "!=", then we must return the appropriate token - * rather than the generic Op. + * didn't find a qualifying character, so remove + * all trailing [+-] */ - if (nchars == 2) - { - if (yytext[0] == '=' && yytext[1] == '>') - return EQUALS_GREATER; - if (yytext[0] == '>' && yytext[1] == '=') - return GREATER_EQUALS; - if (yytext[0] == '<' && yytext[1] == '=') - return LESS_EQUALS; - if (yytext[0] == '<' && yytext[1] == '>') - return NOT_EQUALS; - if (yytext[0] == '!' && yytext[1] == '=') - return NOT_EQUALS; - } + do { + nchars--; + } while (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')); } - - base_yylval.str = mm_strdup(yytext); - return Op; } -<SQL>{param} { - base_yylval.ival = atol(yytext+1); - return PARAM; - } -<C,SQL>{integer} { - int val; - char* endptr; - errno = 0; - val = strtoint(yytext, &endptr, 10); - if (*endptr != '\0' || errno == ERANGE) + if (nchars < yyleng) + { + /* Strip the unwanted chars from the token */ + yyless(nchars); + /* + * If what we have left is only one char, and it's + * one of the characters matching "self", then + * return it as a character token the same way + * that the "self" rule would have. + */ + if (nchars == 1 && + strchr(",()[].;:+-*/%^<>=", yytext[0])) + return yytext[0]; + /* + * Likewise, if what we have left is two chars, and + * those match the tokens ">=", "<=", "=>", "<>" or + * "!=", then we must return the appropriate token + * rather than the generic Op. + */ + if (nchars == 2) { - errno = 0; - base_yylval.str = mm_strdup(yytext); - return FCONST; + if (yytext[0] == '=' && yytext[1] == '>') + return EQUALS_GREATER; + if (yytext[0] == '>' && yytext[1] == '=') + return GREATER_EQUALS; + if (yytext[0] == '<' && yytext[1] == '=') + return LESS_EQUALS; + if (yytext[0] == '<' && yytext[1] == '>') + return NOT_EQUALS; + if (yytext[0] == '!' && yytext[1] == '=') + return NOT_EQUALS; } - base_yylval.ival = val; - return ICONST; } -<SQL>{ip} { - base_yylval.str = mm_strdup(yytext); - return IP; + + base_yylval.str = mm_strdup(yytext); + return Op; } -<C,SQL>{decimal} { +<SQL>{param} { + base_yylval.ival = atol(yytext+1); + return PARAM; + } +<C,SQL>{integer} { + int val; + char* endptr; + + errno = 0; + val = strtoint(yytext, &endptr, 10); + if (*endptr != '\0' || errno == ERANGE) + { + errno = 0; base_yylval.str = mm_strdup(yytext); return FCONST; - } -<C,SQL>{real} { + } + base_yylval.ival = val; + return ICONST; + } +<SQL>{ip} { + base_yylval.str = mm_strdup(yytext); + return IP; + } +<C,SQL>{decimal} { + base_yylval.str = mm_strdup(yytext); + return FCONST; + } +<C,SQL>{real} { base_yylval.str = mm_strdup(yytext); return FCONST; - } + } <SQL>{realfail1} { - yyless(yyleng-1); - base_yylval.str = mm_strdup(yytext); - return FCONST; - } + yyless(yyleng-1); + base_yylval.str = mm_strdup(yytext); + return FCONST; + } <SQL>{realfail2} { - yyless(yyleng-2); - base_yylval.str = mm_strdup(yytext); - return FCONST; - } + yyless(yyleng-2); + base_yylval.str = mm_strdup(yytext); + return FCONST; + } <SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))* { - base_yylval.str = mm_strdup(yytext+1); - return CVARIABLE; - } + base_yylval.str = mm_strdup(yytext+1); + return CVARIABLE; + } <SQL>{identifier} { - const ScanKeyword *keyword; + const ScanKeyword *keyword; - if (!isdefine()) - { - /* Is it an SQL/ECPG keyword? */ - keyword = ScanECPGKeywordLookup(yytext); - if (keyword != NULL) - return keyword->value; + if (!isdefine()) + { + /* Is it an SQL/ECPG keyword? */ + keyword = ScanECPGKeywordLookup(yytext); + if (keyword != NULL) + return keyword->value; - /* Is it a C keyword? */ - keyword = ScanCKeywordLookup(yytext); - if (keyword != NULL) - return keyword->value; + /* Is it a C keyword? */ + keyword = ScanCKeywordLookup(yytext); + if (keyword != NULL) + return keyword->value; - /* - * None of the above. Return it as an identifier. - * - * The backend will attempt to truncate and case-fold - * the identifier, but I see no good reason for ecpg - * to do so; that's just another way that ecpg could get - * out of step with the backend. - */ - base_yylval.str = mm_strdup(yytext); - return IDENT; - } + /* + * None of the above. Return it as an identifier. + * + * The backend will attempt to truncate and case-fold + * the identifier, but I see no good reason for ecpg + * to do so; that's just another way that ecpg could get + * out of step with the backend. + */ + base_yylval.str = mm_strdup(yytext); + return IDENT; } -<SQL>{other} { return yytext[0]; } + } + +<SQL>{other} { + return yytext[0]; + } + <C>{exec_sql} { BEGIN(SQL); return SQL_START; } <C>{informix_special} { /* are we simulating Informix? */ -- 2.17.1
From d41041d21ee7cdb26cd59565e42cc29412daa157 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Sun, 30 Sep 2018 13:04:32 +0700 Subject: [PATCH v1 2/4] Prepatory refactoring for next commit. The next patch needs to do some rearrangements for it to work, but makes the patch hard to read, so put them in a separate patch. --- src/interfaces/ecpg/preproc/pgc.l | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index b96f17ca20..421c62339b 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -399,20 +399,20 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ /* ignore */ } -<C>{xcstart} { +<SQL>{xcstart} { token_start = yytext; state_before = YYSTATE; xcdepth = 0; - BEGIN(xcc); + BEGIN(xcsql); /* Put back any characters past slash-star; see above */ yyless(2); fputs("/*", yyout); } -<SQL>{xcstart} { +<C>{xcstart} { token_start = yytext; state_before = YYSTATE; xcdepth = 0; - BEGIN(xcsql); + BEGIN(xcc); /* Put back any characters past slash-star; see above */ yyless(2); fputs("/*", yyout); @@ -496,6 +496,12 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } <xh><<EOF>> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); } +<C>{xqstart} { + token_start = yytext; + state_before = YYSTATE; + BEGIN(xqc); + startlit(); + } <SQL>{xnstart} { /* National character. * Transfer it as-is to the backend. @@ -505,12 +511,6 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ BEGIN(xn); startlit(); } -<C>{xqstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xqc); - startlit(); - } <SQL>{xqstart} { token_start = yytext; state_before = YYSTATE; @@ -804,6 +804,10 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ base_yylval.ival = atol(yytext+1); return PARAM; } +<SQL>{ip} { + base_yylval.str = mm_strdup(yytext); + return IP; + } <C,SQL>{integer} { int val; char* endptr; @@ -819,10 +823,6 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ base_yylval.ival = val; return ICONST; } -<SQL>{ip} { - base_yylval.str = mm_strdup(yytext); - return IP; - } <C,SQL>{decimal} { base_yylval.str = mm_strdup(yytext); return FCONST; -- 2.17.1
From 9211fb6e2de425ff9b4669c6c6f6fabea7c3f639 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Sun, 30 Sep 2018 13:08:25 +0700 Subject: [PATCH v1 3/4] Use start condition scopes in the core, psql, and ECPG scanners. This makes it easier to diff the core scanner with the ECPG scanner. --- src/backend/parser/scan.l | 12 ++- src/fe_utils/psqlscan.l | 8 +- src/interfaces/ecpg/preproc/pgc.l | 133 +++++++++++++++++++++--------- 3 files changed, 107 insertions(+), 46 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index a2454732a1..f20a6e58fe 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -430,19 +430,23 @@ other . (yyextra->xcdepth)--; } -<xc>{xcinside} { +<xc>{ +{xcinside} { /* ignore */ } -<xc>{op_chars} { +{op_chars} { /* ignore */ } -<xc>\*+ { +\*+ { /* ignore */ } -<xc><<EOF>> { yyerror("unterminated /* comment"); } +<<EOF>> { + yyerror("unterminated /* comment"); + } +} /* <xc> */ {xbstart} { /* Binary bit type. diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index 25253b54ea..eb239685d5 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -417,17 +417,19 @@ other . ECHO; } -<xc>{xcinside} { +<xc>{ +{xcinside} { ECHO; } -<xc>{op_chars} { +{op_chars} { ECHO; } -<xc>\*+ { +\*+ { ECHO; } +} /* <xc> */ {xbstart} { BEGIN(xb); diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index 421c62339b..16b1c1bb08 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -395,11 +395,11 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ token_start = NULL; %} -<SQL>{whitespace} { +<SQL>{ +{whitespace} { /* ignore */ } - -<SQL>{xcstart} { +{xcstart} { token_start = yytext; state_before = YYSTATE; xcdepth = 0; @@ -408,7 +408,9 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ yyless(2); fputs("/*", yyout); } -<C>{xcstart} { +} /* <SQL> */ + +<C>{xcstart} { token_start = yytext; state_before = YYSTATE; xcdepth = 0; @@ -443,24 +445,30 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ token_start = NULL; } -<xcc,xcsql>{xcinside} { +<xcc,xcsql>{ +{xcinside} { ECHO; } -<xcc,xcsql>{op_chars} { +{op_chars} { ECHO; } -<xcc,xcsql>\*+ { +\*+ { ECHO; } +<<EOF>> { + mmfatal(PARSE_ERROR, "unterminated /* comment"); + } +} /* <xcc,xcsql> */ -<xcc,xcsql><<EOF>> { mmfatal(PARSE_ERROR, "unterminated /* comment"); } - -<SQL>{xbstart} { +<SQL>{ +{xbstart} { token_start = yytext; BEGIN(xb); startlit(); addlitchar('b'); } +} /* <SQL> */ + <xb>{quotestop} | <xb>{quotefail} { yyless(1); @@ -502,7 +510,9 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ BEGIN(xqc); startlit(); } -<SQL>{xnstart} { + +<SQL>{ +{xnstart} { /* National character. * Transfer it as-is to the backend. */ @@ -511,25 +521,27 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ BEGIN(xn); startlit(); } -<SQL>{xqstart} { +{xqstart} { token_start = yytext; state_before = YYSTATE; BEGIN(xq); startlit(); } -<SQL>{xestart} { +{xestart} { token_start = yytext; state_before = YYSTATE; BEGIN(xe); startlit(); } -<SQL>{xusstart} { +{xusstart} { token_start = yytext; state_before = YYSTATE; BEGIN(xus); startlit(); addlit(yytext, yyleng); } +} /* <SQL> */ + <xq,xqc>{quotestop} | <xq,xqc>{quotefail} { yyless(1); @@ -584,13 +596,15 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ addlitchar(yytext[0]); } <xq,xqc,xe,xn,xus><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); } -<SQL>{dolqfailed} { + +<SQL>{ +{dolqfailed} { /* throw back all but the initial "$" */ yyless(1); /* and treat it as {other} */ return yytext[0]; } -<SQL>{dolqdelim} { +{dolqdelim} { token_start = yytext; if (dolqstart) free(dolqstart); @@ -599,6 +613,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ startlit(); addlit(yytext, yyleng); } +} /* <SQL> */ + <xdolq>{dolqdelim} { if (strcmp(yytext, dolqstart) == 0) { @@ -631,17 +647,21 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ addlitchar(yytext[0]); } <xdolq><<EOF>> { base_yyerror("unterminated dollar-quoted string"); } -<SQL>{xdstart} { + +<SQL>{ +{xdstart} { state_before = YYSTATE; BEGIN(xd); startlit(); } -<SQL>{xuistart} { +{xuistart} { state_before = YYSTATE; BEGIN(xui); startlit(); addlit(yytext, yyleng); } +} /* <SQL> */ + <xd>{xdstop} { BEGIN(state_before); if (literallen == 0) @@ -677,15 +697,43 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ startlit(); } <xdc>{xdcinside} { addlit(yytext, yyleng); } -<SQL>{typecast} { return TYPECAST; } -<SQL>{dot_dot} { return DOT_DOT; } -<SQL>{colon_equals} { return COLON_EQUALS; } -<SQL>{equals_greater} { return EQUALS_GREATER; } -<SQL>{less_equals} { return LESS_EQUALS; } -<SQL>{greater_equals} { return GREATER_EQUALS; } -<SQL>{less_greater} { return NOT_EQUALS; } -<SQL>{not_equals} { return NOT_EQUALS; } -<SQL>{informix_special} { + +<SQL>{ +{typecast} { + return TYPECAST; + } + +{dot_dot} { + return DOT_DOT; + } + +{colon_equals} { + return COLON_EQUALS; + } + +{equals_greater} { + return EQUALS_GREATER; + } + +{less_equals} { + return LESS_EQUALS; + } + +{greater_equals} { + return GREATER_EQUALS; + } + +{less_greater} { + /* We accept both "<>" and "!=" as meaning NOT_EQUALS */ + return NOT_EQUALS; + } + +{not_equals} { + /* We accept both "<>" and "!=" as meaning NOT_EQUALS */ + return NOT_EQUALS; + } + +{informix_special} { /* are we simulating Informix? */ if (INFORMIX_MODE) { @@ -694,7 +742,7 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ else return yytext[0]; } -<SQL>{self} { +{self} { /* * We may find a ';' inside a structure * definition in a TYPE or VAR statement. @@ -704,7 +752,7 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ BEGIN(C); return yytext[0]; } -<SQL>{operator} { +{operator} { /* * Check for embedded slash-star or dash-dash; those * are comment starts, so operator must stop there. @@ -800,15 +848,18 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ base_yylval.str = mm_strdup(yytext); return Op; } -<SQL>{param} { +{param} { base_yylval.ival = atol(yytext+1); return PARAM; } -<SQL>{ip} { +{ip} { base_yylval.str = mm_strdup(yytext); return IP; } -<C,SQL>{integer} { +} /* <SQL> */ + +<C,SQL>{ +{integer} { int val; char* endptr; @@ -823,29 +874,32 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ base_yylval.ival = val; return ICONST; } -<C,SQL>{decimal} { +{decimal} { base_yylval.str = mm_strdup(yytext); return FCONST; } -<C,SQL>{real} { +{real} { base_yylval.str = mm_strdup(yytext); return FCONST; } -<SQL>{realfail1} { +} /* <C,SQL> */ + +<SQL>{ +{realfail1} { yyless(yyleng-1); base_yylval.str = mm_strdup(yytext); return FCONST; } -<SQL>{realfail2} { +{realfail2} { yyless(yyleng-2); base_yylval.str = mm_strdup(yytext); return FCONST; } -<SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))* { +:{identifier}((("->"|\.){identifier})|(\[{array}\]))* { base_yylval.str = mm_strdup(yytext+1); return CVARIABLE; } -<SQL>{identifier} { +{identifier} { const ScanKeyword *keyword; if (!isdefine()) @@ -873,9 +927,10 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } } -<SQL>{other} { +{other} { return yytext[0]; } +} /* <SQL> */ <C>{exec_sql} { BEGIN(SQL); return SQL_START; } <C>{informix_special} { -- 2.17.1
From a72d66f25f57fb8c1e5a3e419eea7ec286a7b325 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Sun, 30 Sep 2018 14:01:08 +0700 Subject: [PATCH v1 4/4] Remove unused functions and declarations from ECPG scanner. --- src/interfaces/ecpg/preproc/extern.h | 4 +-- src/interfaces/ecpg/preproc/pgc.l | 45 +--------------------------- 2 files changed, 2 insertions(+), 47 deletions(-) diff --git a/src/interfaces/ecpg/preproc/extern.h b/src/interfaces/ecpg/preproc/extern.h index d0c4e47016..aef65657da 100644 --- a/src/interfaces/ecpg/preproc/extern.h +++ b/src/interfaces/ecpg/preproc/extern.h @@ -76,7 +76,7 @@ extern char *hashline_number(void); extern int base_yyparse(void); extern int base_yylex(void); extern void base_yyerror(const char *); -extern void *mm_alloc(size_t), *mm_realloc(void *, size_t); +extern void *mm_alloc(size_t); extern char *mm_strdup(const char *); extern void mmerror(int errorcode, enum errortype type, const char *error,...) pg_attribute_printf(3, 4); extern void mmfatal(int errorcode, const char *error,...) pg_attribute_printf(2, 3) pg_attribute_noreturn(); @@ -105,9 +105,7 @@ extern void remove_variables(int); extern struct variable *new_variable(const char *, struct ECPGtype *, int); extern const ScanKeyword *ScanCKeywordLookup(const char *); extern const ScanKeyword *ScanECPGKeywordLookup(const char *text); -extern void scanner_init(const char *); extern void parser_init(void); -extern void scanner_finish(void); extern int filtered_base_yylex(void); /* return codes */ diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index 16b1c1bb08..f311e2b875 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -32,8 +32,6 @@ extern YYSTYPE base_yylval; static int xcdepth = 0; /* depth of nesting in slash-star comments */ static char *dolqstart = NULL; /* current $foo$ quote start string */ -static YY_BUFFER_STATE scanbufhandle; -static char *scanbuf; /* * literalbuf is used to accumulate literal values when multiple rules @@ -646,7 +644,7 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ /* single quote or dollar sign */ addlitchar(yytext[0]); } -<xdolq><<EOF>> { base_yyerror("unterminated dollar-quoted string"); } +<xdolq><<EOF>> { mmerror(PARSE_ERROR, ET_ERROR, "unterminated dollar-quoted string"); } <SQL>{ {xdstart} { @@ -1615,44 +1613,3 @@ static bool isinformixdefine(void) return false; } - -/* - * Called before any actual parsing is done - */ -void -scanner_init(const char *str) -{ - Size slen = strlen(str); - - /* - * Might be left over after ereport() - */ - if (YY_CURRENT_BUFFER) - yy_delete_buffer(YY_CURRENT_BUFFER); - - /* - * Make a scan buffer with special termination needed by flex. - */ - scanbuf = mm_alloc(slen + 2); - memcpy(scanbuf, str, slen); - scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; - scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); - - /* initialize literal buffer to a reasonable but expansible size */ - literalalloc = 128; - literalbuf = (char *) mm_alloc(literalalloc); - startlit(); - - BEGIN(INITIAL); -} - - -/* - * Called after parsing is done to clean up after scanner_init() - */ -void -scanner_finish(void) -{ - yy_delete_buffer(scanbufhandle); - free(scanbuf); -} -- 2.17.1