Nitpicking +# the special characters are not longer escaped, and it produces Not longer -> no longer
On 18/01/2020, 16:40, "bison-patches on behalf of Akim Demaille" <[email protected] on behalf of [email protected]> wrote: * src/output.c (escape_trigraphs, xescape_trigraphs): New. (prepare_symbol_names): Use it. * tests/regression.at: Check the handling of trigraphs with parse.error = detailed. --- src/output.c | 52 ++++++++++++++++++++++++++++++++++++++++++++- tests/regression.at | 21 +++++++++++++----- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/src/output.c b/src/output.c index 9d597e2d..8de00ed2 100644 --- a/src/output.c +++ b/src/output.c @@ -132,6 +132,56 @@ string_output (FILE *out, char const *string) } +/* Store in BUFFER a copy of SRC where trigraphs are escaped, return + the size of the result (including the final NUL). If called with + BUFFERSIZE = 0, returns the needed size for BUFFER. */ +static ptrdiff_t +escape_trigraphs (char *buffer, ptrdiff_t buffersize, const char *src) +{ +#define STORE(c) \ + do \ + { \ + if (res < buffersize) \ + buffer[res] = (c); \ + ++res; \ + } \ + while (0) + ptrdiff_t res = 0; + for (ptrdiff_t i = 0, len = strlen (src); i < len; ++i) + { + if (i + 2 < len + && src[i] == '?' && src[i+1] == '?') + { + switch (src[i+2]) + { + case '!': case '\'': + case '(': case ')': case '-': case '/': + case '<': case '=': case '>': + i += 1; + STORE ('?'); + STORE ('"'); + STORE ('"'); + STORE ('?'); + continue; + } + } + STORE (src[i]); + } + STORE ('\0'); +#undef STORE + return res; +} + +/* Same as xstrdup, except that trigraphs are escaped. */ +static char * +xescape_trigraphs (const char *src) +{ + ptrdiff_t bufsize = escape_trigraphs (NULL, 0, src); + char *buf = xcharalloc (bufsize); + escape_trigraphs (buf, bufsize, src); + return buf; +} + /* Generate the b4_<MUSCLE_NAME> (e.g., b4_tname) table with the symbol names (aka tags). */ @@ -148,7 +198,7 @@ prepare_symbol_names (char const *muscle_name) { char *cp = symbols[i]->tag[0] == '"' && !quote - ? xstrdup (symbols[i]->tag) + ? xescape_trigraphs (symbols[i]->tag) : quotearg_alloc (symbols[i]->tag, -1, qo); /* Width of the next token, including the two quotes, the comma and the space. */ diff --git a/tests/regression.at b/tests/regression.at index 7d304614..b293fb35 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -366,17 +366,17 @@ AT_CLEANUP ## Token definitions. ## ## ------------------- ## +m4_pushdef([AT_TEST], +[AT_SETUP([Token definitions: $1]) -AT_SETUP([Token definitions]) - -AT_BISON_OPTION_PUSHDEFS +AT_BISON_OPTION_PUSHDEFS([$1]) AT_DATA_GRAMMAR([input.y], [%{ ]AT_YYERROR_DECLARE[ ]AT_YYLEX_DECLARE[ %} -[%define parse.error verbose +[$1 %token MYEOF 0 "end of file" %token 'a' "a" // Bison managed, when fed with '%token 'f' "f"' to #define 'f'! %token B_TOKEN "b" @@ -391,7 +391,6 @@ exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!"; ]AT_YYLEX_DEFINE([{ SPECIAL }])[ ]AT_MAIN_DEFINE[ ]]) -AT_BISON_OPTION_POPDEFS # Checking the warning message guarantees that the trigraph "??!" isn't # unnecessarily escaped here even though it would need to be if encoded in a @@ -411,6 +410,7 @@ input.y:22.16-63: warning: symbol "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!" used AT_COMPILE([input]) +AT_ERROR_VERBOSE_IF([ # Checking the error message here guarantees that yytname, which does contain # C-string literals, does have the trigraph escaped correctly. Thus, the # symbol name reported by the parser is exactly the same as that reported by @@ -419,9 +419,20 @@ AT_DATA([experr], [[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!", expecting a ]]) AT_PARSER_CHECK([input], 1, [], [experr]) +]) + +# We don't check the error message in "detailed" parse.error, since +# the special characters are not longer escaped, and it produces +# invalid UTF-8. + +AT_BISON_OPTION_POPDEFS AT_CLEANUP +]) +AT_TEST([%define parse.error detailed]) +AT_TEST([%define parse.error verbose]) +m4_popdef([AT_TEST]) ## -------------------- ## ## Characters Escapes. ## -- 2.24.1
