Re: [PATCH 08/13] yacc.c: escape trigraphs in detailed parse.error

Adrian Vogelsgesang Sun, 19 Jan 2020 08:41:06 -0800

Nitpicking
+# the special characters are not longer escaped, and it produces
Not longer -> no longer


On 18/01/2020, 16:40, "bison-patches on behalf of Akim Demaille" 
<[email protected] on behalf of 
[email protected]> wrote:

    * src/output.c (escape_trigraphs, xescape_trigraphs): New.
    (prepare_symbol_names): Use it.
    * tests/regression.at: Check the handling of trigraphs with
    parse.error = detailed.
    ---
     src/output.c        | 52 ++++++++++++++++++++++++++++++++++++++++++++-
     tests/regression.at | 21 +++++++++++++-----
     2 files changed, 67 insertions(+), 6 deletions(-)
    
    diff --git a/src/output.c b/src/output.c
    index 9d597e2d..8de00ed2 100644
    --- a/src/output.c
    +++ b/src/output.c
    @@ -132,6 +132,56 @@ string_output (FILE *out, char const *string)
     }
     
     
    +/* Store in BUFFER a copy of SRC where trigraphs are escaped, return
    +   the size of the result (including the final NUL).  If called with
    +   BUFFERSIZE = 0, returns the needed size for BUFFER.  */
    +static ptrdiff_t
    +escape_trigraphs (char *buffer, ptrdiff_t buffersize, const char *src)
    +{
    +#define STORE(c)                                \
    +  do                                            \
    +    {                                           \
    +      if (res < buffersize)                     \
    +        buffer[res] = (c);                      \
    +      ++res;                                    \
    +    }                                           \
    +  while (0)
    +  ptrdiff_t res = 0;
    +  for (ptrdiff_t i = 0, len = strlen (src); i < len; ++i)
    +    {
    +      if (i + 2 < len
    +          && src[i] == '?' && src[i+1] == '?')
    +        {
    +          switch (src[i+2])
    +            {
    +            case '!': case '\'':
    +            case '(': case ')': case '-': case '/':
    +            case '<': case '=': case '>':
    +              i += 1;
    +              STORE ('?');
    +              STORE ('"');
    +              STORE ('"');
    +              STORE ('?');
    +              continue;
    +            }
    +        }
    +      STORE (src[i]);
    +    }
    +  STORE ('\0');
    +#undef STORE
    +  return res;
    +}
    +
    +/* Same as xstrdup, except that trigraphs are escaped.  */
    +static char *
    +xescape_trigraphs (const char *src)
    +{
    +  ptrdiff_t bufsize = escape_trigraphs (NULL, 0, src);
    +  char *buf = xcharalloc (bufsize);
    +  escape_trigraphs (buf, bufsize, src);
    +  return buf;
    +}
    +
     /* Generate the b4_<MUSCLE_NAME> (e.g., b4_tname) table with the
        symbol names (aka tags). */
     
    @@ -148,7 +198,7 @@ prepare_symbol_names (char const *muscle_name)
         {
           char *cp =
             symbols[i]->tag[0] == '"' && !quote
    -        ? xstrdup (symbols[i]->tag)
    +        ? xescape_trigraphs (symbols[i]->tag)
             : quotearg_alloc (symbols[i]->tag, -1, qo);
           /* Width of the next token, including the two quotes, the
              comma and the space.  */
    diff --git a/tests/regression.at b/tests/regression.at
    index 7d304614..b293fb35 100644
    --- a/tests/regression.at
    +++ b/tests/regression.at
    @@ -366,17 +366,17 @@ AT_CLEANUP
     ## Token definitions.  ##
     ## ------------------- ##
     
    +m4_pushdef([AT_TEST],
    +[AT_SETUP([Token definitions: $1])
     
    -AT_SETUP([Token definitions])
    -
    -AT_BISON_OPTION_PUSHDEFS
    +AT_BISON_OPTION_PUSHDEFS([$1])
     
     AT_DATA_GRAMMAR([input.y],
     [%{
     ]AT_YYERROR_DECLARE[
     ]AT_YYLEX_DECLARE[
     %}
    -[%define parse.error verbose
    +[$1
     %token MYEOF 0 "end of file"
     %token 'a' "a"  // Bison managed, when fed with '%token 'f' "f"' to 
#define 'f'!
     %token B_TOKEN "b"
    @@ -391,7 +391,6 @@ exp: "a" 
"\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!";
     ]AT_YYLEX_DEFINE([{ SPECIAL }])[
     ]AT_MAIN_DEFINE[
     ]])
    -AT_BISON_OPTION_POPDEFS
     
     # Checking the warning message guarantees that the trigraph "??!" isn't
     # unnecessarily escaped here even though it would need to be if encoded in 
a
    @@ -411,6 +410,7 @@ input.y:22.16-63: warning: symbol 
"\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!" used
     
     AT_COMPILE([input])
     
    +AT_ERROR_VERBOSE_IF([
     # Checking the error message here guarantees that yytname, which does 
contain
     # C-string literals, does have the trigraph escaped correctly.  Thus, the
     # symbol name reported by the parser is exactly the same as that reported 
by
    @@ -419,9 +419,20 @@ AT_DATA([experr],
     [[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!", 
expecting a
     ]])
     AT_PARSER_CHECK([input], 1, [], [experr])
    +])
    +
    +# We don't check the error message in "detailed" parse.error, since
    +# the special characters are not longer escaped, and it produces
    +# invalid UTF-8.
    +
    +AT_BISON_OPTION_POPDEFS
     AT_CLEANUP
    +])
     
    +AT_TEST([%define parse.error detailed])
    +AT_TEST([%define parse.error verbose])
     
    +m4_popdef([AT_TEST])
     
     ## -------------------- ##
     ## Characters Escapes.  ##
    -- 
    2.24.1

Re: [PATCH 08/13] yacc.c: escape trigraphs in detailed parse.error

Reply via email to