Re: [hackers] [PATCH v3][sbase] libutil/unescape.c: simplify and add \E

Mattias Andrée Mon, 06 Feb 2017 16:04:23 -0800

On Mon, 06 Feb 2017 15:50:04 -0800
evan.ga...@gmail.com (Evan Gates) wrote:


> Mattias Andrée <maand...@kth.se> wrote:
> 
> > On Mon, 06 Feb 2017 15:05:32 -0800
> > evan.ga...@gmail.com (Evan Gates) wrote:
> >   
> > > Mattias Andrée <maand...@kth.se> wrote:
> > >   
> > > > +               } else if (escapes[*r & 255]) {
> > > > +                       *w++ = escapes[*r++ &
> > > > 255];    
> > > 
> > > Why do you & 255 here? I think a cast to unsigned char
> > > would accomplish what you are trying to do and be more
> > > correct (as char can default to either signed or
> > > unsigned). Although I may misunderstand what is going
> > > on here.  
> > 
> > Yes. I used &255 because it's does clutter as much.  
> 
> OK. I'm going to change to a cast to be on the safe side
> due to the "implementation-defined and undefined aspects"
> mentioned in C99 6.5p4:
> 
> Some operators (the unary operator ~, and the binary
> operators <<, >>, &, ^, and |, collectively described as
> bitwise operators) are required to have operands that
> have integer type. These operators yield values that
> depend on the internal representations of integers, and
> have implementation-defined and undefined aspects for
> signed types.

Sure.

> 
> > > I think this is clearer, even though it adds a
> > > conditional:
> > > 
> > > q = q * 16 + isdigit(*r) ? *r - '0' : tolower(*r) -
> > > 'a' + 10;  
> > 
> > I think
> > 
> >     if (isdigit(*r))
> >             q = q * 16 + (*r - '0');
> >     else
> >             q = q * 16 + (tolower(*r) - 'a') + 10;
> > 
> > is clearer, or at least brackets around the ternary.  
> 
> You're right, the line was getting a bit long and
> convoluted with the ternary, bad habit of mine.
> 
> Another thought just came to mind, isoctal is a reserved
> name. 7.26p1:
> 
> The following names are grouped under individual headers
> for convenience. All external names described below are
> reserved no matter what headers are included by the
> program.
> 
> 7.26.2p1:
> 
> Function names that begin with either is or to, and a
> lowercase letter may be added to the declarations in the
> <ctype.h> header.
> 
> I don't know if it's worth being anal about that,
> especially as we have already limited ourselves to C99 do
> we need to worry about future directions? For now I've
> changed isoctal() to is_odigit() but I'm open to
> discussion.

Sure, I didn't think about that, however, I think we can
be pretty confident that if isoctal is added, it will do
that same thing as that macro, is will not make a difference.
However, I think isodigit probably what such a function
will be called since there already is isxdigit, so why not
change it to is_odigit or ODIGIT.

> 
> Updated patch for consideration:
> 
> ----- 8< ----- 8< ----- 8< -----
> From 30fd43d7f3b8716054eb9867c835aadc423f652c Mon Sep 17
> 00:00:00 2001 From: =?UTF-8?q?Mattias=20Andr=C3=A9e?=
> <maand...@kth.se> Date: Sun, 5 Feb 2017 00:44:35 +0100
> Subject: [PATCH] libutil/unescape.c: simplify and add \E
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Mattias Andrée <maand...@kth.se>
> ---
>  libutil/unescape.c | 101
> ++++++++++++++++++++++------------------------------- 1
> file changed, 42 insertions(+), 59 deletions(-)
> 
> diff --git a/libutil/unescape.c b/libutil/unescape.c
> index d1503e6..7523db3 100644
> --- a/libutil/unescape.c
> +++ b/libutil/unescape.c
> @@ -1,74 +1,57 @@
>  /* See LICENSE file for copyright and license details. */
> +#include <ctype.h>
>  #include <string.h>
>  
>  #include "../util.h"
>  
> +#define is_odigit(c)  ('0' <= c && c <= '7')
> +
>  size_t
>  unescape(char *s)
>  {
> -     size_t len, i, off, m, factor, q;
> -
> -     len = strlen(s);
> +     static const char escapes[256] = {
> +             ['"'] = '"',
> +             ['\''] = '\'',
> +             ['\\'] = '\\',
> +             ['a'] = '\a',
> +             ['b'] = '\b',
> +             ['E'] = 033,
> +             ['e'] = 033,
> +             ['f'] = '\f',
> +             ['n'] = '\n',
> +             ['r'] = '\r',
> +             ['t'] = '\t',
> +             ['v'] = '\v'
> +     };
> +     size_t m, q;
> +     char *r, *w;
>  
> -     for (i = 0; i < len; i++) {
> -             if (s[i] != '\\')
> +     for (r = w = s; *r;) {
> +             if (*r != '\\') {
> +                     *w++ = *r++;
>                       continue;
> -             off = 0;
> -
> -             switch (s[i + 1]) {
> -             case '\\': s[i] = '\\'; off++; break;
> -             case '\'': s[i] = '\'', off++; break;
> -             case '"':  s[i] =  '"', off++; break;
> -             case 'a':  s[i] = '\a'; off++; break;
> -             case 'b':  s[i] = '\b'; off++; break;
> -             case 'e':  s[i] =  033; off++; break;
> -             case 'f':  s[i] = '\f'; off++; break;
> -             case 'n':  s[i] = '\n'; off++; break;
> -             case 'r':  s[i] = '\r'; off++; break;
> -             case 't':  s[i] = '\t'; off++; break;
> -             case 'v':  s[i] = '\v'; off++; break;
> -             case 'x':
> -                     /* "\xH[H]" hexadecimal escape */
> -                     for (m = i + 2; m < i + 1 + 3 &&
> m < len; m++)
> -                             if ((s[m] < '0' && s[m]
> > '9') &&
> -                                 (s[m] < 'A' && s[m]
> > 'F') &&
> -                                 (s[m] < 'a' && s[m]
> > 'f'))
> -                                     break;
> -                     if (m == i + 2)
> -                             eprintf("invalid escape
> sequence '\\%c'\n", s[i + 1]);
> -                     off += m - i - 1;
> -                     for (--m, q = 0, factor = 1; m >
> i + 1; m--) {
> -                             if (s[m] >= '0' && s[m]
> <= '9')
> -                                     q += (s[m] -
> '0') * factor;
> -                             else if (s[m] >= 'A' &&
> s[m] <= 'F')
> -                                     q += ((s[m] -
> 'A') + 10) * factor;
> -                             else if (s[m] >= 'a' &&
> s[m] <= 'f')
> -                                     q += ((s[m] -
> 'a') + 10) * factor;
> -                             factor *= 16;
> -                     }
> -                     s[i] = q;
> -                     break;
> -             case '\0':
> +             }
> +             r++;
> +             if (!*r) {
>                       eprintf("null escape
> sequence\n");
> -             default:
> -                     /* "\O[OOO]" octal escape */
> -                     for (m = i + 1; m < i + 1 + 4 &&
> m < len; m++)
> -                             if (s[m] < '0' || s[m] >
> '7')
> -                                     break;
> -                     if (m == i + 1)
> -                             eprintf("invalid escape
> sequence '\\%c'\n", s[i + 1]);
> -                     off += m - i - 1;
> -                     for (--m, q = 0, factor = 1; m >
> i; m--) {
> -                             q += (s[m] - '0') *
> factor;
> -                             factor *= 8;
> -                     }
> -                     s[i] = (q > 255) ? 255 : q;
> +             } else if (escapes[(unsigned char)*r]) {
> +                     *w++ = escapes[(unsigned
> char)*r++];
> +             } else if (is_odigit(*r)) {
> +                     for (q = 0, m = 4; m &&
> is_odigit(*r); m--, r++)
> +                             q = q * 8 + (*r - '0');
> +                     *w++ = MIN(q, 255);
> +             } else if (*r == 'x' && isxdigit(r[1])) {
> +                     r++;
> +                     for (q = 0, m = 2; m &&
> isxdigit(*r); m--, r++)
> +                             if (isdigit(*r))
> +                                     q = q * 16 + (*r
> - '0');
> +                             else
> +                                     q = q * 16 +
> (tolower(*r) - 'a' + 10);
> +                     *w++ = q;
> +             } else {
> +                     eprintf("invalid escape sequence
> '\\%c'\n", *r); }
> -
> -             for (m = i + 1; m <= len - off; m++)
> -                     s[m] = s[m + off];
> -             len -= off;
>       }
>  
> -     return len;
> +     return w - s;
>  }

pgpoJIH3pwjEz.pgp
Description: OpenPGP digital signature

Re: [hackers] [PATCH v3][sbase] libutil/unescape.c: simplify and add \E

Reply via email to