On 12/20/23 11:46 PM, minf...@apache.org wrote:
> Author: minfrin
> Date: Wed Dec 20 22:46:22 2023
> New Revision: 1914814
> 
> URL: http://svn.apache.org/viewvc?rev=1914814&view=rev
> Log:
> apr_escape: Add apr_escape_json() and apr_pescape_json().
> 
> Modified:
>     apr/apr/trunk/CHANGES
>     apr/apr/trunk/encoding/apr_escape.c
>     apr/apr/trunk/include/apr_escape.h
>     apr/apr/trunk/test/testescape.c
>     apr/apr/trunk/tools/gen_test_char.c
> 

> Modified: apr/apr/trunk/encoding/apr_escape.c
> URL: 
> http://svn.apache.org/viewvc/apr/apr/trunk/encoding/apr_escape.c?rev=1914814&r1=1914813&r2=1914814&view=diff
> ==============================================================================
> --- apr/apr/trunk/encoding/apr_escape.c (original)
> +++ apr/apr/trunk/encoding/apr_escape.c Wed Dec 20 22:46:22 2023
> @@ -36,7 +36,7 @@
>   * char in here and get it to work, because if char is signed then it
>   * will first be sign extended.
>   */
> -#define TEST_CHAR(c, f)        (test_char_table[(unsigned)(c)] & (f))
> +#define TEST_CHAR(c, f)        (test_char_table[(apr_uint16_t)(c)] & (f))
>  
>  APR_DECLARE(apr_status_t) apr_escape_shell(char *escaped, const char *str,
>          apr_ssize_t slen, apr_size_t *len)
> @@ -1209,6 +1209,271 @@ APR_DECLARE(const char *) apr_pescape_ld
>      }
>      }
>  
> +    return src;
> +}
> +
> +APR_DECLARE(apr_status_t) apr_escape_json(char *escaped, const char *str,
> +        apr_ssize_t slen, int quote, apr_size_t *len)

Shouldn't we provide a length for the escaped buffer to be able to do some 
sanity checking against overflows?
We could ignore this parameter in case that escaped is NULL.
Otherwise I think we should have a big fat warning in the doxygen documentation 
of this function that it
is extremely dangerous to call this function without providing a buffer of 
sufficient length and that
apr_escape_json should be called with escaped equal NULL before to determine 
this length, just like as
we do in apr_pescape_json.

> +{
> +    apr_size_t size = 1;
> +    int found = quote;
> +    int error = 0;
> +    const unsigned char *s = (const unsigned char *) str;
> +    unsigned char *d = (unsigned char *) escaped;
> +    unsigned c;
> +    const char invalid[3] = { 0xEF, 0xBF, 0xBD };
> +
> +    if (s) {
> +        if (d) {
> +            if (quote) {
> +                *d++ = '"';
> +                size += 1;
> +            }
> +            while ((c = *s) && slen) {
> +                if (TEST_CHAR(c, T_ESCAPE_JSON)) {
> +                    switch (c) {
> +                    case '\b':
> +                        *d++ = '\\';
> +                        *d++ = 'b';
> +                        size += 2;
> +                        found = 1;
> +                        break;
> +                    case '\f':
> +                        *d++ = '\\';
> +                        *d++ = 'f';
> +                        size += 2;
> +                        found = 1;
> +                        break;
> +                    case '\n':
> +                        *d++ = '\\';
> +                        *d++ = 'n';
> +                        size += 2;
> +                        found = 1;
> +                        break;
> +                    case '\r':
> +                        *d++ = '\\';
> +                        *d++ = 'r';
> +                        size += 2;
> +                        found = 1;
> +                        break;
> +                    case '\t':
> +                        *d++ = '\\';
> +                        *d++ = 't';
> +                        size += 2;
> +                        found = 1;
> +                        break;
> +                    case '\\':
> +                        *d++ = '\\';
> +                        *d++ = '\\';
> +                        size += 2;
> +                        found = 1;
> +                        break;
> +                    case '"':
> +                        *d++ = '\\';
> +                        *d++ = '"';
> +                        size += 2; 
> +                        found = 1;
> +                        break;
> +                    default:
> +                        if (c < 0x20) {
> +                            size += apr_snprintf((char *)d, 6, "\\u%04x", c);
> +                            d += 5; 
> +                            found = 1;
> +                        }
> +                        else if (((c >> 7) == 0x00)) {

How can this case happen? c's like this should fail the TEST_CHAR(c, 
T_ESCAPE_JSON) test.

> +                            /* 1 byte */
> +                            memcpy(d, s, 1);
> +                            d += 1;
> +                            size += 1;
> +                        }
> +                        else if ((slen < 0 || slen > 1) && ((c >> 5) == 
> 0x06) && s[1]) {
> +                            /* 2 bytes */
> +                            if ((slen > 0 && slen < 2) || (s[1] >> 6) != 
> 0x02) {
> +                                memcpy(d, invalid, sizeof(invalid));
> +                                d += sizeof(invalid);
> +                                size += sizeof(invalid);
> +                                found = error = 1;
> +                            }
> +                            else {
> +                                memcpy(d, s, 2);
> +                                d += 2;
> +                                size += 2;
> +                                s += 1;
> +                                slen -= 1;
> +                            }
> +                        }
> +                        else if (((c >> 4) == 0x0E)) {
> +                            /* 3 bytes */
> +                            if ((slen > 0 && slen < 3) || (s[1] >> 6) != 
> 0x02 || (s[2] >> 6) != 0x02) {
> +                                memcpy(d, invalid, sizeof(invalid));
> +                                size += sizeof(invalid);
> +                                d += sizeof(invalid);
> +                                found = error = 1;
> +                            }
> +                            else {
> +                                memcpy(d, s, 3);
> +                                d += 3;
> +                                size += 3;
> +                                s += 2;
> +                                slen -= 2;
> +                            }
> +                        }
> +                        else if ((c >> 3) == 0x1E) {
> +                            /* 4 bytes */
> +                            if ((slen > 0 && slen < 4) || (s[1] >> 6) != 
> 0x02 || (s[2] >> 6) != 0x02 || (s[3] >> 6) != 0x02) {
> +                                memcpy(d, invalid, sizeof(invalid));
> +                                d += sizeof(invalid);
> +                                size += sizeof(invalid);
> +                                found = error = 1;
> +                            }
> +                            else {
> +                                memcpy(d, s, 4);
> +                                d += 4;
> +                                size += 4;
> +                                s += 3;
> +                                slen -= 3;
> +                            }
> +                        }
> +                        else {
> +                            memcpy(d, invalid, sizeof(invalid));
> +                            d += sizeof(invalid);
> +                            size += sizeof(invalid);
> +                            found = error = 1;
> +                        }
> +                        break;
> +                    }
> +                }
> +                else {
> +                    *d++ = c;
> +                    size++;
> +                }
> +                ++s;
> +                slen--;
> +            }
> +            if (quote) {
> +                *d++ = '"';
> +                size += 1;
> +            }
> +            *d = '\0';
> +        }
> +        else {
> +            if (quote) {
> +                size += 1;
> +            }
> +            while ((c = *s) && slen) {
> +                if (TEST_CHAR(c, T_ESCAPE_JSON)) {
> +                    switch (c) {
> +                    case '\b':
> +                    case '\f':
> +                    case '\n':
> +                    case '\r':
> +                    case '\t':
> +                    case '\\':
> +                    case '"':
> +                        size += 2;
> +                        found = 1;
> +                        break;
> +                    default:
> +                        if (c < 0x20) {
> +                            size += 5;
> +                            found = 1;
> +                        }
> +                        else if (((c >> 7) == 0x00)) {

How can this case happen? c's like this should fail the TEST_CHAR(c, 
T_ESCAPE_JSON) test.

> +                            /* 1 byte */
> +                            size += 1;
> +                        }
> +                        else if ((slen < 0 || slen > 1) && ((c >> 5) == 
> 0x06) && s[1]) {
> +                            /* 2 bytes */
> +                            if ((slen > 0 && slen < 2) || (s[1] >> 6) != 
> 0x02) {
> +                                size += sizeof(invalid);
> +                                found = error = 1;
> +                            }
> +                            else {
> +                                size += 2;
> +                                s += 1;
> +                                slen -= 1;
> +                            }
> +                        }
> +                        else if (((c >> 4) == 0x0E)) {
> +                            /* 3 bytes */
> +                            if ((slen > 0 && slen < 3) || (s[1] >> 6) != 
> 0x02 || (s[2] >> 6) != 0x02) {
> +                                size += sizeof(invalid);
> +                                found = error = 1;
> +                            }
> +                            else {
> +                                size += 3;
> +                                s += 2;
> +                                slen -= 2;
> +                            }
> +                        }
> +                        else if ((c >> 3) == 0x1E) {
> +                            /* 4 bytes */
> +                            if ((slen > 0 && slen < 4) || (s[1] >> 6) != 
> 0x02 || (s[2] >> 6) != 0x02 || (s[3] >> 6) != 0x02) {
> +                                size += sizeof(invalid);
> +                                found = error = 1;
> +                            }
> +                            else {
> +                                size += 4;
> +                                s += 3;
> +                                slen -= 3;
> +                            }
> +                        }
> +                        else {
> +                            size += sizeof(invalid);
> +                            found = error = 1;
> +                        }
> +                    }
> +                }
> +                else {
> +                    size++;
> +                }
> +                ++s;
> +                slen--;
> +            }
> +            if (quote) {
> +                size += 1;
> +            }
> +        }
> +    }
> +
> +    else if (quote) {
> +        if (d) {
> +            memcpy(d, "NULL", 5);
> +        }
> +        else {
> +            size += 4;
> +        }
> +    }
> +
> +    if (len) {
> +        *len = size;
> +    }
> +    if (error) {
> +        return APR_EINVAL;
> +    }
> +    if (!found) {
> +        return APR_NOTFOUND;
> +    }
> +
> +    return APR_SUCCESS;
> +}
> +
> +APR_DECLARE(const char *) apr_pescape_json(apr_pool_t *p, const char *src,
> +        apr_ssize_t srclen, int quote)

The caller cannot easily determine if the src contained invalid data or was 
already
properly escaped.
Can't we do a

APR_DECLARE(apr_status_t) pr_pescape_json(apr_pool_t *p, char **dest, const 
char *src,
        apr_ssize_t srclen, int quote)

instead which returns the status of apr_escape_json and the escaped JSON in 
dest?


> +{
> +    apr_size_t len;
> +
> +    switch (apr_escape_json(NULL, src, srclen, quote, &len)) {
> +    case APR_NOTFOUND: {
> +        break;
> +    }
> +    default: {
> +        char *encoded = apr_palloc(p, len);
> +        apr_escape_json(encoded, src, srclen, quote, NULL);
> +        return encoded;
> +    }
> +    }
> +
>      return src;
>  }
>  

Reply via email to