crashing in OSS fuzz of httpd (I guess it uses apr-trunk)

ssipuuO://[  ]:             %25                                   ˇˇ

On Thu, Aug 21, 2025 at 4:59 AM <rpl...@apache.org> wrote:
>
> Author: rpluem
> Date: Thu Aug 21 08:59:47 2025
> New Revision: 1927937
>
> Log:
> Implement RFC6874 zone identifier encoding / decoding
>
> Update apr_uri_parse() and apr_uri_unparse() to decode / encode
> a % separating a zone identifier in an IPv6 literal if detected following
> RFC6874.
>
> * test/testuri.c: Add tests
>
> * uri/apr_uri.c:
>   - Add helper functions detect_scope_zone_id and 
> percent_decode_scope_zone_id.
>   - apr_uri_unparse: Encode a % separating a zone identifier following 
> RFC6874.
>   - apr_uri_parse: Decode a % separating a zone identifier following RFC6874.
>
> Modified:
>    apr/apr/trunk/CHANGES
>    apr/apr/trunk/test/testuri.c
>    apr/apr/trunk/uri/apr_uri.c
>
> Modified: apr/apr/trunk/CHANGES
> ==============================================================================
> --- apr/apr/trunk/CHANGES       Thu Aug 21 08:30:16 2025        (r1927936)
> +++ apr/apr/trunk/CHANGES       Thu Aug 21 08:59:47 2025        (r1927937)
> @@ -1,6 +1,10 @@
>                                                       -*- coding: utf-8 -*-
>  Changes for APR 2.0.0
>
> +  *) apr_uri: Update apr_uri_parse() and apr_uri_unparse() to decode / encode
> +     a % separating a zone identifier in an IPv6 literal if detected 
> following
> +     RFC6874. [Jens Finkhaeuser <j...@finkhaeuser.de>, Ruediger Pluem]
> +
>    *) apr_crypto_openssl: Add provider support on OpenSSL3+.
>       [Graham Leggett]
>
>
> Modified: apr/apr/trunk/test/testuri.c
> ==============================================================================
> --- apr/apr/trunk/test/testuri.c        Thu Aug 21 08:30:16 2025        
> (r1927936)
> +++ apr/apr/trunk/test/testuri.c        Thu Aug 21 08:59:47 2025        
> (r1927937)
> @@ -92,6 +92,11 @@ struct aup_test aup_tests[] =
>          0, "http", "sonyamt@[fe80::1]", "sonyamt", NULL, "fe80::1", NULL, 
> "/filespace/", "arg1=store", NULL, 0
>      },
>      {
> +        /* https://datatracker.ietf.org/doc/html/rfc6874 */
> +        "http://[fe80::1%25iface]/";,
> +        0, "http", "[fe80::1%25iface]", NULL, NULL, "fe80::1%iface", NULL, 
> "/", NULL, NULL, 0
> +    },
> +    {
>          "http://localhost";,
>          0, "http", "localhost", NULL, NULL, "localhost", NULL, NULL, NULL, 
> NULL, 0
>      },
> @@ -204,6 +209,11 @@ struct uph_test uph_tests[] =
>          0, "fe80::1", "443", 443
>      },
>      {
> +        /* https://datatracker.ietf.org/doc/html/rfc6874 */
> +        "[fe80::1%25iface]:443",
> +        0, "fe80::1%iface", "443", 443
> +    },
> +    {
>          "127.0.0.1:443",
>          0, "127.0.0.1", "443", 443
>      },
>
> Modified: apr/apr/trunk/uri/apr_uri.c
> ==============================================================================
> --- apr/apr/trunk/uri/apr_uri.c Thu Aug 21 08:30:16 2025        (r1927936)
> +++ apr/apr/trunk/uri/apr_uri.c Thu Aug 21 08:59:47 2025        (r1927937)
> @@ -70,6 +70,85 @@ static schemes_t schemes[] =
>      { NULL, 0xFFFF }     /* unknown port */
>  };
>
> +/*
> + * *only* for IPv6 addresses with a zone identifier according to RFC6874
> + */
> +static apr_status_t detect_scope_zone_id(int * have_zone_id, char const * 
> ipv6addr, size_t len)
> +{
> +    *have_zone_id = 0;
> +    char *s;
> +
> +    if (len < 3) {
> +        /* Need *at least* the three characters for a percent-encoded percent
> +         * sign.
> +         */
> +        return APR_SUCCESS;
> +    }
> +
> +    s = memchr(ipv6addr, '%', len);
> +    if (s != NULL && s < ipv6addr + len - 2) {
> +        /* RFC3986 is pretty specific about how to percent encode, but
> +         * decoding is to be performed per component, which is what we
> +         * already have here. On the other hand, RFC6874 is clear that
> +         * the delimiter for a zone identifier must be a percent encoded
> +         * percent, i.e. "%25". Any other percent-encoded character is
> +         * invalid here.
> +         */
> +        if (s[1] != '2' || s[2] != '5') {
> +            return APR_EINVAL;
> +        }
> +        *have_zone_id = 1;
> +    }
> +    return APR_SUCCESS;
> +}
> +
> +static void percent_decode_scope_zone_id(char *hostname)
> +{
> +    /* RFC6874 is a little hand-wavy in terms of what to decode. Technically,
> +     * all percent-encoded characters should be decoded, but also, the RFC 
> states
> +     * that they SHOULD not occur, basically.
> +     *
> +     * So let's assume they don't, to keep things simple. Because otherwise 
> we'd
> +     * have to deal with full RFC3986 rules and perform UTF-8 decoding as 
> well
> +     * and all that.
> +     */
> +    size_t len = strlen(hostname);
> +
> +    /* We know from the caller already that this *is* a percent encoded
> +     * percent sign, so we just want to skip it. Trust the caller here.
> +     */
> +    char *s = memchr(hostname, '%', len);
> +    size_t offset = s - hostname;
> +    memmove(hostname + offset + 1, hostname + offset + 3, len - offset - 2);
> +}
> +
> +static char * percent_encode_scope_zone_id(apr_pool_t *p, apr_uri_t const 
> *uptr)
> +{
> +    /* Inverse to the logic in the decode function, we need to encode the 
> first
> +     * percent sign we encounter (if any).
> +     */
> +    size_t len = strlen(uptr->hostname);
> +    char * s = memchr(uptr->hostname, '%', len);
> +    size_t offset;
> +    char *hostcopy;
> +
> +    if (s == NULL) {
> +        return uptr->hostname;
> +    }
> +
> +    offset = s - uptr->hostname;
> +
> +    hostcopy = apr_palloc(p, len + 2);
> +    memcpy(hostcopy, uptr->hostname, offset + 1);
> +    hostcopy[offset + 1] = '2';
> +    hostcopy[offset + 2] = '5';
> +    memcpy(hostcopy + offset + 3, uptr->hostname + offset + 1,
> +           len - offset - 1);
> +    hostcopy[len + 2] = '\0';
> +
> +    return hostcopy;
> +}
> +
>  APR_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str)
>  {
>      schemes_t *scheme;
> @@ -118,10 +197,13 @@ APR_DECLARE(char *) apr_uri_unparse(apr_
>          if (uptr->hostname) {
>              int is_default_port;
>              const char *lbrk = "", *rbrk = "";
> +            char *host = uptr->hostname;
>
> -            if (strchr(uptr->hostname, ':')) { /* v6 literal */
> +            if (strchr(host, ':')) { /* v6 literal */
>                  lbrk = "[";
>                  rbrk = "]";
> +
> +                host = percent_encode_scope_zone_id(p, uptr);
>              }
>
>              is_default_port =
> @@ -129,7 +211,7 @@ APR_DECLARE(char *) apr_uri_unparse(apr_
>                   uptr->port == 0 ||
>                   uptr->port == apr_uri_port_of_scheme(uptr->scheme));
>
> -            ret = apr_pstrcat(p, "//", ret, lbrk, uptr->hostname, rbrk,
> +            ret = apr_pstrcat(p, "//", ret, lbrk, host, rbrk,
>                          is_default_port ? "" : ":",
>                          is_default_port ? "" : uptr->port_str,
>                          NULL);
> @@ -728,6 +810,7 @@ APR_DECLARE(apr_status_t) apr_uri_parse(
>      char *endstr;
>      int port;
>      int v6_offset1 = 0, v6_offset2 = 0;
> +    int have_zone_id = 0;
>
>      /* Initialize the structure. parse_uri() and parse_uri_components()
>       * can be called more than once per request.
> @@ -854,8 +937,23 @@ deal_with_host:
>          /* We expect hostinfo to point to the first character of
>           * the hostname.  If there's a port it is the first colon,
>           * except with IPv6.
> +         *
> +         * IPv6 also has the interesting property (RFC6874) that it may 
> contain
> +         * a percent-encoded percent delimiting the zone identifier. We need 
> to
> +         * unescape that.
>           */
>          if (*hostinfo == '[') {
> +            /* zone identifier */
> +            apr_status_t err = detect_scope_zone_id(&have_zone_id, hostinfo,
> +                                                    uri - hostinfo);
> +            /* FIXME: Ignore APR_EINVAL (invalid escaped character) for now 
> as
> +             * old code may rely on it silently getting ignored?
> +             */
> +            if ((err != APR_SUCCESS) && (err != APR_EINVAL)) {
> +                return err;
> +            }
> +
> +            /* Port */
>              v6_offset1 = 1;
>              v6_offset2 = 2;
>              s = memchr(hostinfo, ']', uri - hostinfo);
> @@ -874,11 +972,17 @@ deal_with_host:
>              uptr->hostname = apr_pstrmemdup(p,
>                                              hostinfo + v6_offset1,
>                                              uri - hostinfo - v6_offset2);
> +            if (have_zone_id) {
> +                percent_decode_scope_zone_id(uptr->hostname);
> +            }
>              goto deal_with_path;
>          }
>          uptr->hostname = apr_pstrmemdup(p,
>                                          hostinfo + v6_offset1,
>                                          s - hostinfo - v6_offset2);
> +        if (have_zone_id) {
> +            percent_decode_scope_zone_id(uptr->hostname);
> +        }
>          ++s;
>          uptr->port_str = apr_pstrmemdup(p, s, uri - s);
>          if (uri != s) {
> @@ -949,6 +1053,21 @@ APR_DECLARE(apr_status_t) apr_uri_parse_
>          return APR_EGENERAL;
>      }
>      uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1);
> +
> +    /* Again, ensure zone IDs are decoded. */
> +    int have_zone_id = 0;
> +    apr_status_t err = detect_scope_zone_id(&have_zone_id, uptr->hostname,
> +                                            strlen(uptr->hostname));
> +    /* FIXME: Ignore APR_EINVAL (invalid escaped character) for now as old 
> code
> +     * may rely on it silently getting ignored?
> +     */
> +    if ((err != APR_SUCCESS) && (err != APR_EINVAL)) {
> +        return err;
> +    }
> +    if (have_zone_id) {
> +        percent_decode_scope_zone_id(uptr->hostname);
> +    }
> +
>      ++s;
>      uptr->port_str = apr_pstrdup(p, s);
>      if (*s != '\0') {
>


-- 
Eric Covener
cove...@gmail.com

Reply via email to