Hi Ángel, > * src/iri.c: Remove _utf8_is_valid() > This is probably the shortest-lived function in wget :)
I guess so :-) Very good idea. I didn't even know that gnulib carries a copy of libunistring. Could you add/change the URLs if you know of better suited ones ? And re-send your patch as an attachment (had some issues with line wrapping). Thanks, Tim On Monday 06 July 2015 02:05:57 Ángel González wrote: > * bootstrap.conf: Enable u8-check module > * src/iri.c: Remove _utf8_is_valid() > --- > > This is probably the shortest-lived function in wget :) > > I didn't change the urls, but there are probably more suited ones. > > bootstrap.conf | 1 + > src/iri.c | 62 > ++++++++++++++-------------------------------------------- > 2 files changed, 16 insertions(+), 47 deletions(-) > > diff --git a/bootstrap.conf b/bootstrap.conf > index 4fff711..376a549 100644 > --- a/bootstrap.conf > +++ b/bootstrap.conf > @@ -85,6 +85,7 @@ strtoll > timegm > tmpdir > unlocked-io > +unistr/u8-check > update-copyright > vasprintf > vsnprintf > diff --git a/src/iri.c b/src/iri.c > index a6b1c6e..7d66e9d 100644 > --- a/src/iri.c > +++ b/src/iri.c > @@ -43,6 +43,7 @@ as that of the covered work. */ > #include "url.h" > #include "c-strcase.h" > #include "c-strcasestr.h" > +#include "unistr.h" > #include "xstrndup.h" > > /* RFC3987 section 3.1 mandates STD3 ASCII RULES */ > @@ -220,50 +221,6 @@ locale_to_utf8 (const char *str) > return str; > } > > -/* > - * Work around a libidn <= 1.30 vulnerability. > - * > - * The function checks for a valid UTF-8 character sequence before > - * passing it to idna_to_ascii_8z(). > - * > - * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html > - * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html > - * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html > - */ > -static bool > -_utf8_is_valid(const char *utf8) > -{ > - const unsigned char *s = (const unsigned char *) utf8; > - > - while (*s) > - { > - if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */ > - s++; > - else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ > - { > - if ((s[1] & 0xC0) != 0x80) > - return false; > - s+=2; > - } > - else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ > - { > - if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) > - return false; > - s+=3; > - } > - else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx > 10xxxxxx */ > - { > - if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & > 0xC0) != 0x80) > - return false; > - s+=4; > - } > - else > - return false; > - } > - > - return true; > -} > - > /* Try to "ASCII encode" UTF-8 host. Return the new domain on success > or NULL > on error. */ > char * > @@ -272,6 +229,7 @@ idn_encode (const struct iri *i, const char *host) > int ret; > char *ascii_encoded; > char *utf8_encoded = NULL; > + const char *utf8_host; > > /* Encode to UTF-8 if not done */ > if (!i->utf8_encode) > @@ -280,16 +238,26 @@ idn_encode (const struct iri *i, const char *host) > return NULL; /* Nothing to encode or an error occured */ > } > > - if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host)) > + utf8_host = utf8_encoded ? utf8_encoded : host; > + > + /* > + * Verify that utf8_host is a valid UTF-8 character sequence before > + * passing it to idna_to_ascii_8z(). > + * > + * [1] > https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html > + * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html > + * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html > + */ > + if (u8_check (utf8_host, strlen(utf8_host))) > { > logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"), > - quote(utf8_encoded ? utf8_encoded : host)); > + quote (utf8_host)); > xfree (utf8_encoded); > return NULL; > } > > /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */ > - ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, > &ascii_encoded, IDNA_FLAGS); > + ret = idna_to_ascii_8z (utf8_host, &ascii_encoded, IDNA_FLAGS); > xfree (utf8_encoded); > > if (ret != IDNA_SUCCESS)
signature.asc
Description: This is a digitally signed message part.
