Hi Ángel, I was waiting for your response... but I just realized that I forget to set you on CC. Excuse me please.
Regards, Tim On Monday 06 July 2015 15:20:36 Tim Ruehsen wrote: > Hi Ángel, > > > * src/iri.c: Remove _utf8_is_valid() > > > > This is probably the shortest-lived function in wget :) > > I guess so :-) > > Very good idea. I didn't even know that gnulib carries a copy of > libunistring. > > Could you add/change the URLs if you know of better suited ones ? > And re-send your patch as an attachment (had some issues with line > wrapping). > > Thanks, Tim > > On Monday 06 July 2015 02:05:57 Ángel González wrote: > > * bootstrap.conf: Enable u8-check module > > * src/iri.c: Remove _utf8_is_valid() > > > > --- > > > > This is probably the shortest-lived function in wget :) > > > > I didn't change the urls, but there are probably more suited ones. > > > > bootstrap.conf | 1 + > > src/iri.c | 62 > > > > ++++++++++++++-------------------------------------------- > > > > 2 files changed, 16 insertions(+), 47 deletions(-) > > > > diff --git a/bootstrap.conf b/bootstrap.conf > > index 4fff711..376a549 100644 > > --- a/bootstrap.conf > > +++ b/bootstrap.conf > > @@ -85,6 +85,7 @@ strtoll > > > > timegm > > tmpdir > > unlocked-io > > > > +unistr/u8-check > > > > update-copyright > > vasprintf > > vsnprintf > > > > diff --git a/src/iri.c b/src/iri.c > > index a6b1c6e..7d66e9d 100644 > > --- a/src/iri.c > > +++ b/src/iri.c > > @@ -43,6 +43,7 @@ as that of the covered work. */ > > > > #include "url.h" > > #include "c-strcase.h" > > #include "c-strcasestr.h" > > > > +#include "unistr.h" > > > > #include "xstrndup.h" > > > > /* RFC3987 section 3.1 mandates STD3 ASCII RULES */ > > > > @@ -220,50 +221,6 @@ locale_to_utf8 (const char *str) > > > > return str; > > > > } > > > > -/* > > - * Work around a libidn <= 1.30 vulnerability. > > - * > > - * The function checks for a valid UTF-8 character sequence before > > - * passing it to idna_to_ascii_8z(). > > - * > > - * [1] > > http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html > > - * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html > > - * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html > > - */ > > -static bool > > -_utf8_is_valid(const char *utf8) > > -{ > > - const unsigned char *s = (const unsigned char *) utf8; > > - > > - while (*s) > > - { > > - if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */ > > - s++; > > - else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ > > - { > > - if ((s[1] & 0xC0) != 0x80) > > - return false; > > - s+=2; > > - } > > - else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ > > - { > > - if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) > > - return false; > > - s+=3; > > - } > > - else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx > > 10xxxxxx */ > > - { > > - if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & > > 0xC0) != 0x80) > > - return false; > > - s+=4; > > - } > > - else > > - return false; > > - } > > - > > - return true; > > -} > > - > > > > /* Try to "ASCII encode" UTF-8 host. Return the new domain on success > > > > or NULL > > > > on error. */ > > > > char * > > > > @@ -272,6 +229,7 @@ idn_encode (const struct iri *i, const char *host) > > > > int ret; > > char *ascii_encoded; > > char *utf8_encoded = NULL; > > > > + const char *utf8_host; > > > > /* Encode to UTF-8 if not done */ > > if (!i->utf8_encode) > > > > @@ -280,16 +238,26 @@ idn_encode (const struct iri *i, const char *host) > > > > return NULL; /* Nothing to encode or an error occured */ > > > > } > > > > - if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host)) > > + utf8_host = utf8_encoded ? utf8_encoded : host; > > + > > + /* > > + * Verify that utf8_host is a valid UTF-8 character sequence before > > + * passing it to idna_to_ascii_8z(). > > + * > > + * [1] > > https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html > > + * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html > > + * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html > > + */ > > + if (u8_check (utf8_host, strlen(utf8_host))) > > > > { > > > > logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"), > > > > - quote(utf8_encoded ? utf8_encoded : host)); > > + quote (utf8_host)); > > > > xfree (utf8_encoded); > > return NULL; > > > > } > > > > /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */ > > > > - ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, > > &ascii_encoded, IDNA_FLAGS); > > + ret = idna_to_ascii_8z (utf8_host, &ascii_encoded, IDNA_FLAGS); > > > > xfree (utf8_encoded); > > > > if (ret != IDNA_SUCCESS)
signature.asc
Description: This is a digitally signed message part.
