On Tuesday 30 June 2015 11:09:06 Hubert Tarasiuk wrote:
> +  while (*s)
> +    {
> +      if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
> +        s++;
> +      else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */
> +        {
> +          if ((s[1] & 0xC0) != 0x80)
> +            return false;
> +          s+=2;
> +        }
> +      else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */
> +        {
> +          if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
> +            return false;
> +          s+=3;
> +        }
> +      else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx
> 10xxxxxx */
> +        {
> +          if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] &
> 0xC0) != 0x80)
> +            return false;
> +          s+=4;
> +        }
> I think here should go another else like this: else { return false; }.
> Otherwise it can loop forever if none of the conditions matches (eg.
> '\xba').

Thanks Hubert, good catch.

I add the amended patch for completeness.

Tim
From d79495af84cf37d0925de4e2b3176582befb3094 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim Rühsen?= <[email protected]>
Date: Tue, 30 Jun 2015 09:55:14 +0200
Subject: [PATCH] Work around a libidn <= 1.30 vulnerability

* src/iri.c: Add _utf8_is_valid() to check UTF-8 sequences before
  passing them to idna_to_ascii_8z().
---
 src/iri.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/src/iri.c b/src/iri.c
index 10ae994..3bebb5d 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -219,6 +219,50 @@ locale_to_utf8 (const char *str)
   return str;
 }

+/*
+ * Work around a libidn <= 1.30 vulnerability.
+ *
+ * The function checks for a valid UTF-8 character sequence before
+ * passing it to idna_to_ascii_8z().
+ *
+ * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
+ * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
+ * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
+ */
+static bool
+_utf8_is_valid(const char *utf8)
+{
+  const unsigned char *s = (const unsigned char *) utf8;
+
+  while (*s)
+    {
+      if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
+        s++;
+      else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */
+        {
+          if ((s[1] & 0xC0) != 0x80)
+            return false;
+          s+=2;
+        }
+      else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */
+        {
+          if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
+            return false;
+          s+=3;
+        }
+      else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+        {
+          if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
+            return false;
+          s+=4;
+        }
+      else
+        return false;
+    }
+
+  return true;
+}
+
 /* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
    on error. */
 char *
@@ -235,6 +279,13 @@ idn_encode (struct iri *i, char *host)
           return NULL;  /* Nothing to encode or an error occured */
     }

+  if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host))
+    {
+      xfree (utf8_encoded);
+      logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"), quote(utf8_encoded));
+      return NULL;
+    }
+
   /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */
   ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, &ascii_encoded, IDNA_FLAGS);
   xfree (utf8_encoded);
--
2.1.4

Attachment: signature.asc
Description: This is a digitally signed message part.

Reply via email to