Changeset: e62a2afa1ee0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e62a2afa1ee0
Modified Files:
        gdk/gdk_atoms.c
        sql/test/Tests/inet6.test
Branch: Dec2025
Log Message:

Add lots of inet6 tests, make them succeed

The new parser rejects square brackets because that's URL syntax, not
inet addr syntax.


diffs (truncated from 714 to 300 lines):

diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c
--- a/gdk/gdk_atoms.c
+++ b/gdk/gdk_atoms.c
@@ -1564,154 +1564,256 @@ INET6equal(const void *L, const void *R)
        return memcmp(l->hex, r->hex, sizeof(l->hex)) == 0;
 }
 
-static ssize_t
-INET6fromString(allocator *ma, const char *svalue, size_t *len, void **RETVAL, 
bool external)
+static uint8_t
+inet6_classify(char c)
+{
+       /* The values in this table are all one too high so the invalid
+        * ones can be left at 0. */
+       static uint8_t table[256] = {
+               ['0'] = 0 + 1, ['1'] = 1 + 1, ['2'] = 2 + 1,
+               ['3'] = 3 + 1, ['4'] = 4 + 1, ['5'] = 5 + 1,
+               ['6'] = 6 + 1, ['7'] = 7 + 1, ['8'] = 8 + 1,
+               ['9'] = 9 + 1,
+               ['A'] = 0xA + 1, ['B'] = 0xB + 1, ['C'] = 0xC + 1,
+               ['D'] = 0xD + 1, ['E'] = 0xE + 1, ['F'] = 0xF + 1,
+               ['a'] = 0xa + 1, ['b'] = 0xb + 1, ['c'] = 0xc + 1,
+               ['d'] = 0xd + 1, ['e'] = 0xe + 1, ['f'] = 0xf + 1,
+               /* not hex digits but valid as terminators */
+               ['\0'] = 16 + 1, [':'] = 16 + 1, ['.'] = 16 + 1,
+       };
+       return table[(uint8_t)c] - 1;
+}
+
+static int
+inet6_scan_hexdigits(const char **pos)
+{
+       const char *p = *pos;
+       int acc;
+
+       acc = inet6_classify(*p++);
+       if (acc >= 16) {
+               /* must have at least one digit */
+               return -1;
+       }
+
+       for (int i = 0; i < 3; i++) {
+               uint8_t d = inet6_classify(*p);
+               if (d >= 16)
+                       break;
+               acc = 16 * acc + d;
+               p++;
+       }
+
+       *pos = p;    /* points to the terminator */
+       return acc;
+}
+
+/* move the part after the :: to the correct position */
+static bool
+inet6_expand_gap(uint8_t *bytes, int gap_start, int groups, int max_groups)
+{
+       assert(gap_start <= groups);
+       assert(groups <= max_groups);
+       assert(max_groups == 6 || max_groups == 8);
+
+       int gap_len = max_groups - groups;
+       assert(gap_len >= 0);
+
+       if (gap_start == -1 && gap_len > 0) {
+               /* no :: found but not all groups are present */
+               return false;
+       }
+       if (gap_start != -1 && gap_len == 0) {
+               /* :: found but all groups are present */
+               return false;
+       }
+
+       if (gap_len == 0) {
+               /* nothing to do */
+               return true;
+       }
+
+       void *src = &bytes[ 2 * gap_start ];
+       void *dst = &bytes[ 2 * (gap_start + gap_len)];
+       size_t len = 2 * (groups - gap_start);
+       memmove(dst, src, len);
+       memset(src, '\0', 2 * gap_len);
+       return true;
+}
+
+/* scan an address written as 32 hexdigits without any punctuation */
+static bool
+inet6_scan_big_hexnumber(uint8_t *bytes, const char *s, const char *end)
+{
+       if (end - s != 32)
+               return false;
+
+       for (int i = 0; i < 16; i++) {
+               int hi = inet6_classify(s[2 * i]);
+               int lo = inet6_classify(s[2 * i + 1]);
+               if (hi >= 16 || lo >= 16)
+                       return false;
+               bytes[i] = 16 * hi + lo;
+       }
+       return true;
+
+}
+
+/* scan the optional decimal bytes at the end of for example ::ffff:127.0.0.1 
*/
+static bool
+inet6_scan_embedded_inet4(uint8_t *bytes, const char *start, const char *end)
 {
-       (void) ma;
-       inet6 **retval = (inet6 **) RETVAL;
-       const char *s = svalue;
+       /* We cannot use sscanf because its behavior is undefined for invalid 
inputs.
+        * Strtol and friends accept whitespace etc which we don't want .
+        * We'll just do it by hand */
+
+       int fields[4] = { 0 };
+       int n = 0;
+       bool need_digit = true;
+       for (const char *s = start; s < end; s++) {
+               if (isdigit(*s)) {
+                       fields[n] = 10 * fields[n] + *s - '0';
+                       if (fields[n] > 255)
+                               return false;
+                       need_digit = false;
+               } else if (*s == '.') {
+                       n++;
+                       if (n == 4)
+                               return false;
+                       need_digit = true;
+               } else {
+                       return false;
+               }
+       }
+       if (n != 3 || need_digit)
+               return false;
+
+       for (int i = 0; i < 4; i++)
+               bytes[12 + i] = (uint8_t) fields[i];
+
+       return true;
+}
+
+static bool
+inet6_scan_address(uint8_t *bytes, const char *s, const char *end)
+{
+       if (inet6_scan_big_hexnumber(bytes, s, end))
+               return true;
+
+       int groups = 0;         /* nr of groups seen */
+       int gap = -1;           /* position of gap, -1 means not seen */
+
+       if (end - s < 3) {
+               /* The smallest legal addresses are ::n and n::.
+                * :: is legal as an IP address but not in MonetDB because it's
+                * our nil representation. */
+               return false;
+       }
 
+       /* Colons at the start are tricky. Two means the gap is at the start,
+        * one is forbidden. */
+       if (s[0] == ':') {
+               if (s[1] == ':') {
+                       s += 2;
+                       gap = 0;
+               } else {
+                       return false;
+               }
+       }
+
+       /* with the leading colons out of the way, read hex groups terminated by
+        * :, . or END */
+       while (s < end) {
+               if (groups >= 8) {
+                       /* too many! */
+                       return false;
+               }
+               if (*s == ':') {
+                       if (gap == -1) {
+                               /* found the gap */
+                               gap = groups;
+                               s++;
+                               continue;
+                       } else {
+                               /* there can only be one gap */
+                               return false;
+                       }
+               }
+
+               const char *group_start = s;
+               int group = inet6_scan_hexdigits(&s);
+               if (group < 0)
+                       return false;
+
+               /* check the terminator */
+               char terminator = s < end ? *s : '\0';
+               if (groups == 7) {
+                       if (terminator != '\0')
+                               return false;
+               } else {
+                       if (inet6_classify(terminator) != 16)
+                               return false;
+               }
+
+               /* check for trailing decimal bytes */
+               if (terminator == '.') {
+                       if (!inet6_expand_gap(bytes, gap, groups, 6))
+                               return false;
+                       return inet6_scan_embedded_inet4(bytes, group_start, 
end);
+               }
+
+               /* record the group, skip the separator and move on */
+               bytes[2 * groups] = (uint8_t)(group >> 8);
+               bytes[2 * groups + 1] = (uint8_t)group;
+               groups++;
+               s++;
+       }
+
+       return inet6_expand_gap(bytes, gap, groups, 8);
+}
+
+
+static ssize_t
+INET6fromString(allocator *ma, const char *svalue, size_t *len, void **retval, 
bool external)
+{
+       /* make room for return value */
        if (*len < 16 || *retval == NULL) {
                if ((*retval = ma_alloc(ma, 16)) == NULL)
                        return -1;
                *len = 16;
        }
+       inet6 *addr = (inet6*)*retval;
+       uint8_t *bytes = &addr->hex[0];
+
+       /* handle nils */
        if (external && strcmp(svalue, "nil") == 0) {
-               **retval = inet6_nil;
+               *addr = inet6_nil;
                return 3;
        }
        if (strNil(svalue)) {
-               **retval = inet6_nil;
+               *addr = inet6_nil;
                return 1;
        }
-       while (GDKisspace(*s))
-               s++;
-       inet6 i6 = {0};
-       bool brkt = *s == '[';
-       if (brkt) {
-               s++;
-               if (!GDKisxdigit(*s) && *s != ':') {
-                       GDKerror("Invalid IPv6 address.");
-                       goto bailout;
-               }
-       } else if (strlen(s) == 32 && strspn(s, "0123456789abcdefABCDEF") == 
32) {
-               /* special case: 32 hex digits without [ ] */
-               for (int i = 0; i < 16; i++) {
-                       uint8_t val = 0;
-                       for (int j = 4; j >= 0; j -= 4) {
-                               if ('0' <= *s && *s <= '9')
-                                       val |= (*s - '0') << j;
-                               else if ('a' <= *s && *s <= 'f')
-                                       val |= (*s - 'a' + 10) << j;
-                               else if ('A' <= *s && *s <= 'F')
-                                       val |= (*s - 'A' + 10) << j;
-                               s++;
-                       }
-                       i6.hex[i] = val;
-               }
-               **retval = i6;
-               return (ssize_t) (s - svalue);
-       }
-       int dcolpos = -1;
-       int i;
-       int maybeip4 = 0;
-       for (i = 0; i < 16; i += 2) {
-               if (s[0] == ':' && s[1] == ':') {
-                       if (dcolpos >= 0) {
-                               GDKerror("Invalid IPv6 address: multiple ::.");
-                               goto bailout;
-                       }
-                       dcolpos = i;
-                       s += 2;
-               } else if (i > 0 && s[0] == ':') {
-                       s++;
-               }
-               if (*s == 0 || (brkt && *s == ']'))
-                       break;
-               char *e;
-               unsigned long ul;
-               if (maybeip4 == 1 && (dcolpos == -1 ? i == 6 : i < 6)) {
-                       ul = strtoul(s, &e, 10);
-                       if (e > s && *e == '.') {
-                               /* address such as ::ffff:192.0.2.128
-                                * i.e. an IPv4 address inside an
-                                * IPv6 */
-                               if (ul > 255) {
-                                       GDKerror("Invalid IPv6 address.");
-                                       goto bailout;
-                               }
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to