Changeset: 2789a121f344 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2789a121f344
Modified Files:
        gdk/gdk_atoms.c
        monetdb5/modules/atoms/blob.c
        monetdb5/modules/atoms/str.c
        sql/storage/bat/bat_logger.c
        sql/storage/store.c
Branch: Dec2025
Log Message:

Change WAL format to accomodate BLOB larger than 2 GiB.
Also, length(b) where b is a BLOB of at least 2**31 items now returns -1
(since legnth returns INTEGER it cannot return the correct value).
Fixes #7808.


diffs (212 lines):

diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c
--- a/gdk/gdk_atoms.c
+++ b/gdk/gdk_atoms.c
@@ -1996,13 +1996,13 @@ static void *
 BLOBread(allocator *ma, void *A, size_t *dstlen, stream *s, size_t cnt)
 {
        blob *a = A;
-       int len;
+       lng len;
 
        (void) cnt;
        assert(cnt == 1);
-       if (mnstr_readInt(s, &len) != 1 || len < 0)
+       if (mnstr_readLng(s, &len) != 1 || len < 0)
                return NULL;
-       if (a == NULL || *dstlen < (size_t) len) {
+       if (a == NULL || (lng) *dstlen < len) {
                if (ma) {
                        a = ma_realloc(ma, a, (size_t) len, *dstlen);
                } else {
@@ -2027,8 +2027,7 @@ BLOBwrite(const void *A, stream *s, size
 
        (void) cnt;
        assert(cnt == 1);
-       if (!mnstr_writeInt(s, (int) len) /* 64bit: check for overflow */ ||
-               mnstr_write(s, a, len, 1) < 0)
+       if (!mnstr_writeLng(s, (lng) len) || mnstr_write(s, a, len, 1) < 0)
                return GDK_FAIL;
        return GDK_SUCCEED;
 }
@@ -2037,7 +2036,7 @@ static size_t
 BLOBlength(const void *P)
 {
        const blob *p = P;
-       size_t l = blobsize(p->nitems); /* 64bit: check for overflow */
+       size_t l = blobsize(p->nitems);
        assert(l <= (size_t) GDK_int_max);
        return l;
 }
diff --git a/monetdb5/modules/atoms/blob.c b/monetdb5/modules/atoms/blob.c
--- a/monetdb5/modules/atoms/blob.c
+++ b/monetdb5/modules/atoms/blob.c
@@ -42,8 +42,9 @@ BLOBnitems(Client ctx, int *ret, blob **
        (void) ctx;
        if (is_blob_nil(*b)) {
                *ret = int_nil;
+       } else if ((*b)->nitems > (size_t) GDK_int_max) {
+               *ret = -1;
        } else {
-               assert((*b)->nitems < INT_MAX);
                *ret = (int) (*b)->nitems;
        }
        return MAL_SUCCEED;
@@ -93,8 +94,9 @@ BLOBnitems_bulk(Client cntxt, MalBlkPtr 
                        if (is_blob_nil(b)) {
                                vals[i] = int_nil;
                                nils = true;
+                       } else if (b->nitems > (size_t) GDK_int_max) {
+                               vals[i] = -1;
                        } else {
-                               assert((int) b->nitems < INT_MAX);
                                vals[i] = (int) b->nitems;
                        }
                }
@@ -106,8 +108,9 @@ BLOBnitems_bulk(Client cntxt, MalBlkPtr 
                        if (is_blob_nil(b)) {
                                vals[i] = int_nil;
                                nils = true;
+                       } else if (b->nitems > (size_t) GDK_int_max) {
+                               vals[i] = -1;
                        } else {
-                               assert((int) b->nitems < INT_MAX);
                                vals[i] = (int) b->nitems;
                        }
                }
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -136,11 +136,12 @@ UTF8_strncpy(char *restrict dst, const c
        return dst;
 }
 
-/* return number of Unicode codepoints in s; s is not nil */
+/* return number of Unicode codepoints in s or -1 if larger than
+ * INT_MAX; s is not nil */
 int
 UTF8_strlen(const char *s)
 {                                                              /* This 
function assumes, s is never nil */
-       size_t pos = 0;
+       size_t len = 0;
 
        UTF8_assert(s);
        assert(!strNil(s));
@@ -148,10 +149,11 @@ UTF8_strlen(const char *s)
        while (*s) {
                /* just count leading bytes of encoded code points; only works
                 * for correctly encoded UTF-8 */
-               pos += (*s++ & 0xC0) != 0x80;
+               len += (*s++ & 0xC0) != 0x80;
        }
-       assert(pos <= (size_t) INT_MAX);
-       return (int) pos;
+       if (len > (size_t) INT_MAX)
+               return -1;
+       return (int) len;
 }
 
 /* return (int) strlen(s); s is not nil; returns -1 for strings that are
diff --git a/sql/storage/bat/bat_logger.c b/sql/storage/bat/bat_logger.c
--- a/sql/storage/bat/bat_logger.c
+++ b/sql/storage/bat/bat_logger.c
@@ -20,11 +20,44 @@
 #define CATALOG_SEP2022 52302  /* first in Sep2022 */
 #define CATALOG_AUG2024 52303  /* first in Aug2024 */
 #define CATALOG_MAR2025 52304  /* first in Mar2025 */
+#define CATALOG_DEC2025 52305  /* first in Dec2025 */
 
 /* Note, CATALOG version 52300 is the first one where the basic system
  * tables (the ones created in store.c) have fixed and unchangeable
  * ids. */
 
+#ifdef CATALOG_DEC2025
+static void *
+BLOBreadOld(allocator *ma, void *A, size_t *dstlen, stream *s, size_t cnt)
+{
+       blob *a = A;
+       int len;
+
+       (void) cnt;
+       assert(cnt == 1);
+       if (mnstr_readInt(s, &len) != 1 || len < 0)
+               return NULL;
+       if (a == NULL || *dstlen < (size_t) len) {
+               if (ma) {
+                       a = ma_realloc(ma, a, (size_t) len, *dstlen);
+               } else {
+                       GDKfree(a);
+                       a = GDKmalloc((size_t) len);
+               }
+               if (a == NULL)
+                       return NULL;
+               *dstlen = (size_t) len;
+       }
+       if (mnstr_read(s, (char *) a, (size_t) len, 1) != 1) {
+               return NULL;
+       }
+       return a;
+}
+
+/* original atomRead function for TYPE_blob */
+static void *(*blobread)(allocator *ma, ptr, size_t *, stream *, size_t);
+#endif
+
 /* return GDK_SUCCEED if we can handle the upgrade from oldversion to
  * newversion */
 static gdk_return
@@ -32,6 +65,16 @@ bl_preversion(sqlstore *store, int oldve
 {
        (void)newversion;
 
+#ifdef CATALOG_DEC2025
+       if (oldversion <= CATALOG_DEC2025) {
+               /* replace atomRead function for blobs with version compatible
+                * with older WAL format; this change is reverted in the
+                * postversion function */
+               blobread = BATatoms[TYPE_blob].atomRead;
+               BATatoms[TYPE_blob].atomRead = BLOBreadOld;
+       }
+#endif
+
 #ifdef CATALOG_JUL2021
        if (oldversion == CATALOG_JUL2021) {
                /* upgrade to default releases */
@@ -72,6 +115,14 @@ bl_preversion(sqlstore *store, int oldve
        }
 #endif
 
+#ifdef CATALOG_DEC2025
+       if (oldversion == CATALOG_DEC2025) {
+               /* upgrade to default releases */
+               store->catalog_version = oldversion;
+               return GDK_SUCCEED;
+       }
+#endif
+
        return GDK_FAIL;
 }
 
@@ -166,6 +217,11 @@ bl_postversion(void *Store, logger *lg)
        sqlstore *store = Store;
        gdk_return rc;
 
+#ifdef CATALOG_DEC2025
+       if (blobread)
+               BATatoms[TYPE_blob].atomRead = blobread;
+#endif
+
 #ifdef CATALOG_JUL2021
        if (store->catalog_version <= CATALOG_JUL2021) {
                /* change the language attribute in sys.functions for sys.env,
diff --git a/sql/storage/store.c b/sql/storage/store.c
--- a/sql/storage/store.c
+++ b/sql/storage/store.c
@@ -20,7 +20,7 @@
 #include "bat/bat_logger.h"
 
 /* version 05.23.05 of catalog */
-#define CATALOG_VERSION 52305  /* first after Mar2025 */
+#define CATALOG_VERSION 52306  /* first in Dec2025-SP1 */
 
 static void
 obj_lock_init( MT_Lock *l, char c, sqlid id)
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to