Changeset: 73b95fae9e21 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/73b95fae9e21
Modified Files:
        monetdb5/modules/atoms/json.c
        sql/test/SQLancer/Tests/sqlancer17.test
Branch: Jul2021
Log Message:

Escape strings at json.tojsonarray function and cleanup


diffs (truncated from 594 to 300 lines):

diff --git a/monetdb5/modules/atoms/json.c b/monetdb5/modules/atoms/json.c
--- a/monetdb5/modules/atoms/json.c
+++ b/monetdb5/modules/atoms/json.c
@@ -2377,93 +2377,30 @@ JSONfold(Client cntxt, MalBlkPtr mb, Mal
        return JSONfoldKeyValue(ret, id, key, val);
 }
 
-static str
-JSONgroupStr(str *ret, const bat *bid)
-{
-       BAT *b;
-       BUN p, q;
-       const char *t = NULL;
-       size_t len, size = BUFSIZ, offset, cnt = 0;
-       str buf = GDKmalloc(size);
-       BATiter bi;
-       const char *err = NULL;
-       char temp[128] = "";
-       const double *val = NULL;
-
-       if (buf == NULL)
-               throw(MAL, "json.group", SQLSTATE(HY013) MAL_MALLOC_FAIL);
-       if ((b = BATdescriptor(*bid)) == NULL) {
-               GDKfree(buf);
-               throw(MAL, "json.agg", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
-       }
-       assert(b->ttype == TYPE_str || b->ttype == TYPE_dbl);
-
-       strcpy(buf, str_nil);
-       offset = 0;
-       bi = bat_iterator(b);
-       BATloop(b, p, q) {
-               int n = 0, nil = 0;
-
-               switch (b->ttype) {
-               case TYPE_str:
-                       t = (const char *) BUNtvar(bi, p);
-                       nil = (strNil(t));
-                       break;
-               case TYPE_dbl:
-                       val = (const double *) BUNtloc(bi, p);
-                       nil = is_dbl_nil(*val);
-                       if (!nil)
-                               snprintf(temp, sizeof(temp), "%f", *val);
-                       t = (const char *) temp;
-                       break;
-               }
+#define JSON_STR_CPY   \
+       do {    \
+               for (; *v; v++) {       \
+                       switch (*v) {   \
+                       case '"':       \
+                       case '\\':      \
+                               *dst++ = '\\';  \
+                               /* fall through */      \
+                       default:        \
+                               *dst++ = *v;    \
+                               break;  \
+                       case '\n':      \
+                               *dst++ = '\\';  \
+                               *dst++ = 'n';   \
+                               break;  \
+                       }       \
+               }       \
+       } while (0)
 
-               if (nil)
-                       continue;
-               if (!cnt)
-                       offset = snprintf(buf, size, "[ ");
-               len = strlen(t) + 1 + 4; /* closing bracket and optional ',' */
-               if (len >= size - offset) {
-                       str nbuf;
-                       size += len + 128;
-                       nbuf = GDKrealloc(buf, size);
-                       if (nbuf == NULL) {
-                               err = SQLSTATE(HY013) MAL_MALLOC_FAIL;
-                               goto failed;
-                       }
-                       buf = nbuf;
-               }
-               if (cnt)
-                       offset += snprintf(buf + offset, size - offset, ", ");
-               switch (b->ttype) {
-               case TYPE_str:
-                       n = snprintf(buf + offset, size - offset, "\"%s\"", t);
-                       break;
-               case TYPE_dbl:
-                       n = snprintf(buf + offset, size - offset, "%s", t);
-                       break;
-               }
-               cnt++;
-               offset += n;
-       }
-       bat_iterator_end(&bi);
-       if (cnt)
-               offset += snprintf(buf + offset, size - offset, " ]");
-       BBPunfix(b->batCacheid);
-       *ret = buf;
-       return MAL_SUCCEED;
-  failed:
-       bat_iterator_end(&bi);
-       BBPunfix(b->batCacheid);
-       GDKfree(buf);
-       throw(MAL, "json.agg", "%s", err);
-}
-
-#define JSON_AGGR_CHECK_NEXT_LENGTH(EXTRA)     \
+#define JSON_AGGR_CHECK_NEXT_LENGTH(CALC)      \
        do {    \
-               len = strlen(v) + EXTRA;        \
+               len = CALC;     \
                if (len >= maxlen - buflen) {   \
-                       maxlen = (maxlen + len + BUFSIZ + 8191) & ~8191;        
\
+                       maxlen = maxlen + len + BUFSIZ; \
                        buf2 = GDKrealloc(buf, maxlen); \
                        if (buf2 == NULL) {     \
                                err = SQLSTATE(HY013) MAL_MALLOC_FAIL;  \
@@ -2473,28 +2410,95 @@ JSONgroupStr(str *ret, const bat *bid)
                }       \
        } while (0)
 
+static str
+JSONgroupStr(str *ret, const bat *bid)
+{
+       BAT *b;
+       BUN p, q;
+       size_t len, maxlen = BUFSIZ, buflen = 0;
+       char *buf = GDKmalloc(maxlen), *buf2;
+       BATiter bi;
+       const char *err = NULL;
+       dbl *restrict vals;
+
+       if (buf == NULL)
+               throw(MAL, "json.group", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       if ((b = BATdescriptor(*bid)) == NULL) {
+               GDKfree(buf);
+               throw(MAL, "json.group", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       }
+       assert(b->ttype == TYPE_str || b->ttype == TYPE_dbl);
+
+       bi = bat_iterator(b);
+       vals = (dbl*) Tloc(b, 0);
+       switch (b->ttype) {
+               case TYPE_str:
+                       for (p = 0, q = BATcount(b); p < q; p++) {
+                               const char *v = (const char *) BUNtvar(bi, p);
+
+                               if (strNil(v))
+                                       continue;
+                               JSON_AGGR_CHECK_NEXT_LENGTH(strlen(v) * 2 + 5); 
/* opening bracket and optional ',' */
+                               char *dst = buf + buflen, *odst = dst;
+                               if (buflen == 0)
+                                       *dst++ = '[';
+                               else
+                                       *dst++ = ',';
+                               *dst++ = ' ';
+                               *dst++ = '"';
+                               JSON_STR_CPY;
+                               *dst++ = '"';
+                               *dst = '\0';
+                               buflen += (dst - odst);
+                       }
+                       break;
+               case TYPE_dbl:
+                       for (p = 0, q = BATcount(b); p < q; p++) {
+                               dbl val = vals[p];
+
+                               if (is_dbl_nil(val))
+                                       continue;
+                               JSON_AGGR_CHECK_NEXT_LENGTH(128 + 3); /* 
opening bracket and optional ',' */
+                               len = snprintf(buf + buflen, maxlen - buflen, 
"%c %f", buflen == 0 ? '[' : ',', val);
+                               buflen += len;
+                       }
+                       break;
+               default:
+                       assert(0);
+       }
+       bat_iterator_end(&bi);
+       BBPunfix(b->batCacheid);
+       assert(maxlen > buflen + 3);
+       if (buflen > 0)
+               buflen += snprintf(buf + buflen, maxlen - buflen, " ]");
+       else
+               strcpy(buf, str_nil);
+       *ret = GDKstrdup(buf);
+       GDKfree(buf);
+       if (!*ret) /* Don't return a too large string */
+               throw(MAL, "json.group", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       return MAL_SUCCEED;
+  bunins_failed:
+       bat_iterator_end(&bi);
+       BBPunfix(b->batCacheid);
+       GDKfree(buf);
+       throw(MAL, "json.group", "%s", err);
+}
+
 static const char *
 JSONjsonaggr(BAT **bnp, BAT *b, BAT *g, BAT *e, BAT *s, int skip_nils)
 {
        BAT *bn = NULL, *t1, *t2 = NULL;
        BATiter bi;
-       oid min, max;
-       BUN ngrp;
-       BUN nils = 0;
-       int isnil;
+       oid min, max, mapoff = 0, prev;
+       BUN ngrp, nils = 0, p, q, ncand;
        struct canditer ci;
-       BUN ncand;
-       const char *v = NULL;
+       const char *err = NULL;
        const oid *grps, *map;
-       oid mapoff = 0;
-       oid prev;
-       BUN p, q;
-       int freeb = 0, freeg = 0;
+       int freeb = 0, freeg = 0, isnil = 0;
        char *buf = NULL, *buf2;
-       size_t buflen, maxlen, len;
-       const char *err;
-       char temp[128] = "";
-       const double *val = NULL;
+       size_t buflen, maxlen = BUFSIZ, len;
+       dbl *restrict vals;
 
        assert(b->ttype == TYPE_str || b->ttype == TYPE_dbl);
        if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, 
&ncand)) != NULL) {
@@ -2524,7 +2528,6 @@ JSONjsonaggr(BAT **bnp, BAT *b, BAT *g, 
                }
        }
 
-       maxlen = BUFSIZ;
        if ((buf = GDKmalloc(maxlen)) == NULL) {
                err = SQLSTATE(HY013) MAL_MALLOC_FAIL;
                goto out;
@@ -2536,6 +2539,7 @@ JSONjsonaggr(BAT **bnp, BAT *b, BAT *g, 
                goto out;
        }
        bi = bat_iterator(b);
+       vals = (dbl*) Tloc(b, 0);
        if (g) {
                /* stable sort g */
                if (BATsort(&t1, &t2, NULL, g, NULL, NULL, false, false, true) 
!= GDK_SUCCEED) {
@@ -2554,53 +2558,64 @@ JSONjsonaggr(BAT **bnp, BAT *b, BAT *g, 
                        mapoff = t2->tseqbase;
                }
                if (g && BATtdense(g)) {
-                       for (p = 0, q = BATcount(g); p < q; p++) {
-                               switch (b->ttype) {
-                               case TYPE_str:
-                                       v = (const char *) BUNtvar(bi, (map ? 
(BUN) map[p] - mapoff : p));
-                                       break;
-                               case TYPE_dbl:
-                                       val = (const double *) BUNtloc(bi, (map 
? (BUN) map[p] - mapoff : p));
-                                       if (!is_dbl_nil(*val)) {
-                                               snprintf(temp, sizeof(temp), 
"%f", *val);
-                                               v = (const char *) temp;
-                                       } else {
-                                               v = NULL;
-                                       }
-                                       break;
-                               }
-                               if (strNil(v)) {
-                                       if (skip_nils) {
-                                               /*
-                                                * if q is 1 and the value is
-                                                * null, then we need to fill
-                                                * in a value. Otherwise
-                                                * BATproject will fail.
-                                                */
-                                               if ((p == 0) && (q == 1)) {
-                                                       strcpy(buf, "[ null ]");
-                                                       isnil = 1;
+                       switch (b->ttype) {
+                       case TYPE_str:
+                               for (p = 0, q = BATcount(g); p < q; p++) {
+                                       const char *v = (const char *) 
BUNtvar(bi, (map ? (BUN) map[p] - mapoff : p));
+                                       if (strNil(v)) {
+                                               if (skip_nils) {
+                                                       /*
+                                                       * if q is 1 and the 
value is
+                                                       * null, then we need to 
fill
+                                                       * in a value. Otherwise
+                                                       * BATproject will fail.
+                                                       */
+                                                       if (p == 0 && q == 1)
+                                                               strcpy(buf, "[ 
null ]");
+                                                       else
+                                                               continue;
                                                } else {
-                                                       continue;
+                                                       strcpy(buf, str_nil);
+                                                       nils = 1;
                                                }
                                        } else {
-                                               strcpy(buf, str_nil);
-                                               isnil = 1;
+                                               
JSON_AGGR_CHECK_NEXT_LENGTH(strlen(v) * 2 + 7);
+                                               char *dst = buf;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to