Changeset: a7d5d1ff828f for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a7d5d1ff828f
Added Files:
sql/test/json/Tests/aggregate00.sql
sql/test/json/Tests/aggregate00.stable.err
sql/test/json/Tests/aggregate00.stable.out
Modified Files:
monetdb5/modules/atoms/json.c
monetdb5/modules/atoms/json.h
monetdb5/modules/atoms/json.mal
sql/scripts/40_json.sql
sql/test/json/Tests/All
Branch: default
Log Message:
functions for aggregating column as json array
diffs (truncated from 552 to 300 lines):
diff --git a/monetdb5/modules/atoms/json.c b/monetdb5/modules/atoms/json.c
--- a/monetdb5/modules/atoms/json.c
+++ b/monetdb5/modules/atoms/json.c
@@ -1598,3 +1598,329 @@ JSONtextGrouped(int *ret, int *bid, int
throw(MAL,"json.text","tobeimplemented");
}
+str
+JSONgroupStr(str *ret, const bat *bid)
+{
+ BAT *b;
+ BUN p, q;
+ const char *t;
+ size_t len, size = BUFSIZ, offset;
+ str buf = GDKmalloc(size);
+ BATiter bi;
+ const char *err = NULL;
+
+ if (buf == NULL)
+ throw(MAL, "json.group",MAL_MALLOC_FAIL);
+ if ((b = BATdescriptor(*bid)) == NULL) {
+ GDKfree(buf);
+ throw(MAL, "json.agg", RUNTIME_OBJECT_MISSING);
+ }
+
+ strcpy(buf, str_nil);
+ offset = 0;
+ bi = bat_iterator(b);
+ BATloop(b, p, q) {
+ int n;
+
+ t = (const char *) BUNtail(bi, p);
+
+ if (strNil(t))
+ continue;
+ len = strlen(t) + 1;
+ if (len >= size - offset) {
+ size += len + 128;
+ buf = GDKrealloc(buf, size);
+ if (buf == NULL) {
+ err= MAL_MALLOC_FAIL;
+ goto failed;
+ }
+ }
+ if (offset == 0) {
+ if (BATcount(b) == 1) {
+ n = snprintf(buf, size, "[ \"%s\" ]", t);
+ } else {
+ n = snprintf(buf, size, "[ \"%s\"", t);
+ }
+ } else {
+ if (p == BUNlast(b) - 1) {
+ n = snprintf(buf + offset, size - offset, ",
\"%s\" ]", t);
+ } else {
+ n = snprintf(buf + offset, size - offset, ",
\"%s\"", t);
+ }
+ }
+ offset += n;
+ }
+ BBPreleaseref(b->batCacheid);
+ *ret = buf;
+ return MAL_SUCCEED;
+ failed:
+ BBPreleaseref(b->batCacheid);
+ if (buf != NULL)
+ GDKfree(buf);
+ throw(MAL, "json.agg", "%s", err);
+}
+
+static const char *
+JSONjsonaggr(BAT **bnp, BAT *b, BAT *g, BAT *e, BAT *s, int skip_nils)
+{
+ BAT *bn = NULL, *t1, *t2 = NULL;
+ BATiter bi;
+ oid min, max;
+ BUN ngrp, start, end, cnt;
+ BUN nils = 0;
+ int isnil;
+ const oid *cand = NULL, *candend = NULL;
+ const char *v;
+ const oid *grps, *map;
+ oid mapoff = 0;
+ oid prev;
+ BUN p, q;
+ int freeb = 0, freeg = 0;
+ char *buf = NULL;
+ size_t buflen, maxlen, len;
+ const char *err;
+
+ if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end,
+ &cnt, &cand, &candend)) != NULL) {
+ return err;
+ }
+ assert(b->ttype == TYPE_str);
+ if (BATcount(b) == 0 || ngrp == 0) {
+ bn = BATconstant(TYPE_str, ATOMnilptr(TYPE_str), ngrp);
+ if (bn == NULL)
+ return MAL_MALLOC_FAIL;
+ BATseqbase(bn, ngrp == 0 ? 0 : min);
+ *bnp = bn;
+ return NULL;
+ }
+ if (s) {
+ b = BATleftjoin(s, b, BATcount(s));
+ if (b == NULL) {
+ err = "internal leftjoin failed";
+ goto out;
+ }
+ freeb = 1;
+ if (b->htype != TYPE_void) {
+ t1 = BATmirror(BATmark(BATmirror(b), 0));
+ if (t1 == NULL) {
+ err = "internal mark failed";
+ goto out;
+ }
+ BBPunfix(b->batCacheid);
+ b = t1;
+ }
+ if (g) {
+ g = BATleftjoin(s, g, BATcount(s));
+ if (g == NULL) {
+ err = "internal leftjoin failed";
+ goto out;
+ }
+ freeg = 1;
+ if (g->htype != TYPE_void) {
+ t1 = BATmirror(BATmark(BATmirror(g), 0));
+ if (t1 == NULL) {
+ err = "internal mark failed";
+ goto out;
+ }
+ BBPunfix(g->batCacheid);
+ g = t1;
+ }
+ }
+ }
+ if (g && BATtdense(g)) {
+ /* singleton groups: return group ID's (g's tail) and original
+ * values from b */
+ bn = VIEWcreate(BATmirror(g), b);
+ goto out;
+ }
+
+ maxlen = BUFSIZ;
+ if ((buf = GDKmalloc(maxlen)) == NULL) {
+ err = MAL_MALLOC_FAIL;
+ goto out;
+ }
+ buflen = 0;
+ bn = BATnew(TYPE_void, TYPE_str, ngrp);
+ if (bn == NULL) {
+ err = MAL_MALLOC_FAIL;
+ goto out;
+ }
+ bi = bat_iterator(b);
+ if (g) {
+ /* stable sort g */
+ if (BATsubsort(&t1, &t2, NULL, g, NULL, NULL, 0, 1) ==
GDK_FAIL){
+ BBPreclaim(bn);
+ bn = NULL;
+ err = "internal sort failed";
+ goto out;
+ }
+ if (freeg)
+ BBPunfix(g->batCacheid);
+ g = t1;
+ freeg = 1;
+ if (t2->ttype == TYPE_void) {
+ map = NULL;
+ mapoff = b->tseqbase;
+ } else {
+ map = (const oid *) Tloc(t2, BUNfirst(t2));
+ }
+ grps = (const oid *) Tloc(g, BUNfirst(g));
+ prev = grps[0];
+ isnil = 0;
+ for (p = 0, q = BATcount(g); p <= q; p++) {
+ if (p == q || grps[p] != prev) {
+ strncpy(buf + buflen, " ]", buflen);
+ buflen += 2;
+ while (BATcount(bn) < prev - min) {
+ bunfastapp_nocheck(bn, BUNlast(bn),
str_nil, Tsize(bn));
+ nils++;
+ }
+ bunfastapp_nocheck(bn, BUNlast(bn), buf,
Tsize(b));
+ nils += strNil(buf);
+ strncpy(buf, str_nil, maxlen);
+ buflen = 0;
+ if (p == q)
+ break;
+ prev = grps[p];
+ isnil = 0;
+ }
+ if (isnil)
+ continue;
+ v = (const char *) BUNtail(bi, BUNfirst(b) + (map ?
(BUN) map[p] : p + mapoff));
+
+ if (strNil(v)) {
+ if (skip_nils)
+ continue;
+ strncpy(buf, str_nil, buflen);
+ isnil = 1;
+ } else {
+ len = strlen(v);
+ if (len >= maxlen - buflen) {
+ maxlen += len + BUFSIZ;
+ buf = GDKrealloc(buf, maxlen);
+ if (buf == NULL) {
+ err = MAL_MALLOC_FAIL;
+ goto bunins_failed;
+ }
+ }
+ if (buflen == 0) {
+ len = snprintf(buf + buflen, maxlen -
buflen, "[ \"%s\"", v);
+ buflen += len;
+ } else {
+ len = snprintf(buf + buflen, maxlen -
buflen, ", \"%s\"", v);
+ buflen += len;
+ }
+ }
+ }
+ BBPunfix(t2->batCacheid);
+ t2 = NULL;
+ } else {
+ for (p = BUNfirst(b), q = p + BATcount(b); p < q; p++) {
+ v = (const char *) BUNtail(bi, p);
+ if (strNil(v)) {
+ if (skip_nils)
+ continue;
+ strncpy(buf, str_nil, buflen);
+ nils++;
+ break;
+ }
+ len = strlen(v);
+ if (len >= maxlen - buflen) {
+ maxlen += len + BUFSIZ;
+ buf = GDKrealloc(buf, maxlen);
+ if (buf == NULL) {
+ err = MAL_MALLOC_FAIL;
+ goto bunins_failed;
+ }
+ }
+ if (buflen == 0) {
+ len = snprintf(buf + buflen, maxlen - buflen,
"[ \"%s\"", v);
+ buflen += len;
+ } else {
+ len = snprintf(buf + buflen, maxlen - buflen,
", \"%s\"", v);
+ buflen += len;
+ }
+ }
+ bunfastapp_nocheck(bn, BUNlast(bn), buf, Tsize(bn));
+ }
+ BATseqbase(bn, min);
+ bn->T->nil = nils != 0;
+ bn->T->nonil = nils == 0;
+ bn->T->sorted = BATcount(bn) <= 1;
+ bn->T->revsorted = BATcount(bn) <= 1;
+ bn->T->key = BATcount(bn) <= 1;
+
+ out:
+ if (t2)
+ BBPunfix(t2->batCacheid);
+ if (freeb && b)
+ BBPunfix(b->batCacheid);
+ if (freeg && g)
+ BBPunfix(g->batCacheid);
+ if (buf)
+ GDKfree(buf);
+ *bnp = bn;
+ return err;
+
+ bunins_failed:
+ if (bn)
+ BBPreclaim(bn);
+ bn = NULL;
+ if (err == NULL)
+ err = MAL_MALLOC_FAIL; /* insertion into result BAT failed */
+ goto out;
+}
+
+str
+JSONsubjsoncand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, bit
*skip_nils)
+{
+ BAT *b, *g, *e, *s, *bn = NULL;
+ const char *err;
+
+ b = BATdescriptor(*bid);
+ g = gid ? BATdescriptor(*gid) : NULL;
+ e = eid ? BATdescriptor(*eid) : NULL;
+ if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e ==
NULL)) {
+
+ if (b)
+ BBPreleaseref(b->batCacheid);
+ if (g)
+ BBPreleaseref(g->batCacheid);
+ if (e)
+ BBPreleaseref(e->batCacheid);
+ throw(MAL, "aggr.subjson", RUNTIME_OBJECT_MISSING);
+ }
+ if (sid) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list