Changeset: 9ee3b0fb88c4 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9ee3b0fb88c4
Modified Files:
gdk/gdk_firstn.c
gdk/gdk_group.c
gdk/gdk_imprints.c
gdk/gdk_logger.c
gdk/gdk_select.c
monetdb5/extras/rapi/rapi.c
monetdb5/modules/atoms/json.c
monetdb5/modules/kernel/aggr.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/bat5.c
monetdb5/modules/kernel/batmmath.c
monetdb5/modules/kernel/batstr.c
monetdb5/modules/kernel/group.c
monetdb5/modules/kernel/microbenchmark.c
monetdb5/modules/mal/batExtensions.c
monetdb5/modules/mal/batcalc.c
monetdb5/modules/mal/mat.c
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/sample.c
monetdb5/modules/mal/txtsim.c
sql/backends/monet5/UDF/pyapi/connection.c
sql/backends/monet5/generator/generator.c
sql/backends/monet5/sql.c
sql/backends/monet5/sql_rank.c
sql/backends/monet5/sql_result.c
sql/backends/monet5/vaults/bam/bam_lib.c
sql/storage/bat/bat_storage.c
sql/test/leaks/Tests/check1.stable.out
sql/test/leaks/Tests/check1.stable.out.int128
sql/test/leaks/Tests/check2.stable.out
sql/test/leaks/Tests/check3.stable.out
sql/test/leaks/Tests/check4.stable.out
sql/test/leaks/Tests/check5.stable.out
sql/test/leaks/Tests/select1.stable.out
sql/test/leaks/Tests/select1.stable.out.int128
sql/test/leaks/Tests/select2.stable.out
sql/test/leaks/Tests/select2.stable.out.int128
sql/test/leaks/Tests/temp1.stable.out
Branch: default
Log Message:
Merge with Jul2017 branch.
diffs (truncated from 3035 to 300 lines):
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -102,7 +102,7 @@
#define shuffle_unique(TYPE, OP) \
do { \
- const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \
+ const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \
heapify(OP##fix, SWAP1); \
while (cand ? cand < candend : start < end) { \
i = cand ? *cand++ : start++ + b->hseqbase; \
@@ -121,7 +121,7 @@
* multiple equal values to take us past N, we return a subset of those.
*/
static BAT *
-BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc)
+BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc, oid *lastp)
{
BAT *bn;
BATiter bi = bat_iterator(b);
@@ -140,6 +140,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
if (n >= (BUN) (candend - cand)) {
/* trivial: return the candidate list (the
* part that refers to b, that is) */
+ if (lastp)
+ *lastp = 0;
return BATslice(s,
(BUN) (cand - (const oid *) Tloc(s, 0)),
(BUN) (candend - (const oid *) Tloc(s,
0)));
@@ -151,6 +153,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
return NULL;
BATsetcount(bn, cnt);
BATtseqbase(bn, start + b->hseqbase);
+ if (lastp)
+ *lastp = 0;
return bn;
}
/* note, we want to do both calls */
@@ -163,11 +167,15 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
/* return copy of first relevant part
* of candidate list */
i = (BUN) (cand - (const oid *) Tloc(s, 0));
+ if (lastp)
+ *lastp = cand[n - 1];
return BATslice(s, i, i + n);
}
/* return copy of last relevant part of
* candidate list */
i = (BUN) (candend - (const oid *) Tloc(s, 0));
+ if (lastp)
+ *lastp = candend[-(ssize_t)n];
return BATslice(s, i - n, i);
}
bn = COLnew(0, TYPE_void, n, TRANSIENT);
@@ -177,9 +185,13 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
if (asc ? b->tsorted : b->trevsorted) {
/* first n entries from b */
BATtseqbase(bn, start + b->hseqbase);
+ if (lastp)
+ *lastp = start + b->hseqbase + n - 1;
} else {
/* last n entries from b */
BATtseqbase(bn, start + cnt + b->hseqbase - n);
+ if (lastp)
+ *lastp = start + cnt + b->hseqbase - n;
}
return bn;
}
@@ -293,6 +305,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
break;
}
}
+ if (lastp)
+ *lastp = oids[0]; /* store id of largest value */
/* output must be sorted since it's a candidate list */
GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid);
bn->tsorted = 1;
@@ -357,7 +371,7 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
} while (0)
static BAT *
-BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc)
+BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc, oid
*lastp, oid *lastgp)
{
BAT *bn;
BATiter bi = bat_iterator(b);
@@ -533,6 +547,10 @@ BATfirstn_unique_with_groups(BAT *b, BAT
break;
}
}
+ if (lastp)
+ *lastp = oids[0];
+ if (lastgp)
+ *lastgp = goids[0];
GDKfree(goids);
/* output must be sorted since it's a candidate list */
GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid);
@@ -545,597 +563,238 @@ BATfirstn_unique_with_groups(BAT *b, BAT
return bn;
}
-#define shuffle_grouped1_body(COMPARE, EQUAL) \
- do { \
- for (i = cand ? *cand++ - b->hseqbase : start; \
- i < end; \
- cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
- for (j = 0; j < n; j++) { \
- if (j == top) { \
- assert(top < n); \
- groups[top].cnt = 1; \
- groups[top++].bun = i; \
- break; \
- } else { \
- assert(j < top); \
- assert(groups[j].bun < i); \
- if (COMPARE) { \
- if (top < n) \
- top++; \
- for (k = top - 1; k > j; k--) {
\
- groups[k] = groups[k -
1]; \
- } \
- groups[j].bun = i; \
- groups[j].cnt = 1; \
- break; \
- } else if (EQUAL) { \
- groups[j].cnt++; \
- break; \
- } \
- } \
- } \
- } \
- } while (0)
-
-#define shuffle_grouped1(TYPE, OPER) \
- do { \
- const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \
- shuffle_grouped1_body(OPER(v[i], v[groups[j].bun]), \
- v[i] == v[groups[j].bun]); \
- } while (0)
-
-#define shuffle_grouped2(TYPE) \
- do { \
- const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \
- TYPE lastval = v[groups[top - 1].bun]; \
- for (i = cand ? *cand++ - b->hseqbase : start; \
- i < end; \
- cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
- if (asc ? v[i] > lastval : v[i] < lastval) \
- continue; \
- for (j = 0; j < top; j++) { \
- if (v[i] == v[groups[j].bun]) { \
- if (bp) \
- *bp++ = i + b->hseqbase; \
- *gp++ = j; \
- break; \
- } \
- } \
- } \
- } while (0)
-
static gdk_return
BATfirstn_grouped(BAT **topn, BAT **gids, BAT *b, BAT *s, BUN n, int asc, int
distinct)
{
- BAT *bn, *gn;
- BATiter bi = bat_iterator(b);
- oid *restrict bp, *restrict gp;
- BUN top, i, j, k, cnt, start, end;
- const oid *restrict cand, *candend, *oldcand;
- int tpe = b->ttype;
- int c;
- int (*cmp)(const void *, const void *);
- BUN ncnt;
- struct group {
- BUN bun;
- BUN cnt;
- } *restrict groups;
+ BAT *bn, *gn, *su = NULL;
+ oid last;
+ gdk_return rc;
- assert(topn);
-
- CANDINIT(b, s, start, end, cnt, cand, candend);
-
- if (n > cnt)
- n = cnt;
- if (cand && n > (BUN) (candend - cand))
- n = (BUN) (candend - cand);
-
- if (n == 0) {
- /* candidate list might refer only to values outside
- * of the bat and hence be effectively empty */
- bn = COLnew(0, TYPE_void, 0, TRANSIENT);
- if (bn == NULL)
+ if (distinct && !b->tkey) {
+ su = s;
+ s = BATunique(b, s);
+ if (s == NULL)
return GDK_FAIL;
- BATtseqbase(bn, 0);
+ }
+ bn = BATfirstn_unique(b, s, n, asc, &last);
+ if (bn == NULL)
+ return GDK_FAIL;
+ if (BATcount(bn) == 0) {
if (gids) {
gn = COLnew(0, TYPE_void, 0, TRANSIENT);
if (gn == NULL) {
- BBPreclaim(bn);
+ BBPunfix(bn->batCacheid);
return GDK_FAIL;
}
- BATtseqbase(gn, 0);
*gids = gn;
}
*topn = bn;
return GDK_SUCCEED;
}
+ if (!b->tkey) {
+ if (distinct) {
+ BAT *bn1;
- top = 0;
- cmp = ATOMcompare(b->ttype);
- /* if base type has same comparison function as type itself, we
- * can use the base type */
- tpe = ATOMbasetype(tpe); /* takes care of oid */
- groups = GDKmalloc(sizeof(*groups) * n);
- if (groups == NULL)
- return GDK_FAIL;
- oldcand = cand;
- if (asc) {
- switch (tpe) {
- case TYPE_void:
- shuffle_grouped1_body(i < groups[j].bun,
- i == groups[j].bun);
- break;
- case TYPE_bte:
- shuffle_grouped1(bte, LT);
- break;
- case TYPE_sht:
- shuffle_grouped1(sht, LT);
- break;
- case TYPE_int:
- shuffle_grouped1(int, LT);
- break;
- case TYPE_lng:
- shuffle_grouped1(lng, LT);
- break;
-#ifdef HAVE_HGE
- case TYPE_hge:
- shuffle_grouped1(hge, LT);
- break;
-#endif
- case TYPE_flt:
- shuffle_grouped1(flt, LT);
- break;
- case TYPE_dbl:
- shuffle_grouped1(dbl, LT);
- break;
- default:
- shuffle_grouped1_body(
- (c = cmp(BUNtail(bi, i),
- BUNtail(bi, groups[j].bun))) < 0,
- c == 0);
- break;
- }
- } else {
- switch (tpe) {
- case TYPE_void:
- shuffle_grouped1_body(i > groups[j].bun,
- i == groups[j].bun);
- break;
- case TYPE_bte:
- shuffle_grouped1(bte, GT);
- break;
- case TYPE_sht:
- shuffle_grouped1(sht, GT);
- break;
- case TYPE_int:
- shuffle_grouped1(int, GT);
- break;
- case TYPE_lng:
- shuffle_grouped1(lng, GT);
- break;
-#ifdef HAVE_HGE
- case TYPE_hge:
- shuffle_grouped1(hge, GT);
- break;
-#endif
- case TYPE_flt:
- shuffle_grouped1(flt, GT);
- break;
- case TYPE_dbl:
- shuffle_grouped1(dbl, GT);
- break;
- default:
- shuffle_grouped1_body(
- (c = cmp(BUNtail(bi, i),
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list