Changeset: 2802d998e5fe for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2802d998e5fe
Added Files:
sql/test/BugTracker-2017/Tests/side-effect.Bug-6397.sql
Modified Files:
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_bbp.c
gdk/gdk_firstn.c
gdk/gdk_group.c
gdk/gdk_imprints.c
gdk/gdk_join.c
gdk/gdk_logger.c
gdk/gdk_sample.c
gdk/gdk_select.c
gdk/gdk_unique.c
monetdb5/extras/rapi/rapi.c
monetdb5/modules/atoms/json.c
monetdb5/modules/kernel/aggr.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/bat5.c
monetdb5/modules/kernel/batmmath.c
monetdb5/modules/kernel/batstr.c
monetdb5/modules/kernel/group.c
monetdb5/modules/kernel/microbenchmark.c
monetdb5/modules/mal/batExtensions.c
monetdb5/modules/mal/batcalc.c
monetdb5/modules/mal/mat.c
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/sample.c
monetdb5/modules/mal/txtsim.c
sql/backends/monet5/UDF/pyapi/connection.c
sql/backends/monet5/generator/generator.c
sql/backends/monet5/sql.c
sql/backends/monet5/sql_rank.c
sql/backends/monet5/sql_result.c
sql/backends/monet5/vaults/bam/bam_lib.c
sql/storage/bat/bat_storage.c
sql/storage/bat/bat_table.c
sql/storage/bat/bat_utils.c
sql/test/BugTracker-2010/Tests/error-truncated.Bug-2615.stable.err.Darwin
sql/test/BugTracker-2010/Tests/error-truncated.Bug-2615.stable.err.Windows
sql/test/BugTracker-2016/Tests/storagemodel.stable.out.Darwin
sql/test/leaks/Tests/check1.stable.out
sql/test/leaks/Tests/check1.stable.out.int128
sql/test/leaks/Tests/check2.stable.out
sql/test/leaks/Tests/check3.stable.out
sql/test/leaks/Tests/check4.stable.out
sql/test/leaks/Tests/check5.stable.out
sql/test/leaks/Tests/select1.stable.out
sql/test/leaks/Tests/select1.stable.out.int128
sql/test/leaks/Tests/select2.stable.out
sql/test/leaks/Tests/select2.stable.out.int128
sql/test/leaks/Tests/temp1.stable.out
sql/test/mapi/Tests/sql_int128.stable.out.int128
sql/test/pg_regress/Tests/float8.stable.err.Windows
sql/test/pg_regress/Tests/oid.stable.err.32bit
Branch: trails
Log Message:
Merge with default
diffs (truncated from 3980 to 300 lines):
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -246,10 +246,10 @@ BATdense(oid hseq, oid tseq, BUN cnt)
BAT *bn;
bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
- if (bn == NULL)
- return NULL;
- BATtseqbase(bn, tseq);
- BATsetcount(bn, cnt);
+ if (bn != NULL) {
+ BATtseqbase(bn, tseq);
+ BATsetcount(bn, cnt);
+ }
return bn;
}
@@ -1380,6 +1380,7 @@ BATsetcount(BAT *b, BUN cnt)
{
/* head column is always VOID, and some head properties never change */
assert(b->hseqbase != oid_nil);
+ assert(cnt <= BUN_MAX);
b->batCount = cnt;
b->batDirtydesc = TRUE;
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1306,21 +1306,17 @@ BATsort(BAT **sorted, BAT **order, BAT *
*sorted = bn;
}
if (order) {
- on = COLnew(b->hseqbase, TYPE_void, BATcount(b),
TRANSIENT);
+ on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
if (on == NULL)
goto error;
- BATsetcount(on, BATcount(b));
- BATtseqbase(on, b->hseqbase);
*order = on;
}
if (groups) {
if (BATtkey(b)) {
/* singleton groups */
- gn = COLnew(0, TYPE_void, BATcount(b),
TRANSIENT);
+ gn = BATdense(0, 0, BATcount(b));
if (gn == NULL)
goto error;
- BATsetcount(gn, BATcount(b));
- BATtseqbase(gn, 0);
} else {
/* single group */
const oid *o = 0;
@@ -1822,15 +1818,9 @@ BATcount_no_nil(BAT *b)
static BAT *
newdensecand(oid first, oid last)
{
- BAT *bn;
-
- if ((bn = COLnew(0, TYPE_void, 0, TRANSIENT)) == NULL)
- return NULL;
if (last < first)
first = last = 0; /* empty range */
- BATsetcount(bn, last - first);
- BATtseqbase(bn, first);
- return bn;
+ return BATdense(0, first, last - first);
}
/* merge two candidate lists and produce a new one
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -321,9 +321,18 @@ BBPselectfarm(int role, int type, enum h
{
int i;
- assert(role >= 0 && role < 32);
(void) type; /* may use in future */
(void) hptype; /* may use in future */
+
+ assert(role >= 0 && role < 32);
+#ifndef PERSISTENTHASH
+ if (hptype == hashheap)
+ role = TRANSIENT;
+#endif
+#ifndef PERSISTENTIDX
+ if (hptype == orderidxheap)
+ role = TRANSIENT;
+#endif
for (i = 0; i < MAXFARMS; i++)
if (BBPfarms[i].dirname && BBPfarms[i].roles & (1 << role))
return i;
@@ -1909,7 +1918,9 @@ BBPdump(void)
vm += HEAPvmsize(b->thash->heap);
}
}
- fprintf(stderr, "\n");
+ fprintf(stderr, " role: %s, persistence: %s\n",
+ b->batRole == PERSISTENT ? "persistent" : "transient",
+ b->batPersistence == PERSISTENT ? "persistent" :
"transient");
}
fprintf(stderr,
"# %d bats: mem=" SZFMT ", vm=" SZFMT " %d cached bats: mem="
SZFMT ", vm=" SZFMT "\n",
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -102,7 +102,7 @@
#define shuffle_unique(TYPE, OP) \
do { \
- const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \
+ const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \
heapify(OP##fix, SWAP1); \
while (cand ? cand < candend : start < end) { \
i = cand ? *cand++ : start++ + b->hseqbase; \
@@ -119,9 +119,13 @@
* refer to the N smallest/largest (depending on asc) tail values of b
* (taking the optional candidate list s into account). If there are
* multiple equal values to take us past N, we return a subset of those.
+ *
+ * If lastp is non-NULL, it is filled in with the oid of the "last"
+ * value, i.e. the value of which there may be multiple occurrences
+ * that are not all included in the first N.
*/
static BAT *
-BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc)
+BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc, oid *lastp)
{
BAT *bn;
BATiter bi = bat_iterator(b);
@@ -140,17 +144,19 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
if (n >= (BUN) (candend - cand)) {
/* trivial: return the candidate list (the
* part that refers to b, that is) */
+ if (lastp)
+ *lastp = 0;
return BATslice(s,
(BUN) (cand - (const oid *) Tloc(s, 0)),
(BUN) (candend - (const oid *) Tloc(s,
0)));
}
} else if (n >= cnt) {
/* trivial: return everything */
- bn = COLnew(0, TYPE_void, cnt, TRANSIENT);
+ bn = BATdense(0, start + b->hseqbase, cnt);
if (bn == NULL)
return NULL;
- BATsetcount(bn, cnt);
- BATtseqbase(bn, start + b->hseqbase);
+ if (lastp)
+ *lastp = 0;
return bn;
}
/* note, we want to do both calls */
@@ -163,23 +169,27 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
/* return copy of first relevant part
* of candidate list */
i = (BUN) (cand - (const oid *) Tloc(s, 0));
+ if (lastp)
+ *lastp = cand[n - 1];
return BATslice(s, i, i + n);
}
/* return copy of last relevant part of
* candidate list */
i = (BUN) (candend - (const oid *) Tloc(s, 0));
+ if (lastp)
+ *lastp = candend[-(ssize_t)n];
return BATslice(s, i - n, i);
}
- bn = COLnew(0, TYPE_void, n, TRANSIENT);
- if (bn == NULL)
- return NULL;
- BATsetcount(bn, n);
if (asc ? b->tsorted : b->trevsorted) {
/* first n entries from b */
- BATtseqbase(bn, start + b->hseqbase);
+ bn = BATdense(0, start + b->hseqbase, n);
+ if (lastp)
+ *lastp = start + b->hseqbase + n - 1;
} else {
/* last n entries from b */
- BATtseqbase(bn, start + cnt + b->hseqbase - n);
+ bn = BATdense(0, start + cnt + b->hseqbase - n, n);
+ if (lastp)
+ *lastp = start + cnt + b->hseqbase - n;
}
return bn;
}
@@ -293,6 +303,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
break;
}
}
+ if (lastp)
+ *lastp = oids[0]; /* store id of largest value */
/* output must be sorted since it's a candidate list */
GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid);
bn->tsorted = 1;
@@ -356,8 +368,19 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
} \
} while (0)
+/* This version of BATfirstn is like the one above, except that it
+ * also looks at groups. The values of the group IDs are important:
+ * we return only the smallest N (i.e., not dependent on asc which
+ * refers only to the values in the BAT b).
+ *
+ * If lastp is non-NULL, it is filled in with the oid of the "last"
+ * value, i.e. the value of which there may be multiple occurrences
+ * that are not all included in the first N. If lastgp is non-NULL,
+ * it is filled with the group ID (not the oid of the group ID) for
+ * that same value.
+ */
static BAT *
-BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc)
+BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc, oid
*lastp, oid *lastgp)
{
BAT *bn;
BATiter bi = bat_iterator(b);
@@ -387,11 +410,7 @@ BATfirstn_unique_with_groups(BAT *b, BAT
if (n == 0) {
/* candidate list might refer only to values outside
* of the bat and hence be effectively empty */
- bn = COLnew(0, TYPE_void, 0, TRANSIENT);
- if (bn == NULL)
- return NULL;
- BATtseqbase(bn, 0);
- return bn;
+ return BATdense(0, 0, 0);
}
bn = COLnew(0, TYPE_oid, n, TRANSIENT);
@@ -533,6 +552,10 @@ BATfirstn_unique_with_groups(BAT *b, BAT
break;
}
}
+ if (lastp)
+ *lastp = oids[0];
+ if (lastgp)
+ *lastgp = goids[0];
GDKfree(goids);
/* output must be sorted since it's a candidate list */
GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid);
@@ -545,597 +568,238 @@ BATfirstn_unique_with_groups(BAT *b, BAT
return bn;
}
-#define shuffle_grouped1_body(COMPARE, EQUAL) \
- do { \
- for (i = cand ? *cand++ - b->hseqbase : start; \
- i < end; \
- cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
- for (j = 0; j < n; j++) { \
- if (j == top) { \
- assert(top < n); \
- groups[top].cnt = 1; \
- groups[top++].bun = i; \
- break; \
- } else { \
- assert(j < top); \
- assert(groups[j].bun < i); \
- if (COMPARE) { \
- if (top < n) \
- top++; \
- for (k = top - 1; k > j; k--) {
\
- groups[k] = groups[k -
1]; \
- } \
- groups[j].bun = i; \
- groups[j].cnt = 1; \
- break; \
- } else if (EQUAL) { \
- groups[j].cnt++; \
- break; \
- } \
- } \
- } \
- } \
- } while (0)
-
-#define shuffle_grouped1(TYPE, OPER) \
- do { \
- const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \
- shuffle_grouped1_body(OPER(v[i], v[groups[j].bun]), \
- v[i] == v[groups[j].bun]); \
- } while (0)
-
-#define shuffle_grouped2(TYPE) \
- do { \
- const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \
- TYPE lastval = v[groups[top - 1].bun]; \
- for (i = cand ? *cand++ - b->hseqbase : start; \
- i < end; \
- cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
- if (asc ? v[i] > lastval : v[i] < lastval) \
- continue; \
- for (j = 0; j < top; j++) { \
- if (v[i] == v[groups[j].bun]) { \
- if (bp) \
- *bp++ = i + b->hseqbase; \
- *gp++ = j; \
- break; \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list