Changeset: 56f633d64ef4 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=56f633d64ef4
Removed Files:
sql/test/miscellaneous/Tests/simple_plans.stable.out.single
Modified Files:
gdk/gdk_join.c
sql/backends/monet5/sql_result.c
sql/backends/monet5/sql_round_impl.h
sql/backends/monet5/sql_upgrades.c
sql/server/rel_optimizer.c
sql/server/sql_parser.y
sql/test/emptydb-upgrade-chain-hge/Tests/upgrade.stable.out.int128
sql/test/emptydb-upgrade-chain-hge/Tests/upgrade.stable.out.ppc64.int128
sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out
sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out.32bit
sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out.int128
sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out.ppc64
sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out.ppc64.int128
sql/test/emptydb-upgrade-hge/Tests/upgrade.stable.out.int128
sql/test/emptydb-upgrade/Tests/upgrade.stable.out
sql/test/emptydb-upgrade/Tests/upgrade.stable.out.32bit
sql/test/emptydb-upgrade/Tests/upgrade.stable.out.int128
sql/test/emptydb/Tests/check.stable.out
sql/test/emptydb/Tests/check.stable.out.32bit
sql/test/emptydb/Tests/check.stable.out.int128
sql/test/miscellaneous/Tests/simple_plans.stable.out
sql/test/testdb-upgrade-chain-hge/Tests/upgrade.stable.out.int128
sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out.32bit
sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out.int128
sql/test/testdb-upgrade-hge/Tests/upgrade.stable.out.int128
sql/test/testdb-upgrade/Tests/upgrade.stable.out
sql/test/testdb-upgrade/Tests/upgrade.stable.out.32bit
sql/test/testdb-upgrade/Tests/upgrade.stable.out.int128
Branch: default
Log Message:
Merge with Oct2020 branch.
diffs (truncated from 2712 to 300 lines):
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -370,8 +370,10 @@ VIEWreset(BAT *b)
b->batCapacity = cnt;
/* insert all of v in b, and quit */
- if (BATappend2(b, v, NULL, false, false) != GDK_SUCCEED)
+ if (BATappend2(b, v, NULL, false, false) != GDK_SUCCEED) {
+ GDKerror("appending view failed");
goto bailout;
+ }
BBPreclaim(v);
}
return GDK_SUCCEED;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2831,6 +2831,233 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
return GDK_FAIL;
}
+/* population count: count number of 1 bits in a value */
+static inline uint32_t __attribute__((__const__))
+pop(uint32_t x)
+{
+#if defined(__GNUC__)
+ return (uint32_t) __builtin_popcount(x);
+#elif defined(_MSC_VER)
+ return (uint32_t) __popcnt((unsigned int) (x));
+#else
+ /* divide and conquer implementation (the two versions are
+ * essentially equivalent, but the first version is written a
+ * bit smarter) */
+#if 1
+ x -= (x >> 1) & ~0U/3 /* 0x55555555 */; /* 3-1=2; 2-1=1; 1-0=1; 0-0=0 */
+ x = (x & ~0U/5) + ((x >> 2) & ~0U/5) /* 0x33333333 */;
+ x = (x + (x >> 4)) & ~0UL/0x11 /* 0x0F0F0F0F */;
+ x = (x + (x >> 8)) & ~0UL/0x101 /* 0x00FF00FF */;
+ x = (x + (x >> 16)) & 0xFFFF /* ~0UL/0x10001 */;
+#else
+ x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
+ x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
+ x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
+#endif
+ return x;
+#endif
+}
+
+/* Count the number of unique values for the first half and the complete
+ * set (the sample s of b) and return the two values in *cnt1 and
+ * *cnt2. In case of error, both values are 0. */
+static void
+count_unique(BAT *b, BAT *s, BUN *cnt1, BUN *cnt2)
+{
+ struct canditer ci;
+ BUN half;
+ BUN cnt = 0;
+ const void *v;
+ const char *bvals;
+ const char *bvars;
+ oid bval;
+ int bwidth;
+ oid i, o;
+ const char *nme;
+ BUN hb;
+ BATiter bi;
+ int (*cmp)(const void *, const void *);
+ const char *algomsg = "";
+ lng t0 = 0;
+
+ TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+ canditer_init(&ci, b, s);
+ half = ci.ncand / 2;
+
+ if (b->tkey || ci.ncand <= 1 || BATtdense(b)) {
+ /* trivial: already unique */
+ *cnt1 = half;
+ *cnt2 = ci.ncand;
+ return;
+ }
+
+ if ((BATordered(b) && BATordered_rev(b)) ||
+ (b->ttype == TYPE_void && is_oid_nil(b->tseqbase))) {
+ /* trivial: all values are the same */
+ *cnt1 = *cnt2 = 1;
+ return;
+ }
+
+ assert(b->ttype != TYPE_void);
+
+ bvals = Tloc(b, 0);
+ if (b->tvarsized && b->ttype)
+ bvars = b->tvheap->base;
+ else
+ bvars = NULL;
+ bwidth = Tsize(b);
+ cmp = ATOMcompare(b->ttype);
+ bi = bat_iterator(b);
+
+ *cnt1 = *cnt2 = 0;
+
+ if (BATordered(b) || BATordered_rev(b)) {
+ const void *prev = NULL;
+ algomsg = "sorted";
+ for (i = 0; i < ci.ncand; i++) {
+ if (i == half)
+ *cnt1 = cnt;
+ o = canditer_next(&ci);
+ v = VALUE(b, o - b->hseqbase);
+ if (prev == NULL || (*cmp)(v, prev) != 0) {
+ cnt++;
+ }
+ prev = v;
+ }
+ *cnt2 = cnt;
+ } else if (ATOMbasetype(b->ttype) == TYPE_bte) {
+ unsigned char val;
+ uint32_t seen[256 / 32];
+
+ algomsg = "byte-sized atoms";
+ assert(bvars == NULL);
+ for (i = 0; i < ci.ncand; i++) {
+ if (i == ci.ncand/ 2) {
+ cnt = 0;
+ for (int j = 0; j < 256 / 32; j++)
+ cnt += pop(seen[j]);
+ *cnt1 = cnt;
+ }
+ o = canditer_next(&ci);
+ val = ((const unsigned char *) bvals)[o - b->hseqbase];
+ if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
+ seen[val >> 5] |= 1U << (val & 0x1F);
+ }
+ }
+ cnt = 0;
+ for (int j = 0; j < 256 / 32; j++)
+ cnt += pop(seen[j]);
+ *cnt2 = cnt;
+ } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
+ unsigned short val;
+ uint32_t *seen = NULL;
+
+ algomsg = "short-sized atoms";
+ assert(bvars == NULL);
+ seen = GDKzalloc((65536 / 32) * sizeof(seen[0]));
+ if (seen == NULL)
+ return;
+ for (i = 0; i < ci.ncand; i++) {
+ if (i == half) {
+ cnt = 0;
+ for (int j = 0; j < 65536 / 32; j++)
+ cnt += pop(seen[j]);
+ *cnt1 = cnt;
+ }
+ o = canditer_next(&ci);
+ val = ((const unsigned short *) bvals)[o - b->hseqbase];
+ if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
+ seen[val >> 5] |= 1U << (val & 0x1F);
+ }
+ }
+ cnt = 0;
+ for (int j = 0; j < 65536 / 32; j++)
+ cnt += pop(seen[j]);
+ *cnt2 = cnt;
+ GDKfree(seen);
+ seen = NULL;
+ } else {
+ BUN prb;
+ BUN p;
+ BUN mask;
+ Hash hs = {0};
+
+ GDKclrerr(); /* not interested in BAThash errors */
+ algomsg = "new partial hash";
+ nme = BBP_physical(b->batCacheid);
+ mask = HASHmask(ci.ncand);
+ if (mask < ((BUN) 1 << 16))
+ mask = (BUN) 1 << 16;
+ if (snprintf(hs.heaplink.filename,
sizeof(hs.heaplink.filename), "%s.thshunil%x", nme, (unsigned) THRgettid()) >=
(int) sizeof(hs.heaplink.filename) ||
+ snprintf(hs.heapbckt.filename,
sizeof(hs.heapbckt.filename), "%s.thshunib%x", nme, (unsigned) THRgettid()) >=
(int) sizeof(hs.heapbckt.filename) ||
+ HASHnew(&hs, b->ttype, BUNlast(b), mask, BUN_NONE, false)
!= GDK_SUCCEED) {
+ GDKerror("cannot allocate hash table\n");
+ HEAPfree(&hs.heaplink, true);
+ HEAPfree(&hs.heapbckt, true);
+ return;
+ }
+ for (i = 0; i < ci.ncand; i++) {
+ if (i == half)
+ *cnt1 = cnt;
+ o = canditer_next(&ci);
+ v = VALUE(b, o - b->hseqbase);
+ prb = HASHprobe(&hs, v);
+ for (hb = HASHget(&hs, prb);
+ hb != HASHnil(&hs);
+ hb = HASHgetlink(&hs, hb)) {
+ if (cmp(v, BUNtail(bi, hb)) == 0)
+ break;
+ }
+ if (hb == HASHnil(&hs)) {
+ p = o - b->hseqbase;
+ cnt++;
+ /* enter into hash table */
+ HASHputlink(&hs, p, HASHget(&hs, prb));
+ HASHput(&hs, prb, p);
+ }
+ }
+ *cnt2 = cnt;
+ HEAPfree(&hs.heaplink, true);
+ HEAPfree(&hs.heapbckt, true);
+ }
+
+ TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
+ " -> " BUNFMT " " BUNFMT " (%s -- " LLFMT "usec)\n",
+ ALGOBATPAR(b), ALGOOPTBATPAR(s),
+ *cnt1, *cnt2, algomsg, GDKusec() - t0);
+
+ return;
+}
+
+static double
+guess_uniques(BAT *b, struct canditer *ci)
+{
+ BUN cnt1, cnt2;
+ BAT *s1;
+
+ if (b->tkey)
+ return (double) ci->ncand;
+
+ if (ci->s) {
+ BAT *s2 = BATsample(ci->s, 1000);
+ s1 = BATproject(s2, ci->s);
+ BBPreclaim(s2);
+ } else {
+ s1 = BATsample(b, 1000);
+ }
+ BUN n2 = BATcount(s1);
+ BUN n1 = n2 / 2;
+ count_unique(b, s1, &cnt1, &cnt2);
+ BBPreclaim(s1);
+
+ double A = (double) (cnt2 - cnt1) / (n2 - n1);
+ double B = cnt1 - n1 * A;
+
+ return A * ci->ncand + B;
+}
+
#define MASK_EQ 1
#define MASK_LT 2
#define MASK_GT 4
@@ -3279,7 +3506,15 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
/* no hash table, so cost includes time to build the
* hash table (single scan) plus the time to do the
* lookups (also single scan, we assume some chains) */
- rcost = lci.ncand * 1.1;
+ PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
+ if (prop) {
+ /* we know number of unique values, assume some
+ * chains */
+ rcost = lci.ncand * 1.1 * ((double) BATcount(r) /
prop->v.val.oval);
+ } else {
+ /* guess number of unique value and work with that */
+ rcost = lci.ncand * 1.1 * ((double) BATcount(r) /
guess_uniques(r, &rci));
+ }
#ifdef PERSISTENTHASH
/* only count the cost of creating the hash for
* non-persistent bats */
@@ -3328,7 +3563,16 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
/* no hash table, so cost includes time to build the
* hash table (single scan) plus the time to do the
* lookups (also single scan, we assume some chains) */
- lcost = rci.ncand * 1.1;
+ PROPrec *prop = BATgetprop(l, GDK_NUNIQUE);
+ if (prop) {
+ /* we know number of unique values, assume some
+ * chains */
+ lcost = rci.ncand * 1.1 * ((double) BATcount(l)
/ prop->v.val.oval);
+ } else {
+ /* guess number of unique value and work
+ * with that */
+ lcost = rci.ncand * 1.1 * ((double) BATcount(l)
/ guess_uniques(l, &lci));
+ }
#ifdef PERSISTENTHASH
/* only count the cost of creating the hash
* for non-persistent bats */
@@ -3650,7 +3894,15 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
/* no hash table, so cost includes time to build the
* hash table (single scan) plus the time to do the
* lookups (also single scan, we assume some chains) */
- lcost = rci.ncand * 1.1;
+ PROPrec *prop = BATgetprop(l, GDK_NUNIQUE);
+ if (prop) {
+ /* we know number of unique values, assume some
+ * chains */
+ lcost = rci.ncand * 1.1 * ((double) BATcount(l) /
prop->v.val.oval);
+ } else {
+ /* guess number of unique value and work with that */
+ lcost = rci.ncand * 1.1 * ((double) BATcount(l) /
guess_uniques(l, &lci));
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list