Changeset: 155b1ef95b0e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/155b1ef95b0e
Modified Files:
gdk/gdk_batop.c
gdk/gdk_group.c
gdk/gdk_join.c
gdk/gdk_project.c
sql/backends/monet5/sql.c
Branch: default
Log Message:
propagate the tunique_est a bit more and use this in append (strings). When we
know only a limited number of
(unique) strings is expected, don't reuse the (largish) vheap.
In group by set the tunique_est based on the group result.
diffs (118 lines):
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -125,10 +125,13 @@ insert_string_bat(BAT *b, BATiter *ni, s
r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
/* r is estimate of number of strings in
* double-eliminated area */
- if (r < ci->ncand)
- len = GDK_ELIMLIMIT + (ci->ncand - r) * len;
+ BUN ecnt = ci->ncand;
+ if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
+ ecnt = ni->b->tunique_est;
+ if (r < ecnt)
+ len = GDK_ELIMLIMIT + (ecnt - r) * len;
else
- len = GDK_STRHASHSIZE + ci->ncand * (len + 12);
+ len = GDK_STRHASHSIZE + ecnt * (len + 12);
/* len is total estimated expected size of vheap */
if (len > ni->vhfree / 2) {
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -1282,6 +1282,7 @@ BATgroup_internal(BAT **groups, BAT **ex
en->trevsorted = ngrp == 1;
en->tnonil = true;
en->tnil = false;
+ en->tunique_est = ngrp;
*extents = virtualize(en);
}
if (histo) {
@@ -1304,7 +1305,11 @@ BATgroup_internal(BAT **groups, BAT **ex
gn->tnonil = true;
gn->tnil = false;
gn->tmaxpos = maxgrppos;
+ gn->tunique_est = ngrp;
*groups = gn;
+ if (!g && !e && !s) {
+ b->tunique_est = ngrp;
+ }
TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
",g=" ALGOOPTBATFMT ",e=" ALGOOPTBATFMT
",h=" ALGOOPTBATFMT ",subsorted=%s -> groups="
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3259,6 +3259,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
}
/* also set other bits of heap to correct value to indicate size */
BATsetcount(r1, BATcount(r1));
+ r1->tunique_est = MIN(l->tunique_est, r->tunique_est);
if (BATcount(r1) <= 1) {
r1->tsorted = true;
r1->trevsorted = true;
@@ -3274,11 +3275,13 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
r2->tkey = true;
r2->tseqbase = 0;
}
+ r2->tunique_est = MIN(l->tunique_est, r->tunique_est);
}
if (r3) {
r3->tnonil = !r3->tnil;
BATsetcount(r3, BATcount(r3));
assert(BATcount(r1) == BATcount(r3));
+ r3->tunique_est = MIN(l->tunique_est, r->tunique_est);
}
if (BATcount(r1) > 0) {
if (BATtdense(r1))
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -571,6 +571,9 @@ project_str(BATiter *restrict li, struct
bn->tnil = false;
bn->tnonil = r1i->nonil & r2i->nonil;
bn->tkey = false;
+ bn->tunique_est =
+ MIN(li->b->tunique_est?li->b->tunique_est:BATcount(li->b),
+ r1i->b->tunique_est?r1i->b->tunique_est:BATcount(r1i->b));
TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOBATFMT
" -> " ALGOBATFMT "%s " LLFMT "us\n",
ALGOBATPAR(li->b), ALGOBATPAR(r1i->b), ALGOBATPAR(r2i->b),
@@ -820,6 +823,9 @@ BATproject2(BAT *restrict l, BAT *restri
bn->tascii = r1i.ascii;
}
+ bn->tunique_est =
+ MIN(li.b->tunique_est?li.b->tunique_est:BATcount(li.b),
+ r1i.b->tunique_est?r1i.b->tunique_est:BATcount(r1i.b));
if (!BATtdensebi(&r1i) || (r2 && !BATtdensebi(&r2i)))
BATtseqbase(bn, oid_nil);
@@ -1138,6 +1144,15 @@ BATprojectchain(BAT **bats)
bn->tnonil = nonil & b->tnonil;
bn->tseqbase = oid_nil;
bn->tkey = (ba[0].cnt <= 1);
+ double est = 0;
+ for (int i = 0; i < n; i++) {
+ double nest =
ba[i].b->tunique_est?ba[i].b->tunique_est:BATcount(ba[i].b);
+ if (est)
+ est = MIN(est, nest);
+ else
+ est = nest;
+ }
+ bn->tunique_est = est;
/* note, b may point to one of the bats in tobedeleted, so
* reclaim after the last use of b */
while (ndelete-- > 0)
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -2430,6 +2430,7 @@ SQLtid(Client cntxt, MalBlkPtr mb, MalSt
nr_parts = *getArgReference_int(stk, pci, 5);
}
BAT *b = store->storage_api.bind_cands(tr, t, nr_parts, part_nr);
+ b->tunique_est = BATcount(b);
if (b) {
*res = b->batCacheid;
BBPkeepref(b);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]