Changeset: 567cdb89e55d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/567cdb89e55d
Modified Files:
gdk/gdk_bat.c
gdk/gdk_cand.c
gdk/gdk_join.c
gdk/gdk_project.c
sql/backends/monet5/UDF/udf/udf.c
sql/backends/monet5/sql.c
Branch: Mar2025
Log Message:
Improvements to unique_est maintenance.
diffs (164 lines):
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -834,7 +834,8 @@ COLcopy(BAT *b, int tt, bool writable, r
bn->tnil = bi.nil;
bn->tminpos = bi.minpos;
bn->tmaxpos = bi.maxpos;
- bn->tunique_est = bi.unique_est;
+ if (!bi.key)
+ bn->tunique_est = bi.unique_est;
} else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
ATOMcompare(tt) == ATOMcompare(b->ttype)) {
BUN h = bi.count;
@@ -861,7 +862,8 @@ COLcopy(BAT *b, int tt, bool writable, r
}
bn->tminpos = bi.minpos;
bn->tmaxpos = bi.maxpos;
- bn->tunique_est = bi.unique_est;
+ if (!bi.key)
+ bn->tunique_est = bi.unique_est;
} else {
bn->tsorted = bn->trevsorted = false; /* set based on count
later */
bn->tnonil = bn->tnil = false;
@@ -873,6 +875,7 @@ COLcopy(BAT *b, int tt, bool writable, r
bn->tsorted = ATOMlinear(b->ttype);
bn->trevsorted = ATOMlinear(b->ttype);
bn->tkey = true;
+ bn->tunique_est = (double) bn->batCount;
}
bat_iterator_end(&bi);
if (!writable)
@@ -1665,8 +1668,12 @@ BUNinplacemulti(BAT *b, const oid *posit
BUN nunique = b->thash ? b->thash->nunique : 0;
MT_rwlock_wrunlock(&b->thashlock);
MT_lock_set(&b->theaplock);
- if (nunique != 0)
+ if (nunique != 0) {
b->tunique_est = (double) nunique;
+ if (nunique == b->batCount && !b->tkey)
+ BATkey(b, true);
+ } else if (b->tkey)
+ b->tunique_est = (double) b->batCount;
b->tminpos = bi.minpos;
b->tmaxpos = bi.maxpos;
b->theap->dirty = true;
@@ -1954,8 +1961,10 @@ BATkey(BAT *b, bool flag)
b->tkey = flag;
if (!flag) {
b->tseqbase = oid_nil;
- } else
+ } else {
b->tnokey[0] = b->tnokey[1] = 0;
+ b->tunique_est = (double) b->batCount;
+ }
gdk_return rc = GDK_SUCCEED;
if (flag && VIEWtparent(b)) {
/* if a view is key, then so is the parent if the two
@@ -2021,6 +2030,7 @@ BATtseqbase(BAT *b, oid o)
b->trevsorted = b->batCount <= 1;
if (!b->trevsorted)
b->tnorevsorted = 1;
+ b->tunique_est = (double) b->batCount;
}
}
} else {
diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -1469,6 +1469,7 @@ BATmaskedcands(oid hseq, BUN nr, BAT *ma
GDKfree(msks);
}
BATsetcount(bn, cnt);
+ bn->tunique_est = (double) cnt;
TRC_DEBUG(ALGO, "hseq=" OIDFMT ", masked=" ALGOBATFMT ", selected=%s"
" -> " ALGOBATFMT "\n",
hseq, ALGOBATPAR(masked),
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3259,8 +3259,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
locked = false;
MT_rwlock_rdunlock(&r->thashlock);
}
- bat_iterator_end(&li);
- bat_iterator_end(&ri);
if (hash_cand) {
HEAPfree(&hsh->heaplink, true);
@@ -3269,7 +3267,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
}
/* also set other bits of heap to correct value to indicate size */
BATsetcount(r1, BATcount(r1));
- r1->tunique_est = MIN(l->tunique_est, r->tunique_est);
+ r1->tunique_est = MIN(li.unique_est, ri.unique_est);
if (BATcount(r1) <= 1) {
r1->tsorted = true;
r1->trevsorted = true;
@@ -3285,14 +3283,16 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
r2->tkey = true;
r2->tseqbase = 0;
}
- r2->tunique_est = MIN(l->tunique_est, r->tunique_est);
+ r2->tunique_est = MIN(li.unique_est, ri.unique_est);
}
if (r3) {
r3->tnonil = !r3->tnil;
BATsetcount(r3, BATcount(r3));
assert(BATcount(r1) == BATcount(r3));
- r3->tunique_est = MIN(l->tunique_est, r->tunique_est);
+ r3->tunique_est = MIN(li.unique_est, ri.unique_est);
}
+ bat_iterator_end(&li);
+ bat_iterator_end(&ri);
if (BATcount(r1) > 0) {
if (BATtdense(r1))
r1->tseqbase = ((oid *) r1->theap->base)[0];
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -572,8 +572,8 @@ project_str(BATiter *restrict li, struct
bn->tnonil = r1i->nonil & r2i->nonil;
bn->tkey = false;
bn->tunique_est =
- MIN(li->b->tunique_est?li->b->tunique_est:BATcount(li->b),
- r1i->b->tunique_est?r1i->b->tunique_est:BATcount(r1i->b));
+ MIN(li->unique_est ? li->unique_est : BATcount(li->b),
+ r1i->unique_est ? r1i->unique_est : BATcount(r1i->b));
TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOBATFMT
" -> " ALGOBATFMT "%s " LLFMT "us\n",
ALGOBATPAR(li->b), ALGOBATPAR(r1i->b), ALGOBATPAR(r2i->b),
@@ -824,8 +824,8 @@ BATproject2(BAT *restrict l, BAT *restri
}
bn->tunique_est =
- MIN(li.b->tunique_est?li.b->tunique_est:BATcount(li.b),
- r1i.b->tunique_est?r1i.b->tunique_est:BATcount(r1i.b));
+ MIN(li.unique_est ? li.unique_est : BATcount(li.b),
+ r1i.unique_est ? r1i.unique_est : BATcount(r1i.b));
if (!BATtdensebi(&r1i) || (r2 && !BATtdensebi(&r2i)))
BATtseqbase(bn, oid_nil);
diff --git a/sql/backends/monet5/UDF/udf/udf.c
b/sql/backends/monet5/UDF/udf/udf.c
--- a/sql/backends/monet5/UDF/udf/udf.c
+++ b/sql/backends/monet5/UDF/udf/udf.c
@@ -366,7 +366,7 @@ UDFBATfuse_(BAT **ret, BAT *bone, BAT *b
bres->trevsorted = true;
else
bres->trevsorted = (BATcount(bres) <= 1);
- /* result tail is key (unique), iff both input tails are */
+ /* result tail is key (unique), if both input tails are */
BATkey(bres, BATtkey(bone) || BATtkey(btwo));
*ret = bres;
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -2431,7 +2431,6 @@ SQLtid(Client cntxt, MalBlkPtr mb, MalSt
nr_parts = *getArgReference_int(stk, pci, 5);
}
BAT *b = store->storage_api.bind_cands(tr, t, nr_parts, part_nr);
- b->tunique_est = (double)BATcount(b);
if (b) {
*res = b->batCacheid;
BBPkeepref(b);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]