Changeset: 99be915dc963 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=99be915dc963
Modified Files:
gdk/gdk.h
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_join.c
Branch: Jun2020
Log Message:
Cache estimated count of unique values; don't project sample in certain cases.
diffs (97 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2022,6 +2022,7 @@ enum prop_t {
GDK_MAX_VALUE, /* largest non-nil value in BAT */
GDK_HASH_BUCKETS, /* last used hash bucket size */
GDK_NUNIQUE, /* number of unique values */
+ GDK_UNIQUE_ESTIMATE, /* estimate of number of distinct values */
};
/*
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1073,6 +1073,7 @@ BUNappend(BAT *b, const void *t, bool fo
IMPSdestroy(b); /* no support for inserts in imprints yet */
OIDXdestroy(b);
BATrmprop(b, GDK_NUNIQUE);
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
#if 0 /* enable if we have more properties than just min/max */
PROPrec *prop;
do {
@@ -1159,6 +1160,7 @@ BUNdelete(BAT *b, oid o)
OIDXdestroy(b);
HASHdestroy(b);
BATrmprop(b, GDK_NUNIQUE);
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
#if 0 /* enable if we have more properties than just min/max */
do {
for (prop = b->tprops; prop; prop = prop->next)
@@ -1248,6 +1250,7 @@ BUNinplace(BAT *b, BUN p, const void *t,
}
}
BATrmprop(b, GDK_NUNIQUE);
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
#if 0 /* enable if we have more properties than just min/max */
do {
for (prop = b->tprops; prop; prop = prop->next)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -573,6 +573,7 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool
}
}
BATrmprop(b, GDK_NUNIQUE);
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
#if 0 /* enable if we have more properties than just min/max */
do {
for (prop = b->tprops; prop; prop = prop->next)
@@ -890,6 +891,7 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool
OIDXdestroy(b);
IMPSdestroy(b);
BATrmprop(b, GDK_NUNIQUE);
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
b->tsorted = b->trevsorted = false;
b->tnosorted = b->tnorevsorted = 0;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3023,12 +3023,19 @@ guess_uniques(BAT *b, struct canditer *c
if (b->tkey)
return (double) ci->ncand;
- if (ci->s) {
+ if (ci->s == NULL ||
+ (ci->tpe == cand_dense && ci->ncand == BATcount(b))) {
+ PROPrec *p = BATgetprop(b, GDK_UNIQUE_ESTIMATE);
+ if (p) {
+ TRC_DEBUG(ALGO, "b=" ALGOBATFMT " use cached value\n",
+ ALGOBATPAR(b));
+ return p->v.val.dval;
+ }
+ s1 = BATsample(b, 1000);
+ } else {
BAT *s2 = BATsample(ci->s, 1000);
s1 = BATproject(s2, ci->s);
BBPreclaim(s2);
- } else {
- s1 = BATsample(b, 1000);
}
BUN n2 = BATcount(s1);
BUN n1 = n2 / 2;
@@ -3038,7 +3045,12 @@ guess_uniques(BAT *b, struct canditer *c
double A = (double) (cnt2 - cnt1) / (n2 - n1);
double B = cnt1 - n1 * A;
- return A * ci->ncand + B;
+ B += A * ci->ncand;
+ if (ci->s == NULL ||
+ (ci->tpe == cand_dense && ci->ncand == BATcount(b))) {
+ BATsetprop(b, GDK_UNIQUE_ESTIMATE, TYPE_dbl, &B);
+ }
+ return B;
}
#define MASK_EQ 1
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list