Changeset: eabe0b36be21 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/eabe0b36be21
Modified Files:
gdk/gdk.h
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_hash.c
gdk/gdk_private.h
gdk/gdk_select.c
gdk/gdk_unique.c
sql/server/rel_schema.c
sql/storage/bat/bat_logger.c
sql/test/emptydb-upgrade-chain-hge/Tests/upgrade.stable.out.int128
sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out
sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out.int128
sql/test/emptydb/Tests/check.stable.out
sql/test/emptydb/Tests/check.stable.out.32bit
sql/test/emptydb/Tests/check.stable.out.int128
sql/test/testdb-upgrade-chain-hge/Tests/upgrade.stable.out.int128
sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out.int128
sql/test/testdb-upgrade-hge/Tests/upgrade.stable.out.int128
sql/test/testdb-upgrade/Tests/upgrade.stable.out
sql/test/testdb-upgrade/Tests/upgrade.stable.out.32bit
sql/test/testdb-upgrade/Tests/upgrade.stable.out.int128
Branch: default
Log Message:
Merge with Oct2020 branch.
diffs (truncated from 57385 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2243,13 +2243,14 @@ gdk_export void VIEWbounds(BAT *b, BAT *
*/
enum prop_t {
GDK_MIN_VALUE = 3, /* smallest non-nil value in BAT */
- GDK_MIN_POS, /* BUN position of smallest value */
+ GDK_MIN_POS, /* BUN position of smallest value (oid) */
GDK_MAX_VALUE, /* largest non-nil value in BAT */
- GDK_MAX_POS, /* BUN position of largest value */
- GDK_HASH_BUCKETS, /* last used hash bucket size */
- GDK_NUNIQUE, /* number of unique values */
- GDK_UNIQUE_ESTIMATE, /* estimate of number of distinct values */
+ GDK_MAX_POS, /* BUN position of largest value (oid) */
+ GDK_HASH_BUCKETS, /* last used hash bucket size (oid) */
+ GDK_NUNIQUE, /* number of unique values (oid) */
+ GDK_UNIQUE_ESTIMATE, /* estimate of number of distinct values (dbl)
*/
};
+
gdk_export ValPtr BATgetprop(BAT *b, enum prop_t idx);
/*
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -255,6 +255,7 @@ BATmaterialize(BAT *b)
b->tbaseoff = 0;
b->theap->dirty = true;
BATsetprop_nolock(b, GDK_NUNIQUE, TYPE_oid, &(oid){is_oid_nil(t) ? 1 :
b->batCount});
+ BATsetprop_nolock(b, GDK_UNIQUE_ESTIMATE, TYPE_dbl,
&(dbl){is_oid_nil(t) ? 1 : b->batCount});
MT_lock_unset(&b->theaplock);
b->ttype = TYPE_oid;
BATsetdims(b);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -674,6 +674,7 @@ BATfree(BAT *b)
MT_lock_set(&b->theaplock);
if (nunique != BUN_NONE) {
BATsetprop_nolock(b, GDK_NUNIQUE, TYPE_oid, &(oid){nunique});
+ BATsetprop_nolock(b, GDK_UNIQUE_ESTIMATE, TYPE_dbl,
&(dbl){nunique});
BATsetprop_nolock(b, GDK_HASH_BUCKETS, TYPE_oid,
&(oid){nbucket});
}
if (b->theap) {
@@ -1041,7 +1042,8 @@ BUNappendmulti(BAT *b, const void *value
return rc;
}
- BATrmprop(b, GDK_UNIQUE_ESTIMATE);
+ if (count > BATcount(b) / GDK_UNIQUE_ESTIMATE_KEEP_FRACTION)
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
b->theap->dirty = true;
const void *t = b->ttype == TYPE_msk ? &(msk){false} :
ATOMnilptr(b->ttype);
if (b->ttype == TYPE_oid) {
@@ -1277,7 +1279,8 @@ BUNdelete(BAT *b, oid o)
b->tnorevsorted = 0;
MT_lock_set(&b->theaplock);
b->batCount--;
- BATrmprop_nolock(b, GDK_UNIQUE_ESTIMATE);
+ if (BATcount(b) < GDK_UNIQUE_ESTIMATE_KEEP_FRACTION)
+ BATrmprop_nolock(b, GDK_UNIQUE_ESTIMATE);
MT_lock_unset(&b->theaplock);
if (b->batCount <= 1) {
/* some trivial properties */
@@ -1405,7 +1408,8 @@ BUNinplacemulti(BAT *b, const oid *posit
} else {
BATrmprop_nolock(b, GDK_MIN_POS);
}
- BATrmprop_nolock(b, GDK_UNIQUE_ESTIMATE);
+ if (count > BATcount(b) /
GDK_UNIQUE_ESTIMATE_KEEP_FRACTION)
+ BATrmprop_nolock(b,
GDK_UNIQUE_ESTIMATE);
MT_lock_unset(&b->theaplock);
} else {
PROPdestroy(b);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -787,7 +787,8 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool
BATrmprop(b, GDK_MIN_POS);
}
}
- BATrmprop(b, GDK_UNIQUE_ESTIMATE);
+ if (cnt > BATcount(b) / GDK_UNIQUE_ESTIMATE_KEEP_FRACTION)
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
/* load hash so that we can maintain it */
(void) BATcheckhash(b);
@@ -1163,7 +1164,8 @@ BATappend_or_update(BAT *b, BAT *p, cons
OIDXdestroy(b);
IMPSdestroy(b);
- BATrmprop(b, GDK_UNIQUE_ESTIMATE);
+ if (ni.count > BATcount(b) / GDK_UNIQUE_ESTIMATE_KEEP_FRACTION)
+ BATrmprop(b, GDK_UNIQUE_ESTIMATE);
/* load hash so that we can maintain it */
(void) BATcheckhash(b);
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -818,6 +818,9 @@ BAThash_impl(BAT *restrict b, struct can
maxmask = HASHmask(ci->ncand);
if (mask > maxmask)
mask = maxmask;
+ } else if (!hascand && (prop = BATgetprop_try(b, GDK_UNIQUE_ESTIMATE))
!= NULL) {
+ assert(prop->vtype == TYPE_dbl);
+ mask = (BUN) (prop->val.dval * 8 / 7);
} else {
/* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
* if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
@@ -981,6 +984,9 @@ BAThash_impl(BAT *restrict b, struct can
}
bat_iterator_end(&bi);
if (!hascand) {
+ /* don't keep these properties while we have a hash
+ * structure: they get added again when the hash is
+ * freed */
MT_lock_set(&b->theaplock);
BATrmprop_nolock(b, GDK_HASH_BUCKETS);
BATrmprop_nolock(b, GDK_NUNIQUE);
@@ -1120,9 +1126,14 @@ HASHappend_locked(BAT *b, BUN i, const v
return;
}
assert(i * h->width == h->heaplink.free);
- if (HASHfix(h, false, true) != GDK_SUCCEED) {
+ if (h->nunique < b->batCount / HASH_DESTROY_UNIQUES_FRACTION) {
+ b->thash = NULL;
doHASHdestroy(b, h);
+ return;
+ }
+ if (HASHfix(h, false, true) != GDK_SUCCEED) {
b->thash = NULL;
+ doHASHdestroy(b, h);
return;
}
if (HASHwidth(i + 1) > h->width &&
@@ -1184,9 +1195,14 @@ HASHinsert_locked(BAT *b, BUN p, const v
return;
}
assert(p * h->width < h->heaplink.free);
- if (HASHfix(h, false, true) != GDK_SUCCEED) {
+ if (h->nunique < b->batCount / HASH_DESTROY_UNIQUES_FRACTION) {
+ b->thash = NULL;
doHASHdestroy(b, h);
+ return;
+ }
+ if (HASHfix(h, false, true) != GDK_SUCCEED) {
b->thash = NULL;
+ doHASHdestroy(b, h);
return;
}
BUN c = HASHprobe(h, v);
@@ -1260,9 +1276,14 @@ HASHdelete_locked(BAT *b, BUN p, const v
return;
}
assert(p * h->width < h->heaplink.free);
- if (HASHfix(h, false, true) != GDK_SUCCEED) {
+ if (h->nunique < b->batCount / HASH_DESTROY_UNIQUES_FRACTION) {
+ b->thash = NULL;
doHASHdestroy(b, h);
+ return;
+ }
+ if (HASHfix(h, false, true) != GDK_SUCCEED) {
b->thash = NULL;
+ doHASHdestroy(b, h);
return;
}
BUN c = HASHprobe(h, v);
diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c
--- a/gdk/gdk_logger.c
+++ b/gdk/gdk_logger.c
@@ -342,11 +342,13 @@ string_reader(logger *lg, BAT *b, lng nr
sz = (size_t)SZ;
char *buf = lg->buf;
if (lg->bufsize < sz) {
- lg->buf = buf = GDKrealloc(buf, sz);
+ if (!(buf = GDKrealloc(lg->buf, sz)))
+ return LOG_ERR;
+ lg->buf = buf;
lg->bufsize = sz;
}
- if (!buf || mnstr_read(lg->input_log, buf, sz, 1) != 1)
+ if (mnstr_read(lg->input_log, buf, sz, 1) != 1)
return LOG_EOF;
/* handle strings */
char *t = buf;
@@ -2363,7 +2365,7 @@ string_writer(logger *lg, BAT *b, lng of
size_t bufsz = lg->bufsize, resize = 0;
BUN end = (BUN)(offset + nr);
char *buf = lg->buf;
- gdk_return res = GDK_FAIL;
+ gdk_return res = GDK_SUCCEED;
if (!buf)
return GDK_FAIL;
@@ -2372,11 +2374,11 @@ string_writer(logger *lg, BAT *b, lng of
for ( ; p < end; ) {
size_t sz = 0;
if (resize) {
- lg->buf = buf = GDKrealloc(buf, resize);
- if (!buf) {
+ if (!(buf = GDKrealloc(lg->buf, resize))) {
res = GDK_FAIL;
break;
}
+ lg->buf = buf;
lg->bufsize = bufsz = resize;
resize = 0;
}
@@ -2394,8 +2396,10 @@ string_writer(logger *lg, BAT *b, lng of
sz += len;
}
}
- if (sz && buf && mnstr_writeLng(lg->output_log, (lng) sz) &&
mnstr_write(lg->output_log, buf, sz, 1) == 1)
- res = GDK_SUCCEED;
+ if (sz && (!mnstr_writeLng(lg->output_log, (lng) sz) ||
mnstr_write(lg->output_log, buf, sz, 1) != 1)) {
+ res = GDK_FAIL;
+ break;
+ }
}
bat_iterator_end(&bi);
return res;
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -463,6 +463,16 @@ extern MT_Lock GDKtmLock;
#define GDKcacheLock(y) GDKbbpLock[y].cache
#define BBP_free(y) GDKbbpLock[y].free
+/* when the number of updates to a BAT is less than 1 in this number, we
+ * keep the GDK_UNIQUE_ESTIMATE property */
+extern BUN GDK_UNIQUE_ESTIMATE_KEEP_FRACTION; /* should become a define once */
+/* if the number of unique values is less than 1 in this number, we
+ * destroy the hash rather than update it in HASH{append,insert,delete} */
+extern BUN HASH_DESTROY_UNIQUES_FRACTION; /* likewise */
+/* if the estimated number of unique values is less than 1 in this
+ * number, don't build a hash table to do a hashselect */
+extern dbl NO_HASH_SELECT_FRACTION; /* same here */
+
#if !defined(NDEBUG) && !defined(__COVERITY__)
/* see comment in gdk.h */
#ifdef __GNUC__
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1576,6 +1576,14 @@ BATselect(BAT *b, BAT *s, const void *tl
(!b->batTransient &&
ATOMsize(b->ttype) >= sizeof(BUN) / 4 &&
BATcount(b) * (ATOMsize(b->ttype) + 2 * sizeof(BUN)) <
GDK_mem_maxsize / 2);
+ if (wanthash && !havehash) {
+ const ValRecord *prop;
+ if ((prop = BATgetprop(b, GDK_UNIQUE_ESTIMATE)) != NULL
&&
+ prop->val.dval < BATcount(b) /
NO_HASH_SELECT_FRACTION) {
+ /* too many duplicates: not worth it */
+ wanthash = false;
+ }
+ }
}
if (equi && !havehash && parent != 0) {
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -92,6 +92,8 @@ BATunique(BAT *b, BAT *s)
MT_lock_set(&b->theaplock);
if ((prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL)
initsize = prop->val.oval;
+ else if ((prop = BATgetprop_nolock(b,
GDK_UNIQUE_ESTIMATE)) != NULL)
+ initsize = (BUN) prop->val.dval;
MT_lock_unset(&b->theaplock);
}
}
diff --git a/gdk/gdk_utils.c b/gdk/gdk_utils.c
--- a/gdk/gdk_utils.c
+++ b/gdk/gdk_utils.c
@@ -57,6 +57,16 @@ static void GDKunlockHome(int farmid);
#undef realloc
#undef free
+/* when the number of updates to a BAT is less than 1 in this number, we
+ * keep the GDK_UNIQUE_ESTIMATE property */
+BUN GDK_UNIQUE_ESTIMATE_KEEP_FRACTION = 1000; /* should become a define once */
+/* if the number of unique values is less than 1 in this number, we
+ * destroy the hash rather than update it in HASH{append,insert,delete} */
+BUN HASH_DESTROY_UNIQUES_FRACTION = 1000; /* likewise */
+/* if the estimated number of unique values is less than 1 in this
+ * number, don't build a hash table to do a hashselect */
+dbl NO_HASH_SELECT_FRACTION = 1000; /* same here */
+
/*
* @+ Monet configuration file
* Parse a possible MonetDB config file (if specified by command line
@@ -1146,6 +1156,21 @@ GDKinit(opt *set, int setlen, bool embed
TRC_CRITICAL(GDK, "GDKsetenv revision failed");
return GDK_FAIL;
}
+ GDK_UNIQUE_ESTIMATE_KEEP_FRACTION = 0;
+ if ((p = GDKgetenv("gdk_unique_estimate_keep_fraction")) != NULL)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list