Changeset: 237093560788 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/237093560788
Modified Files:
gdk/gdk_select.c
Branch: Aug2024
Log Message:
Use binary search instead of hash on sorted bat with available hash table.
It turns out, even with large bats, binary search is faster than using a
hash, even if the hash chain has length 1.
diffs (30 lines):
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1873,11 +1873,12 @@ BATselect(BAT *b, BAT *s, const void *tl
else
pb = NULL;
pbi = bat_iterator(pb);
- /* use hash only for equi-join, and then only if b or its
- * parent already has a hash, or if b or its parent is
- * persistent and the total size wouldn't be too large; check
- * for existence of hash last since that may involve I/O */
- if (equi || antiequi) {
+ /* use hash only for equi-join if the bat is not sorted, but
+ * only if b or its parent already has a hash, or if b or its
+ * parent is persistent and the total size wouldn't be too
+ * large; check for existence of hash last since that may
+ * involve I/O */
+ if ((equi || antiequi) && !bi.sorted && !bi.revsorted) {
double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
if (cost > 0 && cost < ci.ncand) {
wanthash = true;
@@ -1980,7 +1981,7 @@ BATselect(BAT *b, BAT *s, const void *tl
}
}
- if (!havehash && (bi.sorted || bi.revsorted || oidxh != NULL)) {
+ if (bi.sorted || bi.revsorted || (!havehash && oidxh != NULL)) {
BUN low = 0;
BUN high = bi.count;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]