Changeset: 0d2d8a07cd9b for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/0d2d8a07cd9b Modified Files: gdk/gdk_select.c Branch: Dec2023 Log Message:
Handle anti select on duplicate-eliminated string bats without nils more
efficiently.
diffs (truncated from 435 to 300 lines):
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -778,11 +778,35 @@ fullscan_str(BATiter *bi, struct candite
timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ?
(qry_ctx->starttime + qry_ctx->querytimeout) : 0;
}
- if (!equi || !GDK_ELIMDOUBLES(bi->vh))
+ if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) &&
+ strcmp(tl, str_nil) != 0 &&
+ strLocate(bi->vh, str_nil) == (var_t) -2) {
+ /* anti-equi select for non-nil value, and there are no
+ * nils, so we can use fast path; trigger by setting
+ * nonil */
+ bi->nonil = true;
+ }
+ if (!((equi ||
+ (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) &&
+ GDK_ELIMDOUBLES(bi->vh)))
return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
lval, hval, lnil, cnt, hseq, dst,
maximum, imprints, algo);
if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) {
+ if (anti) {
+ /* return the whole shebang */
+ *algo = "select: fullscan anti-equi strelim (all)";
+ if (BATextend(bn, ncand) != GDK_SUCCEED) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ dst = Tloc(bn, 0);
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ dst[p] = canditer_next(ci);
+ }
+ TIMEOUT_CHECK(timeoffset,
GOTO_LABEL_TIMEOUT_HANDLER(bailout));
+ return ncand;
+ }
*algo = "select: fullscan equi strelim (nomatch)";
return 0;
}
@@ -790,40 +814,74 @@ fullscan_str(BATiter *bi, struct candite
BBPreclaim(bn);
return BUN_NONE;
}
- *algo = "select: fullscan equi strelim";
+ *algo = anti ? "select: fullscan anti-equi strelim" : "select: fullscan
equi strelim";
assert(pos >= GDK_VAROFFSET);
switch (bi->width) {
case 1: {
const unsigned char *ptr = (const unsigned char *) bi->base;
pos -= GDK_VAROFFSET;
if (ci->tpe == cand_dense) {
- TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
- o = canditer_next_dense(ci);
- if (ptr[o - hseq] == pos) {
- dst = buninsfix(bn, dst, cnt, o,
- (BUN) ((dbl) cnt /
(dbl) (p == 0 ? 1 : p)
- * (dbl)
(ncand-p) * 1.1 + 1024),
- maximum);
- if (dst == NULL) {
- BBPreclaim(bn);
- return BUN_NONE;
+ if (anti) {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next_dense(ci);
+ if (ptr[o - hseq] != pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
}
- cnt++;
+ }
+ } else {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next_dense(ci);
+ if (ptr[o - hseq] == pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
+ }
}
}
} else {
- TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
- o = canditer_next(ci);
- if (ptr[o - hseq] == pos) {
- dst = buninsfix(bn, dst, cnt, o,
- (BUN) ((dbl) cnt /
(dbl) (p == 0 ? 1 : p)
- * (dbl)
(ncand-p) * 1.1 + 1024),
- maximum);
- if (dst == NULL) {
- BBPreclaim(bn);
- return BUN_NONE;
+ if (anti) {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next(ci);
+ if (ptr[o - hseq] != pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
}
- cnt++;
+ }
+ } else {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next(ci);
+ if (ptr[o - hseq] == pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
+ }
}
}
}
@@ -833,33 +891,67 @@ fullscan_str(BATiter *bi, struct candite
const unsigned short *ptr = (const unsigned short *) bi->base;
pos -= GDK_VAROFFSET;
if (ci->tpe == cand_dense) {
- TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
- o = canditer_next_dense(ci);
- if (ptr[o - hseq] == pos) {
- dst = buninsfix(bn, dst, cnt, o,
- (BUN) ((dbl) cnt /
(dbl) (p == 0 ? 1 : p)
- * (dbl)
(ncand-p) * 1.1 + 1024),
- maximum);
- if (dst == NULL) {
- BBPreclaim(bn);
- return BUN_NONE;
+ if (anti) {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next_dense(ci);
+ if (ptr[o - hseq] != pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
}
- cnt++;
+ }
+ } else {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next_dense(ci);
+ if (ptr[o - hseq] == pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
+ }
}
}
} else {
- TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
- o = canditer_next(ci);
- if (ptr[o - hseq] == pos) {
- dst = buninsfix(bn, dst, cnt, o,
- (BUN) ((dbl) cnt /
(dbl) (p == 0 ? 1 : p)
- * (dbl)
(ncand-p) * 1.1 + 1024),
- maximum);
- if (dst == NULL) {
- BBPreclaim(bn);
- return BUN_NONE;
+ if (anti) {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next(ci);
+ if (ptr[o - hseq] != pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
}
- cnt++;
+ }
+ } else {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next(ci);
+ if (ptr[o - hseq] == pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
+ }
}
}
}
@@ -869,33 +961,67 @@ fullscan_str(BATiter *bi, struct candite
case 4: {
const unsigned int *ptr = (const unsigned int *) bi->base;
if (ci->tpe == cand_dense) {
- TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
- o = canditer_next_dense(ci);
- if (ptr[o - hseq] == pos) {
- dst = buninsfix(bn, dst, cnt, o,
- (BUN) ((dbl) cnt /
(dbl) (p == 0 ? 1 : p)
- * (dbl)
(ncand-p) * 1.1 + 1024),
- maximum);
- if (dst == NULL) {
- BBPreclaim(bn);
- return BUN_NONE;
+ if (anti) {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next_dense(ci);
+ if (ptr[o - hseq] != pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
}
- cnt++;
+ }
+ } else {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next_dense(ci);
+ if (ptr[o - hseq] == pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
+ }
}
}
} else {
- TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
- o = canditer_next(ci);
- if (ptr[o - hseq] == pos) {
- dst = buninsfix(bn, dst, cnt, o,
- (BUN) ((dbl) cnt /
(dbl) (p == 0 ? 1 : p)
- * (dbl)
(ncand-p) * 1.1 + 1024),
- maximum);
- if (dst == NULL) {
- BBPreclaim(bn);
- return BUN_NONE;
+ if (anti) {
+ TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+ o = canditer_next(ci);
+ if (ptr[o - hseq] != pos) {
+ dst = buninsfix(bn, dst, cnt, o,
+ (BUN) ((dbl)
cnt / (dbl) (p == 0 ? 1 : p)
+ * (dbl)
(ncand-p) * 1.1 + 1024),
+ maximum);
+ if (dst == NULL) {
+ BBPreclaim(bn);
+ return BUN_NONE;
+ }
+ cnt++;
}
- cnt++;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]
