Changeset: 6ef3eb53d6ea for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/6ef3eb53d6ea Modified Files: monetdb5/modules/atoms/str.c Branch: Jun2023 Log Message:
String select with strimps. diffs (189 lines): diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c --- a/monetdb5/modules/atoms/str.c +++ b/monetdb5/modules/atoms/str.c @@ -5046,9 +5046,36 @@ STRasciify(str *r, const str *s) } \ } while (0) +/* scan select loop with or without candidates */ +#define scanloop_anti(TEST, KEEP_NULLS) \ + do { \ + TRC_DEBUG(ALGO, \ + "scanselect(b=%s#"BUNFMT",anti=%d): " \ + "scanselect %s\n", BATgetId(b), BATcount(b), \ + anti, #TEST); \ + if (!s || BATtdense(s)) { \ + for (; p < q; p++) { \ + GDK_CHECK_TIMEOUT(timeoffset, counter, \ + GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \ + const char *restrict v = BUNtvar(bi, p - off); \ + if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \ + vals[cnt++] = p; \ + } \ + } else { \ + for (; p < ncands; p++) { \ + GDK_CHECK_TIMEOUT(timeoffset, counter, \ + GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \ + oid o = canditer_next(ci); \ + const char *restrict v = BUNtvar(bi, o - off); \ + if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \ + vals[cnt++] = o; \ + } \ + } \ + } while (0) + static str do_string_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q, BUN *rcnt, const char *key, bool anti, - bit (*str_cmp)(const char*, const char*, int)) + bit (*str_cmp)(const char*, const char*, int), bool keep_nulls) { BATiter bi = bat_iterator(b); BUN cnt = 0, ncands = ci->ncand; @@ -5063,9 +5090,9 @@ do_string_select(BAT *bn, BAT *b, BAT *s timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0; if (anti) /* keep nulls ? (use false for now) */ - scanloop(v && *v != '\200' && str_cmp(v, key, klen) == 0, false); + scanloop_anti(v && *v != '\200' && str_cmp(v, key, klen) == 0, keep_nulls); else - scanloop(v && *v != '\200' && str_cmp(v, key, klen) != 0, false); + scanloop(v && *v != '\200' && str_cmp(v, key, klen) != 0, keep_nulls); bailout: bat_iterator_end(&bi); @@ -5074,12 +5101,15 @@ bailout: } static str -string_select(bat *ret, const bat *bid, const bat *sid, const str *key, const bit *anti, bit (*str_cmp)(const char*, const char*, int), const str fname) +string_select(bat *ret, const bat *bid, const bat *sid, const str *key, const bit *anti, + bit (*str_cmp)(const char*, const char*, int), const str fname) { - BAT *b, *s = NULL, *bn = NULL; + BAT *b, *s = NULL, *bn = NULL, *old_s = NULL;; str msg = MAL_SUCCEED; BUN p = 0, q = 0, rcnt = 0; struct canditer ci; + bool with_strimps = false, + with_strimps_anti = false; if ((b = BATdescriptor(*bid)) == NULL) { msg = createException(MAL, fname , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); @@ -5092,6 +5122,28 @@ string_select(bat *ret, const bat *bid, assert(ATOMstorage(b->ttype) == TYPE_str); + if (BAThasstrimps(b)) { + if (STRMPcreate(b, NULL) == GDK_SUCCEED) { + BAT *tmp_s = STRMPfilter(b, s, *key, *anti); + if (tmp_s) { + old_s = s; + s = tmp_s; + if (!*anti) + with_strimps = true; + else + with_strimps_anti = true; + } + } else { /* If we cannot filter with the strimp just continue normally */ + GDKclrerr(); + } + } + + MT_thread_setalgorithm(with_strimps ? + "string_select: strcmp function using strimps" : + (with_strimps_anti ? + "string_select: strcmp function using strimps anti" : + "string_select: strcmp function with no accelerator")); + canditer_init(&ci, b, s); if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) { msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); @@ -5113,7 +5165,7 @@ string_select(bat *ret, const bat *bid, } } - msg = do_string_select(bn, b, s, &ci, p, q, &rcnt, *key, *anti, str_cmp); + msg = do_string_select(bn, b, s, &ci, p, q, &rcnt, *key, *anti && !with_strimps_anti, str_cmp, with_strimps_anti); if (!msg) { /* set some properties */ BATsetcount(bn, rcnt); @@ -5122,12 +5174,27 @@ string_select(bat *ret, const bat *bid, bn->tkey = true; bn->tnil = false; bn->tnonil = true; - bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? *(const oid*)Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil; + bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? + *(const oid*)Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil; + if(with_strimps_anti) { + BAT *rev; + if (old_s) { + rev = BATdiffcand(old_s, bn); + assert (BATintersectcand(old_s, bn)->batCount == bn->batCount); + assert (rev->batCount == old_s->batCount - bn->batCount); + } + + else + rev = BATnegcands(b->batCount, bn); + BBPunfix(bn->batCacheid); + bn = rev; + } } bailout: BBPreclaim(b); BBPreclaim(s); + BBPreclaim(old_s); if (bn && !msg) { *ret = bn->batCacheid; BBPkeepref(bn); @@ -5142,11 +5209,12 @@ STRstartswithselect(Client cntxt, MalBlk (void)cntxt; (void)mb; bat *ret = getArgReference(stk, pci, 0); - const bat *bid = getArgReference(stk, pci, 1); - const bat *sid = getArgReference(stk, pci, 2); + const bat *bid = getArgReference(stk, pci, 1), + *sid = getArgReference(stk, pci, 2); const str *key = getArgReference_str(stk, pci, 3); - const bit icase = pci->argc == 5 ? false : true; - const bit *anti = pci->argc == 5 ? getArgReference_bit(stk, pci, 4) : getArgReference_bit(stk, pci, 5); + const bit icase = pci->argc == 5 ? false : true, + *anti = pci->argc == 5 ? getArgReference_bit(stk, pci, 4) : getArgReference_bit(stk, pci, 5); + return string_select(ret, bid, sid, key, anti, icase ? str_is_iprefix : str_is_prefix, "str.startswithselect"); } @@ -5156,11 +5224,12 @@ STRendswithselect(Client cntxt, MalBlkPt (void)cntxt; (void)mb; bat *ret = getArgReference(stk, pci, 0); - const bat *bid = getArgReference(stk, pci, 1); - const bat *sid = getArgReference(stk, pci, 2); + const bat *bid = getArgReference(stk, pci, 1), + *sid = getArgReference(stk, pci, 2); const str *key = getArgReference_str(stk, pci, 3); - const bit icase = pci->argc == 5 ? false : true; - const bit *anti = pci->argc == 5 ? getArgReference_bit(stk, pci, 4) : getArgReference_bit(stk, pci, 5); + const bit icase = pci->argc == 5 ? false : true, + *anti = pci->argc == 5 ? getArgReference_bit(stk, pci, 4) : getArgReference_bit(stk, pci, 5); + return string_select(ret, bid, sid, key, anti, icase ? str_is_isuffix : str_is_suffix, "str.endswithselect"); } @@ -5170,11 +5239,12 @@ STRcontainsselect(Client cntxt, MalBlkPt (void)cntxt; (void)mb; bat *ret = getArgReference(stk, pci, 0); - const bat *bid = getArgReference(stk, pci, 1); - const bat *sid = getArgReference(stk, pci, 2); + const bat *bid = getArgReference(stk, pci, 1), + *sid = getArgReference(stk, pci, 2); const str *key = getArgReference_str(stk, pci, 3); - const bit icase = pci->argc == 5 ? false : true; - const bit *anti = pci->argc == 5 ? getArgReference_bit(stk, pci, 4) : getArgReference_bit(stk, pci, 5); + const bit icase = pci->argc == 5 ? false : true, + *anti = pci->argc == 5 ? getArgReference_bit(stk, pci, 4) : getArgReference_bit(stk, pci, 5); + return string_select(ret, bid, sid, key, anti, icase ? str_icontains : str_contains, "str.containsselect"); } _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org