Changeset: 559f0ca5ec18 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/559f0ca5ec18
Modified Files:
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
Branch: strimps_v3
Log Message:
Refactor str select and other minor alloc changes
diffs (truncated from 913 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -1834,150 +1834,108 @@ BBPreclaim_n(int nargs, ...)
va_end(valist);
}
-#define HANDLE_TIMEOUT(qc)
\
- do {
\
- TIMEOUT_ERROR(qc, __FILE__, __func__, __LINE__); \
- msg = createException(MAL, fname, GDK_EXCEPTION); \
- } while (0)
+#define VALUE(s, x) (s##_vars + VarHeapVal(s##_vals, (x), s##i->width))
+#define APPEND(b, o) (((oid *) b->theap->base)[b->batCount++] = (o))
-#define scanloop(TEST, canditer_next)
\
+#define SCAN_LOOP(STR_CMP)
\
do {
\
- const oid off = b->hseqbase;
\
- TIMEOUT_LOOP(ci.ncand, qry_ctx) {
\
- oid o = canditer_next(&ci);
\
- const char *restrict v = BUNtvar(bi, o - off); \
- assert(rcnt < BATcapacity(bn));
\
- if (TEST)
\
- vals[rcnt++] = o;
\
+ TIMEOUT_LOOP(lci->ncand, qry_ctx) {
\
+ oid lo = canditer_next(lci);
\
+ const char *ls = VALUE(l, lo - l_base);
\
+ if (!strNil(ls) && (STR_CMP))
\
+ APPEND(rl, lo);
\
}
\
} while (0)
static str
-STRselect(MalStkPtr stk, InstrPtr pci,
- int (*str_icmp)(const char *, const char *, int),
- int (*str_cmp)(const char *, const char *, int),
- const char *fname)
+scan_loop_strselect(BAT *rl, BATiter *li, struct canditer *lci, const char *r,
+ int (*str_cmp)(const char *, const char
*, int),
+ bool anti, const char *fname, QryCtx
*qry_ctx)
{
- str msg = MAL_SUCCEED;
+ oid l_base = li->b->hseqbase;
+ const char *l_vars = li->vh->base, *l_vals = li->base;
+ int r_len = str_strlen(r);
- bat *r_id = getArgReference_bat(stk, pci, 0);
- bat b_id = *getArgReference_bat(stk, pci, 1);
- bat cb_id = *getArgReference_bat(stk, pci, 2);
- const char *key = *getArgReference_str(stk, pci, 3);
- bit icase = pci->argc != 5;
- bit anti = pci->argc == 5 ? *getArgReference_bit(stk, pci, 4) :
- *getArgReference_bit(stk, pci, 5);
-
- BAT *b, *cb = NULL, *bn = NULL, *old_s = NULL;;
- BUN rcnt = 0;
- struct canditer ci;
- bool with_strimps = false,
- with_strimps_anti = false;
-
- if (!(b = BATdescriptor(b_id)))
- throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+ lng t0 = 0;
+ TRC_DEBUG_IF(ALGO) t0 = GDKusec();
- if (!is_bat_nil(cb_id) && !(cb = BATdescriptor(cb_id))) {
- BBPreclaim(b);
- throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
- }
-
- assert(ATOMstorage(b->ttype) == TYPE_str);
+ if (anti)
+ SCAN_LOOP(str_cmp(ls, r, r_len) != 0);
+ else
+ SCAN_LOOP(str_cmp(ls, r, r_len) == 0);
- if (BAThasstrimps(b)) {
- BAT *tmp_s;
- if (STRMPcreate(b, NULL) == GDK_SUCCEED && (tmp_s =
STRMPfilter(b, cb, key, anti)) != NULL) {
- old_s = cb;
- cb = tmp_s;
- if (!anti)
- with_strimps = true;
- else
- with_strimps_anti = true;
- } else {
- /* strimps failed, continue without */
- GDKclrerr();
- }
- }
-
- MT_thread_setalgorithm(with_strimps ?
- "string_select: strcmp
function using strimps" :
- (with_strimps_anti ?
- "string_select: strcmp
function using strimps anti"
- : "string_select:
strcmp function with no accelerator"));
-
- canditer_init(&ci, b, cb);
- if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
- BBPreclaim_n(2, b, cb);
- throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ BATsetcount(rl, BATcount(rl));
+ if (BATcount(rl) > 0) {
+ BATnegateprops(rl);
+ rl->tnonil = true;
+ rl->tnil = false;
}
- if (!strNil(key)) {
- BATiter bi = bat_iterator(b);
- QryCtx *qry_ctx = MT_thread_get_qry_ctx();
- if (icase)
- str_cmp = str_icmp;
- oid *vals = Tloc(bn, 0);
- const int klen = str_strlen(key);
- if (ci.tpe == cand_dense) {
- if (with_strimps_anti)
- scanloop(strNil(v) || str_cmp(v, key, klen) ==
0, canditer_next_dense);
- else if (anti)
- scanloop(!strNil(v) && str_cmp(v, key, klen) !=
0, canditer_next_dense);
- else
- scanloop(!strNil(v) && str_cmp(v, key, klen) ==
0, canditer_next_dense);
- } else {
- if (with_strimps_anti)
- scanloop(strNil(v) || str_cmp(v, key, klen) ==
0, canditer_next);
- else if (anti)
- scanloop(!strNil(v) && str_cmp(v, key, klen) !=
0, canditer_next);
- else
- scanloop(!strNil(v) && str_cmp(v, key, klen) ==
0, canditer_next);
- }
- bat_iterator_end(&bi);
- TIMEOUT_CHECK(qry_ctx, HANDLE_TIMEOUT(qry_ctx));
+ TRC_DEBUG(ALGO, "(%s, %s, l=%s #%zu [%s], cl=%s #%zu,
time="LLFMT"usecs)\n",
+ fname, "scan_loop_strselect",
+ BATgetId(li->b), li->count, ATOMname(li->b->ttype),
+ lci ? BATgetId(lci->s) : "NULL", lci ? lci->ncand : 0,
+ GDKusec() - t0);
+
+ return MAL_SUCCEED;
+}
- if (!msg) {
- BATsetcount(bn, rcnt);
- bn->tsorted = true;
- bn->trevsorted = bn->batCount <= 1;
- bn->tkey = true;
- bn->tnil = false;
- bn->tnonil = true;
- bn->tseqbase = rcnt == 0 ?
- 0 : rcnt == 1 ?
- *(const oid *) Tloc(bn, 0) : rcnt == ci.ncand
&& ci.tpe == cand_dense ? ci.seq : oid_nil;
+static str
+STRselect(MalStkPtr stk, InstrPtr pci, const str fname,
+ int (*str_cmp)(const char *, const char *, int))
+{
+ str msg = MAL_SUCCEED;
+ QryCtx *qry_ctx = MT_thread_get_qry_ctx();
+ BAT *l = NULL, *cl = NULL, *rl = NULL;
- if (with_strimps_anti) {
- BAT *rev;
- if (old_s) {
- rev = BATdiffcand(old_s, bn);
-#ifndef NDEBUG
- BAT *is = BATintersectcand(old_s, bn);
- if (is) {
- assert(is->batCount ==
bn->batCount);
- BBPreclaim(is);
- }
- assert(rev->batCount == old_s->batCount
- bn->batCount);
-#endif
- } else
- rev = BATnegcands(0, b->batCount, bn);
+ bat *RL = getArgReference_bat(stk, pci, 0);
+ bat *L = getArgReference_bat(stk, pci, 1);
+ bat *CL = getArgReference_bat(stk, pci, 2);
+ const char *r = *getArgReference_str(stk, pci, 3);
+ bool icase = pci->argc != 5;
+ bool anti = pci->argc == 5 ? *getArgReference_bit(stk, pci, 4) :
+ *getArgReference_bit(stk, pci, 5);
- BBPreclaim(bn);
- bn = rev;
- if (bn == NULL)
- msg = createException(MAL, fname,
SQLSTATE(HY013) MAL_MALLOC_FAIL);
- }
- }
+ if (!(l = BATdescriptor(*L)))
+ throw(MAL, fname, RUNTIME_OBJECT_MISSING);
+
+ if (CL && !is_bat_nil(*CL) && !(cl = BATdescriptor(*CL))) {
+ BBPreclaim(l);
+ throw(MAL, fname, RUNTIME_OBJECT_MISSING);
}
- if (bn && !msg) {
- *r_id = bn->batCacheid;
- BBPkeepref(bn);
- } else {
- BBPreclaim(bn);
+ BATiter li = bat_iterator(l);
+ struct canditer lci;
+ canditer_init(&lci, l, cl);
+ size_t l_cnt = lci.ncand;
+
+ rl = COLnew(0, TYPE_oid, l_cnt, TRANSIENT);
+ if (!rl) {
+ BBPreclaim_n(2, l, cl);
+ throw(MAL, fname, MAL_MALLOC_FAIL);
}
- BBPreclaim_n(3, b, cb, old_s);
+ if (icase) {
+ if (str_cmp == str_is_prefix)
+ str_cmp = str_is_iprefix;
+ else if (str_cmp == str_is_suffix)
+ str_cmp = str_is_isuffix;
+ else
+ str_cmp = str_icontains;
+ }
+
+ msg = scan_loop_strselect(rl, &li, &lci, r, str_cmp, anti, fname,
qry_ctx);
+
+ bat_iterator_end(&li);
+
+ if (!msg) {
+ *RL = rl->batCacheid;
+ BBPkeepref(rl);
+ } else {
+ BBPreclaim(rl);
+ }
+
+ BBPreclaim_n(2, l, cl);
return msg;
}
@@ -1986,8 +1944,7 @@ STRstartswithselect(Client cntxt, MalBlk
{
(void) cntxt;
(void) mb;
- return STRselect(stk, pci,
- str_is_iprefix, str_is_prefix,
"str.startswithselect");
+ return STRselect(stk, pci, "str.startswithselect", str_is_prefix);
}
static str
@@ -1995,8 +1952,7 @@ STRendswithselect(Client cntxt, MalBlkPt
{
(void) cntxt;
(void) mb;
- return STRselect(stk, pci,
- str_is_isuffix, str_is_suffix,
"str.endswithselect");
+ return STRselect(stk, pci, "str.endswithselect", str_is_suffix);
}
static str
@@ -2004,8 +1960,7 @@ STRcontainsselect(Client cntxt, MalBlkPt
{
(void) cntxt;
(void) mb;
- return STRselect(stk, pci,
- str_icontains, str_contains,
"str.containsselect");
+ return STRselect(stk, pci, "str.containsselect", str_contains);
}
static void
@@ -2105,7 +2060,7 @@ strbat_reverse(BAT *b)
return bn;
}
-#define NESTED_LOOP_STRJOIN(STR_CMP)
\
+#define NESTED_LOOP(STR_CMP)
\
do {
\
canditer_reset(lci);
\
TIMEOUT_LOOP(rci->ncand, qry_ctx) {
\
@@ -2117,8 +2072,7 @@ strbat_reverse(BAT *b)
TIMEOUT_LOOP(lci->ncand, qry_ctx) {
\
ol = canditer_next(lci);
\
const char *ls = VALUE(l, ol - lbase);
\
- if (!strNil(ls)) {
\
- if (STR_CMP) {
\
+ if (!strNil(ls) && STR_CMP) {
\
APPEND(rl, ol);
\
if (rr) APPEND(rr, or);
\
if (BATcount(rl) ==
BATcapacity(rl)) { \
@@ -2128,7 +2082,6 @@ strbat_reverse(BAT *b)
throw(MAL,
fname, GDK_EXCEPTION); \
}
\
}
\
- }
\
}
\
}
\
}
\
@@ -2149,9 +2102,9 @@ nested_loop_strjoin(BAT *rl, BAT *rr, BA
TRC_DEBUG_IF(ALGO) t0 = GDKusec();
if (anti)
- NESTED_LOOP_STRJOIN(str_cmp(ls, rs, str_strlen(rs)) != 0);
+ NESTED_LOOP(str_cmp(ls, rs, str_strlen(rs)) != 0);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]