Changeset: 559f0ca5ec18 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/559f0ca5ec18
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
Branch: strimps_v3
Log Message:

Refactor str select and other minor alloc changes


diffs (truncated from 913 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -1834,150 +1834,108 @@ BBPreclaim_n(int nargs, ...)
        va_end(valist);
 }
 
-#define HANDLE_TIMEOUT(qc)                                                     
                \
-       do {                                                                    
                                \
-               TIMEOUT_ERROR(qc, __FILE__, __func__, __LINE__);        \
-               msg = createException(MAL, fname, GDK_EXCEPTION);       \
-       } while (0)
+#define VALUE(s, x)  (s##_vars + VarHeapVal(s##_vals, (x), s##i->width))
+#define APPEND(b, o) (((oid *) b->theap->base)[b->batCount++] = (o))
 
-#define scanloop(TEST, canditer_next)                                          
\
+#define SCAN_LOOP(STR_CMP)                                                     
                \
        do {                                                                    
                                \
-               const oid off = b->hseqbase;                                    
        \
-               TIMEOUT_LOOP(ci.ncand, qry_ctx) {                               
        \
-                       oid o = canditer_next(&ci);                             
                \
-                       const char *restrict v = BUNtvar(bi, o - off);  \
-                       assert(rcnt < BATcapacity(bn));                         
        \
-                       if (TEST)                                               
                                \
-                               vals[rcnt++] = o;                               
                        \
+               TIMEOUT_LOOP(lci->ncand, qry_ctx) {                             
        \
+                       oid lo = canditer_next(lci);                            
        \
+                       const char *ls = VALUE(l, lo - l_base);                 
\
+                       if (!strNil(ls) && (STR_CMP))                           
        \
+                               APPEND(rl, lo);                                 
                        \
                }                                                               
                                        \
        } while (0)
 
 static str
-STRselect(MalStkPtr stk, InstrPtr pci,
-                 int (*str_icmp)(const char *, const char *, int),
-                 int (*str_cmp)(const char *, const char *, int),
-                 const char *fname)
+scan_loop_strselect(BAT *rl, BATiter *li, struct canditer *lci, const char *r,
+                                       int (*str_cmp)(const char *, const char 
*, int),
+                                       bool anti, const char *fname, QryCtx 
*qry_ctx)
 {
-       str msg = MAL_SUCCEED;
+       oid l_base = li->b->hseqbase;
+       const char *l_vars = li->vh->base, *l_vals = li->base;
+       int r_len = str_strlen(r);
 
-       bat *r_id = getArgReference_bat(stk, pci, 0);
-       bat b_id = *getArgReference_bat(stk, pci, 1);
-       bat cb_id = *getArgReference_bat(stk, pci, 2);
-       const char *key = *getArgReference_str(stk, pci, 3);
-       bit icase = pci->argc != 5;
-       bit anti = pci->argc == 5 ? *getArgReference_bit(stk, pci, 4) :
-               *getArgReference_bit(stk, pci, 5);
-
-       BAT *b, *cb = NULL, *bn = NULL, *old_s = NULL;;
-       BUN rcnt = 0;
-       struct canditer ci;
-       bool with_strimps = false,
-               with_strimps_anti = false;
-
-       if (!(b = BATdescriptor(b_id)))
-               throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+       lng t0 = 0;
+       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
 
-       if (!is_bat_nil(cb_id) && !(cb = BATdescriptor(cb_id))) {
-               BBPreclaim(b);
-               throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
-       }
-
-       assert(ATOMstorage(b->ttype) == TYPE_str);
+       if (anti)
+               SCAN_LOOP(str_cmp(ls, r, r_len) != 0);
+       else
+               SCAN_LOOP(str_cmp(ls, r, r_len) == 0);
 
-       if (BAThasstrimps(b)) {
-               BAT *tmp_s;
-               if (STRMPcreate(b, NULL) == GDK_SUCCEED && (tmp_s = 
STRMPfilter(b, cb, key, anti)) != NULL) {
-                       old_s = cb;
-                       cb = tmp_s;
-                       if (!anti)
-                               with_strimps = true;
-                       else
-                               with_strimps_anti = true;
-               } else {
-                       /* strimps failed, continue without */
-                       GDKclrerr();
-               }
-       }
-
-       MT_thread_setalgorithm(with_strimps ?
-                                                  "string_select: strcmp 
function using strimps" :
-                                                  (with_strimps_anti ?
-                                                       "string_select: strcmp 
function using strimps anti"
-                                                       : "string_select: 
strcmp function with no accelerator"));
-
-       canditer_init(&ci, b, cb);
-       if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
-               BBPreclaim_n(2, b, cb);
-               throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       BATsetcount(rl, BATcount(rl));
+       if (BATcount(rl) > 0) {
+               BATnegateprops(rl);
+               rl->tnonil = true;
+               rl->tnil = false;
        }
 
-       if (!strNil(key)) {
-               BATiter bi = bat_iterator(b);
-               QryCtx *qry_ctx = MT_thread_get_qry_ctx();
-               if (icase)
-                       str_cmp = str_icmp;
-               oid *vals = Tloc(bn, 0);
-               const int klen = str_strlen(key);
-               if (ci.tpe == cand_dense) {
-                       if (with_strimps_anti)
-                               scanloop(strNil(v) || str_cmp(v, key, klen) == 
0, canditer_next_dense);
-                       else if (anti)
-                               scanloop(!strNil(v) && str_cmp(v, key, klen) != 
0, canditer_next_dense);
-                       else
-                               scanloop(!strNil(v) && str_cmp(v, key, klen) == 
0, canditer_next_dense);
-               } else {
-                       if (with_strimps_anti)
-                               scanloop(strNil(v) || str_cmp(v, key, klen) == 
0, canditer_next);
-                       else if (anti)
-                               scanloop(!strNil(v) && str_cmp(v, key, klen) != 
0, canditer_next);
-                       else
-                               scanloop(!strNil(v) && str_cmp(v, key, klen) == 
0, canditer_next);
-               }
-               bat_iterator_end(&bi);
-               TIMEOUT_CHECK(qry_ctx, HANDLE_TIMEOUT(qry_ctx));
+       TRC_DEBUG(ALGO, "(%s, %s, l=%s #%zu [%s], cl=%s #%zu, 
time="LLFMT"usecs)\n",
+                         fname, "scan_loop_strselect",
+                         BATgetId(li->b), li->count, ATOMname(li->b->ttype),
+                         lci ? BATgetId(lci->s) : "NULL", lci ? lci->ncand : 0,
+                         GDKusec() - t0);
+
+       return MAL_SUCCEED;
+}
 
-               if (!msg) {
-                       BATsetcount(bn, rcnt);
-                       bn->tsorted = true;
-                       bn->trevsorted = bn->batCount <= 1;
-                       bn->tkey = true;
-                       bn->tnil = false;
-                       bn->tnonil = true;
-                       bn->tseqbase = rcnt == 0 ?
-                               0 : rcnt == 1 ?
-                               *(const oid *) Tloc(bn, 0) : rcnt == ci.ncand 
&& ci.tpe == cand_dense ? ci.seq : oid_nil;
+static str
+STRselect(MalStkPtr stk, InstrPtr pci, const str fname,
+                 int (*str_cmp)(const char *, const char *, int))
+{
+       str msg = MAL_SUCCEED;
+       QryCtx *qry_ctx = MT_thread_get_qry_ctx();
+       BAT *l = NULL, *cl = NULL, *rl = NULL;
 
-                       if (with_strimps_anti) {
-                               BAT *rev;
-                               if (old_s) {
-                                       rev = BATdiffcand(old_s, bn);
-#ifndef NDEBUG
-                                       BAT *is = BATintersectcand(old_s, bn);
-                                       if (is) {
-                                               assert(is->batCount == 
bn->batCount);
-                                               BBPreclaim(is);
-                                       }
-                                       assert(rev->batCount == old_s->batCount 
- bn->batCount);
-#endif
-                               } else
-                                       rev = BATnegcands(0, b->batCount, bn);
+       bat *RL = getArgReference_bat(stk, pci, 0);
+       bat *L = getArgReference_bat(stk, pci, 1);
+       bat *CL = getArgReference_bat(stk, pci, 2);
+       const char *r = *getArgReference_str(stk, pci, 3);
+       bool icase = pci->argc != 5;
+       bool anti = pci->argc == 5 ? *getArgReference_bit(stk, pci, 4) :
+               *getArgReference_bit(stk, pci, 5);
 
-                               BBPreclaim(bn);
-                               bn = rev;
-                               if (bn == NULL)
-                                       msg = createException(MAL, fname, 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
-                       }
-               }
+       if (!(l = BATdescriptor(*L)))
+               throw(MAL, fname, RUNTIME_OBJECT_MISSING);
+
+       if (CL && !is_bat_nil(*CL) && !(cl = BATdescriptor(*CL))) {
+               BBPreclaim(l);
+               throw(MAL, fname, RUNTIME_OBJECT_MISSING);
        }
 
-       if (bn && !msg) {
-               *r_id = bn->batCacheid;
-               BBPkeepref(bn);
-       } else {
-               BBPreclaim(bn);
+       BATiter li = bat_iterator(l);
+       struct canditer lci;
+       canditer_init(&lci, l, cl);
+       size_t l_cnt = lci.ncand;
+
+       rl = COLnew(0, TYPE_oid, l_cnt, TRANSIENT);
+       if (!rl) {
+               BBPreclaim_n(2, l, cl);
+               throw(MAL, fname, MAL_MALLOC_FAIL);
        }
 
-       BBPreclaim_n(3, b, cb, old_s);
+       if (icase) {
+               if (str_cmp == str_is_prefix)
+                       str_cmp = str_is_iprefix;
+               else if (str_cmp == str_is_suffix)
+                       str_cmp = str_is_isuffix;
+               else
+                       str_cmp = str_icontains;
+       }
+
+       msg = scan_loop_strselect(rl, &li, &lci, r, str_cmp, anti, fname, 
qry_ctx);
+
+       bat_iterator_end(&li);
+
+       if (!msg) {
+               *RL = rl->batCacheid;
+               BBPkeepref(rl);
+       } else {
+               BBPreclaim(rl);
+       }
+
+       BBPreclaim_n(2, l, cl);
        return msg;
 }
 
@@ -1986,8 +1944,7 @@ STRstartswithselect(Client cntxt, MalBlk
 {
        (void) cntxt;
        (void) mb;
-       return STRselect(stk, pci,
-                                        str_is_iprefix, str_is_prefix, 
"str.startswithselect");
+       return STRselect(stk, pci, "str.startswithselect", str_is_prefix);
 }
 
 static str
@@ -1995,8 +1952,7 @@ STRendswithselect(Client cntxt, MalBlkPt
 {
        (void) cntxt;
        (void) mb;
-       return STRselect(stk, pci,
-                                        str_is_isuffix, str_is_suffix, 
"str.endswithselect");
+       return STRselect(stk, pci, "str.endswithselect", str_is_suffix);
 }
 
 static str
@@ -2004,8 +1960,7 @@ STRcontainsselect(Client cntxt, MalBlkPt
 {
        (void) cntxt;
        (void) mb;
-       return STRselect(stk, pci,
-                                        str_icontains, str_contains, 
"str.containsselect");
+       return STRselect(stk, pci, "str.containsselect", str_contains);
 }
 
 static void
@@ -2105,7 +2060,7 @@ strbat_reverse(BAT *b)
        return bn;
 }
 
-#define NESTED_LOOP_STRJOIN(STR_CMP)                                           
                        \
+#define NESTED_LOOP(STR_CMP)                                                   
                                \
        do {                                                                    
                                                        \
                canditer_reset(lci);                                            
                                        \
                TIMEOUT_LOOP(rci->ncand, qry_ctx) {                             
                                \
@@ -2117,8 +2072,7 @@ strbat_reverse(BAT *b)
                        TIMEOUT_LOOP(lci->ncand, qry_ctx) {                     
                                \
                                ol = canditer_next(lci);                        
                                        \
                                const char *ls = VALUE(l, ol - lbase);          
                        \
-                               if (!strNil(ls)) {                              
                                                \
-                                       if (STR_CMP) {                          
                                                \
+                               if (!strNil(ls) && STR_CMP) {                   
                                \
                                                APPEND(rl, ol);                 
                                                \
                                                if (rr) APPEND(rr, or);         
                                        \
                                                if (BATcount(rl) == 
BATcapacity(rl)) {                  \
@@ -2128,7 +2082,6 @@ strbat_reverse(BAT *b)
                                                                throw(MAL, 
fname, GDK_EXCEPTION);               \
                                                        }                       
                                                                \
                                                }                               
                                                                \
-                                       }                                       
                                                                \
                                }                                               
                                                                \
                        }                                                       
                                                                \
                }                                                               
                                                                \
@@ -2149,9 +2102,9 @@ nested_loop_strjoin(BAT *rl, BAT *rr, BA
        TRC_DEBUG_IF(ALGO) t0 = GDKusec();
 
        if (anti)
-               NESTED_LOOP_STRJOIN(str_cmp(ls, rs, str_strlen(rs)) != 0);
+               NESTED_LOOP(str_cmp(ls, rs, str_strlen(rs)) != 0);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to