Changeset: 21898ffd1ba3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/21898ffd1ba3
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/kernel/batstr.c
Branch: txtsim
Log Message:

Move functions to proper module(batstr.c to str.c)


diffs (truncated from 1020 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3665,7 +3665,7 @@ str_lower(str *buf, size_t *buflen, cons
        return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, buf, buflen, s, 
"str.lower");
 }
 
-str
+static inline str
 STRlower(str *res, const str *arg1)
 {
        str buf = NULL, msg = MAL_SUCCEED;
@@ -5080,6 +5080,480 @@ STRreverse(str *ret, const str *arg)
        return MAL_SUCCEED;
 }
 
+/* scan select loop with or without candidates */
+#define scanloop(TEST, KEEP_NULLS)                                             
                            \
+       do {                                                                    
                                                        \
+               TRC_DEBUG(ALGO,                                                 
                                        \
+                                 "scanselect(b=%s#"BUNFMT",anti=%d): "         
                \
+                                 "scanselect %s\n", BATgetId(b), BATcount(b),  
                \
+                                 anti, #TEST);                                 
                                        \
+               if (!s || BATtdense(s)) {                                       
                                        \
+                       for (; p < q; p++) {                                    
                                        \
+                               GDK_CHECK_TIMEOUT(timeoffset, counter,          
                        \
+                                                                 
GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
+                               const char *restrict v = BUNtvar(bi, p - off);  
                \
+                               if ((TEST) || ((KEEP_NULLS) && *v == '\200'))   
                \
+                                       vals[cnt++] = p;                        
                                                \
+                       }                                                       
                                                                \
+               } else {                                                        
                                                        \
+                       for (; p < ncands; p++) {                               
                                        \
+                               GDK_CHECK_TIMEOUT(timeoffset, counter,          
                        \
+                                                                 
GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
+                               oid o = canditer_next(ci);                      
                                        \
+                               const char *restrict v = BUNtvar(bi, o - off);  
                \
+                               if ((TEST) || ((KEEP_NULLS) && *v == '\200'))   
                \
+                                       vals[cnt++] = o;                        
                                                \
+                       }                                                       
                                                                \
+               }                                                               
                                                                \
+       } while (0)
+
+static str
+do_string_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q, 
BUN *rcnt, const char *key, bool anti,
+               bit (*str_cmp)(const char*, const char*, int))
+{
+       BATiter bi = bat_iterator(b);
+       BUN cnt = 0, ncands = ci->ncand;
+       oid off = b->hseqbase, *restrict vals = Tloc(bn, 0);
+       str msg = MAL_SUCCEED;
+       int klen = str_strlen(key);
+
+       size_t counter = 0;
+       lng timeoffset = 0;
+       QryCtx *qry_ctx = MT_thread_get_qry_ctx();
+       if (qry_ctx != NULL)
+               timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? 
(qry_ctx->starttime + qry_ctx->querytimeout) : 0;
+
+       if (anti) /* keep nulls ? (use false for now) */
+               scanloop(v && *v != '\200' && str_cmp(v, key, klen) != 0, 
false);
+       else
+               scanloop(v && *v != '\200' && str_cmp(v, key, klen) == 0, 
false);
+
+bailout:
+       bat_iterator_end(&bi);
+       *rcnt = cnt;
+       return msg;
+}
+
+static str
+string_select(bat *ret, const bat *bid, const bat *sid, const str *key, const 
bit *anti, bit (*str_cmp)(const char*, const char*, int), const str fname)
+{
+       BAT *b, *s = NULL, *bn = NULL;
+       str msg = MAL_SUCCEED;
+       BUN p = 0, q = 0, rcnt = 0;
+       struct canditer ci;
+
+       if ((b = BATdescriptor(*bid)) == NULL) {
+               msg = createException(MAL, fname , SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+               goto bailout;
+       }
+       if (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL) {
+               msg = createException(MAL, fname, SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+               goto bailout;
+       }
+
+       assert(ATOMstorage(b->ttype) == TYPE_str);
+
+       canditer_init(&ci, b, s);
+       if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
+               msg = createException(MAL, fname, SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+               goto bailout;
+       }
+
+       if (!s || BATtdense(s)) {
+               if (s) {
+                       assert(BATtdense(s));
+                       p = (BUN) s->tseqbase;
+                       q = p + BATcount(s);
+                       if ((oid) p < b->hseqbase)
+                               p = b->hseqbase;
+                       if ((oid) q > b->hseqbase + BATcount(b))
+                               q = b->hseqbase + BATcount(b);
+               } else {
+                       p = b->hseqbase;
+                       q = BATcount(b) + b->hseqbase;
+               }
+       }
+
+       msg = do_string_select(bn, b, s, &ci, p, q, &rcnt, *key, *anti, 
str_cmp);
+
+       if (!msg) { /* set some properties */
+               BATsetcount(bn, rcnt);
+               bn->tsorted = true;
+               bn->trevsorted = bn->batCount <= 1;
+               bn->tkey = true;
+               bn->tnil = false;
+               bn->tnonil = true;
+               bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? *(const 
oid*)Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
+       }
+
+bailout:
+       BBPreclaim(b);
+       BBPreclaim(s);
+       if (bn && !msg) {
+               *ret = bn->batCacheid;
+               BBPkeepref(bn);
+       } else if (bn)
+               BBPreclaim(bn);
+       return msg;
+}
+
+static str
+STRstartswithselect(bat *ret, const bat *bid, const bat *sid, const str *key, 
const bit *caseignore, const bit *anti)
+{
+               return string_select(ret, bid, sid, key, anti, 
(*caseignore)?str_is_iprefix:str_is_prefix,
+                               "str.startswithselect");
+}
+
+static str
+STRendswithselect(bat *ret, const bat *bid, const bat *sid, const str *key, 
const bit *caseignore, const bit *anti)
+{
+               return string_select(ret, bid, sid, key, anti, 
(*caseignore)?str_is_isuffix:str_is_suffix,
+                               "str.endswithselect");
+}
+
+static str
+STRcontainsselect(bat *ret, const bat *bid, const bat *sid, const str *key, 
const bit *caseignore, const bit *anti)
+{
+               return string_select(ret, bid, sid, key, anti, 
(*caseignore)?str_icontains:str_contains,
+                               "str.containsselect");
+}
+
+#define APPEND(b, o)   (((oid *) b->theap->base)[b->batCount++] = (o))
+#define VALUE(s, x)            (s##vars + VarHeapVal(s##vals, (x), s##i.width))
+
+/* nested loop implementation for batstr joins */
+#define batstr_join_loop(STRCMP, STR_LEN) \
+       do { \
+               for (BUN ridx = 0; ridx < rci.ncand; ridx++) { \
+                       GDK_CHECK_TIMEOUT(timeoffset, counter, \
+                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
+                       ro = canditer_next(&rci); \
+                       vr = VALUE(r, ro - rbase); \
+                       rlen = STR_LEN; \
+                       nl = 0; \
+                       canditer_reset(&lci); \
+                       for (BUN lidx = 0; lidx < lci.ncand; lidx++) { \
+                               lo = canditer_next(&lci); \
+                               vl = VALUE(l, lo - lbase); \
+                               if (strNil(vl)) { \
+                                       continue; \
+                               } else if (!(STRCMP)) { \
+                                       continue; \
+                               } \
+                               if (BATcount(r1) == BATcapacity(r1)) { \
+                                       newcap = BATgrows(r1); \
+                                       BATsetcount(r1, BATcount(r1)); \
+                                       if (r2) \
+                                               BATsetcount(r2, BATcount(r2)); \
+                                       if (BATextend(r1, newcap) != 
GDK_SUCCEED || (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \
+                                               msg = createException(MAL, 
"pcre.join", SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+                                               goto bailout; \
+                                       } \
+                                       assert(!r2 || BATcapacity(r1) == 
BATcapacity(r2)); \
+                               } \
+                               if (BATcount(r1) > 0) { \
+                                       if (lastl + 1 != lo) \
+                                               r1->tseqbase = oid_nil; \
+                                       if (nl == 0) { \
+                                               if (r2) \
+                                                       r2->trevsorted = false; 
\
+                                               if (lastl > lo) { \
+                                                       r1->tsorted = false; \
+                                                       r1->tkey = false; \
+                                               } else if (lastl < lo) { \
+                                                       r1->trevsorted = false; 
\
+                                               } else { \
+                                                       r1->tkey = false; \
+                                               } \
+                                       } \
+                               } \
+                               APPEND(r1, lo); \
+                               if (r2) \
+                                       APPEND(r2, ro); \
+                               lastl = lo; \
+                               nl++; \
+                       } \
+                       if (r2) { \
+                               if (nl > 1) { \
+                                       r2->tkey = false; \
+                                       r2->tseqbase = oid_nil; \
+                                       r1->trevsorted = false; \
+                               } else if (nl == 0) { \
+                                       rskipped = BATcount(r2) > 0; \
+                               } else if (rskipped) { \
+                                       r2->tseqbase = oid_nil; \
+                               } \
+                       } else if (nl > 1) { \
+                               r1->trevsorted = false; \
+                       } \
+               } \
+       } while (0)
+
+static str
+strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, bit anti, bit 
(*str_cmp)(const char*, const char*, int), const str fname)
+{
+       struct canditer lci, rci;
+       const char *lvals, *rvals, *lvars, *rvars, *vl, *vr;
+       int rskipped = 0, rlen = 0;                     /* whether we skipped 
values in r */
+       oid lbase, rbase, lo, ro, lastl = 0;            /* last value inserted 
into r1 */
+       BUN nl, newcap;
+       char *msg = MAL_SUCCEED;
+
+       size_t counter = 0;
+       lng timeoffset = 0;
+       QryCtx *qry_ctx = MT_thread_get_qry_ctx();
+       if (qry_ctx != NULL) {
+               timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? 
(qry_ctx->starttime + qry_ctx->querytimeout) : 0;
+       }
+
+       TRC_DEBUG(ALGO,
+                         "%s(l=%s#" BUNFMT "[%s]%s%s,"
+                         "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
+                         "sr=%s#" BUNFMT "%s%s)\n",
+                         fname,
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                         r->tsorted ? "-sorted" : "",
+                         r->trevsorted ? "-revsorted" : "",
+                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+                         sl && sl->tsorted ? "-sorted" : "",
+                         sl && sl->trevsorted ? "-revsorted" : "",
+                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+                         sr && sr->tsorted ? "-sorted" : "",
+                         sr && sr->trevsorted ? "-revsorted" : "");
+
+       assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
+       assert(ATOMtype(l->ttype) == TYPE_str);
+
+       canditer_init(&lci, l, sl);
+       canditer_init(&rci, r, sr);
+
+       BATiter li = bat_iterator(l);
+       BATiter ri = bat_iterator(r);
+       lbase = l->hseqbase;
+       rbase = r->hseqbase;
+       lvals = (const char *) li.base;
+       rvals = (const char *) ri.base;
+       assert(ri.vh && r->ttype);
+       lvars = li.vh->base;
+       rvars = ri.vh->base;
+
+       r1->tkey = true;
+       r1->tsorted = true;
+       r1->trevsorted = true;
+       r1->tnil = false;
+       r1->tnonil = true;
+       if (r2) {
+               r2->tkey = true;
+               r2->tsorted = true;
+               r2->trevsorted = true;
+               r2->tnil = false;
+               r2->tnonil = true;
+       }
+
+       if (anti) {
+               batstr_join_loop(str_cmp(vl, vr, rlen) == 0, str_strlen(vr));
+       } else {
+               batstr_join_loop(str_cmp(vl, vr, rlen) != 0, str_strlen(vr));
+       }
+       bat_iterator_end(&li);
+       bat_iterator_end(&ri);
+
+       assert(!r2 || BATcount(r1) == BATcount(r2));
+       /* also set other bits of heap to correct value to indicate size */
+       BATsetcount(r1, BATcount(r1));
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to