Changeset: 214d97b4f4e5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=214d97b4f4e5
Modified Files:
        clients/Tests/exports.stable.out
        monetdb5/modules/mal/pcre.c
        monetdb5/modules/mal/pcre.mal
Branch: default
Log Message:

Implemented algebra.likesubselect.
This is a member of the subselect family, but the selection criterion
is a SQL-style LIKE or ILIKE pattern.


diffs (226 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -2111,6 +2111,8 @@ str PCRElike3(bit *ret, str *s, str *pat
 str PCRElike_join_pcre(int *ret, int *b, int *pat, str *esc);
 str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
 str PCRElike_uselect_pcre(int *ret, int *b, str *pat, str *esc);
+str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit 
*caseignore, bit *anti);
+str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit 
*caseignore, bit *anti);
 str PCREmatch(bit *ret, str *val, str *pat);
 str PCREnotilike2(bit *ret, str *s, str *pat);
 str PCREnotilike3(bit *ret, str *s, str *pat, str *esc);
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -97,6 +97,8 @@ pcre_export str PCREilike_join_pcre(int 
 pcre_export str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
 pcre_export str PCREilike_select_pcre(int *ret, int *b, str *pat, str *esc);
 pcre_export str pcre_init(void);
+pcre_export str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit 
*caseignore, bit *anti);
+pcre_export str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str 
*esc, bit *caseignore, bit *anti);
 
 /* current implementation assumes simple %keyword% [keyw%]* */
 typedef struct RE {
@@ -368,6 +370,132 @@ pcre_index(int *res, pcre * pattern, str
        return MAL_SUCCEED;
 }
 
+/* these two defines are copies from gdk_select.c */
+
+/* scan select loop with candidates */
+#define candscanloop(TEST)                                                     
                        \
+       do {                                                                    
                                        \
+               ALGODEBUG fprintf(stderr,                                       
                        \
+                           "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "       
\
+                           "scanselect %s\n", BATgetId(b), BATcount(b),        
\
+                           s ? BATgetId(s) : "NULL", anti, #TEST);             
        \
+               while (p < q) {                                                 
                                \
+                       o = *candlist++;                                        
                                \
+                       r = (BUN) (o - off);                                    
                        \
+                       v = BUNtail(bi, r);                                     
                                \
+                       if (TEST)                                               
                                        \
+                               bunfastins(bn, NULL, &o);                       
                        \
+                       p++;                                                    
                                        \
+               }                                                               
                                                \
+       } while (0)
+
+/* scan select loop without candidates */
+#define scanloop(TEST)                                                         
                        \
+       do {                                                                    
                                        \
+               ALGODEBUG fprintf(stderr,                                       
                        \
+                           "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "       
\
+                           "scanselect %s\n", BATgetId(b), BATcount(b),        
\
+                           s ? BATgetId(s) : "NULL", anti, #TEST);             
        \
+               BATloop(b, p, q) {                                              
                                \
+                       v = BUNtail(bi, p);                                     
                                \
+                       if (TEST) {                                             
                                        \
+                               o = (oid) p + off;                              
                                \
+                               bunfastins(bn, NULL, &o);                       
                        \
+                       }                                                       
                                                \
+               }                                                               
                                                \
+       } while (0)
+
+static str
+pcre_likesubselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore, 
int anti)
+{
+       int options = PCRE_UTF8 | PCRE_MULTILINE;
+       pcre *re;
+       pcre_extra *pe;
+       const char *error;
+       int errpos;
+       BATiter bi = bat_iterator(b);
+       BAT *bn;
+       BUN p, q;
+       oid o, off;
+       const char *v;
+       int ovector[10];
+
+       assert(BAThdense(b));
+       assert(ATOMstorage(b->ttype) == TYPE_str);
+       assert(anti == 0 || anti == 1);
+
+       if (caseignore)
+               options |= PCRE_CASELESS;
+       if ((re = pcre_compile(pat, options, &error, &errpos, NULL)) == NULL)
+               throw(MAL, "pcre.likesubselect",
+                         OPERATION_FAILED ": compilation of pattern \"%s\" 
failed\n", pat);
+       pe = pcre_study(re, 0, &error);
+       if (error != NULL) {
+               my_pcre_free(re);
+               my_pcre_free(pe);
+               throw(MAL, "pcre.likesubselect",
+                         OPERATION_FAILED ": studying pattern \"%s\" 
failed\n", pat);
+       }
+       bn = BATnew(TYPE_void, TYPE_oid, s ? BATcount(s) : BATcount(b));
+       if (bn == NULL) {
+               my_pcre_free(re);
+               my_pcre_free(pe);
+               throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL);
+       }
+       off = b->hseqbase - BUNfirst(b);
+
+       if (s) {
+               const oid *candlist;
+               BUN r;
+
+               assert(BAThdense(s));
+               assert(s->ttype == TYPE_oid || s->ttype == TYPE_void);
+               assert(s->tsorted);
+               assert(s->tkey);
+               /* setup candscanloop loop vars to only iterate over
+                * part of s that has values that are in range of b */
+               o = b->hseqbase + BATcount(b);
+               q = SORTfndfirst(s, &o);
+               p = SORTfndfirst(s, &b->hseqbase);
+               candlist = (const oid *) Tloc(s, p);
+               if (anti)
+                       candscanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) == -1);
+               else
+                       candscanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) >= 0);
+       } else {
+               if (anti)
+                       scanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) == -1);
+               else
+                       scanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) >= 0);
+       }
+       my_pcre_free(re);
+       my_pcre_free(pe);
+       bn->tsorted = 1;
+       bn->trevsorted = bn->U->count <= 1;
+       bn->tkey = 1;
+       bn->tdense = bn->U->count <= 1;
+       if (bn->U->count == 1)
+               bn->tseqbase =  * (oid *) Tloc(bn, BUNfirst(bn));
+       bn->hsorted = 1;
+       bn->hdense = 1;
+       bn->hseqbase = 0;
+       bn->hkey = 1;
+       bn->hrevsorted = bn->U->count <= 1;
+       *bnp = bn;
+       return MAL_SUCCEED;
+
+  bunins_failed:
+       BBPreclaim(bn);
+       my_pcre_free(re);
+       my_pcre_free(pe);
+       *bnp = NULL;
+       throw(MAL, "pcre.likesubselect", OPERATION_FAILED);
+}
+
 static str
 pcre_select(BAT **res, str pattern, BAT *strs, bit insensitive)
 {
@@ -1367,6 +1495,43 @@ BATPCREnotilike2(bat *ret, int *bid, str
        return(BATPCREnotilike(ret, bid, pat, &esc));
 }
 
+str
+PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit 
*caseignore, bit *anti)
+{
+       BAT *b, *s = NULL, *bn;
+       str res;
+       char *ppat = NULL;
+
+       res = sql2pcre(&ppat, *pat, strcmp(*esc, str_nil) != 0 ? *esc : "\\");
+       if (res != MAL_SUCCEED)
+               return res;
+       if ((b = BATdescriptor(*bid)) == NULL) {
+               GDKfree(ppat);
+               throw(MAL, "algebra.select", RUNTIME_OBJECT_MISSING);
+       }
+       if (sid && (s = BATdescriptor(*sid)) == NULL) {
+               GDKfree(ppat);
+               BBPreleaseref(b->batCacheid);
+               throw(MAL, "algebra.select", RUNTIME_OBJECT_MISSING);
+       }
+       res = pcre_likesubselect(&bn, b, s, ppat, *caseignore, *anti);
+       BBPreleaseref(b->batCacheid);
+       if (s)
+               BBPreleaseref(s->batCacheid);
+       GDKfree(ppat);
+       if (res != MAL_SUCCEED)
+               return res;
+       *ret = bn->batCacheid;
+       BBPkeepref(bn->batCacheid);
+       return MAL_SUCCEED;
+}
+
+str
+PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit *caseignore, 
bit *anti)
+{
+       return PCRElikesubselect2(ret, bid, NULL, pat, esc, caseignore, anti);
+}
+
 static int
 re_simple(char *pat)
 {
diff --git a/monetdb5/modules/mal/pcre.mal b/monetdb5/modules/mal/pcre.mal
--- a/monetdb5/modules/mal/pcre.mal
+++ b/monetdb5/modules/mal/pcre.mal
@@ -166,3 +166,21 @@ address BATPCREnotilike;
 command batstr.not_ilike(s:bat[:oid,:str], pat:str):bat[:oid,:bit]
 address BATPCREnotilike2;
 
+module algebra;
+command algebra.likesubselect(b:bat[:oid,:str], pat:str, esc:str, 
caseignore:bit, anti:bit) :bat[:oid,:oid]
+address PCRElikesubselect1
+comment "Select all head values for which the tail value is \"like\"
+       the given (SQL-style) pattern.
+       Input is a dense-headed BAT, output is a dense-headed BAT with in
+       the tail the head value of the input BAT for which the
+       relationship holds.  The output BAT is sorted on the tail value.";
+
+command algebra.likesubselect(b:bat[:oid,:str], s:bat[:oid,:oid], pat:str, 
esc:str, caseignore:bit, anti:bit) :bat[:oid,:oid]
+address PCRElikesubselect2
+comment "Select all head values of the first input BAT for which the
+       tail value is \"like\" the given (SQL-style) pattern and for
+       which the head value occurs in the tail of the second input
+       BAT.
+       Input is a dense-headed BAT, output is a dense-headed BAT with in
+       the tail the head value of the input BAT for which the
+       relationship holds.  The output BAT is sorted on the tail value.";
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to