Changeset: 214d97b4f4e5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=214d97b4f4e5
Modified Files:
clients/Tests/exports.stable.out
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/pcre.mal
Branch: default
Log Message:
Implemented algebra.likesubselect.
This is a member of the subselect family, but the selection criterion
is a SQL-style LIKE or ILIKE pattern.
diffs (226 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -2111,6 +2111,8 @@ str PCRElike3(bit *ret, str *s, str *pat
str PCRElike_join_pcre(int *ret, int *b, int *pat, str *esc);
str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
str PCRElike_uselect_pcre(int *ret, int *b, str *pat, str *esc);
+str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit
*caseignore, bit *anti);
+str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit
*caseignore, bit *anti);
str PCREmatch(bit *ret, str *val, str *pat);
str PCREnotilike2(bit *ret, str *s, str *pat);
str PCREnotilike3(bit *ret, str *s, str *pat, str *esc);
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -97,6 +97,8 @@ pcre_export str PCREilike_join_pcre(int
pcre_export str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
pcre_export str PCREilike_select_pcre(int *ret, int *b, str *pat, str *esc);
pcre_export str pcre_init(void);
+pcre_export str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit
*caseignore, bit *anti);
+pcre_export str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str
*esc, bit *caseignore, bit *anti);
/* current implementation assumes simple %keyword% [keyw%]* */
typedef struct RE {
@@ -368,6 +370,132 @@ pcre_index(int *res, pcre * pattern, str
return MAL_SUCCEED;
}
+/* these two defines are copies from gdk_select.c */
+
+/* scan select loop with candidates */
+#define candscanloop(TEST)
\
+ do {
\
+ ALGODEBUG fprintf(stderr,
\
+ "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "
\
+ "scanselect %s\n", BATgetId(b), BATcount(b),
\
+ s ? BATgetId(s) : "NULL", anti, #TEST);
\
+ while (p < q) {
\
+ o = *candlist++;
\
+ r = (BUN) (o - off);
\
+ v = BUNtail(bi, r);
\
+ if (TEST)
\
+ bunfastins(bn, NULL, &o);
\
+ p++;
\
+ }
\
+ } while (0)
+
+/* scan select loop without candidates */
+#define scanloop(TEST)
\
+ do {
\
+ ALGODEBUG fprintf(stderr,
\
+ "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "
\
+ "scanselect %s\n", BATgetId(b), BATcount(b),
\
+ s ? BATgetId(s) : "NULL", anti, #TEST);
\
+ BATloop(b, p, q) {
\
+ v = BUNtail(bi, p);
\
+ if (TEST) {
\
+ o = (oid) p + off;
\
+ bunfastins(bn, NULL, &o);
\
+ }
\
+ }
\
+ } while (0)
+
+static str
+pcre_likesubselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore,
int anti)
+{
+ int options = PCRE_UTF8 | PCRE_MULTILINE;
+ pcre *re;
+ pcre_extra *pe;
+ const char *error;
+ int errpos;
+ BATiter bi = bat_iterator(b);
+ BAT *bn;
+ BUN p, q;
+ oid o, off;
+ const char *v;
+ int ovector[10];
+
+ assert(BAThdense(b));
+ assert(ATOMstorage(b->ttype) == TYPE_str);
+ assert(anti == 0 || anti == 1);
+
+ if (caseignore)
+ options |= PCRE_CASELESS;
+ if ((re = pcre_compile(pat, options, &error, &errpos, NULL)) == NULL)
+ throw(MAL, "pcre.likesubselect",
+ OPERATION_FAILED ": compilation of pattern \"%s\"
failed\n", pat);
+ pe = pcre_study(re, 0, &error);
+ if (error != NULL) {
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ throw(MAL, "pcre.likesubselect",
+ OPERATION_FAILED ": studying pattern \"%s\"
failed\n", pat);
+ }
+ bn = BATnew(TYPE_void, TYPE_oid, s ? BATcount(s) : BATcount(b));
+ if (bn == NULL) {
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL);
+ }
+ off = b->hseqbase - BUNfirst(b);
+
+ if (s) {
+ const oid *candlist;
+ BUN r;
+
+ assert(BAThdense(s));
+ assert(s->ttype == TYPE_oid || s->ttype == TYPE_void);
+ assert(s->tsorted);
+ assert(s->tkey);
+ /* setup candscanloop loop vars to only iterate over
+ * part of s that has values that are in range of b */
+ o = b->hseqbase + BATcount(b);
+ q = SORTfndfirst(s, &o);
+ p = SORTfndfirst(s, &b->hseqbase);
+ candlist = (const oid *) Tloc(s, p);
+ if (anti)
+ candscanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) == -1);
+ else
+ candscanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) >= 0);
+ } else {
+ if (anti)
+ scanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) == -1);
+ else
+ scanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) >= 0);
+ }
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ bn->tsorted = 1;
+ bn->trevsorted = bn->U->count <= 1;
+ bn->tkey = 1;
+ bn->tdense = bn->U->count <= 1;
+ if (bn->U->count == 1)
+ bn->tseqbase = * (oid *) Tloc(bn, BUNfirst(bn));
+ bn->hsorted = 1;
+ bn->hdense = 1;
+ bn->hseqbase = 0;
+ bn->hkey = 1;
+ bn->hrevsorted = bn->U->count <= 1;
+ *bnp = bn;
+ return MAL_SUCCEED;
+
+ bunins_failed:
+ BBPreclaim(bn);
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ *bnp = NULL;
+ throw(MAL, "pcre.likesubselect", OPERATION_FAILED);
+}
+
static str
pcre_select(BAT **res, str pattern, BAT *strs, bit insensitive)
{
@@ -1367,6 +1495,43 @@ BATPCREnotilike2(bat *ret, int *bid, str
return(BATPCREnotilike(ret, bid, pat, &esc));
}
+str
+PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit
*caseignore, bit *anti)
+{
+ BAT *b, *s = NULL, *bn;
+ str res;
+ char *ppat = NULL;
+
+ res = sql2pcre(&ppat, *pat, strcmp(*esc, str_nil) != 0 ? *esc : "\\");
+ if (res != MAL_SUCCEED)
+ return res;
+ if ((b = BATdescriptor(*bid)) == NULL) {
+ GDKfree(ppat);
+ throw(MAL, "algebra.select", RUNTIME_OBJECT_MISSING);
+ }
+ if (sid && (s = BATdescriptor(*sid)) == NULL) {
+ GDKfree(ppat);
+ BBPreleaseref(b->batCacheid);
+ throw(MAL, "algebra.select", RUNTIME_OBJECT_MISSING);
+ }
+ res = pcre_likesubselect(&bn, b, s, ppat, *caseignore, *anti);
+ BBPreleaseref(b->batCacheid);
+ if (s)
+ BBPreleaseref(s->batCacheid);
+ GDKfree(ppat);
+ if (res != MAL_SUCCEED)
+ return res;
+ *ret = bn->batCacheid;
+ BBPkeepref(bn->batCacheid);
+ return MAL_SUCCEED;
+}
+
+str
+PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit *caseignore,
bit *anti)
+{
+ return PCRElikesubselect2(ret, bid, NULL, pat, esc, caseignore, anti);
+}
+
static int
re_simple(char *pat)
{
diff --git a/monetdb5/modules/mal/pcre.mal b/monetdb5/modules/mal/pcre.mal
--- a/monetdb5/modules/mal/pcre.mal
+++ b/monetdb5/modules/mal/pcre.mal
@@ -166,3 +166,21 @@ address BATPCREnotilike;
command batstr.not_ilike(s:bat[:oid,:str], pat:str):bat[:oid,:bit]
address BATPCREnotilike2;
+module algebra;
+command algebra.likesubselect(b:bat[:oid,:str], pat:str, esc:str,
caseignore:bit, anti:bit) :bat[:oid,:oid]
+address PCRElikesubselect1
+comment "Select all head values for which the tail value is \"like\"
+ the given (SQL-style) pattern.
+ Input is a dense-headed BAT, output is a dense-headed BAT with in
+ the tail the head value of the input BAT for which the
+ relationship holds. The output BAT is sorted on the tail value.";
+
+command algebra.likesubselect(b:bat[:oid,:str], s:bat[:oid,:oid], pat:str,
esc:str, caseignore:bit, anti:bit) :bat[:oid,:oid]
+address PCRElikesubselect2
+comment "Select all head values of the first input BAT for which the
+ tail value is \"like\" the given (SQL-style) pattern and for
+ which the head value occurs in the tail of the second input
+ BAT.
+ Input is a dense-headed BAT, output is a dense-headed BAT with in
+ the tail the head value of the input BAT for which the
+ relationship holds. The output BAT is sorted on the tail value.";
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list