Changeset: a3d5bebe13df for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a3d5bebe13df
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk_select.c
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/pcre.mal
Branch: default
Log Message:
merge
diffs (truncated from 445 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -176,6 +176,7 @@ BAT *BATsubselect(BAT *b, BAT *s, const
BAT *BATsunion(BAT *b, BAT *c);
BAT *BATsunique(BAT *b);
BAT *BATthetajoin(BAT *l, BAT *r, int mode, BUN estimate);
+BAT *BATthetasubselect(BAT *b, BAT *s, const void *val, const char *op);
int BATtopN(BAT *b, BUN topN);
BAT *BATundo(BAT *b);
BAT *BATuselect(BAT *b, const void *tl, const void *th);
@@ -746,6 +747,8 @@ str ALGtdiff(int *result, int *lid, int
str ALGthetajoin(int *result, int *lid, int *rid, int *opc);
str ALGthetajoinEstimate(int *result, int *lid, int *rid, int *opc, lng
*estimate);
str ALGthetaselect(int *result, int *bid, ptr low, str *op);
+str ALGthetasubselect1(bat *result, bat *bid, const void *val, const char
**op);
+str ALGthetasubselect2(bat *result, bat *bid, bat *sid, const void *val, const
char **op);
str ALGthetauselect(int *result, int *bid, ptr value, str *op);
str ALGthsort(int *result, int *lid);
str ALGtintersect(int *result, int *lid, int *rid);
@@ -2108,6 +2111,8 @@ str PCRElike3(bit *ret, str *s, str *pat
str PCRElike_join_pcre(int *ret, int *b, int *pat, str *esc);
str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
str PCRElike_uselect_pcre(int *ret, int *b, str *pat, str *esc);
+str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit
*caseignore, bit *anti);
+str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit
*caseignore, bit *anti);
str PCREmatch(bit *ret, str *val, str *pat);
str PCREnotilike2(bit *ret, str *s, str *pat);
str PCREnotilike3(bit *ret, str *s, str *pat, str *esc);
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -161,18 +161,13 @@ BAT_hashselect(BAT *b, BAT *s, BAT *bn,
"#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): " \
"scanselect %s\n", BATgetId(b), BATcount(b), \
s ? BATgetId(s) : "NULL", anti, #TEST); \
- BATloop(s, p, q) { \
- o = * (oid *) BUNtloc(si, p); \
- if (o == oid_nil || \
- o < seqbase || \
- o - seqbase >= b->U->count) { \
- /* XXX return an error? */ \
- continue; \
- } \
- p = (BUN) (o - off); \
- v = BUNtail(bi, p); \
+ while (p < q) { \
+ o = *candlist++; \
+ r = (BUN) (o - off); \
+ v = BUNtail(bi, r); \
if (TEST) \
bunfastins(bn, NULL, &o); \
+ p++; \
} \
} while (0)
@@ -199,7 +194,7 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn,
BATiter bi = bat_iterator(b);
int (*cmp)(const void *, const void *);
BUN p, q;
- oid o, seqbase, off;
+ oid o, off;
const void *nil, *v;
int c;
@@ -216,14 +211,22 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn,
assert(!lval || !hval || (*cmp)(tl, th) <= 0);
nil = b->T->nonil ? NULL : ATOMnilptr(b->ttype);
- seqbase = b->hseqbase;
- off = seqbase - BUNfirst(b);
+ off = b->hseqbase - BUNfirst(b);
if (s) {
- BATiter si = bat_iterator(s);
+ const oid *candlist;
+ BUN r;
assert(s->tsorted);
assert(s->tkey);
+ /* setup candscanloop loop vars to only iterate over
+ * part of s that has values that are in range of b */
+ o = b->hseqbase + BATcount(b);
+ q = SORTfndfirst(s, &o);
+ p = SORTfndfirst(s, &b->hseqbase);
+ /* should we return an error if p > BUNfirst(s) || q <
+ * BUNlast(s) (i.e. s not fully used)? */
+ candlist = (const oid *) Tloc(s, p);
if (equi) {
assert(li && hi);
assert(!anti);
@@ -324,7 +327,6 @@ BATsubselect(BAT *b, BAT *s, const void
{
int hval, lval, equi, t, lnil;
const void *nil;
- BAT *orig_s = s;
BAT *bn;
BUN estimate;
@@ -433,10 +435,7 @@ BATsubselect(BAT *b, BAT *s, const void
* any: i.e. return everything */
ALGODEBUG fprintf(stderr,
"#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): everything, nonil\n", BATgetId(b),
BATcount(b), s ? BATgetId(s) : "NULL", anti);
if (s) {
- if (s == orig_s)
- return BATcopy(s, TYPE_void, s->ttype, 0);
- else
- return s; /* already made a copy: return it */
+ return BATcopy(s, TYPE_void, s->ttype, 0);
} else {
return BATmirror(BATmark(b, 0));
}
@@ -526,8 +525,6 @@ BATsubselect(BAT *b, BAT *s, const void
o = (oid) high;
high = SORTfndfirst(s, &o);
v = VIEWhead(BATmirror(s));
- if (s != orig_s)
- BBPunfix(s->batCacheid);
} else {
v = VIEWhead(b); /* [oid,nil] */
}
@@ -540,8 +537,6 @@ BATsubselect(BAT *b, BAT *s, const void
o = (oid) high;
high = SORTfndfirst(s, &o);
v = VIEWhead(BATmirror(s));
- if (s != orig_s)
- BBPunfix(s->batCacheid);
} else {
v = VIEWhead(b); /* [oid,nil] */
}
@@ -584,11 +579,8 @@ BATsubselect(BAT *b, BAT *s, const void
}
bn = BATnew(TYPE_void, TYPE_oid, estimate);
- if (bn == NULL) {
- if (s && s != orig_s)
- BBPreclaim(s);
+ if (bn == NULL)
return NULL;
- }
if (equi &&
(b->T->hash ||
@@ -602,9 +594,6 @@ BATsubselect(BAT *b, BAT *s, const void
bn = BAT_scanselect(b, s, bn, tl, th, li, hi, equi, anti, lval,
hval);
}
- if (bn == NULL && s && s != orig_s)
- BBPreclaim(s);
-
return bn;
}
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -97,6 +97,8 @@ pcre_export str PCREilike_join_pcre(int
pcre_export str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
pcre_export str PCREilike_select_pcre(int *ret, int *b, str *pat, str *esc);
pcre_export str pcre_init(void);
+pcre_export str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit
*caseignore, bit *anti);
+pcre_export str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str
*esc, bit *caseignore, bit *anti);
/* current implementation assumes simple %keyword% [keyw%]* */
typedef struct RE {
@@ -368,6 +370,132 @@ pcre_index(int *res, pcre * pattern, str
return MAL_SUCCEED;
}
+/* these two defines are copies from gdk_select.c */
+
+/* scan select loop with candidates */
+#define candscanloop(TEST)
\
+ do {
\
+ ALGODEBUG fprintf(stderr,
\
+ "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "
\
+ "scanselect %s\n", BATgetId(b), BATcount(b),
\
+ s ? BATgetId(s) : "NULL", anti, #TEST);
\
+ while (p < q) {
\
+ o = *candlist++;
\
+ r = (BUN) (o - off);
\
+ v = BUNtail(bi, r);
\
+ if (TEST)
\
+ bunfastins(bn, NULL, &o);
\
+ p++;
\
+ }
\
+ } while (0)
+
+/* scan select loop without candidates */
+#define scanloop(TEST)
\
+ do {
\
+ ALGODEBUG fprintf(stderr,
\
+ "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "
\
+ "scanselect %s\n", BATgetId(b), BATcount(b),
\
+ s ? BATgetId(s) : "NULL", anti, #TEST);
\
+ BATloop(b, p, q) {
\
+ v = BUNtail(bi, p);
\
+ if (TEST) {
\
+ o = (oid) p + off;
\
+ bunfastins(bn, NULL, &o);
\
+ }
\
+ }
\
+ } while (0)
+
+static str
+pcre_likesubselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore,
int anti)
+{
+ int options = PCRE_UTF8 | PCRE_MULTILINE;
+ pcre *re;
+ pcre_extra *pe;
+ const char *error;
+ int errpos;
+ BATiter bi = bat_iterator(b);
+ BAT *bn;
+ BUN p, q;
+ oid o, off;
+ const char *v;
+ int ovector[10];
+
+ assert(BAThdense(b));
+ assert(ATOMstorage(b->ttype) == TYPE_str);
+ assert(anti == 0 || anti == 1);
+
+ if (caseignore)
+ options |= PCRE_CASELESS;
+ if ((re = pcre_compile(pat, options, &error, &errpos, NULL)) == NULL)
+ throw(MAL, "pcre.likesubselect",
+ OPERATION_FAILED ": compilation of pattern \"%s\"
failed\n", pat);
+ pe = pcre_study(re, 0, &error);
+ if (error != NULL) {
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ throw(MAL, "pcre.likesubselect",
+ OPERATION_FAILED ": studying pattern \"%s\"
failed\n", pat);
+ }
+ bn = BATnew(TYPE_void, TYPE_oid, s ? BATcount(s) : BATcount(b));
+ if (bn == NULL) {
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL);
+ }
+ off = b->hseqbase - BUNfirst(b);
+
+ if (s) {
+ const oid *candlist;
+ BUN r;
+
+ assert(BAThdense(s));
+ assert(s->ttype == TYPE_oid || s->ttype == TYPE_void);
+ assert(s->tsorted);
+ assert(s->tkey);
+ /* setup candscanloop loop vars to only iterate over
+ * part of s that has values that are in range of b */
+ o = b->hseqbase + BATcount(b);
+ q = SORTfndfirst(s, &o);
+ p = SORTfndfirst(s, &b->hseqbase);
+ candlist = (const oid *) Tloc(s, p);
+ if (anti)
+ candscanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) == -1);
+ else
+ candscanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) >= 0);
+ } else {
+ if (anti)
+ scanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) == -1);
+ else
+ scanloop(v && *v != '\200' &&
+ pcre_exec(re, pe, v, (int) strlen(v), 0, 0,
ovector, 10) >= 0);
+ }
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ bn->tsorted = 1;
+ bn->trevsorted = bn->U->count <= 1;
+ bn->tkey = 1;
+ bn->tdense = bn->U->count <= 1;
+ if (bn->U->count == 1)
+ bn->tseqbase = * (oid *) Tloc(bn, BUNfirst(bn));
+ bn->hsorted = 1;
+ bn->hdense = 1;
+ bn->hseqbase = 0;
+ bn->hkey = 1;
+ bn->hrevsorted = bn->U->count <= 1;
+ *bnp = bn;
+ return MAL_SUCCEED;
+
+ bunins_failed:
+ BBPreclaim(bn);
+ my_pcre_free(re);
+ my_pcre_free(pe);
+ *bnp = NULL;
+ throw(MAL, "pcre.likesubselect", OPERATION_FAILED);
+}
+
static str
pcre_select(BAT **res, str pattern, BAT *strs, bit insensitive)
{
@@ -839,7 +967,7 @@ pcre_heap(Heap *heap, size_t capacity)
}
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list