Changeset: d7643e277afc for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d7643e277afc
Added Files:
gdk/gdk_cand.h
Modified Files:
clients/Tests/MAL-signatures_all.stable.out
clients/Tests/MAL-signatures_fits_geom.stable.out
clients/Tests/MAL-signatures_geom.stable.out
clients/Tests/MAL-signatures_none.stable.out
clients/Tests/exports.stable.out
gdk/Makefile.ag
gdk/gdk_calc_private.h
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/pcre.mal
Branch: Oct2014
Log Message:
Reimplemented PCRE like join as a proper "subjoin".
New interfaces algebra.ilikesubjoin and algebra.likesubjoin, but
backward compatible interfaces algebra.(i)likesubselect with BAT
parameter for patterns (and without candidate list) still exists.
diffs (truncated from 982 to 300 lines):
diff --git a/clients/Tests/MAL-signatures_all.stable.out
b/clients/Tests/MAL-signatures_all.stable.out
--- a/clients/Tests/MAL-signatures_all.stable.out
+++ b/clients/Tests/MAL-signatures_all.stable.out
@@ -2450,6 +2450,10 @@ command algebra.groupby(gids:bat[:oid,:o
address ALGgroupby;
comment Produces a new BAT with groups identified by the head column. The
result contains tail times the head value, ie the tail contains the result
group sizes.
+command
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case insensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCREilike_join_pcre;
function
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2502,6 +2506,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
address ALGjoinPath;
comment Routine to handle join paths. The type analysis is rather tricky.
+command
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case sensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCRElike_join_pcre;
function
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/MAL-signatures_fits_geom.stable.out
b/clients/Tests/MAL-signatures_fits_geom.stable.out
--- a/clients/Tests/MAL-signatures_fits_geom.stable.out
+++ b/clients/Tests/MAL-signatures_fits_geom.stable.out
@@ -2451,6 +2451,10 @@ command algebra.groupby(gids:bat[:oid,:o
address ALGgroupby;
comment Produces a new BAT with groups identified by the head column. The
result contains tail times the head value, ie the tail contains the result
group sizes.
+command
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case insensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCREilike_join_pcre;
function
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2503,6 +2507,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
address ALGjoinPath;
comment Routine to handle join paths. The type analysis is rather tricky.
+command
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case sensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCRElike_join_pcre;
function
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/MAL-signatures_geom.stable.out
b/clients/Tests/MAL-signatures_geom.stable.out
--- a/clients/Tests/MAL-signatures_geom.stable.out
+++ b/clients/Tests/MAL-signatures_geom.stable.out
@@ -2451,6 +2451,10 @@ command algebra.groupby(gids:bat[:oid,:o
address ALGgroupby;
comment Produces a new BAT with groups identified by the head column. The
result contains tail times the head value, ie the tail contains the result
group sizes.
+command
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case insensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCREilike_join_pcre;
function
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2503,6 +2507,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
address ALGjoinPath;
comment Routine to handle join paths. The type analysis is rather tricky.
+command
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case sensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCRElike_join_pcre;
function
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/MAL-signatures_none.stable.out
b/clients/Tests/MAL-signatures_none.stable.out
--- a/clients/Tests/MAL-signatures_none.stable.out
+++ b/clients/Tests/MAL-signatures_none.stable.out
@@ -2450,6 +2450,10 @@ command algebra.groupby(gids:bat[:oid,:o
address ALGgroupby;
comment Produces a new BAT with groups identified by the head column. The
result contains tail times the head value, ie the tail contains the result
group sizes.
+command
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case insensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCREilike_join_pcre;
function
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2502,6 +2506,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
address ALGjoinPath;
comment Routine to handle join paths. The type analysis is rather tricky.
+command
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
(X_6:bat[:oid,:oid],X_7:bat[:oid,:oid])
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate
lists SL and SR using pattern escape string ESCand doing a case sensitive
match.The result is two aligned bats with oids of matching rows.
+
command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str)
(l:bat[:oid,:oid],r:bat[:oid,:oid])
address PCRElike_join_pcre;
function
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1297,6 +1297,7 @@ str IDentifier(str *retval, str *in);
int IDfromString(str src, int *len, str *retval);
str IDprelude(void);
int IDtoString(str *retval, int *len, str handle);
+str ILIKEsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
str *esc);
str INET_comp_CS(bit *retval, inet *val1, inet *val2);
str INET_comp_CSE(bit *retval, inet *val1, inet *val2);
str INET_comp_CW(bit *retval, inet *val1, inet *val2);
@@ -1408,6 +1409,7 @@ int JSONtoString(str *s, int *len, json
str JSONunfold(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
str JSONvalueArray(json *ret, json *arg);
str JSONvalueTable(int *ret, json *j);
+str LIKEsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
str *esc);
char *M5OutOfMemory;
str MACROprocessor(Client cntxt, MalBlkPtr mb, Symbol t);
int MAL_MAXCLIENTS;
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -23,7 +23,7 @@ lib_gdk = {
VERSION = $(GDK_VERSION)
NAME = bat
SOURCES = \
- gdk.h gdk_atomic.h gdk_batop.c \
+ gdk.h gdk_cand.h gdk_atomic.h gdk_batop.c \
gdk_select.c gdk_select_legacy.c \
gdk_search.c gdk_search.h gdk_tm.c \
gdk_align.c gdk_bbp.c gdk_bbp.h \
diff --git a/gdk/gdk_calc_private.h b/gdk/gdk_calc_private.h
--- a/gdk/gdk_calc_private.h
+++ b/gdk/gdk_calc_private.h
@@ -53,50 +53,7 @@ typedef unsigned __int64 ulng;
#define GT(a, b) ((bit) ((a) > (b)))
-#define CANDINIT(b, s, start, end, cnt, cand, candend) \
- do { \
- start = 0; \
- end = cnt = BATcount(b); \
- cand = candend = NULL; \
- if (s) { \
- assert(BATttype(s) == TYPE_oid); \
- if (BATcount(s) == 0) { \
- start = end = 0; \
- } else { \
- if (BATtdense(s)) { \
- start = (s)->T->seq; \
- end = start + BATcount(s); \
- } else { \
- oid x = (b)->H->seq; \
- start = SORTfndfirst((s), &x); \
- x += BATcount(b); \
- end = SORTfndfirst((s), &x); \
- cand = (const oid *) Tloc((s), start); \
- candend = (const oid *) Tloc((s), end);
\
- if (cand == candend) { \
- start = end = 0; \
- } else { \
- assert(cand < candend); \
- start = *cand; \
- end = candend[-1] + 1; \
- } \
- } \
- assert(start <= end); \
- if (start <= (b)->H->seq) \
- start = 0; \
- else if (start >= (b)->H->seq + cnt) \
- start = cnt; \
- else \
- start -= (b)->H->seq; \
- if (end >= (b)->H->seq + cnt) \
- end = cnt; \
- else if (end <= (b)->H->seq) \
- end = 0; \
- else \
- end -= (b)->H->seq; \
- } \
- } \
- } while (0)
+#include "gdk_cand.h"
/* dst = lft + rgt with overflow check */
#define ADD_WITH_CHECK(TYPE1, lft, TYPE2, rgt, TYPE3, dst, on_overflow)
\
diff --git a/gdk/gdk_cand.h b/gdk/gdk_cand.h
new file mode 100644
--- /dev/null
+++ b/gdk/gdk_cand.h
@@ -0,0 +1,63 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2014 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+#define CANDINIT(b, s, start, end, cnt, cand, candend) \
+ do { \
+ start = 0; \
+ end = cnt = BATcount(b); \
+ cand = candend = NULL; \
+ if (s) { \
+ assert(BATttype(s) == TYPE_oid); \
+ if (BATcount(s) == 0) { \
+ start = end = 0; \
+ } else { \
+ if (BATtdense(s)) { \
+ start = (s)->T->seq; \
+ end = start + BATcount(s); \
+ } else { \
+ oid x = (b)->H->seq; \
+ start = SORTfndfirst((s), &x); \
+ x += BATcount(b); \
+ end = SORTfndfirst((s), &x); \
+ cand = (const oid *) Tloc((s), start); \
+ candend = (const oid *) Tloc((s), end);
\
+ if (cand == candend) { \
+ start = end = 0; \
+ } else { \
+ assert(cand < candend); \
+ start = *cand; \
+ end = candend[-1] + 1; \
+ } \
+ } \
+ assert(start <= end); \
+ if (start <= (b)->H->seq) \
+ start = 0; \
+ else if (start >= (b)->H->seq + cnt) \
+ start = cnt; \
+ else \
+ start -= (b)->H->seq; \
+ if (end >= (b)->H->seq + cnt) \
+ end = cnt; \
+ else if (end <= (b)->H->seq) \
+ end = 0; \
+ else \
+ end -= (b)->H->seq; \
+ } \
+ } \
+ } while (0)
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -224,54 +224,6 @@ re_destroy( RE *p)
}
}
-static BAT *
-re_uselect(RE *pattern, BAT *strs, int ignore)
-{
- BATiter strsi = bat_iterator(strs);
- BAT *r;
- BUN p, q;
-
- assert(strs->htype==TYPE_void);
- if (strs->htype == TYPE_void)
- r = BATnew(TYPE_oid, TYPE_void, BATcount(strs), TRANSIENT);
- else
- r = BATnew(strs->htype, TYPE_void, BATcount(strs), TRANSIENT);
- if (r == NULL)
- return NULL;
-
- if (ignore) {
- BATloop(strs, p, q) {
- const char *s = BUNtail(strsi, p);
-
- if (re_match_ignore(s, pattern) &&
- BUNfastins(r, BUNhead(strsi, p), NULL) == NULL)
{
- BBPreclaim(r);
- return NULL;
- }
- }
- } else {
- BATloop(strs, p, q) {
- const char *s = BUNtail(strsi, p);
-
- if (re_match_no_ignore(s, pattern) &&
- BUNfastins(r, BUNhead(strsi, p), NULL) == NULL)
{
- BBPreclaim(r);
- return NULL;
- }
- }
- }
- r->H->nonil = strs->H->nonil;
- r->hsorted = strs->hsorted;
- r->hrevsorted = strs->hrevsorted;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list