Changeset: d7643e277afc for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d7643e277afc
Added Files:
        gdk/gdk_cand.h
Modified Files:
        clients/Tests/MAL-signatures_all.stable.out
        clients/Tests/MAL-signatures_fits_geom.stable.out
        clients/Tests/MAL-signatures_geom.stable.out
        clients/Tests/MAL-signatures_none.stable.out
        clients/Tests/exports.stable.out
        gdk/Makefile.ag
        gdk/gdk_calc_private.h
        monetdb5/modules/mal/pcre.c
        monetdb5/modules/mal/pcre.mal
Branch: Oct2014
Log Message:

Reimplemented PCRE like join as a proper "subjoin".
New interfaces algebra.ilikesubjoin and algebra.likesubjoin, but
backward compatible interfaces algebra.(i)likesubselect with BAT
parameter for patterns (and without candidate list) still exists.


diffs (truncated from 982 to 300 lines):

diff --git a/clients/Tests/MAL-signatures_all.stable.out 
b/clients/Tests/MAL-signatures_all.stable.out
--- a/clients/Tests/MAL-signatures_all.stable.out
+++ b/clients/Tests/MAL-signatures_all.stable.out
@@ -2450,6 +2450,10 @@ command algebra.groupby(gids:bat[:oid,:o
 address ALGgroupby;
 comment Produces a new BAT with groups identified by the head column. The 
result contains tail times the head value, ie the tail contains the result 
group sizes.
 
+command 
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case insensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCREilike_join_pcre;
 function 
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2502,6 +2506,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
 address ALGjoinPath;
 comment Routine to handle join paths.  The type analysis is rather tricky.
 
+command 
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case sensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCRElike_join_pcre;
 function 
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/MAL-signatures_fits_geom.stable.out 
b/clients/Tests/MAL-signatures_fits_geom.stable.out
--- a/clients/Tests/MAL-signatures_fits_geom.stable.out
+++ b/clients/Tests/MAL-signatures_fits_geom.stable.out
@@ -2451,6 +2451,10 @@ command algebra.groupby(gids:bat[:oid,:o
 address ALGgroupby;
 comment Produces a new BAT with groups identified by the head column. The 
result contains tail times the head value, ie the tail contains the result 
group sizes.
 
+command 
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case insensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCREilike_join_pcre;
 function 
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2503,6 +2507,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
 address ALGjoinPath;
 comment Routine to handle join paths.  The type analysis is rather tricky.
 
+command 
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case sensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCRElike_join_pcre;
 function 
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/MAL-signatures_geom.stable.out 
b/clients/Tests/MAL-signatures_geom.stable.out
--- a/clients/Tests/MAL-signatures_geom.stable.out
+++ b/clients/Tests/MAL-signatures_geom.stable.out
@@ -2451,6 +2451,10 @@ command algebra.groupby(gids:bat[:oid,:o
 address ALGgroupby;
 comment Produces a new BAT with groups identified by the head column. The 
result contains tail times the head value, ie the tail contains the result 
group sizes.
 
+command 
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case insensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCREilike_join_pcre;
 function 
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2503,6 +2507,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
 address ALGjoinPath;
 comment Routine to handle join paths.  The type analysis is rather tricky.
 
+command 
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case sensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCRElike_join_pcre;
 function 
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/MAL-signatures_none.stable.out 
b/clients/Tests/MAL-signatures_none.stable.out
--- a/clients/Tests/MAL-signatures_none.stable.out
+++ b/clients/Tests/MAL-signatures_none.stable.out
@@ -2450,6 +2450,10 @@ command algebra.groupby(gids:bat[:oid,:o
 address ALGgroupby;
 comment Produces a new BAT with groups identified by the head column. The 
result contains tail times the head value, ie the tail contains the result 
group sizes.
 
+command 
algebra.ilikesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address ILIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case insensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.ilikesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCREilike_join_pcre;
 function 
algebra.ilikesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
@@ -2502,6 +2506,10 @@ pattern algebra.leftjoinPath(l:bat[:any,
 address ALGjoinPath;
 comment Routine to handle join paths.  The type analysis is rather tricky.
 
+command 
algebra.likesubjoin(l:bat[:oid,:str],r:bat[:oid,:str],sl:bat[:oid,:oid],sr:bat[:oid,:oid],esc:str)
 (X_6:bat[:oid,:oid],X_7:bat[:oid,:oid]) 
+address LIKEsubjoin;
+comment Join the string bat L with the pattern bat Rwith optional candidate 
lists SL and SR using pattern escape string ESCand doing a case sensitive 
match.The result is two aligned bats with oids of matching rows.
+
 command algebra.likesubselect(s:bat[:oid,:str],pat:bat[:oid,:str],esc:str) 
(l:bat[:oid,:oid],r:bat[:oid,:oid]) 
 address PCRElike_join_pcre;
 function 
algebra.likesubselect(b:bat[:oid,:str],cand:bat[:oid,:oid],pat:str,esc:str,anti:bit):bat[:oid,:oid];
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1297,6 +1297,7 @@ str IDentifier(str *retval, str *in);
 int IDfromString(str src, int *len, str *retval);
 str IDprelude(void);
 int IDtoString(str *retval, int *len, str handle);
+str ILIKEsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, 
str *esc);
 str INET_comp_CS(bit *retval, inet *val1, inet *val2);
 str INET_comp_CSE(bit *retval, inet *val1, inet *val2);
 str INET_comp_CW(bit *retval, inet *val1, inet *val2);
@@ -1408,6 +1409,7 @@ int JSONtoString(str *s, int *len, json 
 str JSONunfold(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
 str JSONvalueArray(json *ret, json *arg);
 str JSONvalueTable(int *ret, json *j);
+str LIKEsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, 
str *esc);
 char *M5OutOfMemory;
 str MACROprocessor(Client cntxt, MalBlkPtr mb, Symbol t);
 int MAL_MAXCLIENTS;
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -23,7 +23,7 @@ lib_gdk = {
        VERSION = $(GDK_VERSION)
        NAME = bat
        SOURCES = \
-               gdk.h gdk_atomic.h gdk_batop.c \
+               gdk.h gdk_cand.h gdk_atomic.h gdk_batop.c \
                gdk_select.c gdk_select_legacy.c \
                gdk_search.c gdk_search.h gdk_tm.c \
                gdk_align.c gdk_bbp.c gdk_bbp.h \
diff --git a/gdk/gdk_calc_private.h b/gdk/gdk_calc_private.h
--- a/gdk/gdk_calc_private.h
+++ b/gdk/gdk_calc_private.h
@@ -53,50 +53,7 @@ typedef unsigned __int64 ulng;
 
 #define GT(a, b)       ((bit) ((a) > (b)))
 
-#define CANDINIT(b, s, start, end, cnt, cand, candend)                 \
-       do {                                                            \
-               start = 0;                                              \
-               end = cnt = BATcount(b);                                \
-               cand = candend = NULL;                                  \
-               if (s) {                                                \
-                       assert(BATttype(s) == TYPE_oid);                \
-                       if (BATcount(s) == 0) {                         \
-                               start = end = 0;                        \
-                       } else {                                        \
-                               if (BATtdense(s)) {                     \
-                                       start = (s)->T->seq;            \
-                                       end = start + BATcount(s);      \
-                               } else {                                \
-                                       oid x = (b)->H->seq;            \
-                                       start = SORTfndfirst((s), &x);  \
-                                       x += BATcount(b);               \
-                                       end = SORTfndfirst((s), &x);    \
-                                       cand = (const oid *) Tloc((s), start); \
-                                       candend = (const oid *) Tloc((s), end); 
\
-                                       if (cand == candend) {          \
-                                               start = end = 0;        \
-                                       } else {                        \
-                                               assert(cand < candend); \
-                                               start = *cand;          \
-                                               end = candend[-1] + 1;  \
-                                       }                               \
-                               }                                       \
-                               assert(start <= end);                   \
-                               if (start <= (b)->H->seq)               \
-                                       start = 0;                      \
-                               else if (start >= (b)->H->seq + cnt)    \
-                                       start = cnt;                    \
-                               else                                    \
-                                       start -= (b)->H->seq;           \
-                               if (end >= (b)->H->seq + cnt)           \
-                                       end = cnt;                      \
-                               else if (end <= (b)->H->seq)            \
-                                       end = 0;                        \
-                               else                                    \
-                                       end -= (b)->H->seq;             \
-                       }                                               \
-               }                                                       \
-       } while (0)
+#include "gdk_cand.h"
 
 /* dst = lft + rgt with overflow check */
 #define ADD_WITH_CHECK(TYPE1, lft, TYPE2, rgt, TYPE3, dst, on_overflow)        
\
diff --git a/gdk/gdk_cand.h b/gdk/gdk_cand.h
new file mode 100644
--- /dev/null
+++ b/gdk/gdk_cand.h
@@ -0,0 +1,63 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2014 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+#define CANDINIT(b, s, start, end, cnt, cand, candend)                 \
+       do {                                                            \
+               start = 0;                                              \
+               end = cnt = BATcount(b);                                \
+               cand = candend = NULL;                                  \
+               if (s) {                                                \
+                       assert(BATttype(s) == TYPE_oid);                \
+                       if (BATcount(s) == 0) {                         \
+                               start = end = 0;                        \
+                       } else {                                        \
+                               if (BATtdense(s)) {                     \
+                                       start = (s)->T->seq;            \
+                                       end = start + BATcount(s);      \
+                               } else {                                \
+                                       oid x = (b)->H->seq;            \
+                                       start = SORTfndfirst((s), &x);  \
+                                       x += BATcount(b);               \
+                                       end = SORTfndfirst((s), &x);    \
+                                       cand = (const oid *) Tloc((s), start); \
+                                       candend = (const oid *) Tloc((s), end); 
\
+                                       if (cand == candend) {          \
+                                               start = end = 0;        \
+                                       } else {                        \
+                                               assert(cand < candend); \
+                                               start = *cand;          \
+                                               end = candend[-1] + 1;  \
+                                       }                               \
+                               }                                       \
+                               assert(start <= end);                   \
+                               if (start <= (b)->H->seq)               \
+                                       start = 0;                      \
+                               else if (start >= (b)->H->seq + cnt)    \
+                                       start = cnt;                    \
+                               else                                    \
+                                       start -= (b)->H->seq;           \
+                               if (end >= (b)->H->seq + cnt)           \
+                                       end = cnt;                      \
+                               else if (end <= (b)->H->seq)            \
+                                       end = 0;                        \
+                               else                                    \
+                                       end -= (b)->H->seq;             \
+                       }                                               \
+               }                                                       \
+       } while (0)
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -224,54 +224,6 @@ re_destroy( RE *p)
        }
 }
 
-static BAT *
-re_uselect(RE *pattern, BAT *strs, int ignore)
-{
-       BATiter strsi = bat_iterator(strs);
-       BAT *r;
-       BUN p, q;
-
-       assert(strs->htype==TYPE_void);
-       if (strs->htype == TYPE_void)
-               r = BATnew(TYPE_oid, TYPE_void, BATcount(strs), TRANSIENT);
-       else
-               r = BATnew(strs->htype, TYPE_void, BATcount(strs), TRANSIENT);
-       if (r == NULL)
-               return NULL;
-
-       if (ignore) {
-               BATloop(strs, p, q) {
-                       const char *s = BUNtail(strsi, p);
-
-                       if (re_match_ignore(s, pattern) &&
-                               BUNfastins(r, BUNhead(strsi, p), NULL) == NULL) 
{
-                               BBPreclaim(r);
-                               return NULL;
-                       }
-               }
-       } else {
-               BATloop(strs, p, q) {
-                       const char *s = BUNtail(strsi, p);
-
-                       if (re_match_no_ignore(s, pattern) &&
-                               BUNfastins(r, BUNhead(strsi, p), NULL) == NULL) 
{
-                               BBPreclaim(r);
-                               return NULL;
-                       }
-               }
-       }
-       r->H->nonil = strs->H->nonil;
-       r->hsorted = strs->hsorted;
-       r->hrevsorted = strs->hrevsorted;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to