Changeset: c86217f8da15 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c86217f8da15
Modified Files:
        clients/Tests/exports.stable.out
        monetdb5/modules/mal/pcre.c
        sql/server/sql_scan.c
        sql/test/pg_regress/Tests/comments.stable.err
        sql/test/pg_regress/Tests/comments.stable.out
Branch: default
Log Message:

Merge with Dec2016 branch.


diffs (truncated from 405 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1572,12 +1572,10 @@ int OPTvolcanoImplementation(Client cntx
 str OPTwrapper(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p);
 str PCREilike2(bit *ret, const str *s, const str *pat);
 str PCREilike3(bit *ret, const str *s, const str *pat, const str *esc);
-str PCREilike_join_pcre(bat *l, bat *r, const bat *b, const bat *pat, const 
str *esc);
 str PCREimatch(bit *ret, const str *val, const str *pat);
 str PCREindex(int *ret, const pcre *pat, const str *val);
 str PCRElike2(bit *ret, const str *s, const str *pat);
 str PCRElike3(bit *ret, const str *s, const str *pat, const str *esc);
-str PCRElike_join_pcre(bat *l, bat *r, const bat *b, const bat *pat, const str 
*esc);
 str PCRElikesubselect1(bat *ret, const bat *bid, const bat *cid, const str 
*pat, const str *esc, const bit *anti);
 str PCRElikesubselect2(bat *ret, const bat *bid, const bat *sid, const str 
*pat, const str *esc, const bit *caseignore, const bit *anti);
 str PCRElikesubselect3(bat *ret, const bat *bid, const bat *sid, const str 
*pat, const str *esc, const bit *anti);
@@ -2313,7 +2311,6 @@ str partitionRef;
 str passRef;
 str pcreRef;
 str pcre_init(void *ret);
-var_t pcre_put(Heap *h, var_t *bun, pcre *val);
 str pinRef;
 str plusRef;
 str postludeRef;
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -31,17 +31,17 @@
 #define pcre_free_study my_pcre_free
 #endif
 
+mal_export str pcre_init(void *ret);
+
 mal_export str PCREquote(str *r, const str *v);
 mal_export str PCREmatch(bit *ret, const str *val, const str *pat);
 mal_export str PCREimatch(bit *ret, const str *val, const str *pat);
 mal_export str PCREindex(int *ret, const pcre *pat, const str *val);
 mal_export str PCREpatindex(int *ret, const str *pat, const str *val);
-
 mal_export str PCREreplace_wrap(str *res, const str *or, const str *pat, const 
str *repl, const str *flags);
 mal_export str PCREreplace_bat_wrap(bat *res, const bat *or, const str *pat, 
const str *repl, const str *flags);
+mal_export str PCREsql2pcre(str *ret, const str *pat, const str *esc);
 
-mal_export var_t pcre_put(Heap *h, var_t *bun, pcre *val);
-mal_export str PCREsql2pcre(str *ret, const str *pat, const str *esc);
 mal_export str PCRElike3(bit *ret, const str *s, const str *pat, const str 
*esc);
 mal_export str PCRElike2(bit *ret, const str *s, const str *pat);
 mal_export str PCREnotlike3(bit *ret, const str *s, const str *pat, const str 
*esc);
@@ -58,15 +58,18 @@ mal_export str BATPCREilike(bat *ret, co
 mal_export str BATPCREilike2(bat *ret, const bat *b, const str *pat);
 mal_export str BATPCREnotilike(bat *ret, const bat *b, const str *pat, const 
str *esc);
 mal_export str BATPCREnotilike2(bat *ret, const bat *b, const str *pat);
-mal_export str PCRElike_join_pcre(bat *l, bat *r, const bat *b, const bat 
*pat, const str *esc);
-mal_export str PCREilike_join_pcre(bat *l, bat *r, const bat *b, const bat 
*pat, const str *esc);
-mal_export str pcre_init(void *ret);
+
 mal_export str PCRElikesubselect2(bat *ret, const bat *bid, const bat *sid, 
const str *pat, const str *esc, const bit *caseignore, const bit *anti);
 mal_export str PCRElikesubselect1(bat *ret, const bat *bid, const bat *cid, 
const str *pat, const str *esc, const bit *anti);
 mal_export str PCRElikesubselect3(bat *ret, const bat *bid, const bat *sid, 
const str *pat, const str *esc, const bit *anti);
 mal_export str PCRElikesubselect4(bat *ret, const bat *bid, const bat *cid, 
const str *pat, const bit *anti);
 mal_export str PCRElikesubselect5(bat *ret, const bat *bid, const bat *sid, 
const str *pat, const bit *anti);
 
+mal_export str LIKEsubjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const 
lng *estimate);
+mal_export str LIKEsubjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
+mal_export str ILIKEsubjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const 
lng *estimate);
+mal_export str ILIKEsubjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
+
 /* current implementation assumes simple %keyword% [keyw%]* */
 typedef struct RE {
        char *k;
@@ -105,24 +108,31 @@ static int
 re_simple(const char *pat)
 {
        int nr = 0;
-       const char *s = pat;
 
-       if (s == 0)
+       if (pat == 0)
                return 0;
-       if (*s == '%')
-               s++;
-       while(*s) {
-               if (*s == '_')
+       if (*pat == '%')
+               pat++;
+       while (*pat) {
+               if (*pat == '_')
                        return 0;
-               if (*s++ == '%')
+               if (*pat++ == '%')
                        nr++;
        }
-       if (*(s-1) != '%')
+       if (*(pat-1) != '%')
                return 0;
        return nr;
 }
 
 static int
+is_strcmpable(const char *pat, const str esc)
+{
+       if (pat[strcspn(pat, "%_")])
+               return 0;
+       return strlen(esc) == 0 || strstr(pat, esc) == NULL;
+}
+
+static int
 re_match_ignore(const char *s, RE *pattern)
 {
        RE *r;
@@ -153,7 +163,7 @@ re_match_no_ignore(const char *s, RE *pa
 }
 
 static RE *
-re_create( const char *pat, int nr)
+re_create(const char *pat, int nr)
 {
        char *x = GDKstrdup(pat);
        RE *r = (RE*)GDKmalloc(sizeof(RE)), *n = r;
@@ -387,7 +397,7 @@ pcre_likesubselect(BAT **bnp, BAT *b, BA
 }
 
 static str
-re_likesubselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore, 
int anti)
+re_likesubselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore, 
int anti, int use_strcmp)
 {
        BATiter bi = bat_iterator(b);
        BAT *bn;
@@ -405,10 +415,12 @@ re_likesubselect(BAT **bnp, BAT *b, BAT 
                throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL);
        off = b->hseqbase;
 
-       nr = re_simple(pat);
-       re = re_create(pat, nr);
-       if (!re)
-               throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL);
+       if (!use_strcmp) {
+               nr = re_simple(pat);
+               re = re_create(pat, nr);
+               if (!re)
+                       throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL);
+       }
        if (s && !BATtdense(s)) {
                const oid *candlist;
                BUN r;
@@ -422,20 +434,38 @@ re_likesubselect(BAT **bnp, BAT *b, BAT 
                q = SORTfndfirst(s, &o);
                p = SORTfndfirst(s, &b->hseqbase);
                candlist = (const oid *) Tloc(s, p);
-               if (caseignore) {
-                       if (anti)
-                               candscanloop(v && *v != '\200' &&
-                                       re_match_ignore(v, re) == 0);
-                       else
-                               candscanloop(v && *v != '\200' &&
-                                       re_match_ignore(v, re));
+               if (use_strcmp) {
+                       if (caseignore) {
+                               if (anti)
+                                       candscanloop(v && *v != '\200' &&
+                                                                strcasecmp(v, 
pat) != 0);
+                               else
+                                       candscanloop(v && *v != '\200' &&
+                                                                strcasecmp(v, 
pat) == 0);
+                       } else {
+                               if (anti)
+                                       candscanloop(v && *v != '\200' &&
+                                                                strcmp(v, pat) 
!= 0);
+                               else
+                                       candscanloop(v && *v != '\200' &&
+                                                                strcmp(v, pat) 
== 0);
+                       }
                } else {
-                       if (anti)
-                               candscanloop(v && *v != '\200' &&
-                                       re_match_no_ignore(v, re) == 0);
-                       else
-                               candscanloop(v && *v != '\200' &&
-                                       re_match_no_ignore(v, re));
+                       if (caseignore) {
+                               if (anti)
+                                       candscanloop(v && *v != '\200' &&
+                                                                
re_match_ignore(v, re) == 0);
+                               else
+                                       candscanloop(v && *v != '\200' &&
+                                                                
re_match_ignore(v, re));
+                       } else {
+                               if (anti)
+                                       candscanloop(v && *v != '\200' &&
+                                                                
re_match_no_ignore(v, re) == 0);
+                               else
+                                       candscanloop(v && *v != '\200' &&
+                                                                
re_match_no_ignore(v, re));
+                       }
                }
        } else {
                if (s) {
@@ -450,20 +480,38 @@ re_likesubselect(BAT **bnp, BAT *b, BAT 
                        p = off;
                        q = BUNlast(b) + off;
                }
-               if (caseignore) {
-                       if (anti)
-                               scanloop(v && *v != '\200' &&
-                                       re_match_ignore(v, re) == 0);
-                       else
-                               scanloop(v && *v != '\200' &&
-                                       re_match_ignore(v, re));
+               if (use_strcmp) {
+                       if (caseignore) {
+                               if (anti)
+                                       scanloop(v && *v != '\200' &&
+                                                        strcasecmp(v, pat) != 
0);
+                               else
+                                       scanloop(v && *v != '\200' &&
+                                                        strcasecmp(v, pat) == 
0);
+                       } else {
+                               if (anti)
+                                       scanloop(v && *v != '\200' &&
+                                                        strcmp(v, pat) != 0);
+                               else
+                                       scanloop(v && *v != '\200' &&
+                                                        strcmp(v, pat) == 0);
+                       }
                } else {
-                       if (anti)
-                               scanloop(v && *v != '\200' &&
-                                       re_match_no_ignore(v, re) == 0);
-                       else
-                               scanloop(v && *v != '\200' &&
-                                       re_match_no_ignore(v, re));
+                       if (caseignore) {
+                               if (anti)
+                                       scanloop(v && *v != '\200' &&
+                                                        re_match_ignore(v, re) 
== 0);
+                               else
+                                       scanloop(v && *v != '\200' &&
+                                                        re_match_ignore(v, 
re));
+                       } else {
+                               if (anti)
+                                       scanloop(v && *v != '\200' &&
+                                                        re_match_no_ignore(v, 
re) == 0);
+                               else
+                                       scanloop(v && *v != '\200' &&
+                                                        re_match_no_ignore(v, 
re));
+                       }
                }
        }
        BATsetcount(bn, BATcount(bn)); /* set some properties */
@@ -1260,6 +1308,7 @@ PCRElikesubselect2(bat *ret, const bat *
        str res;
        char *ppat = NULL;
        int use_re = 0;
+       int use_strcmp = 0;
 
        if ((b = BATdescriptor(*bid)) == NULL) {
                throw(MAL, "algebra.likeselect", RUNTIME_OBJECT_MISSING);
@@ -1270,7 +1319,10 @@ PCRElikesubselect2(bat *ret, const bat *
        }
 
        /* no escape, try if a simple list of keywords works */
-       if ((strcmp(*esc, str_nil) == 0 || strlen(*esc) == 0) &&
+       if (is_strcmpable(*pat, *esc)) {
+               use_re = 1;
+               use_strcmp = 1;
+       } else if ((strcmp(*esc, str_nil) == 0 || strlen(*esc) == 0) &&
              re_simple(*pat) > 0) {
                use_re = 1;
        } else {
@@ -1296,7 +1348,7 @@ PCRElikesubselect2(bat *ret, const bat *
        }
 
        if (use_re) {
-               res = re_likesubselect(&bn, b, s, *pat, *caseignore, *anti);
+               res = re_likesubselect(&bn, b, s, *pat, *caseignore, *anti, 
use_strcmp);
        } else if (ppat == NULL) {
                /* no pattern and no special characters: can use normal select 
*/
                bn = BATselect(b, s, *pat, NULL, 1, 1, *anti);
@@ -1669,7 +1721,6 @@ PCREsubjoin(bat *r1, bat *r2, bat lid, b
        throw(MAL, "pcre.join", RUNTIME_OBJECT_MISSING);
 }
 
-mal_export str LIKEsubjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const 
lng *estimate);
 str
 LIKEsubjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const str *esc, 
const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate)
 {
@@ -1678,7 +1729,6 @@ LIKEsubjoin(bat *r1, bat *r2, const bat 
        return PCREsubjoin(r1, r2, *lid, *rid, slid ? *slid : 0, srid ? *srid : 
0, *esc, 0);
 }
 
-mal_export str LIKEsubjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
 str
 LIKEsubjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat 
*slid, const bat *srid, const bit *nil_matches, const lng *estimate)
 {
@@ -1686,7 +1736,6 @@ LIKEsubjoin1(bat *r1, bat *r2, const bat
        return LIKEsubjoin(r1, r2, lid, rid, &esc, slid, srid, nil_matches, 
estimate);
 }
 
-mal_export str ILIKEsubjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const 
lng *estimate);
 str
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to