Changeset: e6b7116d2d4c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e6b7116d2d4c
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/kernel/batstr.c
Branch: txtsim
Log Message:

startsWith, endsWith, search, BAT and ~BAT, case insensitive finished. Needs 
tests and cleanup some dup code.


diffs (truncated from 1125 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3548,7 +3548,7 @@ str_lower(str *buf, size_t *buflen, cons
        return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, buf, buflen, s, 
"str.lower");
 }
 
-static str
+str
 STRlower(str *res, const str *arg1)
 {
        str buf = NULL, msg = MAL_SUCCEED;
@@ -3635,14 +3635,12 @@ STRstartsWith(Client cntxt, MalBlkPtr mb
        bit *res = getArgReference(stk, pci, 0);
        const str *arg1 = getArgReference(stk, pci, 1);
        const str *arg2 = getArgReference(stk, pci, 2);
-       bit cs = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true : 
false;
-
-       str s = *arg1, prefix = *arg2;
-       str s_lower, prefix_lower, msg = MAL_SUCCEED;
-
-       if (cs) {
-               if ((msg = STRlower(&s, &s_lower)) != MAL_SUCCEED ||
-                       (msg = STRlower(&prefix, &prefix_lower)) != MAL_SUCCEED)
+       bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
+       str s = *arg1, prefix = *arg2, msg = MAL_SUCCEED;
+
+       if (icase) {
+               if ((msg = STRlower(&s, &s)) != MAL_SUCCEED ||
+                       (msg = STRlower(&prefix, &prefix)) != MAL_SUCCEED)
                        goto bail;
        }
        *res = (strNil(s) || strNil(prefix)) ? bit_nil :str_is_prefix(s, 
prefix) ;
@@ -3670,12 +3668,12 @@ STRendsWith(Client cntxt, MalBlkPtr mb, 
        bit *res = getArgReference(stk, pci, 0);
        const str *arg1 = getArgReference(stk, pci, 1);
        const str *arg2 = getArgReference(stk, pci, 2);
-       bit cs = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true : 
false;
+       bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
 
        str s = *arg1, suffix = *arg2;
        str s_lower, suffix_lower, msg = MAL_SUCCEED;
 
-       if (cs) {
+       if (icase) {
                if ((msg = STRlower(&s, &s_lower)) != MAL_SUCCEED ||
                        (msg = STRlower(&suffix, &suffix_lower)) != MAL_SUCCEED)
                        goto bail;
@@ -3704,12 +3702,12 @@ STRstrSearch(Client cntxt, MalBlkPtr mb,
        bit *res = getArgReference(stk, pci, 0);
        const str *haystack = getArgReference(stk, pci, 1);
        const str *needle = getArgReference(stk, pci, 2);
-       bit cs = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true : 
false;
+       bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
 
        str s = *haystack, h = *needle;
        str s_lower, h_lower, msg = MAL_SUCCEED;
 
-       if (cs) {
+       if (icase) {
                if ((msg = STRlower(&s, &s_lower)) != MAL_SUCCEED ||
                        (msg = STRlower(&h, &h_lower)) != MAL_SUCCEED)
                        goto bail;
@@ -4898,13 +4896,13 @@ mel_func str_init_funcs[] = {
  command("str", "unicodeAt", STRWChrAt, false, "get a unicode character\n(as 
an int) from a string position.", args(1,3, 
arg("",int),arg("s",str),arg("index",int))),
  command("str", "unicode", STRFromWChr, false, "convert a unicode to a 
character.", args(1,2, arg("",str),arg("wchar",int))),
  pattern("str", "startsWith", STRstartsWith, false, "Check if string starts 
with substring.", args(1,3, arg("",bit),arg("s",str),arg("prefix",str))),
- pattern("str", "startsWith", STRstartsWith, false, "Check if string starts 
with substring, case insensitive flag.", args(1,4, 
arg("",bit),arg("s",str),arg("prefix",str),arg("cs",bit))),
+ pattern("str", "startsWith", STRstartsWith, false, "Check if string starts 
with substring, icase flag.", args(1,4, 
arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
  pattern("str", "endsWith", STRendsWith, false, "Check if string ends with 
substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
- pattern("str", "endsWith", STRendsWith, false, "Check if string ends with 
substring, case insensitive flag.", args(1,4, 
arg("",bit),arg("s",str),arg("suffix",str),arg("cs",bit))),
+ pattern("str", "endsWith", STRendsWith, false, "Check if string ends with 
substring, icase flag.", args(1,4, 
arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
  command("str", "toLower", STRlower, false, "Convert a string to lower case.", 
args(1,2, arg("",str),arg("s",str))),
  command("str", "toUpper", STRupper, false, "Convert a string to upper case.", 
args(1,2, arg("",str),arg("s",str))),
  pattern("str", "search", STRstrSearch, false, "Search for a substring. 
Returns\nposition, -1 if not found.", args(1,3, 
arg("",int),arg("s",str),arg("c",str))),
- pattern("str", "search", STRstrSearch, false, "Search for a substring, case 
insensitive flag. Returns\nposition, -1 if not found.", args(1,4, 
arg("",int),arg("s",str),arg("c",str),arg("cs",bit))),
+ pattern("str", "search", STRstrSearch, false, "Search for a substring, icase 
flag. Returns\nposition, -1 if not found.", args(1,4, 
arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
  command("str", "r_search", STRReverseStrSearch, false, "Reverse search for a 
substring. Returns\nposition, -1 if not found.", args(1,3, 
arg("",int),arg("s",str),arg("c",str))),
  command("str", "splitpart", STRsplitpart, false, "Split string on delimiter. 
Returns\ngiven field (counting from one.)", args(1,4, 
arg("",str),arg("s",str),arg("needle",str),arg("field",int))),
  command("str", "trim", STRStrip, false, "Strip whitespaces around a string.", 
args(1,2, arg("",str),arg("s",str))),
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -254,6 +254,7 @@ UTF8_strwidth(const char *restrict s)
 }
 
 mal_export bool batstr_func_has_candidates(const char *func);
+mal_export str STRlower(str *res, const str *arg1);
 
 /* For str returning functions, the result is passed as the input parameter 
buf. The returned str indicates
    if the function succeeded (ie malloc failure or invalid unicode character). 
str_wchr_at function also
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1632,10 +1632,12 @@ STRbatRpad3_bat_bat(Client cntxt, MalBlk
  * head column. This is not checked and may be mis-used to deploy the
  * implementation for shifted window arithmetic as well.
  */
-
 static str
-prefix_or_suffix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, 
const char *name, bit (*func)(const char*, const char*))
+prefix_or_suffix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, 
const char *name, bit (*func)(const char*, const char*), bit *icase)
 {
+       (void) cntxt;
+       (void) mb;
+
        BATiter lefti, righti;
        BAT *bn = NULL, *left = NULL, *lefts = NULL, *right = NULL, *rights = 
NULL;
        bit *restrict vals;
@@ -1648,8 +1650,6 @@ prefix_or_suffix(Client cntxt, MalBlkPtr
                *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 3) : 
NULL,
                *sid2 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : 
NULL;
 
-       (void) cntxt;
-       (void) mb;
        if (!(left = BATdescriptor(*l)) || !(right = BATdescriptor(*r))) {
                msg = createException(MAL, name, SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
                goto bailout;
@@ -1677,26 +1677,36 @@ prefix_or_suffix(Client cntxt, MalBlkPtr
        if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
                for (BUN i = 0; i < ci1.ncand; i++) {
                        oid p1 = (canditer_next_dense(&ci1) - off1), p2 = 
(canditer_next_dense(&ci2) - off2);
-                       const char *x = BUNtvar(lefti, p1);
-                       const char *y = BUNtvar(righti, p2);
+                       char *x = BUNtvar(lefti, p1);
+                       char *y = BUNtvar(righti, p2);
 
                        if (strNil(x) || strNil(y)) {
                                vals[i] = int_nil;
                                nils = true;
                        } else {
+                               if (icase && *icase) {
+                                       if ((msg = STRlower(&x, &x)) != 
MAL_SUCCEED ||
+                                               (msg = STRlower(&y, &y)) != 
MAL_SUCCEED)
+                                               goto bailout;
+                               }
                                vals[i] = func(x, y);
                        }
                }
        } else {
                for (BUN i = 0; i < ci1.ncand; i++) {
                        oid p1 = (canditer_next(&ci1) - off1), p2 = 
(canditer_next(&ci2) - off2);
-                       const char *x = BUNtvar(lefti, p1);
-                       const char *y = BUNtvar(righti, p2);
+                       char *x = BUNtvar(lefti, p1);
+                       char *y = BUNtvar(righti, p2);
 
                        if (strNil(x) || strNil(y)) {
                                vals[i] = int_nil;
                                nils = true;
                        } else {
+                               if (icase && *icase) {
+                                       if ((msg = STRlower(&x, &x)) != 
MAL_SUCCEED ||
+                                               (msg = STRlower(&y, &y)) != 
MAL_SUCCEED)
+                                               goto bailout;
+                               }
                                vals[i] = func(x, y);
                        }
                }
@@ -1710,33 +1720,69 @@ bailout:
 }
 
 static str
-STRbatPrefix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+BATSTRstarts_with(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.startsWith", 
str_is_prefix);
-}
-
-static str
-STRbatSuffix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
-{
-       return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.endsWith", 
str_is_suffix);
+       bit *icase;
+       switch (pci->argc) {
+       case 4:
+               icase = getArgReference_bit(stk, pci, 3);
+               break;
+       case 6:
+               icase = getArgReference_bit(stk, pci, 5);
+               break;
+       default:
+               icase = NULL;
+               break;
+       }
+       return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.startsWith", 
str_is_prefix, icase);
 }
 
 static str
-prefix_or_suffix_cst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, 
const char *name, bit (*func)(const char*, const char*))
+BATSTRends_with(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
+       bit *icase;
+       switch (pci->argc) {
+       case 4:
+               icase = getArgReference_bit(stk, pci, 3);
+               break;
+       case 6:
+               icase = getArgReference_bit(stk, pci, 5);
+               break;
+       default:
+               icase = NULL;
+               break;
+       }
+       return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.endsWith", 
str_is_suffix, icase);
+}
+
+static str
+prefix_or_suffix_cst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, 
const char *name, bit (*func)(const char*, const char*), bit *icase)
+{
+       (void) cntxt;
+       (void) mb;
+
        BATiter bi;
        BAT *bn = NULL, *b = NULL, *bs = NULL;
        bit *restrict vals;
-       const char *y = *getArgReference_str(stk, pci, 2);
-       str msg = MAL_SUCCEED;
+       str y = *getArgReference_str(stk, pci, 2), msg = MAL_SUCCEED;
        bool nils = false;
        struct canditer ci1 = {0};
        oid off1;
-       bat *res = getArgReference_bat(stk, pci, 0), *bid = 
getArgReference_bat(stk, pci, 1),
-               *sid1 = pci->argc == 4 ? getArgReference_bat(stk, pci, 3) : 
NULL;
-
-       (void) cntxt;
-       (void) mb;
+       bat *res = getArgReference_bat(stk, pci, 0), *bid = 
getArgReference_bat(stk, pci, 1), *sid1;
+       /* checking if icase is ~NULL and not if it is true or false */
+       if (pci->argc == 4 || pci->argc == 5) {
+               if (icase)
+                       sid1 = NULL;
+               else {
+                       assert(getArgType(mb, pci, 3) == TYPE_bat);
+                       sid1 = getArgReference_bat(stk, pci, 3);
+               }
+       }
+
+       if (icase && *icase) {
+               if ((msg = STRlower(&y, &y)) != MAL_SUCCEED)
+                       goto bailout;
+       }
        if (!(b = BATdescriptor(*bid))) {
                msg = createException(MAL, name, SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
                goto bailout;
@@ -1757,24 +1803,32 @@ prefix_or_suffix_cst(Client cntxt, MalBl
        if (ci1.tpe == cand_dense) {
                for (BUN i = 0; i < ci1.ncand; i++) {
                        oid p1 = (canditer_next_dense(&ci1) - off1);
-                       const char *x = BUNtvar(bi, p1);
+                       char *x = BUNtvar(bi, p1);
 
                        if (strNil(x) || strNil(y)) {
                                vals[i] = bit_nil;
                                nils = true;
                        } else {
+                               if (icase && *icase) {
+                                       if ((msg = STRlower(&x, &x)) != 
MAL_SUCCEED)
+                                               goto bailout;
+                               }
                                vals[i] = func(x, y);
                        }
                }
        } else {
                for (BUN i = 0; i < ci1.ncand; i++) {
                        oid p1 = (canditer_next(&ci1) - off1);
-                       const char *x = BUNtvar(bi, p1);
+                       char *x = BUNtvar(bi, p1);
 
                        if (strNil(x) || strNil(y)) {
                                vals[i] = int_nil;
                                nils = true;
                        } else {
+                               if(icase && *icase) {
+                                       if ((msg = STRlower(&x, &x)) != 
MAL_SUCCEED)
+                                               goto bailout;
+                               }
                                vals[i] = func(x, y);
                        }
                }
@@ -1787,33 +1841,74 @@ bailout:
 }
 
 static str
-STRbatPrefixcst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+BATSTRstarts_with_cst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       return prefix_or_suffix_cst(cntxt, mb, stk, pci, "batstr.startsWith", 
str_is_prefix);
+       bit *icase = NULL;
+       switch (pci->argc) {
+       case 4:
+               if (getArgType(mb, pci, 3) == TYPE_bit) {
+                       icase = getArgReference_bit(stk, pci, 3);
+               }
+               break;
+       case 5:
+               icase = getArgReference_bit(stk, pci, 4);
+               break;
+       default:
+               icase = NULL;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to