Changeset: fe08edb4ab0f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/fe08edb4ab0f
Branch: txtsim
Log Message:

Merge with txtsim-sub.


diffs (truncated from 3414 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -68,6 +68,8 @@
 #endif
 #include "mal_interpreter.h"
 
+#include "utf8.h"
+
 /*
  * UTF-8 Handling
  * UTF-8 is a way to store Unicode strings in zero-terminated byte
@@ -3737,9 +3739,16 @@ STRupper(str *res, const str *arg1)
 
 /* returns whether arg1 starts with arg2 */
 bit
-str_is_prefix(const char *s, const char *prefix)
+str_is_prefix(const char *s, const char *prefix, int plen)
 {
-       return strncmp(s, prefix, strlen(prefix)) == 0;
+       return strncmp(s, prefix, plen) == 0;
+}
+
+bit
+str_is_iprefix(const char *s, const char *prefix, int plen)
+{
+       //return strncasecmp(s, prefix, plen) == 0;
+       return utf8ncasecmp(s, prefix, plen) == 0;
 }
 
 static str
@@ -3751,24 +3760,17 @@ STRstartsWith(Client cntxt, MalBlkPtr mb
        const str *arg1 = getArgReference(stk, pci, 1), *arg2 = 
getArgReference(stk, pci, 2);
        bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
        str s = *arg1, prefix = *arg2, msg = MAL_SUCCEED;
-
-       if (icase) {
-               if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
-                       goto bail;
-               if ((msg = STRlower(&prefix, &prefix)) != MAL_SUCCEED) {
-                       GDKfree(s);
-                       goto bail;
-               }
-       }
-       *res = (strNil(s) || strNil(prefix)) ? bit_nil : str_is_prefix(s, 
prefix);
- bail:
+       int plen = strlen(prefix);
+
+       *res = (strNil(s) || strNil(prefix)) ? bit_nil :
+               icase ? str_is_iprefix(s, prefix, plen) : str_is_prefix(s, 
prefix, plen);
        return msg;
 }
 
 bit
-str_is_suffix(const char *s, const char *suffix)
+str_is_suffix(const char *s, const char *suffix, int sul)
 {
-       size_t sl = strlen(s), sul = strlen(suffix);
+       int sl = strlen(s);
 
        if (sl < sul)
                return 0;
@@ -3776,6 +3778,19 @@ str_is_suffix(const char *s, const char 
                return strcmp(s + sl - sul, suffix) == 0;
 }
 
+bit
+str_is_isuffix(const char *s, const char *suffix, int sul)
+{
+       int sl = strlen(s);
+
+       if (sl < sul)
+               return 0;
+       else
+               //return strcasecmp(s + sl - sul, suffix) == 0;
+               return utf8casecmp(s + sl - sul, suffix) == 0;
+}
+
+
 /* returns whether arg1 ends with arg2 */
 static str
 STRendsWith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -3786,23 +3801,56 @@ STRendsWith(Client cntxt, MalBlkPtr mb, 
        const str *arg1 = getArgReference(stk, pci, 1), *arg2 = 
getArgReference(stk, pci, 2);
        bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
        str s = *arg1, suffix = *arg2, msg = MAL_SUCCEED;
-
-       if (icase) {
-               if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
-                       goto bail;
-               if ((msg = STRlower(&suffix, &suffix)) != MAL_SUCCEED) {
-                       GDKfree(s);
-                       goto bail;
-               }
-       }
-       *res = (strNil(s) || strNil(suffix)) ? bit_nil : str_is_suffix(s, 
suffix);
- bail:
+       int sul = strlen(suffix);
+
+       *res = (strNil(s) || strNil(suffix)) ? bit_nil :
+               icase ? str_is_isuffix(s, suffix, sul) : str_is_suffix(s, 
suffix, sul);
+       return msg;
+}
+
+bit
+str_contains(const char *h, const char *n, int nlen)
+{
+       (void)nlen;
+       /* 64bit: should return lng */
+       if (strstr(h, n) != NULL)
+               return TRUE;
+       else
+               return FALSE;
+}
+
+bit
+str_icontains(const char *h, const char *n, int nlen)
+{
+       (void)nlen;
+       /* 64bit: should return lng */
+       if (strcasestr(h, n) != NULL)
+               return TRUE;
+       else
+               return FALSE;
+}
+
+/* returns whether haystack contains needle */
+static str
+STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void)cntxt;
+       (void)mb;
+       bit *res = getArgReference(stk, pci, 0);
+       const str *arg1 = getArgReference(stk, pci, 1), *arg2 = 
getArgReference(stk, pci, 2);
+       bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
+       str haystack = *arg1, needle = *arg2, msg = MAL_SUCCEED;
+       int needle_len = strlen(needle);
+
+       *res = (strNil(haystack) || strNil(needle)) ? bit_nil :
+               icase ? str_icontains(haystack, needle, needle_len) : 
str_contains(haystack, needle, needle_len);
        return msg;
 }
 
 int
-str_search(const char *s, const char *s2)
+str_search(const char *s, const char *s2, int slen)
 {
+       (void)slen;
        /* 64bit: should return lng */
        if ((s2 = strstr(s, s2)) != NULL)
                return UTF8_strpos(s, s2);
@@ -3810,6 +3858,17 @@ str_search(const char *s, const char *s2
                return -1;
 }
 
+int
+str_isearch(const char *s, const char *s2, int slen)
+{
+       (void)slen;
+       /* 64bit: should return lng */
+       if ((s2 = strcasestr(s, s2)) != NULL)
+               return UTF8_strpos(s, s2);
+       else
+               return -1;
+}
+
 /* find first occurrence of needle in haystack */
 static str
 STRstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -3820,25 +3879,18 @@ STRstr_search(Client cntxt, MalBlkPtr mb
        const str *haystack = getArgReference(stk, pci, 1), *needle = 
getArgReference(stk, pci, 2);
        bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
        str s = *haystack, h = *needle, msg = MAL_SUCCEED;
-
-       if (icase) {
-               if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
-                       goto bail;
-               if ((msg = STRlower(&h, &h)) != MAL_SUCCEED) {
-                       GDKfree(s);
-                       goto bail;
-               }
-       }
-       *res = (strNil(s) || strNil(h)) ? bit_nil : str_search(s, h);
- bail:
+       int needle_len = strlen(h);
+
+       *res = (strNil(s) || strNil(h)) ? bit_nil :
+               icase ? str_isearch(s, h, needle_len) : str_search(s, h, 
needle_len);
        return msg;
 }
 
 int
-str_reverse_str_search(const char *s, const char *s2)
+str_reverse_str_search(const char *s, const char *s2, int slen)
 {
        /* 64bit: should return lng */
-       size_t len = strlen(s), slen = strlen(s2);
+       int len = strlen(s);
        int res = -1; /* changed if found */
 
        if (len >= slen) {
@@ -3853,6 +3905,26 @@ str_reverse_str_search(const char *s, co
        return res;
 }
 
+int
+str_reverse_str_isearch(const char *s, const char *s2, int slen)
+{
+       /* 64bit: should return lng */
+       int len = strlen(s);
+       int res = -1; /* changed if found */
+
+       if (len >= slen) {
+               const char *p = s + len - slen;
+               do {
+                       //if (strncasecmp(p, s2, slen) == 0) {
+                       if (utf8ncasecmp(p, s2, slen) == 0) {
+                               res = UTF8_strpos(s, p);
+                               break;
+                       }
+               } while (p-- > s);
+       }
+       return res;
+}
+
 /* find last occurrence of arg2 in arg1 */
 static str
 STRrevstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -3864,17 +3936,10 @@ STRrevstr_search(Client cntxt, MalBlkPtr
        const str *needle = getArgReference(stk, pci, 2);
        bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true 
: false;
        str s = *haystack, h = *needle, msg = MAL_SUCCEED;
-
-       if (icase) {
-               if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
-                       goto bail;
-               if ((msg = STRlower(&h, &h)) != MAL_SUCCEED) {
-                       GDKfree(s);
-                       goto bail;
-               }
-       }
-       *res = (strNil(s) || strNil(h)) ? bit_nil : str_reverse_str_search(s, 
h);
- bail:
+       int needle_len = strlen(h);
+
+       *res = (strNil(s) || strNil(h)) ? bit_nil :
+               icase ? str_reverse_str_isearch(s, h, needle_len) : 
str_reverse_str_search(s, h, needle_len);
        return msg;
 }
 
@@ -4761,7 +4826,7 @@ str_locate2(const char *needle, const ch
 
        off = start <= 0 ? 1 : start;
        s = UTF8_strtail(haystack, off - 1);
-       res = str_search(s, needle);
+       res = str_search(s, needle, strlen(needle));
        return res >= 0 ? res + off : 0;
 }
 
@@ -5031,6 +5096,8 @@ mel_func str_init_funcs[] = {
  pattern("str", "startsWith", STRstartsWith, false, "Check if string starts 
with substring, icase flag.", args(1,4, 
arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
  pattern("str", "endsWith", STRendsWith, false, "Check if string ends with 
substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
  pattern("str", "endsWith", STRendsWith, false, "Check if string ends with 
substring, icase flag.", args(1,4, 
arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
+ pattern("str", "contains", STRcontains, false, "Check if string haystack 
contains string needle.", args(1,3, 
arg("",bit),arg("haystack",str),arg("needle",str))),
+ pattern("str", "contains", STRcontains, false, "Check if string chaystack 
contains string needle, icase flag.", args(1,4, 
arg("",bit),arg("haystack",str),arg("needle",str),arg("icase",bit))),
  command("str", "toLower", STRlower, false, "Convert a string to lower case.", 
args(1,2, arg("",str),arg("s",str))),
  command("str", "toUpper", STRupper, false, "Convert a string to upper case.", 
args(1,2, arg("",str),arg("s",str))),
  pattern("str", "search", STRstr_search, false, "Search for a substring. 
Returns\nposition, -1 if not found.", args(1,3, 
arg("",int),arg("s",str),arg("c",str))),
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -155,9 +155,17 @@ extern str str_from_wchr(str *buf, size_
 extern str str_wchr_at(int *res, const char *s, int at)
 __attribute__((__visibility__("hidden")));
 
-extern bit str_is_prefix(const char *s, const char *prefix)
+extern bit str_is_prefix(const char *s, const char *prefix, int plen)
+__attribute__((__visibility__("hidden")));
+extern bit str_is_iprefix(const char *s, const char *prefix, int plen)
+__attribute__((__visibility__("hidden")));
+extern bit str_is_suffix(const char *s, const char *suffix, int sul)
 __attribute__((__visibility__("hidden")));
-extern bit str_is_suffix(const char *s, const char *suffix)
+extern bit str_is_isuffix(const char *s, const char *suffix, int sul)
+__attribute__((__visibility__("hidden")));
+extern bit str_contains(const char *h, const char *n, int nlen)
+__attribute__((__visibility__("hidden")));
+extern bit str_icontains(const char *h, const char *n, int nlen)
 __attribute__((__visibility__("hidden")));
 
 extern str str_tail(str *buf, size_t *buflen, const char *s, int off)
@@ -205,9 +213,13 @@ extern str str_lpad3(str *buf, size_t *b
 extern str str_rpad3(str *buf, size_t *buflen, const char *s, int len, const 
char *s2)
 __attribute__((__visibility__("hidden")));
 
-extern int str_search(const char *s, const char *s2)
+extern int str_search(const char *s, const char *needle, int needle_len)
+__attribute__((__visibility__("hidden")));
+extern int str_isearch(const char *s, const char *needle, int needle_len)
 __attribute__((__visibility__("hidden")));
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to