Changeset: fe08edb4ab0f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/fe08edb4ab0f
Branch: txtsim
Log Message:
Merge with txtsim-sub.
diffs (truncated from 3414 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -68,6 +68,8 @@
#endif
#include "mal_interpreter.h"
+#include "utf8.h"
+
/*
* UTF-8 Handling
* UTF-8 is a way to store Unicode strings in zero-terminated byte
@@ -3737,9 +3739,16 @@ STRupper(str *res, const str *arg1)
/* returns whether arg1 starts with arg2 */
bit
-str_is_prefix(const char *s, const char *prefix)
+str_is_prefix(const char *s, const char *prefix, int plen)
{
- return strncmp(s, prefix, strlen(prefix)) == 0;
+ return strncmp(s, prefix, plen) == 0;
+}
+
+bit
+str_is_iprefix(const char *s, const char *prefix, int plen)
+{
+ //return strncasecmp(s, prefix, plen) == 0;
+ return utf8ncasecmp(s, prefix, plen) == 0;
}
static str
@@ -3751,24 +3760,17 @@ STRstartsWith(Client cntxt, MalBlkPtr mb
const str *arg1 = getArgReference(stk, pci, 1), *arg2 =
getArgReference(stk, pci, 2);
bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
str s = *arg1, prefix = *arg2, msg = MAL_SUCCEED;
-
- if (icase) {
- if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
- goto bail;
- if ((msg = STRlower(&prefix, &prefix)) != MAL_SUCCEED) {
- GDKfree(s);
- goto bail;
- }
- }
- *res = (strNil(s) || strNil(prefix)) ? bit_nil : str_is_prefix(s,
prefix);
- bail:
+ int plen = strlen(prefix);
+
+ *res = (strNil(s) || strNil(prefix)) ? bit_nil :
+ icase ? str_is_iprefix(s, prefix, plen) : str_is_prefix(s,
prefix, plen);
return msg;
}
bit
-str_is_suffix(const char *s, const char *suffix)
+str_is_suffix(const char *s, const char *suffix, int sul)
{
- size_t sl = strlen(s), sul = strlen(suffix);
+ int sl = strlen(s);
if (sl < sul)
return 0;
@@ -3776,6 +3778,19 @@ str_is_suffix(const char *s, const char
return strcmp(s + sl - sul, suffix) == 0;
}
+bit
+str_is_isuffix(const char *s, const char *suffix, int sul)
+{
+ int sl = strlen(s);
+
+ if (sl < sul)
+ return 0;
+ else
+ //return strcasecmp(s + sl - sul, suffix) == 0;
+ return utf8casecmp(s + sl - sul, suffix) == 0;
+}
+
+
/* returns whether arg1 ends with arg2 */
static str
STRendsWith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -3786,23 +3801,56 @@ STRendsWith(Client cntxt, MalBlkPtr mb,
const str *arg1 = getArgReference(stk, pci, 1), *arg2 =
getArgReference(stk, pci, 2);
bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
str s = *arg1, suffix = *arg2, msg = MAL_SUCCEED;
-
- if (icase) {
- if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
- goto bail;
- if ((msg = STRlower(&suffix, &suffix)) != MAL_SUCCEED) {
- GDKfree(s);
- goto bail;
- }
- }
- *res = (strNil(s) || strNil(suffix)) ? bit_nil : str_is_suffix(s,
suffix);
- bail:
+ int sul = strlen(suffix);
+
+ *res = (strNil(s) || strNil(suffix)) ? bit_nil :
+ icase ? str_is_isuffix(s, suffix, sul) : str_is_suffix(s,
suffix, sul);
+ return msg;
+}
+
+bit
+str_contains(const char *h, const char *n, int nlen)
+{
+ (void)nlen;
+ /* 64bit: should return lng */
+ if (strstr(h, n) != NULL)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+bit
+str_icontains(const char *h, const char *n, int nlen)
+{
+ (void)nlen;
+ /* 64bit: should return lng */
+ if (strcasestr(h, n) != NULL)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/* returns whether haystack contains needle */
+static str
+STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ (void)cntxt;
+ (void)mb;
+ bit *res = getArgReference(stk, pci, 0);
+ const str *arg1 = getArgReference(stk, pci, 1), *arg2 =
getArgReference(stk, pci, 2);
+ bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
+ str haystack = *arg1, needle = *arg2, msg = MAL_SUCCEED;
+ int needle_len = strlen(needle);
+
+ *res = (strNil(haystack) || strNil(needle)) ? bit_nil :
+ icase ? str_icontains(haystack, needle, needle_len) :
str_contains(haystack, needle, needle_len);
return msg;
}
int
-str_search(const char *s, const char *s2)
+str_search(const char *s, const char *s2, int slen)
{
+ (void)slen;
/* 64bit: should return lng */
if ((s2 = strstr(s, s2)) != NULL)
return UTF8_strpos(s, s2);
@@ -3810,6 +3858,17 @@ str_search(const char *s, const char *s2
return -1;
}
+int
+str_isearch(const char *s, const char *s2, int slen)
+{
+ (void)slen;
+ /* 64bit: should return lng */
+ if ((s2 = strcasestr(s, s2)) != NULL)
+ return UTF8_strpos(s, s2);
+ else
+ return -1;
+}
+
/* find first occurrence of needle in haystack */
static str
STRstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -3820,25 +3879,18 @@ STRstr_search(Client cntxt, MalBlkPtr mb
const str *haystack = getArgReference(stk, pci, 1), *needle =
getArgReference(stk, pci, 2);
bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
str s = *haystack, h = *needle, msg = MAL_SUCCEED;
-
- if (icase) {
- if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
- goto bail;
- if ((msg = STRlower(&h, &h)) != MAL_SUCCEED) {
- GDKfree(s);
- goto bail;
- }
- }
- *res = (strNil(s) || strNil(h)) ? bit_nil : str_search(s, h);
- bail:
+ int needle_len = strlen(h);
+
+ *res = (strNil(s) || strNil(h)) ? bit_nil :
+ icase ? str_isearch(s, h, needle_len) : str_search(s, h,
needle_len);
return msg;
}
int
-str_reverse_str_search(const char *s, const char *s2)
+str_reverse_str_search(const char *s, const char *s2, int slen)
{
/* 64bit: should return lng */
- size_t len = strlen(s), slen = strlen(s2);
+ int len = strlen(s);
int res = -1; /* changed if found */
if (len >= slen) {
@@ -3853,6 +3905,26 @@ str_reverse_str_search(const char *s, co
return res;
}
+int
+str_reverse_str_isearch(const char *s, const char *s2, int slen)
+{
+ /* 64bit: should return lng */
+ int len = strlen(s);
+ int res = -1; /* changed if found */
+
+ if (len >= slen) {
+ const char *p = s + len - slen;
+ do {
+ //if (strncasecmp(p, s2, slen) == 0) {
+ if (utf8ncasecmp(p, s2, slen) == 0) {
+ res = UTF8_strpos(s, p);
+ break;
+ }
+ } while (p-- > s);
+ }
+ return res;
+}
+
/* find last occurrence of arg2 in arg1 */
static str
STRrevstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -3864,17 +3936,10 @@ STRrevstr_search(Client cntxt, MalBlkPtr
const str *needle = getArgReference(stk, pci, 2);
bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
str s = *haystack, h = *needle, msg = MAL_SUCCEED;
-
- if (icase) {
- if ((msg = STRlower(&s, &s)) != MAL_SUCCEED)
- goto bail;
- if ((msg = STRlower(&h, &h)) != MAL_SUCCEED) {
- GDKfree(s);
- goto bail;
- }
- }
- *res = (strNil(s) || strNil(h)) ? bit_nil : str_reverse_str_search(s,
h);
- bail:
+ int needle_len = strlen(h);
+
+ *res = (strNil(s) || strNil(h)) ? bit_nil :
+ icase ? str_reverse_str_isearch(s, h, needle_len) :
str_reverse_str_search(s, h, needle_len);
return msg;
}
@@ -4761,7 +4826,7 @@ str_locate2(const char *needle, const ch
off = start <= 0 ? 1 : start;
s = UTF8_strtail(haystack, off - 1);
- res = str_search(s, needle);
+ res = str_search(s, needle, strlen(needle));
return res >= 0 ? res + off : 0;
}
@@ -5031,6 +5096,8 @@ mel_func str_init_funcs[] = {
pattern("str", "startsWith", STRstartsWith, false, "Check if string starts
with substring, icase flag.", args(1,4,
arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
pattern("str", "endsWith", STRendsWith, false, "Check if string ends with
substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
pattern("str", "endsWith", STRendsWith, false, "Check if string ends with
substring, icase flag.", args(1,4,
arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
+ pattern("str", "contains", STRcontains, false, "Check if string haystack
contains string needle.", args(1,3,
arg("",bit),arg("haystack",str),arg("needle",str))),
+ pattern("str", "contains", STRcontains, false, "Check if string chaystack
contains string needle, icase flag.", args(1,4,
arg("",bit),arg("haystack",str),arg("needle",str),arg("icase",bit))),
command("str", "toLower", STRlower, false, "Convert a string to lower case.",
args(1,2, arg("",str),arg("s",str))),
command("str", "toUpper", STRupper, false, "Convert a string to upper case.",
args(1,2, arg("",str),arg("s",str))),
pattern("str", "search", STRstr_search, false, "Search for a substring.
Returns\nposition, -1 if not found.", args(1,3,
arg("",int),arg("s",str),arg("c",str))),
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -155,9 +155,17 @@ extern str str_from_wchr(str *buf, size_
extern str str_wchr_at(int *res, const char *s, int at)
__attribute__((__visibility__("hidden")));
-extern bit str_is_prefix(const char *s, const char *prefix)
+extern bit str_is_prefix(const char *s, const char *prefix, int plen)
+__attribute__((__visibility__("hidden")));
+extern bit str_is_iprefix(const char *s, const char *prefix, int plen)
+__attribute__((__visibility__("hidden")));
+extern bit str_is_suffix(const char *s, const char *suffix, int sul)
__attribute__((__visibility__("hidden")));
-extern bit str_is_suffix(const char *s, const char *suffix)
+extern bit str_is_isuffix(const char *s, const char *suffix, int sul)
+__attribute__((__visibility__("hidden")));
+extern bit str_contains(const char *h, const char *n, int nlen)
+__attribute__((__visibility__("hidden")));
+extern bit str_icontains(const char *h, const char *n, int nlen)
__attribute__((__visibility__("hidden")));
extern str str_tail(str *buf, size_t *buflen, const char *s, int off)
@@ -205,9 +213,13 @@ extern str str_lpad3(str *buf, size_t *b
extern str str_rpad3(str *buf, size_t *buflen, const char *s, int len, const
char *s2)
__attribute__((__visibility__("hidden")));
-extern int str_search(const char *s, const char *s2)
+extern int str_search(const char *s, const char *needle, int needle_len)
+__attribute__((__visibility__("hidden")));
+extern int str_isearch(const char *s, const char *needle, int needle_len)
__attribute__((__visibility__("hidden")));
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]