Changeset: e6b7116d2d4c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e6b7116d2d4c
Modified Files:
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
monetdb5/modules/kernel/batstr.c
Branch: txtsim
Log Message:
startsWith, endsWith, search, BAT and ~BAT, case insensitive finished. Needs
tests and cleanup some dup code.
diffs (truncated from 1125 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3548,7 +3548,7 @@ str_lower(str *buf, size_t *buflen, cons
return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, buf, buflen, s,
"str.lower");
}
-static str
+str
STRlower(str *res, const str *arg1)
{
str buf = NULL, msg = MAL_SUCCEED;
@@ -3635,14 +3635,12 @@ STRstartsWith(Client cntxt, MalBlkPtr mb
bit *res = getArgReference(stk, pci, 0);
const str *arg1 = getArgReference(stk, pci, 1);
const str *arg2 = getArgReference(stk, pci, 2);
- bit cs = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true :
false;
-
- str s = *arg1, prefix = *arg2;
- str s_lower, prefix_lower, msg = MAL_SUCCEED;
-
- if (cs) {
- if ((msg = STRlower(&s, &s_lower)) != MAL_SUCCEED ||
- (msg = STRlower(&prefix, &prefix_lower)) != MAL_SUCCEED)
+ bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
+ str s = *arg1, prefix = *arg2, msg = MAL_SUCCEED;
+
+ if (icase) {
+ if ((msg = STRlower(&s, &s)) != MAL_SUCCEED ||
+ (msg = STRlower(&prefix, &prefix)) != MAL_SUCCEED)
goto bail;
}
*res = (strNil(s) || strNil(prefix)) ? bit_nil :str_is_prefix(s,
prefix) ;
@@ -3670,12 +3668,12 @@ STRendsWith(Client cntxt, MalBlkPtr mb,
bit *res = getArgReference(stk, pci, 0);
const str *arg1 = getArgReference(stk, pci, 1);
const str *arg2 = getArgReference(stk, pci, 2);
- bit cs = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true :
false;
+ bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
str s = *arg1, suffix = *arg2;
str s_lower, suffix_lower, msg = MAL_SUCCEED;
- if (cs) {
+ if (icase) {
if ((msg = STRlower(&s, &s_lower)) != MAL_SUCCEED ||
(msg = STRlower(&suffix, &suffix_lower)) != MAL_SUCCEED)
goto bail;
@@ -3704,12 +3702,12 @@ STRstrSearch(Client cntxt, MalBlkPtr mb,
bit *res = getArgReference(stk, pci, 0);
const str *haystack = getArgReference(stk, pci, 1);
const str *needle = getArgReference(stk, pci, 2);
- bit cs = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true :
false;
+ bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3) ? true
: false;
str s = *haystack, h = *needle;
str s_lower, h_lower, msg = MAL_SUCCEED;
- if (cs) {
+ if (icase) {
if ((msg = STRlower(&s, &s_lower)) != MAL_SUCCEED ||
(msg = STRlower(&h, &h_lower)) != MAL_SUCCEED)
goto bail;
@@ -4898,13 +4896,13 @@ mel_func str_init_funcs[] = {
command("str", "unicodeAt", STRWChrAt, false, "get a unicode character\n(as
an int) from a string position.", args(1,3,
arg("",int),arg("s",str),arg("index",int))),
command("str", "unicode", STRFromWChr, false, "convert a unicode to a
character.", args(1,2, arg("",str),arg("wchar",int))),
pattern("str", "startsWith", STRstartsWith, false, "Check if string starts
with substring.", args(1,3, arg("",bit),arg("s",str),arg("prefix",str))),
- pattern("str", "startsWith", STRstartsWith, false, "Check if string starts
with substring, case insensitive flag.", args(1,4,
arg("",bit),arg("s",str),arg("prefix",str),arg("cs",bit))),
+ pattern("str", "startsWith", STRstartsWith, false, "Check if string starts
with substring, icase flag.", args(1,4,
arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
pattern("str", "endsWith", STRendsWith, false, "Check if string ends with
substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
- pattern("str", "endsWith", STRendsWith, false, "Check if string ends with
substring, case insensitive flag.", args(1,4,
arg("",bit),arg("s",str),arg("suffix",str),arg("cs",bit))),
+ pattern("str", "endsWith", STRendsWith, false, "Check if string ends with
substring, icase flag.", args(1,4,
arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
command("str", "toLower", STRlower, false, "Convert a string to lower case.",
args(1,2, arg("",str),arg("s",str))),
command("str", "toUpper", STRupper, false, "Convert a string to upper case.",
args(1,2, arg("",str),arg("s",str))),
pattern("str", "search", STRstrSearch, false, "Search for a substring.
Returns\nposition, -1 if not found.", args(1,3,
arg("",int),arg("s",str),arg("c",str))),
- pattern("str", "search", STRstrSearch, false, "Search for a substring, case
insensitive flag. Returns\nposition, -1 if not found.", args(1,4,
arg("",int),arg("s",str),arg("c",str),arg("cs",bit))),
+ pattern("str", "search", STRstrSearch, false, "Search for a substring, icase
flag. Returns\nposition, -1 if not found.", args(1,4,
arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
command("str", "r_search", STRReverseStrSearch, false, "Reverse search for a
substring. Returns\nposition, -1 if not found.", args(1,3,
arg("",int),arg("s",str),arg("c",str))),
command("str", "splitpart", STRsplitpart, false, "Split string on delimiter.
Returns\ngiven field (counting from one.)", args(1,4,
arg("",str),arg("s",str),arg("needle",str),arg("field",int))),
command("str", "trim", STRStrip, false, "Strip whitespaces around a string.",
args(1,2, arg("",str),arg("s",str))),
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -254,6 +254,7 @@ UTF8_strwidth(const char *restrict s)
}
mal_export bool batstr_func_has_candidates(const char *func);
+mal_export str STRlower(str *res, const str *arg1);
/* For str returning functions, the result is passed as the input parameter
buf. The returned str indicates
if the function succeeded (ie malloc failure or invalid unicode character).
str_wchr_at function also
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1632,10 +1632,12 @@ STRbatRpad3_bat_bat(Client cntxt, MalBlk
* head column. This is not checked and may be mis-used to deploy the
* implementation for shifted window arithmetic as well.
*/
-
static str
-prefix_or_suffix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
const char *name, bit (*func)(const char*, const char*))
+prefix_or_suffix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
const char *name, bit (*func)(const char*, const char*), bit *icase)
{
+ (void) cntxt;
+ (void) mb;
+
BATiter lefti, righti;
BAT *bn = NULL, *left = NULL, *lefts = NULL, *right = NULL, *rights =
NULL;
bit *restrict vals;
@@ -1648,8 +1650,6 @@ prefix_or_suffix(Client cntxt, MalBlkPtr
*sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 3) :
NULL,
*sid2 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) :
NULL;
- (void) cntxt;
- (void) mb;
if (!(left = BATdescriptor(*l)) || !(right = BATdescriptor(*r))) {
msg = createException(MAL, name, SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
goto bailout;
@@ -1677,26 +1677,36 @@ prefix_or_suffix(Client cntxt, MalBlkPtr
if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
for (BUN i = 0; i < ci1.ncand; i++) {
oid p1 = (canditer_next_dense(&ci1) - off1), p2 =
(canditer_next_dense(&ci2) - off2);
- const char *x = BUNtvar(lefti, p1);
- const char *y = BUNtvar(righti, p2);
+ char *x = BUNtvar(lefti, p1);
+ char *y = BUNtvar(righti, p2);
if (strNil(x) || strNil(y)) {
vals[i] = int_nil;
nils = true;
} else {
+ if (icase && *icase) {
+ if ((msg = STRlower(&x, &x)) !=
MAL_SUCCEED ||
+ (msg = STRlower(&y, &y)) !=
MAL_SUCCEED)
+ goto bailout;
+ }
vals[i] = func(x, y);
}
}
} else {
for (BUN i = 0; i < ci1.ncand; i++) {
oid p1 = (canditer_next(&ci1) - off1), p2 =
(canditer_next(&ci2) - off2);
- const char *x = BUNtvar(lefti, p1);
- const char *y = BUNtvar(righti, p2);
+ char *x = BUNtvar(lefti, p1);
+ char *y = BUNtvar(righti, p2);
if (strNil(x) || strNil(y)) {
vals[i] = int_nil;
nils = true;
} else {
+ if (icase && *icase) {
+ if ((msg = STRlower(&x, &x)) !=
MAL_SUCCEED ||
+ (msg = STRlower(&y, &y)) !=
MAL_SUCCEED)
+ goto bailout;
+ }
vals[i] = func(x, y);
}
}
@@ -1710,33 +1720,69 @@ bailout:
}
static str
-STRbatPrefix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+BATSTRstarts_with(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
- return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.startsWith",
str_is_prefix);
-}
-
-static str
-STRbatSuffix(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
-{
- return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.endsWith",
str_is_suffix);
+ bit *icase;
+ switch (pci->argc) {
+ case 4:
+ icase = getArgReference_bit(stk, pci, 3);
+ break;
+ case 6:
+ icase = getArgReference_bit(stk, pci, 5);
+ break;
+ default:
+ icase = NULL;
+ break;
+ }
+ return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.startsWith",
str_is_prefix, icase);
}
static str
-prefix_or_suffix_cst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
const char *name, bit (*func)(const char*, const char*))
+BATSTRends_with(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
+ bit *icase;
+ switch (pci->argc) {
+ case 4:
+ icase = getArgReference_bit(stk, pci, 3);
+ break;
+ case 6:
+ icase = getArgReference_bit(stk, pci, 5);
+ break;
+ default:
+ icase = NULL;
+ break;
+ }
+ return prefix_or_suffix(cntxt, mb, stk, pci, "batstr.endsWith",
str_is_suffix, icase);
+}
+
+static str
+prefix_or_suffix_cst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
const char *name, bit (*func)(const char*, const char*), bit *icase)
+{
+ (void) cntxt;
+ (void) mb;
+
BATiter bi;
BAT *bn = NULL, *b = NULL, *bs = NULL;
bit *restrict vals;
- const char *y = *getArgReference_str(stk, pci, 2);
- str msg = MAL_SUCCEED;
+ str y = *getArgReference_str(stk, pci, 2), msg = MAL_SUCCEED;
bool nils = false;
struct canditer ci1 = {0};
oid off1;
- bat *res = getArgReference_bat(stk, pci, 0), *bid =
getArgReference_bat(stk, pci, 1),
- *sid1 = pci->argc == 4 ? getArgReference_bat(stk, pci, 3) :
NULL;
-
- (void) cntxt;
- (void) mb;
+ bat *res = getArgReference_bat(stk, pci, 0), *bid =
getArgReference_bat(stk, pci, 1), *sid1;
+ /* checking if icase is ~NULL and not if it is true or false */
+ if (pci->argc == 4 || pci->argc == 5) {
+ if (icase)
+ sid1 = NULL;
+ else {
+ assert(getArgType(mb, pci, 3) == TYPE_bat);
+ sid1 = getArgReference_bat(stk, pci, 3);
+ }
+ }
+
+ if (icase && *icase) {
+ if ((msg = STRlower(&y, &y)) != MAL_SUCCEED)
+ goto bailout;
+ }
if (!(b = BATdescriptor(*bid))) {
msg = createException(MAL, name, SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
goto bailout;
@@ -1757,24 +1803,32 @@ prefix_or_suffix_cst(Client cntxt, MalBl
if (ci1.tpe == cand_dense) {
for (BUN i = 0; i < ci1.ncand; i++) {
oid p1 = (canditer_next_dense(&ci1) - off1);
- const char *x = BUNtvar(bi, p1);
+ char *x = BUNtvar(bi, p1);
if (strNil(x) || strNil(y)) {
vals[i] = bit_nil;
nils = true;
} else {
+ if (icase && *icase) {
+ if ((msg = STRlower(&x, &x)) !=
MAL_SUCCEED)
+ goto bailout;
+ }
vals[i] = func(x, y);
}
}
} else {
for (BUN i = 0; i < ci1.ncand; i++) {
oid p1 = (canditer_next(&ci1) - off1);
- const char *x = BUNtvar(bi, p1);
+ char *x = BUNtvar(bi, p1);
if (strNil(x) || strNil(y)) {
vals[i] = int_nil;
nils = true;
} else {
+ if(icase && *icase) {
+ if ((msg = STRlower(&x, &x)) !=
MAL_SUCCEED)
+ goto bailout;
+ }
vals[i] = func(x, y);
}
}
@@ -1787,33 +1841,74 @@ bailout:
}
static str
-STRbatPrefixcst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+BATSTRstarts_with_cst(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
- return prefix_or_suffix_cst(cntxt, mb, stk, pci, "batstr.startsWith",
str_is_prefix);
+ bit *icase = NULL;
+ switch (pci->argc) {
+ case 4:
+ if (getArgType(mb, pci, 3) == TYPE_bit) {
+ icase = getArgReference_bit(stk, pci, 3);
+ }
+ break;
+ case 5:
+ icase = getArgReference_bit(stk, pci, 4);
+ break;
+ default:
+ icase = NULL;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]