Changeset: 5d409d65fac0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5d409d65fac0
Modified Files:
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
monetdb5/modules/kernel/batstr.c
Branch: alloc-less-str
Log Message:
Implementations for locate, locate2 and search functions
diffs (truncated from 708 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3646,22 +3646,24 @@ STRUpper(str *res, const str *arg1)
return msg;
}
-/* find first occurrence of needle in haystack */
-str
-STRstrSearch(int *res, const str *haystack, const str *needle)
+int
+str_search(const char *s, const char *s2)
{
/* 64bit: should return lng */
- const char *s = *haystack;
- const char *s2 = *needle;
-
if (strNil(s) || strNil(s2)) {
- *res = int_nil;
- return MAL_SUCCEED;
+ return int_nil;
}
if ((s2 = strstr(s, s2)) != NULL)
- *res = UTF8_strpos(s, s2);
+ return UTF8_strpos(s, s2);
else
- *res = -1;
+ return -1;
+}
+
+/* find first occurrence of needle in haystack */
+static str
+STRstrSearch(int *res, const str *haystack, const str *needle)
+{
+ *res = str_search(*haystack, *needle);
return MAL_SUCCEED;
}
@@ -4258,7 +4260,7 @@ str_sub_string(str *buf, int *buflen, co
return str_Sub_String(buf, buflen, s, start, l);
}
-str
+static str
STRsubstring(str *res, const str *s, const int *start, const int *l)
{
int buflen = INITIAL_STR_BUFFER_LENGTH;
@@ -4323,29 +4325,34 @@ STRsuffix(str *res, const str *s, const
return msg;
}
-str
-STRlocate2(int *ret, const str *needle, const str *haystack, const int *start)
+int
+str_locate2(const char *needle, const char *haystack, int start)
{
int off, res;
char *s;
- if (strNil(*needle) || strNil(*haystack) || is_int_nil(*start)) {
- *ret = int_nil;
- return MAL_SUCCEED;
+ if (strNil(needle) || strNil(haystack) || is_int_nil(start)) {
+ return int_nil;
}
- off = *start <= 0 ? 1 : *start;
- s = UTF8_strtail(*haystack, off - 1);
- STRstrSearch(&res, &s, needle);
- *ret = res >= 0 ? res + off : 0;
+ off = start <= 0 ? 1 : start;
+ s = UTF8_strtail(haystack, off - 1);
+ res = str_search(s, needle);
+ return res >= 0 ? res + off : 0;
+}
+
+static str
+STRlocate2(int *ret, const str *needle, const str *haystack, const int *start)
+{
+ *ret = str_locate2(*needle, *haystack, *start);
return MAL_SUCCEED;
}
-str
+static str
STRlocate(int *ret, const str *needle, const str *haystack)
{
- int p = 1;
- return STRlocate2(ret, needle, haystack, &p);
+ *ret = str_locate2(*needle, *haystack, 1);
+ return MAL_SUCCEED;
}
str
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -39,7 +39,9 @@ extern str str_strip(str *buf, int *bufl
extern str str_ltrim(str *buf, int *buflen, const char *s);
extern str str_rtrim(str *buf, int *buflen, const char *s);
-mal_export str STRstrSearch(int *res, const str *arg1, const str *arg2);
+extern int str_search(const char *s, const char *s2);
+extern int str_locate2(const char *needle, const char *haystack, int start);
+
mal_export str STRReverseStrSearch(int *res, const str *arg1, const str *arg2);
mal_export str STRsplitpart(str *res, str *haystack, str *needle, int *field);
@@ -52,9 +54,6 @@ mal_export str STRLpad2(str *res, const
mal_export str STRRpad2(str *res, const str *arg1, const int *len, const str
*arg2);
mal_export str STRSubstitute(str *res, const str *arg1, const str *arg2, const
str *arg3, const bit *g);
-mal_export str STRsubstring(str *ret, const str *s, const int *start, const
int *l);
-mal_export str STRlocate(int *ret, const str *s1, const str *s2);
-mal_export str STRlocate2(int *ret, const str *s1, const str *s2, const int
*start);
mal_export str STRinsert(str *ret, const str *s, const int *start, const int
*l, const str *s2);
mal_export str STRreplace(str *ret, const str *s1, const str *s2, const str
*s3);
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1040,66 +1040,103 @@ STRbatSuffixcst(bat *res, const bat *l,
}
static str
-STRbatstrSearch(bat *ret, const bat *l, const bat *r)
+STRbatstrSearch(bat *res, const bat *l, const bat *r)
{
BATiter lefti, righti;
- BAT *bn, *left, *right;
- BUN p,q;
- int v;
+ BAT *bn = NULL, *left = NULL, *right = NULL;
+ BUN p, q;
+ int *restrict vals, next;
+ str x, y, msg = MAL_SUCCEED;
+ bool nils = false;
- prepareOperand2(left,l,right,r,"batstr.search");
- if(BATcount(left) != BATcount(right)) {
- BBPunfix(left->batCacheid);
- BBPunfix(right->batCacheid);
- throw(MAL, "batstr.search", ILLEGAL_ARGUMENT " Requires bats of
identical size");
+ if (!(left = BATdescriptor(*l)) || !(right = BATdescriptor(*r))) {
+ msg = createException(MAL, "batstr.search", SQLSTATE(HY005)
RUNTIME_OBJECT_MISSING);
+ goto bailout;
}
- prepareResult2(bn,left,right,TYPE_int,"batstr.search");
+ if (BATcount(left) != BATcount(right)) {
+ msg = createException(MAL, "batstr.search", ILLEGAL_ARGUMENT "
Requires bats of identical size");
+ goto bailout;
+ }
+ q = BATcount(left);
+ if (!(bn = COLnew(left->hseqbase, TYPE_int, q, TRANSIENT))) {
+ msg = createException(MAL, "batstr.search", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ goto bailout;
+ }
lefti = bat_iterator(left);
righti = bat_iterator(right);
+ vals = Tloc(bn, 0);
+ for (p = 0; p < q ; p++) {
+ x = (str) BUNtail(lefti, p);
+ y = (str) BUNtail(righti, p);
- BATloop(left, p, q) {
- str tl = (str) BUNtvar(lefti,p);
- str tr = (str) BUNtvar(righti,p);
- STRstrSearch(&v, &tl, &tr);
- if (bunfastappTYPE(int, bn, &v) != GDK_SUCCEED) {
- BBPunfix(left->batCacheid);
- BBPunfix(right->batCacheid);
- BBPunfix(*ret);
- throw(MAL, "batstr.search", OPERATION_FAILED " During
bulk operation");
- }
+ next = str_search(x, y);
+ vals[p] = next;
+ nils |= is_int_nil(next);
}
- bn->tnonil = false;
- BBPunfix(right->batCacheid);
- finalizeResult(ret,bn,left);
- return MAL_SUCCEED;
+
+bailout:
+ if (left)
+ BBPunfix(left->batCacheid);
+ if (right)
+ BBPunfix(right->batCacheid);
+ if (bn && !msg) {
+ BATsetcount(bn, q);
+ bn->tnil = nils;
+ bn->tnonil = !nils;
+ bn->tkey = BATcount(bn) <= 1;
+ bn->tsorted = BATcount(bn) <= 1;
+ bn->trevsorted = BATcount(bn) <= 1;
+ BBPkeepref(*res = bn->batCacheid);
+ } else if (bn)
+ BBPreclaim(bn);
+ return msg;
}
static str
-STRbatstrSearchcst(bat *ret, const bat *l, const str *cst)
+STRbatstrSearchcst(bat *res, const bat *l, const str *cst)
{
BATiter lefti;
- BAT *bn, *left;
- BUN p,q;
- int v;
+ BAT *bn = NULL, *left = NULL;
+ BUN p, q;
+ int *restrict vals, next;
+ str x, y = *cst, msg = MAL_SUCCEED;
+ bool nils = false;
- prepareOperand(left,l,"batstr.search");
- prepareResult(bn,left,TYPE_int,"batstr.search");
+ if (!(left = BATdescriptor(*l))) {
+ msg = createException(MAL, "batstr.search", SQLSTATE(HY005)
RUNTIME_OBJECT_MISSING);
+ goto bailout;
+ }
+ q = BATcount(left);
+ if (!(bn = COLnew(left->hseqbase, TYPE_int, q, TRANSIENT))) {
+ msg = createException(MAL, "batstr.search", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ goto bailout;
+ }
lefti = bat_iterator(left);
+ vals = Tloc(bn, 0);
+ for (p = 0; p < q ; p++) {
+ x = (str) BUNtail(lefti, p);
- BATloop(left, p, q) {
- str tl = (str) BUNtvar(lefti,p);
- STRstrSearch(&v, &tl, cst);
- if (bunfastappTYPE(int, bn, &v) != GDK_SUCCEED) {
- BBPunfix(left->batCacheid);
- BBPunfix(*ret);
- throw(MAL, "batstr.search", OPERATION_FAILED " During
bulk operation");
- }
+ next = str_search(x, y);
+ vals[p] = next;
+ nils |= is_int_nil(next);
}
- bn->tnonil = false;
- finalizeResult(ret,bn,left);
- return MAL_SUCCEED;
+
+bailout:
+ if (left)
+ BBPunfix(left->batCacheid);
+ if (bn && !msg) {
+ BATsetcount(bn, q);
+ bn->tnil = nils;
+ bn->tnonil = !nils;
+ bn->tkey = BATcount(bn) <= 1;
+ bn->tsorted = BATcount(bn) <= 1;
+ bn->trevsorted = BATcount(bn) <= 1;
+ BBPkeepref(*res = bn->batCacheid);
+ } else if (bn)
+ BBPreclaim(bn);
+ return msg;
}
static str
@@ -1972,224 +2009,270 @@ bailout:
}
static str
-STRbatsubstring(bat *ret, const bat *l, const bat *r, const bat *t)
+STRbatsubstring(bat *res, const bat *l, const bat *r, const bat *t)
{
- BATiter lefti, starti, lengthi;
- BAT *bn, *left, *start, *length;
- BUN p,q;
- str v;
+ BATiter lefti;
+ BAT *bn = NULL, *left = NULL, *start = NULL, *length = NULL;
+ BUN p, q;
+ int buflen = INITIAL_STR_BUFFER_LENGTH, *starti, *lengthi;
+ str x, buf = GDKmalloc(buflen), msg = MAL_SUCCEED;
+ bool nils = false;
- if( (left= BATdescriptor(*l)) == NULL )
- throw(MAL, "batstr.substring", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
- if( (start= BATdescriptor(*r)) == NULL ){
- BBPunfix(left->batCacheid);
- throw(MAL, "batstr.substring", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ if (!buf) {
+ msg = createException(MAL, "batstr.substring", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ goto bailout;
+ }
+ if (!(left = BATdescriptor(*l)) || !(start = BATdescriptor(*r)) ||
!(length = BATdescriptor(*t))) {
+ msg = createException(MAL, "batstr.substring", SQLSTATE(HY005)
RUNTIME_OBJECT_MISSING);
+ goto bailout;
}
- if( (length= BATdescriptor(*t)) == NULL ){
- BBPunfix(left->batCacheid);
- BBPunfix(start->batCacheid);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list