Changeset: 9c55b51ba92c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9c55b51ba92c
Modified Files:
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
monetdb5/modules/kernel/batstr.c
Branch: alloc-less-str
Log Message:
Cleaned reverse string search and splitpart
diffs (truncated from 484 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3667,54 +3667,51 @@ STRstrSearch(int *res, const str *haysta
return MAL_SUCCEED;
}
-/* find last occurrence of arg2 in arg1 */
-str
-STRReverseStrSearch(int *res, const str *arg1, const str *arg2)
+int
+str_reverse_str_search(const char *s, const char *s2)
{
/* 64bit: should return lng */
size_t len, slen;
- const char *s = *arg1;
- const char *s2 = *arg2;
+ int res = -1; /* changed if found */
if (strNil(s) || strNil(s2)) {
- *res = int_nil;
- return MAL_SUCCEED;
+ return int_nil;
}
- *res = -1;
len = strlen(s);
slen = strlen(s2);
- *res = -1; /* changed if found */
if (len >= slen) {
const char *p = s + len - slen;
do {
if (strncmp(p, s2, slen) == 0) {
- *res = UTF8_strpos(s, p);
+ res = UTF8_strpos(s, p);
break;
}
} while (p-- > s);
}
+ return res;
+}
+
+/* find last occurrence of arg2 in arg1 */
+static str
+STRReverseStrSearch(int *res, const str *arg1, const str *arg2)
+{
+ *res = str_reverse_str_search(*arg1, *arg2);
return MAL_SUCCEED;
}
str
-STRsplitpart(str *res, str *haystack, str *needle, int *field)
+str_splitpart(str *buf, int *buflen, const char *s, const char *s2, int f)
{
size_t len;
- int f = *field;
char *p = NULL;
- const char *s = *haystack;
- const char *s2 = *needle;
- if (strNil(s) || strNil(s2) || is_int_nil(*field)) {
- *res = GDKstrdup(str_nil);
- if (*res == NULL)
- throw(MAL, "str.splitpart", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ if (strNil(s) || strNil(s2) || is_int_nil(f)) {
+ strcpy(*buf, str_nil);
return MAL_SUCCEED;
}
- if (*field <= 0) {
+ if (f <= 0)
throw(MAL, "str.splitpart", SQLSTATE(42000) "field position
must be greater than zero");
- }
len = strlen(s2);
if (len) {
@@ -3725,9 +3722,7 @@ STRsplitpart(str *res, str *haystack, st
}
if (f != 1) {
- *res = GDKstrdup("");
- if (*res == NULL)
- throw(MAL, "str.splitpart", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ strcpy(*buf, "");
return MAL_SUCCEED;
}
@@ -3737,12 +3732,30 @@ STRsplitpart(str *res, str *haystack, st
len = (size_t) (p - s);
}
- *res = GDKstrndup(s, len);
- if (*res == NULL)
- throw(MAL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ len++;
+ CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) len, "str.splitpart");
+ strcpy_len(*buf, s, (int) len);
return MAL_SUCCEED;
}
+static str
+STRsplitpart(str *res, str *haystack, str *needle, int *field)
+{
+ int buflen = INITIAL_STR_BUFFER_LENGTH;
+ str buf = GDKmalloc(buflen), msg;
+
+ *res = NULL;
+ if (!buf)
+ throw(SQL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ msg = str_splitpart(&buf, &buflen, *haystack, *needle, *field);
+ if (!msg && !(*res = GDKstrdup(buf))) {
+ msg = createException(MAL, "str.splitpart", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ }
+
+ GDKfree(buf);
+ return msg;
+}
+
/* returns number of bytes to remove from left to strip the codepoints in rm */
static size_t
lstrip(const char *s, size_t len, const int *rm, size_t nrm)
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -40,10 +40,10 @@ extern str str_ltrim(str *buf, int *bufl
extern str str_rtrim(str *buf, int *buflen, const char *s);
extern int str_search(const char *s, const char *s2);
+extern int str_reverse_str_search(const char *s, const char *s2);
extern int str_locate2(const char *needle, const char *haystack, int start);
-mal_export str STRReverseStrSearch(int *res, const str *arg1, const str *arg2);
-mal_export str STRsplitpart(str *res, str *haystack, str *needle, int *field);
+extern str str_splitpart(str *buf, int *buflen, const char *s, const char *s2,
int f);
mal_export str STRStrip2(str *res, const str *arg1, const str *arg2);
mal_export str STRLtrim2(str *res, const str *arg1, const str *arg2);
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1140,66 +1140,103 @@ bailout:
}
static str
-STRbatRstrSearch(bat *ret, const bat *l, const bat *r)
+STRbatRstrSearch(bat *res, const bat *l, const bat *r)
{
BATiter lefti, righti;
- BAT *bn, *left, *right;
- BUN p,q;
- int v;
+ BAT *bn = NULL, *left = NULL, *right = NULL;
+ BUN p, q;
+ int *restrict vals, next;
+ str x, y, msg = MAL_SUCCEED;
+ bool nils = false;
- prepareOperand2(left,l,right,r,"batstr.r_search");
- if(BATcount(left) != BATcount(right)) {
- BBPunfix(left->batCacheid);
- BBPunfix(right->batCacheid);
- throw(MAL, "batstr.r_search", ILLEGAL_ARGUMENT " Requires bats
of identical size");
+ if (!(left = BATdescriptor(*l)) || !(right = BATdescriptor(*r))) {
+ msg = createException(MAL, "batstr.r_search", SQLSTATE(HY005)
RUNTIME_OBJECT_MISSING);
+ goto bailout;
}
- prepareResult2(bn,left,right,TYPE_int,"batstr.r_search");
+ if (BATcount(left) != BATcount(right)) {
+ msg = createException(MAL, "batstr.r_search", ILLEGAL_ARGUMENT
" Requires bats of identical size");
+ goto bailout;
+ }
+ q = BATcount(left);
+ if (!(bn = COLnew(left->hseqbase, TYPE_int, q, TRANSIENT))) {
+ msg = createException(MAL, "batstr.r_search", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ goto bailout;
+ }
lefti = bat_iterator(left);
righti = bat_iterator(right);
+ vals = Tloc(bn, 0);
+ for (p = 0; p < q ; p++) {
+ x = (str) BUNtail(lefti, p);
+ y = (str) BUNtail(righti, p);
- BATloop(left, p, q) {
- str tl = (str) BUNtvar(lefti,p);
- str tr = (str) BUNtvar(righti,p);
- STRReverseStrSearch(&v, &tl, &tr);
- if (bunfastappTYPE(int, bn, &v) != GDK_SUCCEED) {
- BBPunfix(left->batCacheid);
- BBPunfix(right->batCacheid);
- BBPunfix(*ret);
- throw(MAL, "batstr.r_search", OPERATION_FAILED " During
bulk operation");
- }
+ next = str_reverse_str_search(x, y);
+ vals[p] = next;
+ nils |= is_int_nil(next);
}
- bn->tnonil = false;
- BBPunfix(right->batCacheid);
- finalizeResult(ret,bn,left);
- return MAL_SUCCEED;
+
+bailout:
+ if (left)
+ BBPunfix(left->batCacheid);
+ if (right)
+ BBPunfix(right->batCacheid);
+ if (bn && !msg) {
+ BATsetcount(bn, q);
+ bn->tnil = nils;
+ bn->tnonil = !nils;
+ bn->tkey = BATcount(bn) <= 1;
+ bn->tsorted = BATcount(bn) <= 1;
+ bn->trevsorted = BATcount(bn) <= 1;
+ BBPkeepref(*res = bn->batCacheid);
+ } else if (bn)
+ BBPreclaim(bn);
+ return msg;
}
static str
-STRbatRstrSearchcst(bat *ret, const bat *l, const str *cst)
+STRbatRstrSearchcst(bat *res, const bat *l, const str *cst)
{
- BATiter lefti;
- BAT *bn, *left;
- BUN p,q;
- int v;
+ BATiter bi;
+ BAT *bn = NULL, *b = NULL;
+ BUN p, q;
+ int *restrict vals, next;
+ str x, y = *cst, msg = MAL_SUCCEED;
+ bool nils = false;
- prepareOperand(left,l,"batstr.r_search");
- prepareResult(bn,left,TYPE_int,"batstr.r_search");
-
- lefti = bat_iterator(left);
+ if (!(b = BATdescriptor(*l))) {
+ msg = createException(MAL, "batstr.r_search", SQLSTATE(HY005)
RUNTIME_OBJECT_MISSING);
+ goto bailout;
+ }
+ q = BATcount(b);
+ if (!(bn = COLnew(b->hseqbase, TYPE_int, q, TRANSIENT))) {
+ msg = createException(MAL, "batstr.r_search", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ goto bailout;
+ }
- BATloop(left, p, q) {
- str tl = (str) BUNtvar(lefti,p);
- STRReverseStrSearch(&v, &tl, cst);
- if (bunfastappTYPE(int, bn, &v) != GDK_SUCCEED) {
- BBPunfix(left->batCacheid);
- BBPunfix(*ret);
- throw(MAL, "batstr.r_search", OPERATION_FAILED " During
bulk operation");
- }
+ bi = bat_iterator(b);
+ vals = Tloc(bn, 0);
+ for (p = 0; p < q ; p++) {
+ x = (str) BUNtail(bi, p);
+
+ next = str_reverse_str_search(x, y);
+ vals[p] = next;
+ nils |= is_int_nil(next);
}
- bn->tnonil = false;
- finalizeResult(ret,bn,left);
- return MAL_SUCCEED;
+
+bailout:
+ if (b)
+ BBPunfix(b->batCacheid);
+ if (bn && !msg) {
+ BATsetcount(bn, q);
+ bn->tnil = nils;
+ bn->tnonil = !nils;
+ bn->tkey = BATcount(bn) <= 1;
+ bn->tsorted = BATcount(bn) <= 1;
+ bn->trevsorted = BATcount(bn) <= 1;
+ BBPkeepref(*res = bn->batCacheid);
+ } else if (bn)
+ BBPreclaim(bn);
+ return msg;
}
static str
@@ -1607,117 +1644,122 @@ bailout:
}
static str
-STRbatsplitpartcst(bat *ret, const bat *bid, const str *needle, const int
*field)
+STRbatsplitpartcst(bat *res, const bat *bid, const str *needle, const int
*field)
{
- BATiter lefti;
- BAT *bn = NULL, *left;
- BUN p,q;
- str y = NULL, err = MAL_SUCCEED;
+ BATiter bi;
+ BAT *bn = NULL, *b = NULL;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list