Changeset: 9c55b51ba92c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9c55b51ba92c
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/kernel/batstr.c
Branch: alloc-less-str
Log Message:

Cleaned reverse string search and splitpart


diffs (truncated from 484 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3667,54 +3667,51 @@ STRstrSearch(int *res, const str *haysta
        return MAL_SUCCEED;
 }
 
-/* find last occurrence of arg2 in arg1 */
-str
-STRReverseStrSearch(int *res, const str *arg1, const str *arg2)
+int
+str_reverse_str_search(const char *s, const char *s2)
 {
 /* 64bit: should return lng */
        size_t len, slen;
-       const char *s = *arg1;
-       const char *s2 = *arg2;
+       int res = -1; /* changed if found */
 
        if (strNil(s) || strNil(s2)) {
-               *res = int_nil;
-               return MAL_SUCCEED;
+               return int_nil;
        }
-       *res = -1;
        len = strlen(s);
        slen = strlen(s2);
-       *res = -1;                                      /* changed if found */
        if (len >= slen) {
                const char *p = s + len - slen;
                do {
                        if (strncmp(p, s2, slen) == 0) {
-                               *res = UTF8_strpos(s, p);
+                               res = UTF8_strpos(s, p);
                                break;
                        }
                } while (p-- > s);
        }
+       return res;
+}
+
+/* find last occurrence of arg2 in arg1 */
+static str
+STRReverseStrSearch(int *res, const str *arg1, const str *arg2)
+{
+       *res = str_reverse_str_search(*arg1, *arg2);
        return MAL_SUCCEED;
 }
 
 str
-STRsplitpart(str *res, str *haystack, str *needle, int *field)
+str_splitpart(str *buf, int *buflen, const char *s, const char *s2, int f)
 {
        size_t len;
-       int f = *field;
        char *p = NULL;
-       const char *s = *haystack;
-       const char *s2 = *needle;
 
-       if (strNil(s) || strNil(s2) || is_int_nil(*field)) {
-               *res = GDKstrdup(str_nil);
-               if (*res == NULL)
-                       throw(MAL, "str.splitpart", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+       if (strNil(s) || strNil(s2) || is_int_nil(f)) {
+               strcpy(*buf, str_nil);
                return MAL_SUCCEED;
        }
 
-       if (*field <= 0) {
+       if (f <= 0)
                throw(MAL, "str.splitpart", SQLSTATE(42000) "field position 
must be greater than zero");
-       }
 
        len = strlen(s2);
        if (len) {
@@ -3725,9 +3722,7 @@ STRsplitpart(str *res, str *haystack, st
        }
 
        if (f != 1) {
-               *res = GDKstrdup("");
-               if (*res == NULL)
-                       throw(MAL, "str.splitpart", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+               strcpy(*buf, "");
                return MAL_SUCCEED;
        }
 
@@ -3737,12 +3732,30 @@ STRsplitpart(str *res, str *haystack, st
                len = (size_t) (p - s);
        }
 
-       *res = GDKstrndup(s, len);
-       if (*res == NULL)
-               throw(MAL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       len++;
+       CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) len, "str.splitpart");
+       strcpy_len(*buf, s, (int) len);
        return MAL_SUCCEED;
 }
 
+static str
+STRsplitpart(str *res, str *haystack, str *needle, int *field)
+{
+       int buflen = INITIAL_STR_BUFFER_LENGTH;
+       str buf = GDKmalloc(buflen), msg;
+
+       *res = NULL;
+       if (!buf)
+               throw(SQL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       msg = str_splitpart(&buf, &buflen, *haystack, *needle, *field);
+       if (!msg && !(*res = GDKstrdup(buf))) {
+               msg = createException(MAL, "str.splitpart", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+       }
+
+       GDKfree(buf);
+       return msg;
+}
+
 /* returns number of bytes to remove from left to strip the codepoints in rm */
 static size_t
 lstrip(const char *s, size_t len, const int *rm, size_t nrm)
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -40,10 +40,10 @@ extern str str_ltrim(str *buf, int *bufl
 extern str str_rtrim(str *buf, int *buflen, const char *s);
 
 extern int str_search(const char *s, const char *s2);
+extern int str_reverse_str_search(const char *s, const char *s2);
 extern int str_locate2(const char *needle, const char *haystack, int start);
 
-mal_export str STRReverseStrSearch(int *res, const str *arg1, const str *arg2);
-mal_export str STRsplitpart(str *res, str *haystack, str *needle, int *field);
+extern str str_splitpart(str *buf, int *buflen, const char *s, const char *s2, 
int f);
 
 mal_export str STRStrip2(str *res, const str *arg1, const str *arg2);
 mal_export str STRLtrim2(str *res, const str *arg1, const str *arg2);
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1140,66 +1140,103 @@ bailout:
 }
 
 static str
-STRbatRstrSearch(bat *ret, const bat *l, const bat *r)
+STRbatRstrSearch(bat *res, const bat *l, const bat *r)
 {
        BATiter lefti, righti;
-       BAT *bn, *left, *right;
-       BUN p,q;
-       int v;
+       BAT *bn = NULL, *left = NULL, *right = NULL;
+       BUN p, q;
+       int *restrict vals, next;
+       str x, y, msg = MAL_SUCCEED;
+       bool nils = false;
 
-       prepareOperand2(left,l,right,r,"batstr.r_search");
-       if(BATcount(left) != BATcount(right)) {
-               BBPunfix(left->batCacheid);
-               BBPunfix(right->batCacheid);
-               throw(MAL, "batstr.r_search", ILLEGAL_ARGUMENT " Requires bats 
of identical size");
+       if (!(left = BATdescriptor(*l)) || !(right = BATdescriptor(*r))) {
+               msg = createException(MAL, "batstr.r_search", SQLSTATE(HY005) 
RUNTIME_OBJECT_MISSING);
+               goto bailout;
        }
-       prepareResult2(bn,left,right,TYPE_int,"batstr.r_search");
+       if (BATcount(left) != BATcount(right)) {
+               msg = createException(MAL, "batstr.r_search", ILLEGAL_ARGUMENT 
" Requires bats of identical size");
+               goto bailout;
+       }
+       q = BATcount(left);
+       if (!(bn = COLnew(left->hseqbase, TYPE_int, q, TRANSIENT))) {
+               msg = createException(MAL, "batstr.r_search", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+               goto bailout;
+       }
 
        lefti = bat_iterator(left);
        righti = bat_iterator(right);
+       vals = Tloc(bn, 0);
+       for (p = 0; p < q ; p++) {
+               x = (str) BUNtail(lefti, p);
+               y = (str) BUNtail(righti, p);
 
-       BATloop(left, p, q) {
-               str tl = (str) BUNtvar(lefti,p);
-               str tr = (str) BUNtvar(righti,p);
-               STRReverseStrSearch(&v, &tl, &tr);
-               if (bunfastappTYPE(int, bn, &v) != GDK_SUCCEED) {
-                       BBPunfix(left->batCacheid);
-                       BBPunfix(right->batCacheid);
-                       BBPunfix(*ret);
-                       throw(MAL, "batstr.r_search", OPERATION_FAILED " During 
bulk operation");
-               }
+               next = str_reverse_str_search(x, y);
+               vals[p] = next;
+               nils |= is_int_nil(next);
        }
-       bn->tnonil = false;
-       BBPunfix(right->batCacheid);
-       finalizeResult(ret,bn,left);
-       return MAL_SUCCEED;
+
+bailout:
+       if (left)
+               BBPunfix(left->batCacheid);
+       if (right)
+               BBPunfix(right->batCacheid);
+       if (bn && !msg) {
+               BATsetcount(bn, q);
+               bn->tnil = nils;
+               bn->tnonil = !nils;
+               bn->tkey = BATcount(bn) <= 1;
+               bn->tsorted = BATcount(bn) <= 1;
+               bn->trevsorted = BATcount(bn) <= 1;
+               BBPkeepref(*res = bn->batCacheid);
+       } else if (bn)
+               BBPreclaim(bn);
+       return msg;
 }
 
 static str
-STRbatRstrSearchcst(bat *ret, const bat *l, const str *cst)
+STRbatRstrSearchcst(bat *res, const bat *l, const str *cst)
 {
-       BATiter lefti;
-       BAT *bn, *left;
-       BUN p,q;
-       int v;
+       BATiter bi;
+       BAT *bn = NULL, *b = NULL;
+       BUN p, q;
+       int *restrict vals, next;
+       str x, y = *cst, msg = MAL_SUCCEED;
+       bool nils = false;
 
-       prepareOperand(left,l,"batstr.r_search");
-       prepareResult(bn,left,TYPE_int,"batstr.r_search");
-
-       lefti = bat_iterator(left);
+       if (!(b = BATdescriptor(*l))) {
+               msg = createException(MAL, "batstr.r_search", SQLSTATE(HY005) 
RUNTIME_OBJECT_MISSING);
+               goto bailout;
+       }
+       q = BATcount(b);
+       if (!(bn = COLnew(b->hseqbase, TYPE_int, q, TRANSIENT))) {
+               msg = createException(MAL, "batstr.r_search", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+               goto bailout;
+       }
 
-       BATloop(left, p, q) {
-               str tl = (str) BUNtvar(lefti,p);
-               STRReverseStrSearch(&v, &tl, cst);
-               if (bunfastappTYPE(int, bn, &v) != GDK_SUCCEED) {
-                       BBPunfix(left->batCacheid);
-                       BBPunfix(*ret);
-                       throw(MAL, "batstr.r_search", OPERATION_FAILED " During 
bulk operation");
-               }
+       bi = bat_iterator(b);
+       vals = Tloc(bn, 0);
+       for (p = 0; p < q ; p++) {
+               x = (str) BUNtail(bi, p);
+
+               next = str_reverse_str_search(x, y);
+               vals[p] = next;
+               nils |= is_int_nil(next);
        }
-       bn->tnonil = false;
-       finalizeResult(ret,bn,left);
-       return MAL_SUCCEED;
+
+bailout:
+       if (b)
+               BBPunfix(b->batCacheid);
+       if (bn && !msg) {
+               BATsetcount(bn, q);
+               bn->tnil = nils;
+               bn->tnonil = !nils;
+               bn->tkey = BATcount(bn) <= 1;
+               bn->tsorted = BATcount(bn) <= 1;
+               bn->trevsorted = BATcount(bn) <= 1;
+               BBPkeepref(*res = bn->batCacheid);
+       } else if (bn)
+               BBPreclaim(bn);
+       return msg;
 }
 
 static str
@@ -1607,117 +1644,122 @@ bailout:
 }
 
 static str
-STRbatsplitpartcst(bat *ret, const bat *bid, const str *needle, const int 
*field)
+STRbatsplitpartcst(bat *res, const bat *bid, const str *needle, const int 
*field)
 {
-       BATiter lefti;
-       BAT *bn = NULL, *left;
-       BUN p,q;
-       str y = NULL, err = MAL_SUCCEED;
+       BATiter bi;
+       BAT *bn = NULL, *b = NULL;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to