Changeset: 315b3c29c703 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=315b3c29c703
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/kernel/batstr.c
Branch: alloc-less-str
Log Message:

More efficient implementations for insert and substitute


diffs (truncated from 674 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3472,9 +3472,9 @@ str_Sub_String(str *buf, int *buflen, co
                return MAL_SUCCEED;
        }
        s = UTF8_strtail(s, off);
-       len = UTF8_strtail(s, l) - s;
-       CHECK_STR_BUFFER_LENGTH(buf, buflen, len + 1, "str.substring");
-       strcpy_len(*buf, s, (size_t)(len + 1));
+       len = UTF8_strtail(s, l) - s + 1;
+       CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.substring");
+       strcpy_len(*buf, s, (size_t) len);
        return MAL_SUCCEED;
 }
 
@@ -3734,7 +3734,7 @@ str_splitpart(str *buf, int *buflen, con
 
        len++;
        CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) len, "str.splitpart");
-       strcpy_len(*buf, s, (int) len);
+       strcpy_len(*buf, s, len);
        return MAL_SUCCEED;
 }
 
@@ -3843,8 +3843,9 @@ str_strip(str *buf, int *buflen, const c
                len -= n;
                n = rstrip(s, len, whitespace, NSPACES);
 
-               CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) (n + 1), 
"str.strip");
-               strcpy_len(*buf, s, n + 1);
+               n++;
+               CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) n, "str.strip");
+               strcpy_len(*buf, s, n);
                return MAL_SUCCEED;
        }
 }
@@ -3877,9 +3878,10 @@ str_ltrim(str *buf, int *buflen, const c
        } else {
                size_t len = strlen(s);
                size_t n = lstrip(s, len, whitespace, NSPACES);
+               size_t ncast = len - n + 1;
 
-               CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) (len - n + 1), 
"str.ltrim");
-               strcpy_len(*buf, s + n, len - n + 1);
+               CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) ncast, "str.ltrim");
+               strcpy_len(*buf, s + n, ncast);
                return MAL_SUCCEED;
        }
 }
@@ -3913,8 +3915,9 @@ str_rtrim(str *buf, int *buflen, const c
                size_t len = strlen(s);
                size_t n = rstrip(s, len, whitespace, NSPACES);
 
-               CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) (n + 1), 
"str.strip");
-               strcpy_len(*buf, s, n + 1);
+               n++;
+               CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) n, "str.strip");
+               strcpy_len(*buf, s, n);
                return MAL_SUCCEED;
        }
 }
@@ -4170,59 +4173,66 @@ STRRpad2(str *res, const str *arg1, cons
 }
 
 str
+str_substitute(str *buf, int *buflen, const char *s, const char *src, const 
char *dst, bit repeat)
+{
+       if (strNil(s) || strNil(src) || strNil(dst)) {
+               strcpy(*buf, str_nil);
+               return MAL_SUCCEED;
+       } else {
+               size_t lsrc = strlen(src), ldst = strlen(dst), n, l = strLen(s);
+               char *b, *fnd;
+               const char *pfnd;
+
+               if (!lsrc || !l) { /* s/src is an empty string, there's nothing 
to substitute */
+                       strcpy(*buf, "");
+                       return MAL_SUCCEED;
+               }
+
+               n = l + ldst;
+               if (repeat && ldst > lsrc)
+                       n = (ldst * l) / lsrc;  /* max length */
+
+               CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) n, "str.substitute");
+               b = *buf;
+               pfnd = s;
+               do {
+                       fnd = strstr(pfnd, src);
+                       if (fnd == NULL)
+                               break;
+                       n = fnd - pfnd;
+                       if (n > 0) {
+                               strncpy(b, pfnd, n);
+                               b += n;
+                       }
+                       if (ldst > 0) {
+                               strncpy(b, dst, ldst);
+                               b += ldst;
+                       }
+                       if (*fnd == 0)
+                               break;
+                       pfnd = fnd + lsrc;
+               } while (repeat);
+               strcpy(b, pfnd);
+               return MAL_SUCCEED;
+       }
+}
+
+static str
 STRSubstitute(str *res, const str *arg1, const str *arg2, const str *arg3, 
const bit *g)
 {
-       const char *s = *arg1;
-       const char *src = *arg2 ? *arg2 : "";
-       const char *dst = *arg3 ? *arg3 : "";
-       int repeat = *g;
-       size_t lsrc = strlen(src);
-       size_t ldst = strlen(dst);
-       size_t l = strLen(s);
-       size_t n;
-       char *buf;
-       const char *pfnd;
-       char *fnd;
+       int buflen = INITIAL_STR_BUFFER_LENGTH;
+       str buf = GDKmalloc(buflen), msg;
 
-       if (strNil(s) || strNil(src) || strNil(dst)) {
-               if ((*res = GDKstrdup(str_nil)) == NULL)
-                       throw(MAL, "str.substitute", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
-               return MAL_SUCCEED;
-       }
-       if (!lsrc || !l) { /* s/src is an empty string, there's nothing to 
substitute */
-               if ((*res = GDKstrdup(s)) == NULL)
-                       throw(MAL, "str.substitute", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
-               return MAL_SUCCEED;
+       *res = NULL;
+       if (!buf)
+               throw(SQL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       msg = str_substitute(&buf, &buflen, *arg1, *arg2, *arg3, *g);
+       if (!msg && !(*res = GDKstrdup(buf))) {
+               msg = createException(MAL, "str.substitute", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
        }
 
-       n = l + ldst;
-       if (repeat && ldst > lsrc)
-               n = (ldst * l) / lsrc;  /* max length */
-
-       buf = *res = GDKmalloc(n);
-       if (*res == NULL)
-               throw(MAL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
-
-       pfnd = s;
-       do {
-               fnd = strstr(pfnd, src);
-               if (fnd == NULL)
-                       break;
-               n = fnd - pfnd;
-               if (n > 0) {
-                       strncpy(buf, pfnd, n);
-                       buf += n;
-               }
-               if (ldst > 0) {
-                       strncpy(buf, dst, ldst);
-                       buf += ldst;
-               }
-               if (*fnd == 0)
-                       break;
-               pfnd = fnd + lsrc;
-       } while (repeat);
-       strcpy(buf, pfnd);
-       return MAL_SUCCEED;
+       GDKfree(buf);
+       return msg;
 }
 
 static str
@@ -4369,16 +4379,15 @@ STRlocate(int *ret, const str *needle, c
 }
 
 str
-STRinsert(str *ret, const str *input, const int *start, const int *nchars, 
const str *input2)
+str_insert(str *buf, int *buflen, const char *s, int strt, int l, const char 
*s2)
 {
-       str v, s = *input, s2 = *input2;
-       int strt = *start, l = *nchars;
-
        if (strNil(s) || strNil(s2) || is_int_nil(strt) || is_int_nil(l)) {
-               if ((*ret = GDKstrdup(str_nil)) == NULL)
-                       throw(MAL, "str.insert", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+               strcpy(*buf, str_nil);
+               return MAL_SUCCEED;
        } else {
+               str v;
                size_t l1 = UTF8_strlen(s);
+               int nextlen;
 
                if (l < 0)
                        throw(MAL, "str.insert", SQLSTATE(42000) "The number of 
characters for insert function must be non negative");
@@ -4390,16 +4399,35 @@ STRinsert(str *ret, const str *input, co
                }
                if ((size_t) strt > l1)
                        strt = (int) l1;
-               v = *ret = GDKmalloc(strlen(s) + strlen(s2) + 1);
-               if (v == NULL)
-                       throw(MAL, "str.insert", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+
+               nextlen = (int) (strlen(s) + strlen(s2) + 1);
+               CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.insert");
+               v = *buf;
                if (strt > 0)
                        v = UTF8_strncpy(v, s, strt);
                strcpy(v, s2);
                if (strt + l < (int) l1)
-                       strcat(v, UTF8_offset(s, strt + l));
+                       strcat(v, UTF8_offset((char *)s, strt + l));
+               return MAL_SUCCEED;
        }
-       return MAL_SUCCEED;
+}
+
+static str
+STRinsert(str *res, const str *input, const int *start, const int *nchars, 
const str *input2)
+{
+       int buflen = INITIAL_STR_BUFFER_LENGTH;
+       str buf = GDKmalloc(buflen), msg;
+
+       *res = NULL;
+       if (!buf)
+               throw(SQL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       msg = str_insert(&buf, &buflen, *input, *start, *nchars, *input2);
+       if (!msg && !(*res = GDKstrdup(buf))) {
+               msg = createException(MAL, "str.insert", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+       }
+
+       GDKfree(buf);
+       return msg;
 }
 
 str
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -44,6 +44,8 @@ extern int str_reverse_str_search(const 
 extern int str_locate2(const char *needle, const char *haystack, int start);
 
 extern str str_splitpart(str *buf, int *buflen, const char *s, const char *s2, 
int f);
+extern str str_insert(str *buf, int *buflen, const char *s, int strt, int l, 
const char *s2);
+extern str str_substitute(str *buf, int *buflen, const char *s, const char 
*src, const char *dst, bit repeat);
 
 mal_export str STRStrip2(str *res, const str *arg1, const str *arg2);
 mal_export str STRLtrim2(str *res, const str *arg1, const str *arg2);
@@ -52,9 +54,7 @@ mal_export str STRLpad(str *res, const s
 mal_export str STRRpad(str *res, const str *arg1, const int *len);
 mal_export str STRLpad2(str *res, const str *arg1, const int *len, const str 
*arg2);
 mal_export str STRRpad2(str *res, const str *arg1, const int *len, const str 
*arg2);
-mal_export str STRSubstitute(str *res, const str *arg1, const str *arg2, const 
str *arg3, const bit *g);
 
-mal_export str STRinsert(str *ret, const str *s, const int *start, const int 
*l, const str *s2);
 mal_export str STRreplace(str *ret, const str *s1, const str *s2, const str 
*s3);
 
 #endif /* __string_H__ */
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1526,121 +1526,128 @@ STRbatsubstringTail(bat *ret, const bat 
 }
 
 static str
-STRbatSubstitutecst(bat *ret, const bat *l, const str *arg2, const str *arg3, 
const bit *rep)
+STRbatSubstitutecst(bat *res, const bat *bid, const str *arg2, const str 
*arg3, const bit *rep)
 {
-       BATiter lefti;
-       BAT *bn = NULL, *left;
-       BUN p,q;
-       str y = NULL, err = MAL_SUCCEED;
+       BATiter bi;
+       BAT *bn = NULL, *b = NULL;
+       BUN p, q;
+       int buflen = INITIAL_STR_BUFFER_LENGTH;
+       str x, y = *arg2, z = *arg3, buf = GDKmalloc(buflen), msg = MAL_SUCCEED;
+       bool nils = false;
+       bit r = *rep;
 
-       if (!(left = BATdescriptor(*l))) {
-               err = createException(MAL, "batstr.substritute", 
SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+       if (!buf) {
+               msg = createException(MAL, "batstr.substritute", 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
                goto bailout;
        }
-       if (!(bn = COLnew(left->hseqbase, TYPE_str,BATcount(left), TRANSIENT))) 
{
-               err = createException(MAL, "batstr.substritute", 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       if ((b = BATdescriptor(*bid)) == NULL) {
+               msg = createException(MAL, "batstr.substritute", 
SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
                goto bailout;
        }
-       bn->tnonil=true;
-       bn->tnil=false;
+       q = BATcount(b);
+       if (!(bn = COLnew(b->hseqbase, TYPE_str, q, TRANSIENT))) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to