Changeset: 315b3c29c703 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=315b3c29c703
Modified Files:
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
monetdb5/modules/kernel/batstr.c
Branch: alloc-less-str
Log Message:
More efficient implementations for insert and substitute
diffs (truncated from 674 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3472,9 +3472,9 @@ str_Sub_String(str *buf, int *buflen, co
return MAL_SUCCEED;
}
s = UTF8_strtail(s, off);
- len = UTF8_strtail(s, l) - s;
- CHECK_STR_BUFFER_LENGTH(buf, buflen, len + 1, "str.substring");
- strcpy_len(*buf, s, (size_t)(len + 1));
+ len = UTF8_strtail(s, l) - s + 1;
+ CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.substring");
+ strcpy_len(*buf, s, (size_t) len);
return MAL_SUCCEED;
}
@@ -3734,7 +3734,7 @@ str_splitpart(str *buf, int *buflen, con
len++;
CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) len, "str.splitpart");
- strcpy_len(*buf, s, (int) len);
+ strcpy_len(*buf, s, len);
return MAL_SUCCEED;
}
@@ -3843,8 +3843,9 @@ str_strip(str *buf, int *buflen, const c
len -= n;
n = rstrip(s, len, whitespace, NSPACES);
- CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) (n + 1),
"str.strip");
- strcpy_len(*buf, s, n + 1);
+ n++;
+ CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) n, "str.strip");
+ strcpy_len(*buf, s, n);
return MAL_SUCCEED;
}
}
@@ -3877,9 +3878,10 @@ str_ltrim(str *buf, int *buflen, const c
} else {
size_t len = strlen(s);
size_t n = lstrip(s, len, whitespace, NSPACES);
+ size_t ncast = len - n + 1;
- CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) (len - n + 1),
"str.ltrim");
- strcpy_len(*buf, s + n, len - n + 1);
+ CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) ncast, "str.ltrim");
+ strcpy_len(*buf, s + n, ncast);
return MAL_SUCCEED;
}
}
@@ -3913,8 +3915,9 @@ str_rtrim(str *buf, int *buflen, const c
size_t len = strlen(s);
size_t n = rstrip(s, len, whitespace, NSPACES);
- CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) (n + 1),
"str.strip");
- strcpy_len(*buf, s, n + 1);
+ n++;
+ CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) n, "str.strip");
+ strcpy_len(*buf, s, n);
return MAL_SUCCEED;
}
}
@@ -4170,59 +4173,66 @@ STRRpad2(str *res, const str *arg1, cons
}
str
+str_substitute(str *buf, int *buflen, const char *s, const char *src, const
char *dst, bit repeat)
+{
+ if (strNil(s) || strNil(src) || strNil(dst)) {
+ strcpy(*buf, str_nil);
+ return MAL_SUCCEED;
+ } else {
+ size_t lsrc = strlen(src), ldst = strlen(dst), n, l = strLen(s);
+ char *b, *fnd;
+ const char *pfnd;
+
+ if (!lsrc || !l) { /* s/src is an empty string, there's nothing
to substitute */
+ strcpy(*buf, "");
+ return MAL_SUCCEED;
+ }
+
+ n = l + ldst;
+ if (repeat && ldst > lsrc)
+ n = (ldst * l) / lsrc; /* max length */
+
+ CHECK_STR_BUFFER_LENGTH(buf, buflen, (int) n, "str.substitute");
+ b = *buf;
+ pfnd = s;
+ do {
+ fnd = strstr(pfnd, src);
+ if (fnd == NULL)
+ break;
+ n = fnd - pfnd;
+ if (n > 0) {
+ strncpy(b, pfnd, n);
+ b += n;
+ }
+ if (ldst > 0) {
+ strncpy(b, dst, ldst);
+ b += ldst;
+ }
+ if (*fnd == 0)
+ break;
+ pfnd = fnd + lsrc;
+ } while (repeat);
+ strcpy(b, pfnd);
+ return MAL_SUCCEED;
+ }
+}
+
+static str
STRSubstitute(str *res, const str *arg1, const str *arg2, const str *arg3,
const bit *g)
{
- const char *s = *arg1;
- const char *src = *arg2 ? *arg2 : "";
- const char *dst = *arg3 ? *arg3 : "";
- int repeat = *g;
- size_t lsrc = strlen(src);
- size_t ldst = strlen(dst);
- size_t l = strLen(s);
- size_t n;
- char *buf;
- const char *pfnd;
- char *fnd;
+ int buflen = INITIAL_STR_BUFFER_LENGTH;
+ str buf = GDKmalloc(buflen), msg;
- if (strNil(s) || strNil(src) || strNil(dst)) {
- if ((*res = GDKstrdup(str_nil)) == NULL)
- throw(MAL, "str.substitute", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
- return MAL_SUCCEED;
- }
- if (!lsrc || !l) { /* s/src is an empty string, there's nothing to
substitute */
- if ((*res = GDKstrdup(s)) == NULL)
- throw(MAL, "str.substitute", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
- return MAL_SUCCEED;
+ *res = NULL;
+ if (!buf)
+ throw(SQL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ msg = str_substitute(&buf, &buflen, *arg1, *arg2, *arg3, *g);
+ if (!msg && !(*res = GDKstrdup(buf))) {
+ msg = createException(MAL, "str.substitute", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
}
- n = l + ldst;
- if (repeat && ldst > lsrc)
- n = (ldst * l) / lsrc; /* max length */
-
- buf = *res = GDKmalloc(n);
- if (*res == NULL)
- throw(MAL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
-
- pfnd = s;
- do {
- fnd = strstr(pfnd, src);
- if (fnd == NULL)
- break;
- n = fnd - pfnd;
- if (n > 0) {
- strncpy(buf, pfnd, n);
- buf += n;
- }
- if (ldst > 0) {
- strncpy(buf, dst, ldst);
- buf += ldst;
- }
- if (*fnd == 0)
- break;
- pfnd = fnd + lsrc;
- } while (repeat);
- strcpy(buf, pfnd);
- return MAL_SUCCEED;
+ GDKfree(buf);
+ return msg;
}
static str
@@ -4369,16 +4379,15 @@ STRlocate(int *ret, const str *needle, c
}
str
-STRinsert(str *ret, const str *input, const int *start, const int *nchars,
const str *input2)
+str_insert(str *buf, int *buflen, const char *s, int strt, int l, const char
*s2)
{
- str v, s = *input, s2 = *input2;
- int strt = *start, l = *nchars;
-
if (strNil(s) || strNil(s2) || is_int_nil(strt) || is_int_nil(l)) {
- if ((*ret = GDKstrdup(str_nil)) == NULL)
- throw(MAL, "str.insert", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ strcpy(*buf, str_nil);
+ return MAL_SUCCEED;
} else {
+ str v;
size_t l1 = UTF8_strlen(s);
+ int nextlen;
if (l < 0)
throw(MAL, "str.insert", SQLSTATE(42000) "The number of
characters for insert function must be non negative");
@@ -4390,16 +4399,35 @@ STRinsert(str *ret, const str *input, co
}
if ((size_t) strt > l1)
strt = (int) l1;
- v = *ret = GDKmalloc(strlen(s) + strlen(s2) + 1);
- if (v == NULL)
- throw(MAL, "str.insert", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+
+ nextlen = (int) (strlen(s) + strlen(s2) + 1);
+ CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.insert");
+ v = *buf;
if (strt > 0)
v = UTF8_strncpy(v, s, strt);
strcpy(v, s2);
if (strt + l < (int) l1)
- strcat(v, UTF8_offset(s, strt + l));
+ strcat(v, UTF8_offset((char *)s, strt + l));
+ return MAL_SUCCEED;
}
- return MAL_SUCCEED;
+}
+
+static str
+STRinsert(str *res, const str *input, const int *start, const int *nchars,
const str *input2)
+{
+ int buflen = INITIAL_STR_BUFFER_LENGTH;
+ str buf = GDKmalloc(buflen), msg;
+
+ *res = NULL;
+ if (!buf)
+ throw(SQL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ msg = str_insert(&buf, &buflen, *input, *start, *nchars, *input2);
+ if (!msg && !(*res = GDKstrdup(buf))) {
+ msg = createException(MAL, "str.insert", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ }
+
+ GDKfree(buf);
+ return msg;
}
str
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -44,6 +44,8 @@ extern int str_reverse_str_search(const
extern int str_locate2(const char *needle, const char *haystack, int start);
extern str str_splitpart(str *buf, int *buflen, const char *s, const char *s2,
int f);
+extern str str_insert(str *buf, int *buflen, const char *s, int strt, int l,
const char *s2);
+extern str str_substitute(str *buf, int *buflen, const char *s, const char
*src, const char *dst, bit repeat);
mal_export str STRStrip2(str *res, const str *arg1, const str *arg2);
mal_export str STRLtrim2(str *res, const str *arg1, const str *arg2);
@@ -52,9 +54,7 @@ mal_export str STRLpad(str *res, const s
mal_export str STRRpad(str *res, const str *arg1, const int *len);
mal_export str STRLpad2(str *res, const str *arg1, const int *len, const str
*arg2);
mal_export str STRRpad2(str *res, const str *arg1, const int *len, const str
*arg2);
-mal_export str STRSubstitute(str *res, const str *arg1, const str *arg2, const
str *arg3, const bit *g);
-mal_export str STRinsert(str *ret, const str *s, const int *start, const int
*l, const str *s2);
mal_export str STRreplace(str *ret, const str *s1, const str *s2, const str
*s3);
#endif /* __string_H__ */
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1526,121 +1526,128 @@ STRbatsubstringTail(bat *ret, const bat
}
static str
-STRbatSubstitutecst(bat *ret, const bat *l, const str *arg2, const str *arg3,
const bit *rep)
+STRbatSubstitutecst(bat *res, const bat *bid, const str *arg2, const str
*arg3, const bit *rep)
{
- BATiter lefti;
- BAT *bn = NULL, *left;
- BUN p,q;
- str y = NULL, err = MAL_SUCCEED;
+ BATiter bi;
+ BAT *bn = NULL, *b = NULL;
+ BUN p, q;
+ int buflen = INITIAL_STR_BUFFER_LENGTH;
+ str x, y = *arg2, z = *arg3, buf = GDKmalloc(buflen), msg = MAL_SUCCEED;
+ bool nils = false;
+ bit r = *rep;
- if (!(left = BATdescriptor(*l))) {
- err = createException(MAL, "batstr.substritute",
SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+ if (!buf) {
+ msg = createException(MAL, "batstr.substritute",
SQLSTATE(HY013) MAL_MALLOC_FAIL);
goto bailout;
}
- if (!(bn = COLnew(left->hseqbase, TYPE_str,BATcount(left), TRANSIENT)))
{
- err = createException(MAL, "batstr.substritute",
SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ if ((b = BATdescriptor(*bid)) == NULL) {
+ msg = createException(MAL, "batstr.substritute",
SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
goto bailout;
}
- bn->tnonil=true;
- bn->tnil=false;
+ q = BATcount(b);
+ if (!(bn = COLnew(b->hseqbase, TYPE_str, q, TRANSIENT))) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list