Changeset: 1d869b0d669f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1d869b0d669f
Modified Files:
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
monetdb5/modules/kernel/batstr.c
sql/backends/monet5/UDF/udf/udf.c
Branch: alloc-less-str
Log Message:
Re-allocate the string buffer in a multiple of 1024 bytes, look for nils in
reverse function and give a larger initial buffer for ltrim2, rtrim2 and strip2
functions because of codepoints calculation
diffs (209 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3934,7 +3934,7 @@ trimchars(str *buf, size_t *buflen, size
int c, *cbuf;
CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
- cbuf = *((int**)buf);
+ cbuf = *(int**)buf;
while (*s) {
UTF8_GETCHAR(c, s);
@@ -3965,10 +3965,10 @@ str_strip2(str *buf, size_t *buflen, str
if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2"))
!= MAL_SUCCEED)
return msg;
len = strlen(s);
- n = lstrip(s, len, *((int**)buf), n3);
+ n = lstrip(s, len, *(int**)buf, n3);
s += n;
len -= n;
- n = rstrip(s, len, *((int**)buf), n3);
+ n = rstrip(s, len, *(int**)buf, n3);
n++;
CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
@@ -3982,7 +3982,7 @@ str_strip2(str *buf, size_t *buflen, str
static str
STRStrip2(str *res, const str *arg1, const str *arg2)
{
- size_t buflen = INITIAL_STR_BUFFER_LENGTH;
+ size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
str buf = GDKmalloc(buflen), msg;
*res = NULL;
@@ -4015,7 +4015,7 @@ str_ltrim2(str *buf, size_t *buflen, str
if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2"))
!= MAL_SUCCEED)
return msg;
len = strlen(s);
- n = lstrip(s, len, *((int**)buf), n3);
+ n = lstrip(s, len, *(int**)buf, n3);
nallocate = len - n + 1;
CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
@@ -4029,7 +4029,7 @@ str_ltrim2(str *buf, size_t *buflen, str
static str
STRLtrim2(str *res, const str *arg1, const str *arg2)
{
- size_t buflen = INITIAL_STR_BUFFER_LENGTH;
+ size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
str buf = GDKmalloc(buflen), msg;
*res = NULL;
@@ -4062,7 +4062,7 @@ str_rtrim2(str *buf, size_t *buflen, str
if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2"))
!= MAL_SUCCEED)
return msg;
len = strlen(s);
- n = rstrip(s, len, *((int**)buf), n3);
+ n = rstrip(s, len, *(int**)buf, n3);
n++;
CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
@@ -4076,7 +4076,7 @@ str_rtrim2(str *buf, size_t *buflen, str
static str
STRRtrim2(str *res, const str *arg1, const str *arg2)
{
- size_t buflen = INITIAL_STR_BUFFER_LENGTH;
+ size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
str buf = GDKmalloc(buflen), msg;
*res = NULL;
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -15,14 +15,14 @@
/* The batstr module functions use a single buffer to avoid malloc/free
overhead.
Note the buffer should be always large enough to hold null strings, so less
testing will be required */
-#define INITIAL_STR_BUFFER_LENGTH MAX(strlen(str_nil) + 1, 1024)
+#define INITIAL_STR_BUFFER_LENGTH (MAX(strlen(str_nil) + 1, 1024))
/* The batstr module functions use a single buffer to avoid malloc/free
overhead.
Note the buffer should be always large enough to hold null strings, so less
testing will be required */
#define CHECK_STR_BUFFER_LENGTH(BUFFER, BUFFER_LEN, NEXT_LEN, OP) \
- do { \
- if (NEXT_LEN > *BUFFER_LEN) { \
- size_t newlen = NEXT_LEN + 1024; \
+ do { \
+ if ((NEXT_LEN) > *BUFFER_LEN) { \
+ size_t newlen = (((NEXT_LEN) + 1023) & ~1023); /* align
to a multiple of 1024 bytes */ \
str newbuf = GDKmalloc(newlen); \
if (!newbuf) \
throw(MAL, OP, SQLSTATE(HY013)
MAL_MALLOC_FAIL); \
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -296,12 +296,11 @@ bailout:
* Output type: str (a BAT of strings)
*/
static str
-do_batstr_conststr_str(bat *res, const bat *l, const str *s2, const char
*name, str (*func)(str*, size_t*, str, str))
+do_batstr_conststr_str(bat *res, const bat *l, const str *s2, const char
*name, size_t buflen, str (*func)(str*, size_t*, str, str))
{
BATiter bi;
BAT *bn = NULL, *b = NULL;
BUN p, q;
- size_t buflen = INITIAL_STR_BUFFER_LENGTH;
str x, y = *s2, buf = GDKmalloc(buflen), msg = MAL_SUCCEED;
bool nils = false;
@@ -353,12 +352,11 @@ bailout:
* Output type: str (a BAT of strings)
*/
static str
-do_batstr_batstr_str(bat *res, const bat *l, const bat *l2, const char *name,
str (*func)(str*, size_t*, str, str))
+do_batstr_batstr_str(bat *res, const bat *l, const bat *l2, const char *name,
size_t buflen, str (*func)(str*, size_t*, str, str))
{
BATiter lefti, righti;
BAT *bn = NULL, *left = NULL, *right = NULL;
BUN p, q;
- size_t buflen = INITIAL_STR_BUFFER_LENGTH;
str x, y, buf = GDKmalloc(buflen), msg = MAL_SUCCEED;
bool nils = false;
@@ -828,37 +826,37 @@ STRbatRtrim(bat *ret, const bat *l)
static str
STRbatStrip2_const(bat *ret, const bat *l, const str *s2)
{
- return do_batstr_conststr_str(ret, l, s2, "batstr.strip", str_strip2);
+ return do_batstr_conststr_str(ret, l, s2, "batstr.strip",
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_strip2);
}
static str
STRbatLtrim2_const(bat *ret, const bat *l, const str *s2)
{
- return do_batstr_conststr_str(ret, l, s2, "batstr.ltrim", str_ltrim2);
+ return do_batstr_conststr_str(ret, l, s2, "batstr.ltrim",
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_ltrim2);
}
static str
STRbatRtrim2_const(bat *ret, const bat *l, const str *s2)
{
- return do_batstr_conststr_str(ret, l, s2, "batstr.rtrim", str_rtrim2);
+ return do_batstr_conststr_str(ret, l, s2, "batstr.rtrim",
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_rtrim2);
}
static str
STRbatStrip2_bat(bat *ret, const bat *l, const bat *l2)
{
- return do_batstr_batstr_str(ret, l, l2, "batstr.strip", str_strip2);
+ return do_batstr_batstr_str(ret, l, l2, "batstr.strip",
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_strip2);
}
static str
STRbatLtrim2_bat(bat *ret, const bat *l, const bat *l2)
{
- return do_batstr_batstr_str(ret, l, l2, "batstr.ltrim", str_ltrim2);
+ return do_batstr_batstr_str(ret, l, l2, "batstr.ltrim",
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_ltrim2);
}
static str
STRbatRtrim2_bat(bat *ret, const bat *l, const bat *l2)
{
- return do_batstr_batstr_str(ret, l, l2, "batstr.rtrim", str_rtrim2);
+ return do_batstr_batstr_str(ret, l, l2, "batstr.rtrim",
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_rtrim2);
}
static str
diff --git a/sql/backends/monet5/UDF/udf/udf.c
b/sql/backends/monet5/UDF/udf/udf.c
--- a/sql/backends/monet5/UDF/udf/udf.c
+++ b/sql/backends/monet5/UDF/udf/udf.c
@@ -118,6 +118,7 @@ UDFBATreverse_(BAT **ret, BAT *src)
BUN p = 0, q = 0;
size_t buflen = INITIAL_STR_BUFFER_LENGTH;
str msg = MAL_SUCCEED, buf;
+ bool nils = false;
/* assert calling sanity */
assert(ret);
@@ -158,14 +159,15 @@ UDFBATreverse_(BAT **ret, BAT *src)
msg = createException(MAL, "batudf.reverse",
SQLSTATE(HY013) MAL_MALLOC_FAIL);
goto bailout;
}
+ nils |= strNil(buf);
}
bailout:
GDKfree(buf);
if (bn && !msg) {
BATsetcount(bn, q);
- bn->tnil = src->tnil;
- bn->tnonil = src->tnonil;
+ bn->tnil = nils;
+ bn->tnonil = !nils;
bn->tkey = BATcount(bn) <= 1;
bn->tsorted = BATcount(bn) <= 1;
bn->trevsorted = BATcount(bn) <= 1;
@@ -189,10 +191,10 @@ UDFBATreverse(bat *ret, const bat *arg)
/* bat-id -> BAT-descriptor */
if ((src = BATdescriptor(*arg)) == NULL)
- throw(MAL, "batudf.reverse", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ throw(MAL, "batudf.reverse", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
/* do the work */
- msg = UDFBATreverse_ ( &res, src );
+ msg = UDFBATreverse_( &res, src );
/* release input BAT-descriptor */
BBPunfix(src->batCacheid);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list