MonetDB: alloc-less-str - Added string buffer for more string fu...

Pedro Ferreira Thu, 24 Sep 2020 07:44:29 -0700

Changeset: b0c02c519446 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b0c02c519446
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/kernel/batstr.c
        sql/backends/monet5/sql_cast.c
        sql/backends/monet5/sql_result.c
Branch: alloc-less-str
Log Message:


Added string buffer for more string functions and cleanup


diffs (truncated from 746 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3209,8 +3209,23 @@ UTF8_offset(char *restrict s, int n)
        return s;
 }
 
+/* The batstr module functions use a single buffer to avoid malloc/free 
overhead.
+   Note the buffer should be always large enough to hold null strings, so less 
testing will be required */
+#define CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, op) \
+       do {  \
+               if (nextlen > *buflen) { \
+                       int newlen = nextlen + 1024; \
+                       str newbuf = GDKmalloc(newlen); \
+                       if (!newbuf) \
+                               throw(MAL, op, SQLSTATE(HY013) 
MAL_MALLOC_FAIL); \
+                       GDKfree(*buf); \
+                       *buf = newbuf; \
+                       *buflen = newlen; \
+               } \
+       } while (0)
+
 static str
-convertCase(BAT *from, BAT *to, str *res, const char *src, const char *malfunc)
+convertCase(BAT *from, BAT *to, str *buf, int *buflen, const char *src, const 
char *malfunc)
 {
        size_t len = strlen(src);
        char *dst;
@@ -3218,14 +3233,17 @@ convertCase(BAT *from, BAT *to, str *res
        bool lower_to_upper = from == UTF8_toUpperFrom;
 
        if (strNil(src)) {
-               *res = GDKstrdup(str_nil);
+               strcpy(*buf, str_nil);
+               return MAL_SUCCEED;
        } else {
-               if (BAThash(from) != GDK_SUCCEED ||
-                       (*res = GDKmalloc(len + 1)) == NULL) {
+               Hash *h;
+               int nextlen = len + 1;
+               if (BAThash(from) != GDK_SUCCEED)
                        throw(MAL, malfunc, SQLSTATE(HY013) MAL_MALLOC_FAIL);
-               }
-               Hash *h = from->thash;
-               dst = *res;
+               CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, malfunc);
+
+               h = from->thash;
+               dst = *buf;
                while (src < end) {
                        int c;
 
@@ -3251,29 +3269,22 @@ convertCase(BAT *from, BAT *to, str *res
                                        }
                                }
                        }
-                       if (dst + UTF8_CHARLEN(c) > *res + len) {
+                       if (dst + UTF8_CHARLEN(c) > *buf + len) {
                                /* doesn't fit, so allocate more space;
                                 * also allocate enough for the rest of the
                                 * source */
-                               size_t off = dst - *res;
+                               size_t off = dst - *buf;
+                               int nextlen = (len += 4 + (end - src)) + 1;
 
-                               dst = GDKrealloc(*res, (len += 4 + (end - src)) 
+ 1);
-                               if (dst == NULL) {
-                                       /* if realloc fails, original buffer is 
still
-                                        * allocated, so free it */
-                                       GDKfree(*res);
-                                       throw(MAL, malfunc, SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
-                               }
-                               *res = dst;
-                               dst = *res + off;
+                               CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, 
malfunc);
+                               dst = *buf + off;
                        }
                        UTF8_PUTCHAR(c, dst);
                }
                *dst = 0;
+               return MAL_SUCCEED;
        }
-       if (*res != NULL)
-               return MAL_SUCCEED;
-  illegal:
+illegal:
        throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
 }
 
@@ -3323,7 +3334,7 @@ STRlike(const char *s, const char *pat, 
        return *t == 0 && *p == 0;
 }
 
-str
+static str
 STRlikewrap(bit *ret, const str *s, const str *pat, const str *esc)
 {
        if (strNil(*s) || strNil(*pat) || strNil(*esc))
@@ -3333,7 +3344,7 @@ STRlikewrap(bit *ret, const str *s, cons
        return MAL_SUCCEED;
 }
 
-str
+static str
 STRlikewrap2(bit *ret, const str *s, const str *pat)
 {
        if (strNil(*s) || strNil(*pat))
@@ -3343,7 +3354,7 @@ STRlikewrap2(bit *ret, const str *s, con
        return MAL_SUCCEED;
 }
 
-str
+static str
 STRtostr(str *res, const str *src)
 {
        if( *src == 0)
@@ -3369,7 +3380,7 @@ str_length(const char *s)
        return (int) l;
 }
 
-str
+static str
 STRLength(int *res, const str *arg1)
 {
        *res = str_length(*arg1);
@@ -3388,26 +3399,13 @@ str_bytes(const char *s)
        return (int) l;
 }
 
-str
+static str
 STRBytes(int *res, const str *arg1)
 {
        *res = str_bytes(*arg1);
        return MAL_SUCCEED;
 }
 
-#define CHECK_BUFFER_LENGTH(buf, buflen, nextlen, op) \
-       do {  \
-               if (nextlen > *buflen) { \
-                       int newlen = nextlen + 1024; \
-                       str newbuf = GDKmalloc(newlen); \
-                       if (!newbuf) \
-                               throw(MAL, op, SQLSTATE(HY013) 
MAL_MALLOC_FAIL); \
-                       GDKfree(*buf); \
-                       *buf = newbuf; \
-                       *buflen = newlen; \
-               } \
-       } while (0)
-
 str
 str_tail(str *buf, int *buflen, const char *s, int off)
 {
@@ -3425,12 +3423,12 @@ str_tail(str *buf, int *buflen, const ch
        }
        str tail = UTF8_strtail(s, off);
        int nextlen = (int) strlen(tail) + 1;
-       CHECK_BUFFER_LENGTH(buf, buflen, nextlen, "str.tail");
+       CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.tail");
        strcpy(*buf, tail);
        return MAL_SUCCEED;
 }
 
-str
+static str
 STRTail(str *res, const str *arg1, const int *offset)
 {
        int buflen = INITIAL_STR_BUFFER_LENGTH;
@@ -3475,7 +3473,7 @@ str_Sub_String(str *buf, int *buflen, co
        }
        s = UTF8_strtail(s, off);
        len = UTF8_strtail(s, l) - s;
-       CHECK_BUFFER_LENGTH(buf, buflen, len + 1, "str.substring");
+       CHECK_STR_BUFFER_LENGTH(buf, buflen, len + 1, "str.substring");
        strcpy_len(*buf, s, (size_t)(len + 1));
        return MAL_SUCCEED;
 }
@@ -3505,7 +3503,7 @@ str_from_wchr(str *buf, int *buflen, int
                strcpy(*buf, str_nil);
                return MAL_SUCCEED;
        }
-       CHECK_BUFFER_LENGTH(buf, buflen, 5, "str.unicode");
+       CHECK_STR_BUFFER_LENGTH(buf, buflen, 5, "str.unicode");
        str s = *buf;
        UTF8_PUTCHAR(c, s);
        *s = 0;
@@ -3514,7 +3512,7 @@ illegal:
        throw(MAL, "str.unicode", SQLSTATE(42000) "Illegal Unicode code point");
 }
 
-str
+static str
 STRFromWChr(str *res, const int *c)
 {
        int buflen = INITIAL_STR_BUFFER_LENGTH;
@@ -3552,7 +3550,7 @@ illegal:
        throw(MAL, "str.unicodeAt", SQLSTATE(42000) "Illegal Unicode code 
point");
 }
 
-str
+static str
 STRWChrAt(int *res, const str *arg1, const int *at)
 {
        return str_wchr_at(res, *arg1, *at);
@@ -3568,7 +3566,7 @@ str_is_prefix(const char *s, const char 
        return strncmp(s, prefix, strlen(prefix)) == 0;
 }
 
-str
+static str
 STRPrefix(bit *res, const str *arg1, const str *arg2)
 {
        *res = str_is_prefix(*arg1, *arg2);
@@ -3593,7 +3591,7 @@ str_is_suffix(const char *s, const char 
 }
 
 /* returns whether arg1 ends with arg2 */
-str
+static str
 STRSuffix(bit *res, const str *arg1, const str *arg2)
 {
        *res = str_is_suffix(*arg1, *arg2);
@@ -3601,15 +3599,51 @@ STRSuffix(bit *res, const str *arg1, con
 }
 
 str
+str_lower(str *buf, int *buflen, const char *s)
+{
+       return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, buf, buflen, s, 
"str.lower");
+}
+
+static str
 STRLower(str *res, const str *arg1)
 {
-       return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, res, *arg1, 
"str.lower");
+       int buflen = INITIAL_STR_BUFFER_LENGTH;
+       str buf = GDKmalloc(buflen), msg;
+
+       *res = NULL;
+       if (!buf)
+               throw(SQL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       msg = str_lower(&buf, &buflen, *arg1);
+       if (!msg && !(*res = GDKstrdup(buf))) {
+               msg = createException(MAL, "str.lower", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+       }
+
+       GDKfree(buf);
+       return msg;
 }
 
 str
+str_upper(str *buf, int *buflen, const char *s)
+{
+       return convertCase(UTF8_toUpperFrom, UTF8_toUpperTo, buf, buflen, s, 
"str.upper");
+}
+
+static str
 STRUpper(str *res, const str *arg1)
 {
-       return convertCase(UTF8_toUpperFrom, UTF8_toUpperTo, res, *arg1, 
"str.upper");
+       int buflen = INITIAL_STR_BUFFER_LENGTH;
+       str buf = GDKmalloc(buflen), msg;
+
+       *res = NULL;
+       if (!buf)
+               throw(SQL, "str.upper", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+       msg = str_upper(&buf, &buflen, *arg1);
+       if (!msg && !(*res = GDKstrdup(buf))) {
+               msg = createException(MAL, "str.upper", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+       }
+
+       GDKfree(buf);
+       return msg;
 }
 
 /* find first occurrence of needle in haystack */
@@ -3781,67 +3815,112 @@ const int whitespace[] = {
 };
 #define NSPACES                (sizeof(whitespace) / sizeof(whitespace[0]))
 
-/* remove all whitespace from either side of arg1 */
 str
-STRStrip(str *res, const str *arg1)
+str_strip(str *buf, int *buflen, const char *s)
 {
-       const char *s = *arg1;
-       size_t len;
-       size_t n;
-
        if (strNil(s)) {
-               *res = GDKstrdup(str_nil);
+               strcpy(*buf, str_nil);
+               return MAL_SUCCEED;
        } else {
-               len = strlen(s);
-               n = lstrip(s, len, whitespace, NSPACES);
+               size_t len = strlen(s);
+               size_t n = lstrip(s, len, whitespace, NSPACES);
                s += n;
                len -= n;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

MonetDB: alloc-less-str - Added string buffer for more string fu...

Reply via email to