Changeset: de1c35aedd6f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/de1c35aedd6f
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_string.c
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/kernel/batstr.c
Branch: ascii-flag
Log Message:

Move case conversion fully to GDK.


diffs (242 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -206,6 +206,8 @@ gdk_return BATsum(void *res, int tp, BAT
 const char *BATtailname(const BAT *b);
 gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, int op, bool nil_matches, BUN estimate) 
__attribute__((__warn_unused_result__));
 BAT *BATthetaselect(BAT *b, BAT *s, const void *val, const char *op);
+BAT *BATtolower(BAT *b, BAT *s);
+BAT *BATtoupper(BAT *b, BAT *s);
 void BATtseqbase(BAT *b, oid o);
 BAT *BATunique(BAT *b, BAT *s);
 BAT *BATunmask(BAT *b);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2346,6 +2346,8 @@ gdk_export gdk_return GDKtolower(char **
 gdk_export int GDKstrncasecmp(const char *str1, const char *str2, size_t l1, 
size_t l2);
 gdk_export int GDKstrcasecmp(const char *s1, const char *s2);
 gdk_export char *GDKstrcasestr(const char *haystack, const char *needle);
+gdk_export BAT *BATtolower(BAT *b, BAT *s);
+gdk_export BAT *BATtoupper(BAT *b, BAT *s);
 
 /*
  * @- BAT sample operators
diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c
--- a/gdk/gdk_string.c
+++ b/gdk/gdk_string.c
@@ -4501,7 +4501,7 @@ convertcase(char **buf, size_t *buflen, 
        size_t dstoff = 0;
        size_t bl = *buflen;
 
-       if (buf == NULL)
+       if (*buf == NULL)
                bl = 0;
        while (*s) {
                /* we are at the start of a Unicode codepoint encoded in
@@ -4553,6 +4553,16 @@ convertcase(char **buf, size_t *buflen, 
                        dst[dstoff++] = 0x80 | (v & 0x3F);
                }
        }
+       if (dstoff + 1 > bl) {
+               size_t newlen = dstoff + 1;
+               dst = GDKrealloc(*buf, newlen);
+               if (dst == NULL) {
+                       *buflen = bl;
+                       return GDK_FAIL;
+               }
+               *buf = (char *) dst;
+               bl = newlen;
+       }
        dst[dstoff] = '\0';
        *buflen = bl;
        return GDK_SUCCEED;
@@ -4572,6 +4582,64 @@ GDKtolower(char **buf, size_t *buflen, c
        return convertcase(buf, buflen, (const uint8_t *) s, lowercase);
 }
 
+static BAT *
+BATcaseconvert(BAT *b, BAT *s, const int *convtab, const char *func)
+{
+       lng t0 = 0;
+       BAT *bn;
+       struct canditer ci;
+       BATiter bi;
+       oid bhseqbase = b->hseqbase;
+       QryCtx *qry_ctx = MT_thread_get_qry_ctx();
+       qry_ctx = qry_ctx ? qry_ctx : &(QryCtx) {.endtime = 0};
+
+       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+       BATcheck(b, NULL);
+       canditer_init(&ci, b, s);
+       bn = COLnew(ci.hseq, TYPE_str, ci.ncand, TRANSIENT);
+       if (bn == NULL)
+               return NULL;
+       bi = bat_iterator(b);
+       char *buf = NULL;
+       size_t buflen = 0;
+       TIMEOUT_LOOP(ci.ncand, qry_ctx) {
+               BUN x = canditer_next(&ci) - bhseqbase;
+               if (convertcase(&buf, &buflen, (const uint8_t *) BUNtvar(bi, x),
+                               convtab) != GDK_SUCCEED ||
+                   BUNappend(bn, buf, false) != GDK_SUCCEED) {
+                       goto bailout;
+               }
+       }
+       GDKfree(buf);
+       bat_iterator_end(&bi);
+       TIMEOUT_CHECK(qry_ctx,
+                     GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
+       TRC_DEBUG(ALGO, "%s: b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
+                 " -> " ALGOOPTBATFMT " " LLFMT "usec\n",
+                 func,
+                 ALGOBATPAR(b), ALGOOPTBATPAR(s),
+                 ALGOOPTBATPAR(bn), GDKusec() - t0);
+       return bn;
+
+  bailout:
+       GDKfree(buf);
+       bat_iterator_end(&bi);
+       BBPreclaim(bn);
+       return NULL;
+}
+
+BAT *
+BATtolower(BAT *b, BAT *s)
+{
+       return BATcaseconvert(b, s, lowercase, __func__);
+}
+
+BAT *
+BATtoupper(BAT *b, BAT *s)
+{
+       return BATcaseconvert(b, s, uppercase, __func__);
+}
+
 /* Unicode-aware case insensitive string comparison of two UTF-8 encoded
  * string; do not look beyond the lengths in bytes of the two strings;
  * if either one reaches the end of the buffer (as given by the length
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -719,14 +719,6 @@ STRWChrAt(int *res, const str *arg1, con
        return str_wchr_at(res, *arg1, *at);
 }
 
-str
-str_lower(str *buf, size_t *buflen, const char *s)
-{
-       if (GDKtolower(buf, buflen, s) != GDK_SUCCEED)
-               throw(MAL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL);
-       return MAL_SUCCEED;
-}
-
 static inline str
 STRlower(str *res, const str *arg1)
 {
@@ -741,10 +733,9 @@ STRlower(str *res, const str *arg1)
                *res = NULL;
                if (!(buf = GDKmalloc(buflen)))
                        throw(MAL, "str.lower", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
-               msg = str_lower(&buf, &buflen, s);
-               if (msg != MAL_SUCCEED) {
+               if (GDKtolower(&buf, &buflen, s) != GDK_SUCCEED) {
                        GDKfree(buf);
-                       return msg;
+                       throw(MAL, "str.lower", GDK_EXCEPTION);
                }
                *res = GDKstrdup(buf);
        }
@@ -756,14 +747,6 @@ STRlower(str *res, const str *arg1)
        return msg;
 }
 
-str
-str_upper(str *buf, size_t *buflen, const char *s)
-{
-       if (GDKtoupper(buf, buflen, s) != GDK_SUCCEED)
-               throw(MAL, "str.upper", SQLSTATE(HY013) MAL_MALLOC_FAIL);
-       return MAL_SUCCEED;
-}
-
 static str
 STRupper(str *res, const str *arg1)
 {
@@ -778,10 +761,9 @@ STRupper(str *res, const str *arg1)
                *res = NULL;
                if (!(buf = GDKmalloc(buflen)))
                        throw(MAL, "str.upper", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
-               msg = str_upper(&buf, &buflen, s);
-               if (msg != MAL_SUCCEED) {
+               if (GDKtoupper(&buf, &buflen, s) != GDK_SUCCEED) {
                        GDKfree(buf);
-                       return msg;
+                       throw(MAL, "str.upper", GDK_EXCEPTION);
                }
                *res = GDKstrdup(buf);
        }
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -185,13 +185,6 @@ extern str str_suffix(str *buf, size_t *
 extern str str_repeat(str *buf, size_t *buflen, const char *s, int c)
                __attribute__((__visibility__("hidden")));
 
-/* Make sure the UTF8_toLowerFrom hash is locked! */
-extern str str_lower(str *buf, size_t *buflen, const char *s)
-               __attribute__((__visibility__("hidden")));
-/* Make sure the UTF8_toUpperFrom hash is locked! */
-extern str str_upper(str *buf, size_t *buflen, const char *s)
-               __attribute__((__visibility__("hidden")));
-
 extern str str_strip(str *buf, size_t *buflen, const char *s)
                __attribute__((__visibility__("hidden")));
 extern str str_ltrim(str *buf, size_t *buflen, const char *s)
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -1605,15 +1605,42 @@ do_batstr_batint_batstr_str(Client cntxt
 }
 
 static str
+STRbatConvert(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
+                         BAT *(*func)(BAT *, BAT *), const char *malfunc)
+{
+       BAT *bn = NULL, *b = NULL, *bs = NULL;
+       bat *res = getArgReference_bat(stk, pci, 0),
+               *bid = getArgReference_bat(stk, pci, 1),
+               *sid1 = pci->argc == 3 ? getArgReference_bat(stk, pci, 2) : 
NULL;
+
+       (void) cntxt;
+       (void) mb;
+       if (!(b = BATdescriptor(*bid))) {
+               throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+       }
+       if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
+               BBPreclaim(b);
+               throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+       }
+       bn = (*func)(b, bs);
+       unfix_inputs(2, b, bs);
+       if (bn == NULL)
+               throw(MAL, malfunc, GDK_EXCEPTION);
+       *res = bn->batCacheid;
+       BBPkeepref(bn);
+       return MAL_SUCCEED;
+}
+
+static str
 STRbatLower(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       return do_batstr_str(cntxt, mb, stk, pci, "batstr.lower", str_lower);
+       return STRbatConvert(cntxt, mb, stk, pci, BATtolower, "batstr.toLower");
 }
 
 static str
 STRbatUpper(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       return do_batstr_str(cntxt, mb, stk, pci, "batstr.upper", str_upper);
+       return STRbatConvert(cntxt, mb, stk, pci, BATtoupper, "batstr.toUpper");
 }
 
 static str
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to