Changeset: 43fb721ed302 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=43fb721ed302
Modified Files:
        monetdb5/modules/atoms/str.c
Branch: Jul2015
Log Message:

More efficient code to convert to upper/lower case.
Strings are mostly ASCII, and for those the conversion is simple.
Change inspired by a developers-list message from Roberto Cornacchia.


diffs (36 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -1446,6 +1446,7 @@ convertCase(BAT *from, BAT *to, str *res
        const unsigned char *src = (const unsigned char *) s;
        const unsigned char *end = (const unsigned char *) (src + len);
        BUN UTF8_CONV_r;
+       int lower_to_upper = from == UTF8_lowerBat;
 
        if (strNil(s)) {
                *res = GDKstrdup(str_nil);
@@ -1457,9 +1458,21 @@ convertCase(BAT *from, BAT *to, str *res
                                int c;
 
                                UTF8_GETCHAR(c, src);
-                               HASHfnd_int(UTF8_CONV_r, fromi, &c);
-                               if (UTF8_CONV_r != BUN_NONE)
-                                       c = *(int*) BUNtloc(toi, UTF8_CONV_r);
+                               if (c < 0x80) {
+                                       /* for ASCII characters we don't need 
to do a hash
+                                        * lookup */
+                                       if (lower_to_upper) {
+                                               if ('a' <= c && c <= 'z')
+                                                       c += 'A' - 'a';
+                                       } else {
+                                               if ('A' <= c && c <= 'Z')
+                                                       c += 'a' - 'A';
+                                       }
+                               } else {
+                                       HASHfnd_int(UTF8_CONV_r, fromi, &c);
+                                       if (UTF8_CONV_r != BUN_NONE)
+                                               c = *(int*) BUNtloc(toi, 
UTF8_CONV_r);
+                               }
                                if (dst + 6 > (unsigned char *) *res + len) {
                                        /* not guaranteed to fit, so allocate 
more space;
                                         * also allocate enough for the rest of 
the
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to