Changeset: 43fb721ed302 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=43fb721ed302
Modified Files:
monetdb5/modules/atoms/str.c
Branch: Jul2015
Log Message:
More efficient code to convert to upper/lower case.
Strings are mostly ASCII, and for those the conversion is simple.
Change inspired by a developers-list message from Roberto Cornacchia.
diffs (36 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -1446,6 +1446,7 @@ convertCase(BAT *from, BAT *to, str *res
const unsigned char *src = (const unsigned char *) s;
const unsigned char *end = (const unsigned char *) (src + len);
BUN UTF8_CONV_r;
+ int lower_to_upper = from == UTF8_lowerBat;
if (strNil(s)) {
*res = GDKstrdup(str_nil);
@@ -1457,9 +1458,21 @@ convertCase(BAT *from, BAT *to, str *res
int c;
UTF8_GETCHAR(c, src);
- HASHfnd_int(UTF8_CONV_r, fromi, &c);
- if (UTF8_CONV_r != BUN_NONE)
- c = *(int*) BUNtloc(toi, UTF8_CONV_r);
+ if (c < 0x80) {
+ /* for ASCII characters we don't need
to do a hash
+ * lookup */
+ if (lower_to_upper) {
+ if ('a' <= c && c <= 'z')
+ c += 'A' - 'a';
+ } else {
+ if ('A' <= c && c <= 'Z')
+ c += 'a' - 'A';
+ }
+ } else {
+ HASHfnd_int(UTF8_CONV_r, fromi, &c);
+ if (UTF8_CONV_r != BUN_NONE)
+ c = *(int*) BUNtloc(toi,
UTF8_CONV_r);
+ }
if (dst + 6 > (unsigned char *) *res + len) {
/* not guaranteed to fit, so allocate
more space;
* also allocate enough for the rest of
the
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list