Changeset: f6605069493d for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f6605069493d Modified Files: monetdb5/modules/atoms/str.c sql/test/SQLancer/Tests/sqlancer03.sql sql/test/SQLancer/Tests/sqlancer03.stable.out Branch: Jun2020 Log Message:
Handle UTF-8 strings at str.insert function diffs (238 lines): diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c --- a/monetdb5/modules/atoms/str.c +++ b/monetdb5/modules/atoms/str.c @@ -3156,6 +3156,59 @@ UTF8_strtail(const char *s, int pos) return (str) s; } +static inline str +UTF8_strncpy(char *restrict dst, const char *restrict s, int n) +{ + UTF8_assert(s); + while (*s && n) { + if ((*s & 0xF8) == 0xF0) { + /* 4 byte UTF-8 sequence */ + *dst++ = *s++; + *dst++ = *s++; + *dst++ = *s++; + *dst++ = *s++; + } else if ((*s & 0xF0) == 0xE0) { + /* 3 byte UTF-8 sequence */ + *dst++ = *s++; + *dst++ = *s++; + *dst++ = *s++; + } else if ((*s & 0xE0) == 0xC0) { + /* 2 byte UTF-8 sequence */ + *dst++ = *s++; + *dst++ = *s++; + } else { + /* 1 byte UTF-8 "sequence" */ + *dst++ = *s++; + } + n--; + } + *dst = '\0'; + return dst; +} + +static inline str +UTF8_offset(char *restrict s, int n) +{ + UTF8_assert(s); + while (*s && n) { + if ((*s & 0xF8) == 0xF0) { + /* 4 byte UTF-8 sequence */ + s += 4; + } else if ((*s & 0xF0) == 0xE0) { + /* 3 byte UTF-8 sequence */ + s += 3; + } else if ((*s & 0xE0) == 0xC0) { + /* 2 byte UTF-8 sequence */ + s += 2; + } else { + /* 1 byte UTF-8 "sequence" */ + s++; + } + n--; + } + return s; +} + static str convertCase(BAT *from, BAT *to, str *res, const char *src, const char *malfunc) { @@ -4120,20 +4173,18 @@ STRlocate(int *ret, const str *needle, c } str -STRinsert(str *ret, const str *s, const int *start, const int *l, const str *s2) +STRinsert(str *ret, const str *input, const int *start, const int *nchars, const str *input2) { - str v; - int strt = *start; - if (strNil(*s) || strNil(*s2) || is_int_nil(*start) || is_int_nil(*l)) { + str v, s = *input, s2 = *input2; + int strt = *start, l = *nchars; + + if (strNil(s) || strNil(s2) || is_int_nil(strt) || is_int_nil(l)) { if ((*ret = GDKstrdup(str_nil)) == NULL) throw(MAL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL); } else { - size_t l1 = strlen(*s); - size_t l2 = strlen(*s2); + size_t l1 = UTF8_strlen(s); - if (l1 + l2 + 1 >= INT_MAX) - throw(MAL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL); - if (*l < 0) + if (l < 0) throw(MAL, "str.insert", SQLSTATE(42000) "The number of characters for insert function must be non negative"); if (strt < 0) { if ((size_t) -strt <= l1) @@ -4143,15 +4194,14 @@ STRinsert(str *ret, const str *s, const } if ((size_t) strt > l1) strt = (int) l1; - v = *ret = GDKmalloc(strlen(*s) + strlen(*s2) + 1); + v = *ret = GDKmalloc(strlen(s) + strlen(s2) + 1); if (v == NULL) throw(MAL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL); if (strt > 0) - strncpy(v, *s, strt); - v[strt] = 0; - strcpy(v + strt, *s2); - if (strt + *l < (int) l1) - strcat(v, *s + strt + *l); + v = UTF8_strncpy(v, s, strt); + strcpy(v, s2); + if (strt + l < (int) l1) + strcat(v, UTF8_offset(s, strt + l)); } return MAL_SUCCEED; } diff --git a/sql/test/SQLancer/Tests/sqlancer03.sql b/sql/test/SQLancer/Tests/sqlancer03.sql --- a/sql/test/SQLancer/Tests/sqlancer03.sql +++ b/sql/test/SQLancer/Tests/sqlancer03.sql @@ -24,3 +24,44 @@ SELECT sql_min(sql_max(NULL, ''), ''); SELECT ALL length(upper(MIN(ALL CAST(((trim(CAST(r'' AS STRING(659)), CAST(r'o3%+i]抔DCöf▟nßOpNbybಜ7' AS STRING)))||(sql_min(sql_max(NULL, r''), splitpart(r'x', r',7+.', t0.c1)))) AS STRING(151))))), 0.4179268710155164 FROM v0 LEFT OUTER JOIN t0 ON NOT (t0.c0) WHERE t0.c0 GROUP BY 0.3584962, CAST(t0.c1 AS STRING(601)), t0.c1; ROLLBACK; + +START TRANSACTION; -- Bug 6919 +CREATE TABLE "sys"."t0" ( + "c0" INTEGER NOT NULL, + "c1" DOUBLE, + CONSTRAINT "t0_c0_pkey" PRIMARY KEY ("c0") +); +COPY 29 RECORDS INTO "sys"."t0" FROM stdin USING DELIMITERS E'\t',E'\n','"'; +6 0.01926179604972278 +7 0.01926179604972278 +8 0.01926179604972278 +9 0.01926179604972278 +10 0.01926179604972278 +11 0.01926179604972278 +12 0.01926179604972278 +13 0.01926179604972278 +14 0.01926179604972278 +15 0.01926179604972278 +16 0.01926179604972278 +17 0.01926179604972278 +954233931 0.01926179604972278 +-890980732 0.01926179604972278 +18 0.9441921149477416 +19 0.8647722974466762 +20 0.6303259287607281 +21 0.7198562388857971 +22 1905034025 +1927464158 0.827299544139285 +421223489 0.03854140660184213 +-906851618 0.01926179604972278 +23 0.44641096314987394 +24 0.5358519423727929 +25 0.8490801972106654 +911090097 1 +-708085857 0.7843275143974144 +26 1130231849 +27 0.1052118441396751 + +select "insert"('屁{珙', 1, 1, '1'), "insert"('屁{珙', 1, 1, '抔'), "insert"('屁抔珙', 1, 1, 'ಜ'), "insert"('a', 0, 1, 'ಜ'), "insert"('a', 0, 0, 'ಜ'); +select "insert"('屁{珙', 1, 1, '1'), "insert"('屁{珙', 1, 1, '抔'), "insert"('屁抔珙', 1, 1, 'ಜ') from t0; +ROLLBACK; diff --git a/sql/test/SQLancer/Tests/sqlancer03.stable.out b/sql/test/SQLancer/Tests/sqlancer03.stable.out --- a/sql/test/SQLancer/Tests/sqlancer03.stable.out +++ b/sql/test/SQLancer/Tests/sqlancer03.stable.out @@ -60,6 +60,74 @@ stdout of test 'sqlancer03` in directory % int, decimal # type % 1, 19 # length #ROLLBACK; +#START TRANSACTION; -- Bug 6919 +#CREATE TABLE "sys"."t0" ( +# "c0" INTEGER NOT NULL, +# "c1" DOUBLE, +# CONSTRAINT "t0_c0_pkey" PRIMARY KEY ("c0") +#); +#COPY 29 RECORDS INTO "sys"."t0" FROM stdin USING DELIMITERS E'\t',E'\n','"'; +#6 0.01926179604972278 +#7 0.01926179604972278 +#8 0.01926179604972278 +#9 0.01926179604972278 +#10 0.01926179604972278 +#11 0.01926179604972278 +#12 0.01926179604972278 +#13 0.01926179604972278 +#14 0.01926179604972278 +#15 0.01926179604972278 +#16 0.01926179604972278 +#17 0.01926179604972278 +#954233931 0.01926179604972278 +#-890980732 0.01926179604972278 +#18 0.9441921149477416 +#19 0.8647722974466762 +#20 0.6303259287607281 +#21 0.7198562388857971 +#22 1905034025 +[ 29 ] +#select "insert"('屁{珙', 1, 1, '1'), "insert"('屁{珙', 1, 1, '抔'), "insert"('屁抔珙', 1, 1, 'ಜ'), "insert"('a', 0, 1, 'ಜ'), "insert"('a', 0, 0, 'ಜ'); +% .%2, .%3, .%4, .%5, .%6 # table_name +% %2, %3, %4, %5, %6 # name +% clob, clob, clob, clob, clob # type +% 5, 6, 5, 1, 2 # length +[ "屁1珙", "屁抔珙", "屁ಜ珙", "ಜ", "ಜa" ] +#select "insert"('屁{珙', 1, 1, '1'), "insert"('屁{珙', 1, 1, '抔'), "insert"('屁抔珙', 1, 1, 'ಜ') from t0; +% .%1, .%2, .%3 # table_name +% %1, %2, %3 # name +% clob, clob, clob # type +% 5, 6, 5 # length +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +[ "屁1珙", "屁抔珙", "屁ಜ珙" ] +#ROLLBACK; # 17:14:16 > # 17:14:16 > "Done." _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list