Hi Sergei, Please review a patch for MDEV-7055.
Thanks.
diff --git a/mysql-test/include/ctype_date_format.inc b/mysql-test/include/ctype_date_format.inc new file mode 100644 index 0000000..d836090 --- /dev/null +++ b/mysql-test/include/ctype_date_format.inc @@ -0,0 +1,14 @@ +--echo # +--echo # MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845 +--echo # +SELECT HEX(date_format('0001-01-01', '%Y')); +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f')); +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')); +SELECT date_format('2001-01-01','%W rubb ish %w'); +SELECT date_format('2001-01-01','%W rubb ish %'); + +CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0; +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (date_format('2001-01-01','%W')); +SELECT * FROM t1; +DROP TABLE IF EXISTS t1; diff --git a/mysql-test/r/ctype_gbk.result b/mysql-test/r/ctype_gbk.result index 9da3cf9..0c593bc 100644 --- a/mysql-test/r/ctype_gbk.result +++ b/mysql-test/r/ctype_gbk.result @@ -621,5 +621,14 @@ A8BD Å A8BE Å DROP TABLE t1; # +# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845 +# +SET NAMES gbk; +CREATE TABLE t1 AS SELECT DATE_FORMAT('2001-01-01',_gbk 0xA1402557) AS a; +SELECT HEX(a), CONVERT(a USING utf8) FROM t1; +HEX(a) CONVERT(a USING utf8) +A1404D6F6E646179 ?Monday +DROP TABLE t1; +# # End of 5.5 tests # diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index f9e9a69..6d70b79 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -124,6 +124,37 @@ select 'a a' > 'a', 'a \0' < 'a'; select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' 1 1 1 +# +# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845 +# +SELECT HEX(date_format('0001-01-01', '%Y')); +HEX(date_format('0001-01-01', '%Y')) +0030003000300031 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%f')) +003000300030003000300039 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')) +00300030003000310020003000300030003000300039 +SELECT date_format('2001-01-01','%W rubb ish %w'); +date_format('2001-01-01','%W rubb ish %w') +Monday rubb ish 1 +SELECT date_format('2001-01-01','%W rubb ish %'); +date_format('2001-01-01','%W rubb ish %') +Monday rubb ish % +CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(1) CHARACTER SET ucs2 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (date_format('2001-01-01','%W')); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SELECT * FROM t1; +a +M +DROP TABLE IF EXISTS t1; SET CHARACTER SET koi8r; create table t1 (a varchar(2) character set ucs2 collate ucs2_bin, key(a)); insert into t1 values ('A'),('A'),('B'),('C'),('D'),('A\t'); diff --git a/mysql-test/r/ctype_ucs2_innodb.result b/mysql-test/r/ctype_ucs2_innodb.result new file mode 100644 index 0000000..ecd0315 --- /dev/null +++ b/mysql-test/r/ctype_ucs2_innodb.result @@ -0,0 +1,45 @@ +SET default_storage_engine=InnoDB; +SET TIME_ZONE = '+03:00'; +DROP TABLE IF EXISTS t1; +# +# Start of 5.5 tests +# +SET NAMES latin1; +SET character_set_connection=ucs2; +SELECT HEX('a'), HEX('a '); +HEX('a') HEX('a ') +0061 00610020 +# +# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845 +# +SELECT HEX(date_format('0001-01-01', '%Y')); +HEX(date_format('0001-01-01', '%Y')) +0030003000300031 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%f')) +003000300030003000300039 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')) +00300030003000310020003000300030003000300039 +SELECT date_format('2001-01-01','%W rubb ish %w'); +date_format('2001-01-01','%W rubb ish %w') +Monday rubb ish 1 +SELECT date_format('2001-01-01','%W rubb ish %'); +date_format('2001-01-01','%W rubb ish %') +Monday rubb ish % +CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(1) CHARACTER SET ucs2 NOT NULL DEFAULT '' +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (date_format('2001-01-01','%W')); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SELECT * FROM t1; +a +M +DROP TABLE IF EXISTS t1; +# +# End of 5.5 tests +# diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result index 1f316b7..c0249cc 100644 --- a/mysql-test/r/ctype_utf32.result +++ b/mysql-test/r/ctype_utf32.result @@ -29,6 +29,37 @@ select 'a a' > 'a', 'a \0' < 'a'; select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' 1 1 1 +# +# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845 +# +SELECT HEX(date_format('0001-01-01', '%Y')); +HEX(date_format('0001-01-01', '%Y')) +00000030000000300000003000000031 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%f')) +000000300000003000000030000000300000003000000039 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')) +0000003000000030000000300000003100000020000000300000003000000030000000300000003000000039 +SELECT date_format('2001-01-01','%W rubb ish %w'); +date_format('2001-01-01','%W rubb ish %w') +Monday rubb ish 1 +SELECT date_format('2001-01-01','%W rubb ish %'); +date_format('2001-01-01','%W rubb ish %') +Monday rubb ish % +CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(1) CHARACTER SET utf32 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (date_format('2001-01-01','%W')); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SELECT * FROM t1; +a +M +DROP TABLE IF EXISTS t1; select hex(_utf32 0x44); hex(_utf32 0x44) 00000044 diff --git a/mysql-test/r/ctype_utf32_innodb.result b/mysql-test/r/ctype_utf32_innodb.result new file mode 100644 index 0000000..8362724 --- /dev/null +++ b/mysql-test/r/ctype_utf32_innodb.result @@ -0,0 +1,45 @@ +SET default_storage_engine=InnoDB; +SET TIME_ZONE = '+03:00'; +DROP TABLE IF EXISTS t1; +# +# Start of 5.5 tests +# +SET NAMES latin1; +SET character_set_connection=utf32; +SELECT HEX('a'), HEX('a '); +HEX('a') HEX('a ') +00000061 0000006100000020 +# +# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845 +# +SELECT HEX(date_format('0001-01-01', '%Y')); +HEX(date_format('0001-01-01', '%Y')) +00000030000000300000003000000031 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%f')) +000000300000003000000030000000300000003000000039 +SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')); +HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f')) +0000003000000030000000300000003100000020000000300000003000000030000000300000003000000039 +SELECT date_format('2001-01-01','%W rubb ish %w'); +date_format('2001-01-01','%W rubb ish %w') +Monday rubb ish 1 +SELECT date_format('2001-01-01','%W rubb ish %'); +date_format('2001-01-01','%W rubb ish %') +Monday rubb ish % +CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(1) CHARACTER SET utf32 NOT NULL DEFAULT '' +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (date_format('2001-01-01','%W')); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SELECT * FROM t1; +a +M +DROP TABLE IF EXISTS t1; +# +# End of 5.5 tests +# diff --git a/mysql-test/t/ctype_gbk.test b/mysql-test/t/ctype_gbk.test index b9e25e9..3493cec 100644 --- a/mysql-test/t/ctype_gbk.test +++ b/mysql-test/t/ctype_gbk.test @@ -154,7 +154,19 @@ WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <> DROP TABLE t1; +--echo # +--echo # MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845 +--echo # + +# Testing format string 0xA140 + '%' + 'W' +# 0xA140 is an unassigned character in gbk. +# It should be preserved in the DATE_FORMAT output +# (should not be replaced to question mark) +SET NAMES gbk; +CREATE TABLE t1 AS SELECT DATE_FORMAT('2001-01-01',_gbk 0xA1402557) AS a; +SELECT HEX(a), CONVERT(a USING utf8) FROM t1; +DROP TABLE t1; --echo # --echo # End of 5.5 tests diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index 7fd3768..7211c59 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -11,6 +11,7 @@ SET @test_collation= 'ucs2_general_ci'; SET NAMES latin1; SET character_set_connection=ucs2; -- source include/endspace.inc +-- source include/ctype_date_format.inc SET CHARACTER SET koi8r; diff --git a/mysql-test/t/ctype_ucs2_innodb.test b/mysql-test/t/ctype_ucs2_innodb.test new file mode 100644 index 0000000..144e08f --- /dev/null +++ b/mysql-test/t/ctype_ucs2_innodb.test @@ -0,0 +1,23 @@ +-- source include/have_innodb.inc +-- source include/have_ucs2.inc + +SET default_storage_engine=InnoDB; + +SET TIME_ZONE = '+03:00'; + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +SET NAMES latin1; +SET character_set_connection=ucs2; +SELECT HEX('a'), HEX('a '); +-- source include/ctype_date_format.inc + +--echo # +--echo # End of 5.5 tests +--echo # diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test index 1be8925..1694f43 100644 --- a/mysql-test/t/ctype_utf32.test +++ b/mysql-test/t/ctype_utf32.test @@ -14,6 +14,7 @@ SET NAMES latin1; SET character_set_connection=utf32; select hex('a'), hex('a '); -- source include/endspace.inc +-- source include/ctype_date_format.inc # # Check that incomplete utf32 characters in HEX notation diff --git a/mysql-test/t/ctype_utf32_innodb.test b/mysql-test/t/ctype_utf32_innodb.test new file mode 100644 index 0000000..ceb90ff --- /dev/null +++ b/mysql-test/t/ctype_utf32_innodb.test @@ -0,0 +1,23 @@ +-- source include/have_innodb.inc +-- source include/have_utf32.inc + +SET default_storage_engine=InnoDB; + +SET TIME_ZONE = '+03:00'; + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +SET NAMES latin1; +SET character_set_connection=utf32; +SELECT HEX('a'), HEX('a '); +-- source include/ctype_date_format.inc + +--echo # +--echo # End of 5.5 tests +--echo # diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 522004e..a2a6fff 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -447,6 +447,70 @@ static bool extract_date_time(DATE_TIME_FORMAT *format, /** + A multi-byte safe helper class to read characters from a string. + + QQ: Serg: which file to put this new class in? + It can be helpful for some other purposes + (not only here in item_timefunc.cc) + I remember you don't like such things in sql_string.h :) +*/ +class Wchar_reader +{ + CHARSET_INFO *m_cs; + const char *m_ptr; + const char *m_end; +public: + Wchar_reader(CHARSET_INFO *cs, const char *str, size_t length) + :m_cs(cs), m_ptr(str), m_end(str + length) + { } + bool eol() const { return m_ptr >= m_end; } + /** + Read a character, return its Unicode code point and octet length. + + @param [OUT] wc - a pointer to a Unicode code point variable + @param [OUT] chlen - a pointer to a character length variable + @return false - on success + @return true - on error (end of line, or a bad byte sequence) + + Converts negative lengths in the range -6..-1 + (which mb_wc() returns for valid but unassigned characters) + to positive lengths 1..6, so the caller does not have + to care about unassigned characters. The caller will just see + such characters as "U+003F QUESTION MARK", but with length + not necessarily equal to 1. + */ + bool read(my_wc_t *wc, int *chlen) + { + *chlen= m_cs->cset->mb_wc(m_cs, wc, (uchar *) m_ptr, (uchar *) m_end); + if (*chlen <= 0) + { + if (*chlen < -6 || *chlen == 0) + return true; // End of line, or a bad byte sequence + *chlen= -(*chlen); // An unassigned (but a valid) character found + *wc= '?'; // Initialize *wc to QUESTION MARK + } + m_ptr+= *chlen; // Shift the pointer to the next character. + return false; + } + /** + Read a character when its length is not important for the caller + @param [OUT] wc - a pointer to a Unicode code point variable + @return false - on succes + @return true - on error (end of line, or an invalid byte sequence) + */ + bool read(my_wc_t *wc) + { + int chlen; + return read(wc, &chlen); + } + /** + Return a ponter to the next character in the queue. + */ + const char *ptr() const { return m_ptr; } +}; + + +/** Create a formated date/time value in a string. */ @@ -457,21 +521,29 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time, uint hours_i; uint weekday; ulong length; - const char *ptr, *end; + my_wc_t wc; + int chlen; + Wchar_reader reader(str->charset(), format->format.str, + format->format.length); str->length(0); if (l_time->neg) str->append('-'); - end= (ptr= format->format.str) + format->format.length; - for (; ptr != end ; ptr++) + for ( ; !reader.read(&wc, &chlen) ; ) { - if (*ptr != '%' || ptr+1 == end) - str->append(*ptr); + if (wc != '%' || reader.eol()) + { + DBUG_ASSERT(chlen > 0); // A regular character, or a trailing '%' + str->append(reader.ptr() - chlen, chlen, &my_charset_bin); + } else { - switch (*++ptr) { + // An % sequnce found, scan the next character after '%' + if (reader.read(&wc, &chlen)) + break; + switch (wc) { case 'M': if (!l_time->month) return 1; @@ -617,7 +689,7 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time, if (type == MYSQL_TIMESTAMP_TIME) return 1; length= (uint) (int10_to_str(calc_week(l_time, - (*ptr) == 'U' ? + wc == 'U' ? WEEK_FIRST_WEEKDAY : WEEK_MONDAY_FIRST, &year), intbuff, 10) - intbuff); @@ -631,7 +703,7 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time, if (type == MYSQL_TIMESTAMP_TIME) return 1; length= (uint) (int10_to_str(calc_week(l_time, - ((*ptr) == 'V' ? + (wc == 'V' ? (WEEK_YEAR | WEEK_FIRST_WEEKDAY) : (WEEK_YEAR | WEEK_MONDAY_FIRST)), &year), @@ -646,7 +718,7 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time, if (type == MYSQL_TIMESTAMP_TIME) return 1; (void) calc_week(l_time, - ((*ptr) == 'X' ? + (wc == 'X' ? WEEK_YEAR | WEEK_FIRST_WEEKDAY : WEEK_YEAR | WEEK_MONDAY_FIRST), &year); @@ -664,7 +736,8 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time, break; default: - str->append(*ptr); + DBUG_ASSERT(chlen > 0); + str->append(reader.ptr() - chlen, (size_t) chlen, &my_charset_bin); break; } } @@ -1778,20 +1851,22 @@ bool Item_func_date_format::eq(const Item *item, bool binary_cmp) const } - uint Item_func_date_format::format_length(const String *format) { uint size=0; - const char *ptr=format->ptr(); - const char *end=ptr+format->length(); + my_wc_t wc; + Wchar_reader reader(format->charset(), format->ptr(), format->length()); - for (; ptr != end ; ptr++) + for ( ; !reader.read(&wc) ; ) { - if (*ptr != '%' || ptr == end-1) - size++; + if (wc != '%' || reader.eol()) + size++; // A regular character, or a trailing '%' else { - switch(*++ptr) { + // A '%' sequence found, scan the next character after '%' + if (reader.read(&wc)) + break; + switch (wc) { case 'M': /* month, textual */ case 'W': /* day (of the week), textual */ size += 64; /* large for UTF8 locale data */ diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 885f53a..21fba79 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -537,6 +537,15 @@ bool String::append(IO_CACHE* file, uint32 arg_length) return FALSE; } +/** + Append an ASCII string, optionally fill a prefix. + @param s - a pointer to an ASCII string + @param arg_length - length of the ASCII string + @param full_length - the desired character length of the piece + to be added + @param fill_char - make a prefix consisting of this character, + if the desired full_length is bigger that arg_length. +*/ bool String::append_with_prefill(const char *s,uint32 arg_length, uint32 full_length, char fill_char) { @@ -547,8 +556,28 @@ bool String::append_with_prefill(const char *s,uint32 arg_length, t_length= full_length - arg_length; if (t_length > 0) { - bfill(Ptr+str_length, t_length, fill_char); - str_length=str_length + t_length; + if (charset()->mbminlen == 1) + { + /* + An ASCII string can be appended directly + to an ASCII-compatible string. This includes + multi-byte character sets, like utf8, sjis, etc. + */ + bfill(Ptr+str_length, t_length, fill_char); + str_length=str_length + t_length; + } + else + { + /* + Needs conversion to append an ASCII string to ASCII-incompatible + character sets, such as ucs2, utf16, utf16le, utf32. + */ + for (int i= 0; i < t_length; i++) + { + if (append(&fill_char, 1, &my_charset_latin1)) + return true; + } + } } append(s, arg_length); return FALSE;
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp