Hi Sergei, Please review a patch for MDEV-9842.
This is a prerequisite for: MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring Thanks.
commit 4d615db357a5b513c915ab677afd130a0673f0da Author: Alexander Barkov <[email protected]> Date: Fri Apr 1 14:53:00 2016 +0400 MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis Fixing READ_INFO to store the loaded data in a String object. Making sure to reserve MY_CS_MBMAXLEN bytes before loading the next character in read_field(). diff --git a/mysql-test/r/ctype_sjis.result b/mysql-test/r/ctype_sjis.result index 4668693..4edd900 100644 --- a/mysql-test/r/ctype_sjis.result +++ b/mysql-test/r/ctype_sjis.result @@ -18730,3 +18730,23 @@ DROP TABLE t1; # # End of 10.0 tests # +# +# Start of 10.2 tests +# +# +# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis +# +CREATE TABLE t1 (a TEXT CHARACTER SET sjis); +LOAD DATA INFILE '../../std_data/loaddata/mdev9842.txt' INTO TABLE t1 CHARACTER SET sjis; +SHOW WARNINGS; +Level Code Message +SELECT HEX(a) FROM t1; +HEX(a) +78835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C +SELECT a=CONCAT('x', REPEAT(_sjis 0x835C, 200)) FROM t1; +a=CONCAT('x', REPEAT(_sjis 0x835C, 200)) +1 +DROP TABLE t1; +# +# End of 10.2 tests +# diff --git a/mysql-test/std_data/loaddata/mdev9842.txt b/mysql-test/std_data/loaddata/mdev9842.txt new file mode 100644 index 0000000..1aed9a9 --- /dev/null +++ b/mysql-test/std_data/loaddata/mdev9842.txt @@ -0,0 +1 @@ +x\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ diff --git a/mysql-test/t/ctype_sjis.test b/mysql-test/t/ctype_sjis.test index 2777cf6..da1f963 100644 --- a/mysql-test/t/ctype_sjis.test +++ b/mysql-test/t/ctype_sjis.test @@ -231,3 +231,22 @@ SET NAMES sjis; --echo # --echo # End of 10.0 tests --echo # + +--echo # +--echo # Start of 10.2 tests +--echo # + +--echo # +--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis +--echo # +CREATE TABLE t1 (a TEXT CHARACTER SET sjis); +LOAD DATA INFILE '../../std_data/loaddata/mdev9842.txt' INTO TABLE t1 CHARACTER SET sjis; +SHOW WARNINGS; +SELECT HEX(a) FROM t1; +SELECT a=CONCAT('x', REPEAT(_sjis 0x835C, 200)) FROM t1; +DROP TABLE t1; + + +--echo # +--echo # End of 10.2 tests +--echo # diff --git a/sql/sql_load.cc b/sql/sql_load.cc index f1c2920..d7ca8ef 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -66,10 +66,9 @@ XML_TAG::XML_TAG(int l, String f, String v) class READ_INFO { File file; - uchar *buffer, /* Buffer for read text */ - *end_of_buff; /* Data in bufferts ends here */ - uint buff_length, /* Length of buffert */ - max_length; /* Max length of row */ + String data; /* Read buffer */ + uint fixed_length; /* Length of the fixed length record */ + uint max_length; /* Max length of row */ const uchar *field_term_ptr,*line_term_ptr; const char *line_start_ptr,*line_start_end; uint field_term_length,line_term_length,enclosed_length; @@ -1349,10 +1348,11 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs, String &field_term, String &line_start, String &line_term, String &enclosed_par, int escape, bool get_it_from_net, bool is_fifo) - :file(file_par), buffer(NULL), buff_length(tot_length), escape_char(escape), + :file(file_par), fixed_length(tot_length), escape_char(escape), found_end_of_line(false), eof(false), error(false), line_cuted(false), found_null(false), read_charset(cs) { + data.set_thread_specific(); /* Field and line terminators must be interpreted as sequence of unsigned char. Otherwise, non-ascii terminators will be negative on some platforms, @@ -1394,18 +1394,16 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs, set_if_bigger(length,line_start.length()); stack= stack_pos= (int*) thd->alloc(sizeof(int) * length); - if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(MY_THREAD_SPECIFIC)))) + if (data.reserve(tot_length)) error=1; /* purecov: inspected */ else { - end_of_buff=buffer+buff_length; if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0, (get_it_from_net) ? READ_NET : (is_fifo ? READ_FIFO : READ_CACHE),0L,1, MYF(MY_WME | MY_THREAD_SPECIFIC))) { - my_free(buffer); /* purecov: inspected */ - buffer= NULL; + data.free(); error=1; } else @@ -1428,7 +1426,7 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs, READ_INFO::~READ_INFO() { ::end_io_cache(&cache); - my_free(buffer); + data.free(); List_iterator<XML_TAG> xmlit(taglist); XML_TAG *t; while ((t= xmlit++)) @@ -1459,7 +1457,6 @@ inline int READ_INFO::terminator(const uchar *ptr,uint length) int READ_INFO::read_field() { int chr,found_enclosed_char; - uchar *to,*new_buffer; found_null=0; if (found_end_of_line) @@ -1478,11 +1475,11 @@ int READ_INFO::read_field() found_end_of_line=eof=1; return 1; } - to=buffer; + data.length(0); if (chr == enclosed_char) { found_enclosed_char=enclosed_char; - *to++=(uchar) chr; // If error + data.append(chr); // If error } else { @@ -1492,7 +1489,7 @@ int READ_INFO::read_field() for (;;) { - while ( to < end_of_buff) + while (data.length() + MY_CS_MBMAXLEN < data.alloced_length()) { chr = GET; if (chr == my_b_EOF) @@ -1501,7 +1498,7 @@ int READ_INFO::read_field() { if ((chr=GET) == my_b_EOF) { - *to++= (uchar) escape_char; + data.append(escape_char); goto found_eof; } /* @@ -1513,7 +1510,7 @@ int READ_INFO::read_field() */ if (escape_char != enclosed_char || chr == escape_char) { - *to++ = (uchar) unescape((char) chr); + data.append(unescape((char) chr)); continue; } PUSH(chr); @@ -1529,8 +1526,8 @@ int READ_INFO::read_field() { // Maybe unexpected linefeed enclosed=0; found_end_of_line=1; - row_start=buffer; - row_end= to; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); return 0; } } @@ -1538,7 +1535,7 @@ int READ_INFO::read_field() { if ((chr=GET) == found_enclosed_char) { // Remove dupplicated - *to++ = (uchar) chr; + data.append(chr); continue; } // End of enclosed field if followed by field_term or line_term @@ -1549,16 +1546,16 @@ int READ_INFO::read_field() /* Maybe unexpected linefeed */ enclosed=1; found_end_of_line=1; - row_start=buffer+1; - row_end= to; + row_start= (uchar *) data.ptr() + 1; + row_end= (uchar *) data.end(); return 0; } if (chr == field_term_char && terminator(field_term_ptr,field_term_length)) { enclosed=1; - row_start=buffer+1; - row_end= to; + row_start= (uchar *) data.ptr() + 1; + row_end= (uchar *) data.end(); return 0; } /* @@ -1574,22 +1571,19 @@ int READ_INFO::read_field() if (terminator(field_term_ptr,field_term_length)) { enclosed=0; - row_start=buffer; - row_end= to; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); return 0; } } #ifdef USE_MB - if (my_mbcharlen(read_charset, chr) > 1 && - to + my_mbcharlen(read_charset, chr) <= end_of_buff) + if (my_mbcharlen(read_charset, chr) > 1) { - uchar* p= to; - int ml, i; - *to++ = chr; + uint32 length0= data.length(); + int ml= my_mbcharlen(read_charset, chr); + data.append(chr); - ml= my_mbcharlen(read_charset, chr); - - for (i= 1; i < ml; i++) + for (int i= 1; i < ml; i++) { chr= GET; if (chr == my_b_EOF) @@ -1598,39 +1592,35 @@ int READ_INFO::read_field() Need to back up the bytes already ready from illformed multi-byte char */ - to-= i; + data.length(length0); goto found_eof; } - *to++ = chr; + data.append(chr); } if (my_ismbchar(read_charset, - (const char *)p, - (const char *)to)) + (const char *) data.ptr() + length0, + (const char *) data.end())) continue; - for (i= 0; i < ml; i++) - PUSH(*--to); + for (int i= 0; i < ml; i++) + PUSH(data.end()[-1 - i]); + data.length(length0); chr= GET; } #endif - *to++ = (uchar) chr; + data.append(chr); } /* ** We come here if buffer is too small. Enlarge it and continue */ - if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE, - MYF(MY_WME | MY_THREAD_SPECIFIC)))) - return (error=1); - to=new_buffer + (to-buffer); - buffer=new_buffer; - buff_length+=IO_SIZE; - end_of_buff=buffer+buff_length; + if (data.reserve(IO_SIZE)) + return (error= 1); } found_eof: enclosed=0; found_end_of_line=eof=1; - row_start=buffer; - row_end=to; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); return 0; } @@ -1652,7 +1642,6 @@ int READ_INFO::read_field() int READ_INFO::read_fixed_length() { int chr; - uchar *to; if (found_end_of_line) return 1; // One have to call next_line @@ -1663,8 +1652,7 @@ int READ_INFO::read_fixed_length() return 1; } - to=row_start=buffer; - while (to < end_of_buff) + for (data.length(0); data.length() < fixed_length ; ) { if ((chr=GET) == my_b_EOF) goto found_eof; @@ -1672,31 +1660,31 @@ int READ_INFO::read_fixed_length() { if ((chr=GET) == my_b_EOF) { - *to++= (uchar) escape_char; + data.append(escape_char); goto found_eof; } - *to++ =(uchar) unescape((char) chr); + data.append((uchar) unescape((char) chr)); continue; } if (chr == line_term_char) { if (terminator(line_term_ptr,line_term_length)) { // Maybe unexpected linefeed - found_end_of_line=1; - row_end= to; - return 0; + found_end_of_line=1; + break; } } - *to++ = (uchar) chr; + data.append(chr); } - row_end=to; // Found full line + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); // Found full line return 0; found_eof: found_end_of_line=eof=1; - row_start=buffer; - row_end=to; - return to == buffer ? 1 : 0; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); + return data.length() == 0 ? 1 : 0; }
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

