Repository: incubator-impala Updated Branches: refs/heads/master 0d689d362 -> 27b3b4d47
IMPALA-2700: ASCII NUL characters are doubled on insert into text tables Currently the scanner processes the '\0' character as a no special character whereas the writer treats it as a special character. The writer appends a special character before writting which is causing the ASCII NULL characters to double since they are the default escape characters. This adds a check to treat '\0' as a no special character in the writter. Change-Id: Ia30fa314d1ee1e99f9e7598466eb1570ca7940fc Reviewed-on: http://gerrit.cloudera.org:8080/3876 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b66829f1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b66829f1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b66829f1 Branch: refs/heads/master Commit: b66829f15f5d3072ba56b966ef2ca21620a59390 Parents: 0d689d3 Author: Anuj Phadke <[email protected]> Authored: Wed Jul 20 14:26:17 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Wed Aug 10 04:09:38 2016 +0000 ---------------------------------------------------------------------- be/src/exec/hdfs-text-table-writer.cc | 11 +++++++---- tests/query_test/test_insert_behaviour.py | 9 +++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b66829f1/be/src/exec/hdfs-text-table-writer.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-text-table-writer.cc b/be/src/exec/hdfs-text-table-writer.cc index 59ecd9c..053c821 100644 --- a/be/src/exec/hdfs-text-table-writer.cc +++ b/be/src/exec/hdfs-text-table-writer.cc @@ -208,11 +208,14 @@ Status HdfsTextTableWriter::Flush() { inline void HdfsTextTableWriter::PrintEscaped(const StringValue* str_val) { for (int i = 0; i < str_val->len; ++i) { - if (UNLIKELY(str_val->ptr[i] == field_delim_ || str_val->ptr[i] == escape_char_)) { - rowbatch_stringstream_ << escape_char_; + if (escape_char_ == '\0') { + rowbatch_stringstream_ << str_val->ptr[i]; + } else { + if (UNLIKELY(str_val->ptr[i] == field_delim_ || str_val->ptr[i] == escape_char_)) { + rowbatch_stringstream_ << escape_char_; + } + rowbatch_stringstream_ << str_val->ptr[i]; } - rowbatch_stringstream_ << str_val->ptr[i]; } } - } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b66829f1/tests/query_test/test_insert_behaviour.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_insert_behaviour.py b/tests/query_test/test_insert_behaviour.py index 7dcb89d..53c9dbf 100644 --- a/tests/query_test/test_insert_behaviour.py +++ b/tests/query_test/test_insert_behaviour.py @@ -83,6 +83,15 @@ class TestInsertBehaviour(ImpalaTestSuite): assert self.filesystem_client.exists(table_dir + dir_), "Directory {0} was " \ "unexpectedly deleted by INSERT OVERWRITE".format(table_dir + dir_) + def test_insert_ascii_nulls(self, unique_database): + TBL_NAME = '`{0}`.`null_insert`'.format(unique_database) + self.execute_query_expect_success(self.client, "DROP TABLE IF EXISTS %s" % TBL_NAME) + self.execute_query_expect_success(self.client, "create table %s as select '\0' s" + % TBL_NAME) + result = self.execute_query_expect_success(self.client, + "SELECT LENGTH(s) FROM %s" % TBL_NAME) + assert int(result.get_data()) == 1 + @UniqueDatabase.parametrize(name_prefix='test_insert_alter_partition_location_db') def test_insert_alter_partition_location(self, unique_database): """Test that inserts after changing the location of a partition work correctly,
