Repository: incubator-impala
Updated Branches:
  refs/heads/master 0d689d362 -> 27b3b4d47


IMPALA-2700: ASCII NUL characters are doubled on insert into text tables

Currently the scanner processes the '\0' character as a no special character
whereas the writer treats it as a special character. The writer
appends a special character before writting which is causing the ASCII
NULL characters to double since they are the default escape characters.
This adds a check to treat '\0' as a no special character in the
writter.

Change-Id: Ia30fa314d1ee1e99f9e7598466eb1570ca7940fc
Reviewed-on: http://gerrit.cloudera.org:8080/3876
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b66829f1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b66829f1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b66829f1

Branch: refs/heads/master
Commit: b66829f15f5d3072ba56b966ef2ca21620a59390
Parents: 0d689d3
Author: Anuj Phadke <[email protected]>
Authored: Wed Jul 20 14:26:17 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Wed Aug 10 04:09:38 2016 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-text-table-writer.cc     | 11 +++++++----
 tests/query_test/test_insert_behaviour.py |  9 +++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b66829f1/be/src/exec/hdfs-text-table-writer.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-table-writer.cc 
b/be/src/exec/hdfs-text-table-writer.cc
index 59ecd9c..053c821 100644
--- a/be/src/exec/hdfs-text-table-writer.cc
+++ b/be/src/exec/hdfs-text-table-writer.cc
@@ -208,11 +208,14 @@ Status HdfsTextTableWriter::Flush() {
 
 inline void HdfsTextTableWriter::PrintEscaped(const StringValue* str_val) {
   for (int i = 0; i < str_val->len; ++i) {
-    if (UNLIKELY(str_val->ptr[i] == field_delim_ || str_val->ptr[i] == 
escape_char_)) {
-      rowbatch_stringstream_ << escape_char_;
+    if (escape_char_ == '\0') {
+      rowbatch_stringstream_ << str_val->ptr[i];
+    } else {
+      if (UNLIKELY(str_val->ptr[i] == field_delim_ || str_val->ptr[i] == 
escape_char_)) {
+        rowbatch_stringstream_ << escape_char_;
+      }
+      rowbatch_stringstream_ << str_val->ptr[i];
     }
-    rowbatch_stringstream_ << str_val->ptr[i];
   }
 }
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b66829f1/tests/query_test/test_insert_behaviour.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_insert_behaviour.py 
b/tests/query_test/test_insert_behaviour.py
index 7dcb89d..53c9dbf 100644
--- a/tests/query_test/test_insert_behaviour.py
+++ b/tests/query_test/test_insert_behaviour.py
@@ -83,6 +83,15 @@ class TestInsertBehaviour(ImpalaTestSuite):
       assert self.filesystem_client.exists(table_dir + dir_), "Directory {0} 
was " \
           "unexpectedly deleted by INSERT OVERWRITE".format(table_dir + dir_)
 
+  def test_insert_ascii_nulls(self, unique_database):
+    TBL_NAME = '`{0}`.`null_insert`'.format(unique_database)
+    self.execute_query_expect_success(self.client, "DROP TABLE IF EXISTS %s" % 
TBL_NAME)
+    self.execute_query_expect_success(self.client, "create table %s as select 
'\0' s"
+        % TBL_NAME)
+    result = self.execute_query_expect_success(self.client,
+        "SELECT LENGTH(s) FROM %s" % TBL_NAME)
+    assert int(result.get_data()) == 1
+
   
@UniqueDatabase.parametrize(name_prefix='test_insert_alter_partition_location_db')
   def test_insert_alter_partition_location(self, unique_database):
     """Test that inserts after changing the location of a partition work 
correctly,

Reply via email to