This is an automated email from the ASF dual-hosted git repository.

jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new b3e6e80  [CARBONDATA-3644Support Configuration of Complex Delimiters 
in Carbon Properties
b3e6e80 is described below

commit b3e6e801bfd5222a42b8f1c29f7150560732c29a
Author: h00424960 <haoxing...@huawei.com>
AuthorDate: Tue Dec 31 17:35:11 2019 +0800

    [CARBONDATA-3644Support Configuration of Complex Delimiters in Carbon 
Properties
    
    Why is this PR needed?
    
    Modification reason: In the insert carbontable select from a parquet table 
processing, if the binary column has the content '\001', like 'col1\001col2', 
the content before '\001' will be truncated as '\001' is the Complex Delimiter. 
The problem is that Complex Delimiter can't be configured in the insert flow, 
which needs to improve.
    
    What changes were proposed in this PR?
    
    Modification content: We add the configuration of complex delimiters in the 
CarbonProperties, which will be loaded in the LoadOption.
    
    Does this PR introduce any user interface change?
    
    No
    
    Is any new testcase added?
    
    Yes
    
    This closes #3552
---
 .../core/constants/CarbonCommonConstants.java      | 24 ++++++++++++++++++++++
 .../complexType/TestComplexDataType.scala          | 21 +++++++++++++++++++
 .../command/management/CarbonLoadDataCommand.scala |  3 ---
 .../processing/loading/model/LoadOption.java       | 18 +++++++++++++---
 4 files changed, 60 insertions(+), 6 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 9ad276c..10b2d89 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -146,6 +146,30 @@ public final class CarbonCommonConstants {
   public static final String DEFAULT_COMPRESSOR = "snappy";
 
   /**
+   * the level 1 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_1 = 
"carbon.complex.delimiter.level.1";
+
+  /**
+   * the level 2 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_2 = 
"carbon.complex.delimiter.level.2";
+
+  /**
+   * the level 3 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_3 = 
"carbon.complex.delimiter.level.3";
+
+  /**
+   * the level 4 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_4 = 
"carbon.complex.delimiter.level.4";
+
+  /**
    * ZOOKEEPER_ENABLE_LOCK if this is set to true then zookeeper
    * will be used to handle locking
    * mechanism of carbon
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
index 2dbae36..32a5d92 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
@@ -42,6 +42,8 @@ class TestComplexDataType extends QueryTest with 
BeforeAndAfterAll {
   override def beforeAll(): Unit = {
     sql("DROP TABLE IF EXISTS table1")
     sql("DROP TABLE IF EXISTS test")
+    sql("DROP TABLE IF EXISTS datatype_struct_carbondata")
+    sql("DROP TABLE IF EXISTS datatype_struct_parquet")
   }
 
   override def afterAll(): Unit = {
@@ -54,6 +56,8 @@ class TestComplexDataType extends QueryTest with 
BeforeAndAfterAll {
         CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, 
badRecordAction)
+    CarbonProperties.getInstance()
+      .removeProperty(CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1)
   }
 
   test("test Projection PushDown for Struct - Integer type") {
@@ -1064,6 +1068,23 @@ class TestComplexDataType extends QueryTest with 
BeforeAndAfterAll {
     sql("drop table if exists hive_table")
   }
 
+  test("test when insert select from a parquet table with an struct with 
binary and custom complex delimiter") {
+    var carbonProperties = CarbonProperties.getInstance()
+    
carbonProperties.addProperty(CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1, 
"#")
+
+    sql("create table datatype_struct_parquet(price struct<a:binary>) stored 
as parquet")
+    sql("insert into table datatype_struct_parquet values(named_struct('a', 
'col1\001col2'))")
+    sql("create table datatype_struct_carbondata(price struct<a:binary>) 
stored as carbondata")
+    sql("insert into datatype_struct_carbondata select * from 
datatype_struct_parquet")
+    checkAnswer(
+      sql("SELECT * FROM datatype_struct_carbondata"),
+      sql("SELECT * FROM datatype_struct_parquet"))
+    sql("DROP TABLE IF EXISTS datatype_struct_carbondata")
+    sql("DROP TABLE IF EXISTS datatype_struct_parquet")
+
+    
carbonProperties.removeProperty(CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1)
+  }
+
   test("[CARBONDATA-3527] Fix 'String length cannot exceed 32000 characters' 
issue when load data with 'GLOBAL_SORT' from csv files which include big 
complex type data") {
     val tableName = "complexdata3_table"
     sql(s"drop table if exists ${tableName}")
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
index 7a853b9..0309e91 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
@@ -169,9 +169,6 @@ case class CarbonLoadDataCommand(
     val carbonLoadModel = new CarbonLoadModel()
     val tableProperties = table.getTableInfo.getFactTable.getTableProperties
     val optionsFinal = LoadOption.fillOptionWithDefaultValue(options.asJava)
-    optionsFinal
-      .put("complex_delimiter_level_4",
-        ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value())
 
     /**
     * Priority of sort_scope assignment :
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
index b0206bc..7915fdd 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
@@ -107,15 +107,27 @@ public class LoadOption {
 
     optionsFinal.put("complex_delimiter_level_1",
         Maps.getOrDefault(options, "complex_delimiter_level_1",
-            ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_1.value()));
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_1.value())));
 
     optionsFinal.put("complex_delimiter_level_2",
         Maps.getOrDefault(options, "complex_delimiter_level_2",
-            ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_2.value()));
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_2,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_2.value())));
 
     optionsFinal.put("complex_delimiter_level_3",
         Maps.getOrDefault(options, "complex_delimiter_level_3",
-            ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_3.value()));
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_3,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_3.value())));
+
+    optionsFinal.put("complex_delimiter_level_4",
+        Maps.getOrDefault(options, "complex_delimiter_level_4",
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_4,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value())));
 
     optionsFinal.put(
         "dateformat",

Reply via email to