[21/50] [abbrv] carbondata git commit: [CARBONDATA-1944][PARTITION]Special character like comma (, ) cannot be loaded on partition columns

ravipesala Mon, 08 Jan 2018 20:02:28 -0800

[CARBONDATA-1944][PARTITION]Special character like comma (,) cannot be loaded 
on partition columns


There is an issue while setting the partition information to the 
CarbonTableInputFormat , it is fixed in this PR.
And also removed the old CarbonInputFormat class and its references.

This closes #1733


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/45787fb9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/45787fb9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/45787fb9

Branch: refs/heads/branch-1.3
Commit: 45787fb99bd9327f180b40b288794edcaed5296c
Parents: a83d029
Author: ravipesala <[email protected]>
Authored: Wed Dec 27 23:33:54 2017 +0530
Committer: Jacky Li <[email protected]>
Committed: Thu Jan 4 16:01:18 2018 +0800

----------------------------------------------------------------------
 .../hadoop/api/CarbonTableInputFormat.java      | 21 +++++++++++++-------
 .../test/resources/data_with_special_char.csv   | 11 ++++++++++
 .../StandardPartitionTableLoadingTestCase.scala | 19 ++++++++++++++++++
 .../datasources/CarbonFileFormat.scala          | 14 +++++--------
 4 files changed, 49 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
----------------------------------------------------------------------
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index c02e03e..ed4684d 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -295,19 +295,26 @@ public class CarbonTableInputFormat<T> extends 
FileInputFormat<Void, T> {
    * set list of partitions to prune
    */
   public static void setPartitionsToPrune(Configuration configuration, 
List<String> partitions) {
-    configuration.set(
-        CarbonTableInputFormat.PARTITIONS_TO_PRUNE, 
CarbonUtil.convertToString(partitions));
+    if (partitions == null) {
+      return;
+    }
+    try {
+      String partitionString = 
ObjectSerializationUtil.convertObjectToString(partitions);
+      configuration.set(PARTITIONS_TO_PRUNE, partitionString);
+    } catch (Exception e) {
+      throw new RuntimeException("Error while setting patition information to 
Job", e);
+    }
   }
 
   /**
    * get list of partitions to prune
    */
-  public static List<String> getPartitionsToPrune(Configuration configuration) 
{
-    String partitionString = configuration.get(PARTITIONS_TO_PRUNE, "");
-    if (partitionString.trim().isEmpty()) {
-      return null;
+  public static List<String> getPartitionsToPrune(Configuration configuration) 
throws IOException {
+    String partitionString = configuration.get(PARTITIONS_TO_PRUNE);
+    if (partitionString != null) {
+      return (List<String>) 
ObjectSerializationUtil.convertStringToObject(partitionString);
     }
-    return Arrays.asList(partitionString.split(","));
+    return null;
   }
   /**
    * Set list of files to access

http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/integration/spark-common-test/src/test/resources/data_with_special_char.csv
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/resources/data_with_special_char.csv 
b/integration/spark-common-test/src/test/resources/data_with_special_char.csv
new file mode 100644
index 0000000..f4d313a
--- /dev/null
+++ 
b/integration/spark-common-test/src/test/resources/data_with_special_char.csv
@@ -0,0 +1,11 @@
+empno,empname,designation,doj,workgroupcategory,workgroupcategoryname,deptno,deptname,projectcode,projectjoindate,projectenddate,attendance,utilization,salary
+11,"arvind,ss",SE,17-01-2007,1,developer,10,network,928478,17-02-2007,29-11-2016,96,96,5040
+12,"krithin$ks",SSE,29-05-2008,1,developer,11,protocol,928378,29-06-2008,30-12-2016,85,95,7124
+13,"madhan%rr",TPL,07-07-2009,2,tester,10,network,928478,07-08-2009,30-12-2016,88,99,9054
+14,"anandh(y)",SA,29-12-2010,3,manager,11,protocol,928278,29-01-2011,29-06-2016,77,92,11248
+15,"ayushi*ty",SSA,09-11-2011,1,developer,12,security,928375,09-12-2011,29-05-2016,99,91,13245
+16,"pramod&56",SE,14-10-2012,1,developer,13,configManagement,928478,14-11-2012,29-12-2016,86,93,5040
+17,"gawrav@66",PL,22-09-2013,2,tester,12,security,928778,22-10-2013,15-11-2016,78,97,9574
+18,"sibi=56",TL,15-08-2014,2,tester,14,Learning,928176,15-09-2014,29-05-2016,84,98,7245
+19,shivani,PL,12-05-2015,1,developer,10,network,928977,12-06-2015,12-11-2016,88,91,11254
+20,bill,PM,01-12-2015,3,manager,14,Learning,928479,01-01-2016,30-11-2016,75,94,13547

http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
index d3ea5aa..bd4252f 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
@@ -239,6 +239,24 @@ class StandardPartitionTableLoadingTestCase extends 
QueryTest with BeforeAndAfte
     checkAnswer(sql("select count(*) from loadstaticpartitiononeoverwrite"), 
rows)
   }
 
+  test("test partition column with special characters") {
+    sql(
+      """
+        | CREATE TABLE loadpartitionwithspecialchar (empno int, designation 
String, doj Timestamp,
+        |  workgroupcategory int, workgroupcategoryname String, deptno int, 
deptname String,
+        |  projectcode int, projectjoindate Timestamp, projectenddate 
Timestamp,attendance int,
+        |  utilization int,salary int)
+        | PARTITIONED BY (empname String)
+        | STORED BY 'org.apache.carbondata.format'
+      """.stripMargin)
+
+    sql(s"""LOAD DATA local inpath '$resourcesPath/data_with_special_char.csv' 
INTO TABLE loadpartitionwithspecialchar OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= 
'"')""")
+
+    checkAnswer(sql("select count(*) from loadpartitionwithspecialchar"), 
Seq(Row(10)))
+    checkAnswer(sql("select count(*) from loadpartitionwithspecialchar where 
empname='sibi=56'"), Seq(Row(1)))
+    checkAnswer(sql("select count(*) from loadpartitionwithspecialchar where 
empname='arvind,ss'"), Seq(Row(1)))
+  }
+
   test("Restrict streaming on partitioned table") {
     intercept[AnalysisException] {
       sql(
@@ -290,6 +308,7 @@ class StandardPartitionTableLoadingTestCase extends 
QueryTest with BeforeAndAfte
     sql("drop table if exists loadstaticpartitiononeoverwrite")
     sql("drop table if exists streamingpartitionedtable")
     sql("drop table if exists loadstaticpartitiononeissue")
+    sql("drop table if exists loadpartitionwithspecialchar")
   }
 
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala
index a8b7c74..954c67a 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala
@@ -190,16 +190,12 @@ private class CarbonOutputWriter(path: String,
   val partitions = getPartitionsFromPath(path, 
context).map(ExternalCatalogUtils.unescapePathName)
   val partitionData = if (partitions.nonEmpty) {
     partitions.map{ p =>
-      val splitData = p.split("=")
-      if (splitData.length > 1) {
-        // NUll handling case. For null hive creates with this special name
-        if (splitData(1).equals("__HIVE_DEFAULT_PARTITION__")) {
-          null
-        } else {
-          splitData(1)
-        }
+      val value = p.substring(p.indexOf("=") + 1, p.length)
+      // NUll handling case. For null hive creates with this special name
+      if (value.equals("__HIVE_DEFAULT_PARTITION__")) {
+        null
       } else {
-        ""
+        value
       }
     }
   } else {

[21/50] [abbrv] carbondata git commit: [CARBONDATA-1944][PARTITION]Special character like comma (, ) cannot be loaded on partition columns

Reply via email to