[CARBONDATA-1944][PARTITION]Special character like comma (,) cannot be loaded on partition columns
There is an issue while setting the partition information to the CarbonTableInputFormat , it is fixed in this PR. And also removed the old CarbonInputFormat class and its references. This closes #1733 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/45787fb9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/45787fb9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/45787fb9 Branch: refs/heads/branch-1.3 Commit: 45787fb99bd9327f180b40b288794edcaed5296c Parents: a83d029 Author: ravipesala <[email protected]> Authored: Wed Dec 27 23:33:54 2017 +0530 Committer: Jacky Li <[email protected]> Committed: Thu Jan 4 16:01:18 2018 +0800 ---------------------------------------------------------------------- .../hadoop/api/CarbonTableInputFormat.java | 21 +++++++++++++------- .../test/resources/data_with_special_char.csv | 11 ++++++++++ .../StandardPartitionTableLoadingTestCase.scala | 19 ++++++++++++++++++ .../datasources/CarbonFileFormat.scala | 14 +++++-------- 4 files changed, 49 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java index c02e03e..ed4684d 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java @@ -295,19 +295,26 @@ public class CarbonTableInputFormat<T> extends FileInputFormat<Void, T> { * set list of partitions to prune */ public static void setPartitionsToPrune(Configuration configuration, List<String> partitions) { - configuration.set( - CarbonTableInputFormat.PARTITIONS_TO_PRUNE, CarbonUtil.convertToString(partitions)); + if (partitions == null) { + return; + } + try { + String partitionString = ObjectSerializationUtil.convertObjectToString(partitions); + configuration.set(PARTITIONS_TO_PRUNE, partitionString); + } catch (Exception e) { + throw new RuntimeException("Error while setting patition information to Job", e); + } } /** * get list of partitions to prune */ - public static List<String> getPartitionsToPrune(Configuration configuration) { - String partitionString = configuration.get(PARTITIONS_TO_PRUNE, ""); - if (partitionString.trim().isEmpty()) { - return null; + public static List<String> getPartitionsToPrune(Configuration configuration) throws IOException { + String partitionString = configuration.get(PARTITIONS_TO_PRUNE); + if (partitionString != null) { + return (List<String>) ObjectSerializationUtil.convertStringToObject(partitionString); } - return Arrays.asList(partitionString.split(",")); + return null; } /** * Set list of files to access http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/integration/spark-common-test/src/test/resources/data_with_special_char.csv ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/data_with_special_char.csv b/integration/spark-common-test/src/test/resources/data_with_special_char.csv new file mode 100644 index 0000000..f4d313a --- /dev/null +++ b/integration/spark-common-test/src/test/resources/data_with_special_char.csv @@ -0,0 +1,11 @@ +empno,empname,designation,doj,workgroupcategory,workgroupcategoryname,deptno,deptname,projectcode,projectjoindate,projectenddate,attendance,utilization,salary +11,"arvind,ss",SE,17-01-2007,1,developer,10,network,928478,17-02-2007,29-11-2016,96,96,5040 +12,"krithin$ks",SSE,29-05-2008,1,developer,11,protocol,928378,29-06-2008,30-12-2016,85,95,7124 +13,"madhan%rr",TPL,07-07-2009,2,tester,10,network,928478,07-08-2009,30-12-2016,88,99,9054 +14,"anandh(y)",SA,29-12-2010,3,manager,11,protocol,928278,29-01-2011,29-06-2016,77,92,11248 +15,"ayushi*ty",SSA,09-11-2011,1,developer,12,security,928375,09-12-2011,29-05-2016,99,91,13245 +16,"pramod&56",SE,14-10-2012,1,developer,13,configManagement,928478,14-11-2012,29-12-2016,86,93,5040 +17,"gawrav@66",PL,22-09-2013,2,tester,12,security,928778,22-10-2013,15-11-2016,78,97,9574 +18,"sibi=56",TL,15-08-2014,2,tester,14,Learning,928176,15-09-2014,29-05-2016,84,98,7245 +19,shivani,PL,12-05-2015,1,developer,10,network,928977,12-06-2015,12-11-2016,88,91,11254 +20,bill,PM,01-12-2015,3,manager,14,Learning,928479,01-01-2016,30-11-2016,75,94,13547 http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala index d3ea5aa..bd4252f 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala @@ -239,6 +239,24 @@ class StandardPartitionTableLoadingTestCase extends QueryTest with BeforeAndAfte checkAnswer(sql("select count(*) from loadstaticpartitiononeoverwrite"), rows) } + test("test partition column with special characters") { + sql( + """ + | CREATE TABLE loadpartitionwithspecialchar (empno int, designation String, doj Timestamp, + | workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, + | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int, + | utilization int,salary int) + | PARTITIONED BY (empname String) + | STORED BY 'org.apache.carbondata.format' + """.stripMargin) + + sql(s"""LOAD DATA local inpath '$resourcesPath/data_with_special_char.csv' INTO TABLE loadpartitionwithspecialchar OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""") + + checkAnswer(sql("select count(*) from loadpartitionwithspecialchar"), Seq(Row(10))) + checkAnswer(sql("select count(*) from loadpartitionwithspecialchar where empname='sibi=56'"), Seq(Row(1))) + checkAnswer(sql("select count(*) from loadpartitionwithspecialchar where empname='arvind,ss'"), Seq(Row(1))) + } + test("Restrict streaming on partitioned table") { intercept[AnalysisException] { sql( @@ -290,6 +308,7 @@ class StandardPartitionTableLoadingTestCase extends QueryTest with BeforeAndAfte sql("drop table if exists loadstaticpartitiononeoverwrite") sql("drop table if exists streamingpartitionedtable") sql("drop table if exists loadstaticpartitiononeissue") + sql("drop table if exists loadpartitionwithspecialchar") } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/45787fb9/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala index a8b7c74..954c67a 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/CarbonFileFormat.scala @@ -190,16 +190,12 @@ private class CarbonOutputWriter(path: String, val partitions = getPartitionsFromPath(path, context).map(ExternalCatalogUtils.unescapePathName) val partitionData = if (partitions.nonEmpty) { partitions.map{ p => - val splitData = p.split("=") - if (splitData.length > 1) { - // NUll handling case. For null hive creates with this special name - if (splitData(1).equals("__HIVE_DEFAULT_PARTITION__")) { - null - } else { - splitData(1) - } + val value = p.substring(p.indexOf("=") + 1, p.length) + // NUll handling case. For null hive creates with this special name + if (value.equals("__HIVE_DEFAULT_PARTITION__")) { + null } else { - "" + value } } } else {
