Repository: carbondata
Updated Branches:
  refs/heads/master ce53b48a1 -> 1345dc6a3


[CARBONDATA-2512][32k] Support writing longstring through SDK

Support writing longstring through SDK.
User can specify the datatype as 'varchar' for longstring columns.
Please note that, the 'varchar' column cannot be sort_columns.

This closes #2455


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1345dc6a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1345dc6a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1345dc6a

Branch: refs/heads/master
Commit: 1345dc6a3c8822ce732df61c5d46ce7ece0c348f
Parents: ce53b48
Author: xuchuanyin <xuchuan...@hust.edu.cn>
Authored: Fri Jul 6 11:21:45 2018 +0800
Committer: Jacky Li <jacky.li...@qq.com>
Committed: Tue Jul 24 11:02:09 2018 +0800

----------------------------------------------------------------------
 .../schema/table/TableSchemaBuilder.java        |  1 +
 .../sdv/generated/SDKwriterTestCase.scala       | 25 ++++++++++++++++++++
 .../sdk/file/CarbonWriterBuilder.java           |  5 ++--
 .../org/apache/carbondata/sdk/file/Field.java   |  4 ++++
 4 files changed, 33 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index 4780e60..6a41eee 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -169,6 +169,7 @@ public class TableSchemaBuilder {
     newColumn.setDataType(field.getDataType());
     if (isSortColumn ||
         field.getDataType() == DataTypes.STRING ||
+        field.getDataType() == DataTypes.VARCHAR ||
         field.getDataType() == DataTypes.DATE ||
         field.getDataType() == DataTypes.TIMESTAMP ||
         field.getDataType().isComplexType() ||

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
index 98ff99c..de0d731 100644
--- 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
+++ 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
@@ -32,6 +32,7 @@ import org.apache.avro.file.DataFileWriter
 import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, 
GenericRecord}
 import org.apache.avro.io.{DecoderFactory, Encoder}
 import org.apache.commons.lang.CharEncoding
+import org.apache.commons.lang3.RandomStringUtils
 import org.junit.Assert
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
@@ -730,6 +731,30 @@ class SDKwriterTestCase extends QueryTest with 
BeforeAndAfterEach {
     checkAnswer(sql(s"""select count(*) from sdkTable"""),
       Seq(Row(3)))
   }
+
+  test("Test sdk with longstring") {
+    // here we specify the longstring column as varchar
+    val schema = new StringBuilder()
+      .append("[ \n")
+      .append("   {\"name\":\"string\"},\n")
+      .append("   {\"address\":\"varchar\"},\n")
+      .append("   {\"age\":\"int\"}\n")
+      .append("]")
+      .toString()
+    val builder = CarbonWriter.builder()
+    val writer = builder.outputPath(writerPath)
+      .buildWriterForCSVInput(Schema.parseJson(schema))
+
+    for (i <- 0 until 5) {
+      writer.write(Array[String](s"name_$i", 
RandomStringUtils.randomAlphabetic(33000), i.toString))
+    }
+    writer.close()
+
+    assert(FileFactory.getCarbonFile(writerPath).exists)
+    sql("DROP TABLE IF EXISTS sdkTable")
+    sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION 
'$writerPath'")
+    checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5)))
+  }
 }
 
 object avroUtil{

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
----------------------------------------------------------------------
diff --git 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index d4b1c5b..225d373 100644
--- 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -495,9 +495,10 @@ public class CarbonWriterBuilder {
         if (isSortColumn > -1) {
           // unsupported types for ("array", "struct", "double", "float", 
"decimal")
           if (field.getDataType() == DataTypes.DOUBLE || field.getDataType() 
== DataTypes.FLOAT
-              || DataTypes.isDecimal(field.getDataType()) || 
field.getDataType().isComplexType()) {
+              || DataTypes.isDecimal(field.getDataType()) || 
field.getDataType().isComplexType()
+              || field.getDataType() == DataTypes.VARCHAR) {
             throw new RuntimeException(
-                " sort columns not supported for " + "array, struct, double, 
float, decimal ");
+                " sort columns not supported for array, struct, double, float, 
decimal, varchar");
           }
         }
         if (field.getChildren() != null && field.getChildren().size() > 0) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java
index 6d4cfd9..e1e1186 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java
@@ -54,6 +54,8 @@ public class Field {
     this.name = name;
     if (type.equalsIgnoreCase("string")) {
       this.type = DataTypes.STRING;
+    } else if (type.equalsIgnoreCase("varchar")) {
+      this.type = DataTypes.VARCHAR;
     } else if (type.equalsIgnoreCase("date")) {
       this.type = DataTypes.DATE;
     } else if (type.equalsIgnoreCase("timestamp")) {
@@ -87,6 +89,8 @@ public class Field {
     this.children = fields;
     if (type.equalsIgnoreCase("string")) {
       this.type = DataTypes.STRING;
+    } else if (type.equalsIgnoreCase("varchar")) {
+      this.type = DataTypes.VARCHAR;
     } else if (type.equalsIgnoreCase("date")) {
       this.type = DataTypes.DATE;
     } else if (type.equalsIgnoreCase("timestamp")) {

Reply via email to