[01/50] carbondata git commit: [CARBONDATA-2512][32k] Support writing longstring through SDK

2018-07-30 Thread ravipesala
Repository: carbondata
Updated Branches:
  refs/heads/branch-1.4 e07b832f5 -> 92f021122


[CARBONDATA-2512][32k] Support writing longstring through SDK

Support writing longstring through SDK.
User can specify the datatype as 'varchar' for longstring columns.
Please note that, the 'varchar' column cannot be sort_columns.

This closes #2455


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/700ad4be
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/700ad4be
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/700ad4be

Branch: refs/heads/branch-1.4
Commit: 700ad4be53e4c1bc4c13e401da467690ec60b31f
Parents: 7dfc0ef
Author: xuchuanyin 
Authored: Fri Jul 6 11:21:45 2018 +0800
Committer: ravipesala 
Committed: Tue Jul 31 00:10:41 2018 +0530

--
 .../schema/table/TableSchemaBuilder.java|  1 +
 .../sdv/generated/SDKwriterTestCase.scala   | 25 
 .../sdk/file/CarbonWriterBuilder.java   |  5 ++--
 .../org/apache/carbondata/sdk/file/Field.java   |  4 
 4 files changed, 33 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/700ad4be/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index 4780e60..6a41eee 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -169,6 +169,7 @@ public class TableSchemaBuilder {
 newColumn.setDataType(field.getDataType());
 if (isSortColumn ||
 field.getDataType() == DataTypes.STRING ||
+field.getDataType() == DataTypes.VARCHAR ||
 field.getDataType() == DataTypes.DATE ||
 field.getDataType() == DataTypes.TIMESTAMP ||
 field.getDataType().isComplexType() ||

http://git-wip-us.apache.org/repos/asf/carbondata/blob/700ad4be/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
--
diff --git 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
index 98ff99c..de0d731 100644
--- 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
+++ 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
@@ -32,6 +32,7 @@ import org.apache.avro.file.DataFileWriter
 import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, 
GenericRecord}
 import org.apache.avro.io.{DecoderFactory, Encoder}
 import org.apache.commons.lang.CharEncoding
+import org.apache.commons.lang3.RandomStringUtils
 import org.junit.Assert
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
@@ -730,6 +731,30 @@ class SDKwriterTestCase extends QueryTest with 
BeforeAndAfterEach {
 checkAnswer(sql(s"""select count(*) from sdkTable"""),
   Seq(Row(3)))
   }
+
+  test("Test sdk with longstring") {
+// here we specify the longstring column as varchar
+val schema = new StringBuilder()
+  .append("[ \n")
+  .append("   {\"name\":\"string\"},\n")
+  .append("   {\"address\":\"varchar\"},\n")
+  .append("   {\"age\":\"int\"}\n")
+  .append("]")
+  .toString()
+val builder = CarbonWriter.builder()
+val writer = builder.outputPath(writerPath)
+  .buildWriterForCSVInput(Schema.parseJson(schema))
+
+for (i <- 0 until 5) {
+  writer.write(Array[String](s"name_$i", 
RandomStringUtils.randomAlphabetic(33000), i.toString))
+}
+writer.close()
+
+assert(FileFactory.getCarbonFile(writerPath).exists)
+sql("DROP TABLE IF EXISTS sdkTable")
+sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION 
'$writerPath'")
+checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5)))
+  }
 }
 
 object avroUtil{

http://git-wip-us.apache.org/repos/asf/carbondata/blob/700ad4be/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
--
diff --git 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
 

carbondata git commit: [CARBONDATA-2512][32k] Support writing longstring through SDK

2018-07-23 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/master ce53b48a1 -> 1345dc6a3


[CARBONDATA-2512][32k] Support writing longstring through SDK

Support writing longstring through SDK.
User can specify the datatype as 'varchar' for longstring columns.
Please note that, the 'varchar' column cannot be sort_columns.

This closes #2455


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1345dc6a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1345dc6a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1345dc6a

Branch: refs/heads/master
Commit: 1345dc6a3c8822ce732df61c5d46ce7ece0c348f
Parents: ce53b48
Author: xuchuanyin 
Authored: Fri Jul 6 11:21:45 2018 +0800
Committer: Jacky Li 
Committed: Tue Jul 24 11:02:09 2018 +0800

--
 .../schema/table/TableSchemaBuilder.java|  1 +
 .../sdv/generated/SDKwriterTestCase.scala   | 25 
 .../sdk/file/CarbonWriterBuilder.java   |  5 ++--
 .../org/apache/carbondata/sdk/file/Field.java   |  4 
 4 files changed, 33 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index 4780e60..6a41eee 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -169,6 +169,7 @@ public class TableSchemaBuilder {
 newColumn.setDataType(field.getDataType());
 if (isSortColumn ||
 field.getDataType() == DataTypes.STRING ||
+field.getDataType() == DataTypes.VARCHAR ||
 field.getDataType() == DataTypes.DATE ||
 field.getDataType() == DataTypes.TIMESTAMP ||
 field.getDataType().isComplexType() ||

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
--
diff --git 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
index 98ff99c..de0d731 100644
--- 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
+++ 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
@@ -32,6 +32,7 @@ import org.apache.avro.file.DataFileWriter
 import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, 
GenericRecord}
 import org.apache.avro.io.{DecoderFactory, Encoder}
 import org.apache.commons.lang.CharEncoding
+import org.apache.commons.lang3.RandomStringUtils
 import org.junit.Assert
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
@@ -730,6 +731,30 @@ class SDKwriterTestCase extends QueryTest with 
BeforeAndAfterEach {
 checkAnswer(sql(s"""select count(*) from sdkTable"""),
   Seq(Row(3)))
   }
+
+  test("Test sdk with longstring") {
+// here we specify the longstring column as varchar
+val schema = new StringBuilder()
+  .append("[ \n")
+  .append("   {\"name\":\"string\"},\n")
+  .append("   {\"address\":\"varchar\"},\n")
+  .append("   {\"age\":\"int\"}\n")
+  .append("]")
+  .toString()
+val builder = CarbonWriter.builder()
+val writer = builder.outputPath(writerPath)
+  .buildWriterForCSVInput(Schema.parseJson(schema))
+
+for (i <- 0 until 5) {
+  writer.write(Array[String](s"name_$i", 
RandomStringUtils.randomAlphabetic(33000), i.toString))
+}
+writer.close()
+
+assert(FileFactory.getCarbonFile(writerPath).exists)
+sql("DROP TABLE IF EXISTS sdkTable")
+sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION 
'$writerPath'")
+checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5)))
+  }
 }
 
 object avroUtil{

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
--
diff --git 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java