[01/50] carbondata git commit: [CARBONDATA-2512][32k] Support writing longstring through SDK
Repository: carbondata Updated Branches: refs/heads/branch-1.4 e07b832f5 -> 92f021122 [CARBONDATA-2512][32k] Support writing longstring through SDK Support writing longstring through SDK. User can specify the datatype as 'varchar' for longstring columns. Please note that, the 'varchar' column cannot be sort_columns. This closes #2455 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/700ad4be Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/700ad4be Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/700ad4be Branch: refs/heads/branch-1.4 Commit: 700ad4be53e4c1bc4c13e401da467690ec60b31f Parents: 7dfc0ef Author: xuchuanyin Authored: Fri Jul 6 11:21:45 2018 +0800 Committer: ravipesala Committed: Tue Jul 31 00:10:41 2018 +0530 -- .../schema/table/TableSchemaBuilder.java| 1 + .../sdv/generated/SDKwriterTestCase.scala | 25 .../sdk/file/CarbonWriterBuilder.java | 5 ++-- .../org/apache/carbondata/sdk/file/Field.java | 4 4 files changed, 33 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/700ad4be/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index 4780e60..6a41eee 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -169,6 +169,7 @@ public class TableSchemaBuilder { newColumn.setDataType(field.getDataType()); if (isSortColumn || field.getDataType() == DataTypes.STRING || +field.getDataType() == DataTypes.VARCHAR || field.getDataType() == DataTypes.DATE || field.getDataType() == DataTypes.TIMESTAMP || field.getDataType().isComplexType() || http://git-wip-us.apache.org/repos/asf/carbondata/blob/700ad4be/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala -- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala index 98ff99c..de0d731 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala @@ -32,6 +32,7 @@ import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, Encoder} import org.apache.commons.lang.CharEncoding +import org.apache.commons.lang3.RandomStringUtils import org.junit.Assert import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -730,6 +731,30 @@ class SDKwriterTestCase extends QueryTest with BeforeAndAfterEach { checkAnswer(sql(s"""select count(*) from sdkTable"""), Seq(Row(3))) } + + test("Test sdk with longstring") { +// here we specify the longstring column as varchar +val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"address\":\"varchar\"},\n") + .append(" {\"age\":\"int\"}\n") + .append("]") + .toString() +val builder = CarbonWriter.builder() +val writer = builder.outputPath(writerPath) + .buildWriterForCSVInput(Schema.parseJson(schema)) + +for (i <- 0 until 5) { + writer.write(Array[String](s"name_$i", RandomStringUtils.randomAlphabetic(33000), i.toString)) +} +writer.close() + +assert(FileFactory.getCarbonFile(writerPath).exists) +sql("DROP TABLE IF EXISTS sdkTable") +sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION '$writerPath'") +checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5))) + } } object avroUtil{ http://git-wip-us.apache.org/repos/asf/carbondata/blob/700ad4be/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
carbondata git commit: [CARBONDATA-2512][32k] Support writing longstring through SDK
Repository: carbondata Updated Branches: refs/heads/master ce53b48a1 -> 1345dc6a3 [CARBONDATA-2512][32k] Support writing longstring through SDK Support writing longstring through SDK. User can specify the datatype as 'varchar' for longstring columns. Please note that, the 'varchar' column cannot be sort_columns. This closes #2455 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1345dc6a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1345dc6a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1345dc6a Branch: refs/heads/master Commit: 1345dc6a3c8822ce732df61c5d46ce7ece0c348f Parents: ce53b48 Author: xuchuanyin Authored: Fri Jul 6 11:21:45 2018 +0800 Committer: Jacky Li Committed: Tue Jul 24 11:02:09 2018 +0800 -- .../schema/table/TableSchemaBuilder.java| 1 + .../sdv/generated/SDKwriterTestCase.scala | 25 .../sdk/file/CarbonWriterBuilder.java | 5 ++-- .../org/apache/carbondata/sdk/file/Field.java | 4 4 files changed, 33 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index 4780e60..6a41eee 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -169,6 +169,7 @@ public class TableSchemaBuilder { newColumn.setDataType(field.getDataType()); if (isSortColumn || field.getDataType() == DataTypes.STRING || +field.getDataType() == DataTypes.VARCHAR || field.getDataType() == DataTypes.DATE || field.getDataType() == DataTypes.TIMESTAMP || field.getDataType().isComplexType() || http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala -- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala index 98ff99c..de0d731 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala @@ -32,6 +32,7 @@ import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, Encoder} import org.apache.commons.lang.CharEncoding +import org.apache.commons.lang3.RandomStringUtils import org.junit.Assert import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -730,6 +731,30 @@ class SDKwriterTestCase extends QueryTest with BeforeAndAfterEach { checkAnswer(sql(s"""select count(*) from sdkTable"""), Seq(Row(3))) } + + test("Test sdk with longstring") { +// here we specify the longstring column as varchar +val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"address\":\"varchar\"},\n") + .append(" {\"age\":\"int\"}\n") + .append("]") + .toString() +val builder = CarbonWriter.builder() +val writer = builder.outputPath(writerPath) + .buildWriterForCSVInput(Schema.parseJson(schema)) + +for (i <- 0 until 5) { + writer.write(Array[String](s"name_$i", RandomStringUtils.randomAlphabetic(33000), i.toString)) +} +writer.close() + +assert(FileFactory.getCarbonFile(writerPath).exists) +sql("DROP TABLE IF EXISTS sdkTable") +sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION '$writerPath'") +checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5))) + } } object avroUtil{ http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java