Repository: carbondata Updated Branches: refs/heads/master ce53b48a1 -> 1345dc6a3
[CARBONDATA-2512][32k] Support writing longstring through SDK Support writing longstring through SDK. User can specify the datatype as 'varchar' for longstring columns. Please note that, the 'varchar' column cannot be sort_columns. This closes #2455 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1345dc6a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1345dc6a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1345dc6a Branch: refs/heads/master Commit: 1345dc6a3c8822ce732df61c5d46ce7ece0c348f Parents: ce53b48 Author: xuchuanyin <xuchuan...@hust.edu.cn> Authored: Fri Jul 6 11:21:45 2018 +0800 Committer: Jacky Li <jacky.li...@qq.com> Committed: Tue Jul 24 11:02:09 2018 +0800 ---------------------------------------------------------------------- .../schema/table/TableSchemaBuilder.java | 1 + .../sdv/generated/SDKwriterTestCase.scala | 25 ++++++++++++++++++++ .../sdk/file/CarbonWriterBuilder.java | 5 ++-- .../org/apache/carbondata/sdk/file/Field.java | 4 ++++ 4 files changed, 33 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index 4780e60..6a41eee 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -169,6 +169,7 @@ public class TableSchemaBuilder { newColumn.setDataType(field.getDataType()); if (isSortColumn || field.getDataType() == DataTypes.STRING || + field.getDataType() == DataTypes.VARCHAR || field.getDataType() == DataTypes.DATE || field.getDataType() == DataTypes.TIMESTAMP || field.getDataType().isComplexType() || http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala index 98ff99c..de0d731 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala @@ -32,6 +32,7 @@ import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, Encoder} import org.apache.commons.lang.CharEncoding +import org.apache.commons.lang3.RandomStringUtils import org.junit.Assert import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -730,6 +731,30 @@ class SDKwriterTestCase extends QueryTest with BeforeAndAfterEach { checkAnswer(sql(s"""select count(*) from sdkTable"""), Seq(Row(3))) } + + test("Test sdk with longstring") { + // here we specify the longstring column as varchar + val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"address\":\"varchar\"},\n") + .append(" {\"age\":\"int\"}\n") + .append("]") + .toString() + val builder = CarbonWriter.builder() + val writer = builder.outputPath(writerPath) + .buildWriterForCSVInput(Schema.parseJson(schema)) + + for (i <- 0 until 5) { + writer.write(Array[String](s"name_$i", RandomStringUtils.randomAlphabetic(33000), i.toString)) + } + writer.close() + + assert(FileFactory.getCarbonFile(writerPath).exists) + sql("DROP TABLE IF EXISTS sdkTable") + sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION '$writerPath'") + checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5))) + } } object avroUtil{ http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java index d4b1c5b..225d373 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java @@ -495,9 +495,10 @@ public class CarbonWriterBuilder { if (isSortColumn > -1) { // unsupported types for ("array", "struct", "double", "float", "decimal") if (field.getDataType() == DataTypes.DOUBLE || field.getDataType() == DataTypes.FLOAT - || DataTypes.isDecimal(field.getDataType()) || field.getDataType().isComplexType()) { + || DataTypes.isDecimal(field.getDataType()) || field.getDataType().isComplexType() + || field.getDataType() == DataTypes.VARCHAR) { throw new RuntimeException( - " sort columns not supported for " + "array, struct, double, float, decimal "); + " sort columns not supported for array, struct, double, float, decimal, varchar"); } } if (field.getChildren() != null && field.getChildren().size() > 0) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/1345dc6a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java index 6d4cfd9..e1e1186 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java @@ -54,6 +54,8 @@ public class Field { this.name = name; if (type.equalsIgnoreCase("string")) { this.type = DataTypes.STRING; + } else if (type.equalsIgnoreCase("varchar")) { + this.type = DataTypes.VARCHAR; } else if (type.equalsIgnoreCase("date")) { this.type = DataTypes.DATE; } else if (type.equalsIgnoreCase("timestamp")) { @@ -87,6 +89,8 @@ public class Field { this.children = fields; if (type.equalsIgnoreCase("string")) { this.type = DataTypes.STRING; + } else if (type.equalsIgnoreCase("varchar")) { + this.type = DataTypes.VARCHAR; } else if (type.equalsIgnoreCase("date")) { this.type = DataTypes.DATE; } else if (type.equalsIgnoreCase("timestamp")) {