[17/50] [abbrv] carbondata git commit: [CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK
[CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK Problem: Problem inferring the complex type schema with boolean array type from the store created using SDK writer Analysis: When we create an external table and infer the schema from store created using SDK writer, the operation fails because of complex type field with boolean array dataType. This is because during schema creation by SDK writer, for array type children a child with column name val is added. While parsing the logic to append the parent name with child column name is missing for boolean type which is causing this problem. Solution: Handle the parsing for boolean type This closes #2294 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/35a7b5e9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/35a7b5e9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/35a7b5e9 Branch: refs/heads/spark-2.3 Commit: 35a7b5e9af5cabe85794274e07cab9a6c53f Parents: ff5166e Author: manishgupta88Authored: Thu May 10 17:09:17 2018 +0530 Committer: ravipesala Committed: Thu May 10 20:49:28 2018 +0530 -- .../schema/table/TableSchemaBuilder.java| 33 .../schema/table/TableSchemaBuilderSuite.java | 13 +--- .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 5 +++ .../sdk/file/CarbonWriterBuilder.java | 13 ++-- 4 files changed, 36 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/35a7b5e9/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index ca082e1..b078400 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; @@ -114,12 +115,12 @@ public class TableSchemaBuilder { this.sortColumns = sortColumns; } - public ColumnSchema addColumn(StructField field, boolean isSortColumn) { -return addColumn(field, null, isSortColumn, false); + public ColumnSchema addColumn(StructField field, AtomicInteger valIndex, boolean isSortColumn) { +return addColumn(field, null, valIndex, isSortColumn, false); } - private ColumnSchema addColumn(StructField field, String parentName, boolean isSortColumn, - boolean isComplexChild) { + private ColumnSchema addColumn(StructField field, String parentName, AtomicInteger valIndex, + boolean isSortColumn, boolean isComplexChild) { Objects.requireNonNull(field); checkRepeatColumnName(field); ColumnSchema newColumn = new ColumnSchema(); @@ -184,33 +185,25 @@ public class TableSchemaBuilder { if (field.getDataType().isComplexType()) { String parentFieldName = newColumn.getColumnName(); if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) { -String colName = getColNameForArray(parentFieldName); -addColumn(new StructField(colName, -((ArrayType) field.getDataType()).getElementType()), field.getFieldName(), false, true); +String colName = getColNameForArray(valIndex); +addColumn(new StructField(colName, ((ArrayType) field.getDataType()).getElementType()), +field.getFieldName(), valIndex, false, true); } else if (field.getDataType().getName().equalsIgnoreCase("STRUCT") && ((StructType) field.getDataType()).getFields().size() > 0) { // This field has children. List fields = ((StructType) field.getDataType()).getFields(); for (int i = 0; i < fields.size(); i++) { - addColumn(fields.get(i), parentFieldName, false, true); + addColumn(fields.get(i), parentFieldName, valIndex, false, true); } } } return newColumn; } - private String getColNameForArray(String parentFieldName) { -if (!parentFieldName.endsWith(".val")) { - return "val"; -} else { - String[] splits = parentFieldName.split("val"); - if (splits.length == 1) { -return "val" + 1; - } else { -return "val" + (Integer.parseInt(parentFieldName -
carbondata git commit: [CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK
Repository: carbondata Updated Branches: refs/heads/master ff5166ef7 -> 35a7b5e9a [CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK Problem: Problem inferring the complex type schema with boolean array type from the store created using SDK writer Analysis: When we create an external table and infer the schema from store created using SDK writer, the operation fails because of complex type field with boolean array dataType. This is because during schema creation by SDK writer, for array type children a child with column name val is added. While parsing the logic to append the parent name with child column name is missing for boolean type which is causing this problem. Solution: Handle the parsing for boolean type This closes #2294 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/35a7b5e9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/35a7b5e9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/35a7b5e9 Branch: refs/heads/master Commit: 35a7b5e9af5cabe85794274e07cab9a6c53f Parents: ff5166e Author: manishgupta88Authored: Thu May 10 17:09:17 2018 +0530 Committer: ravipesala Committed: Thu May 10 20:49:28 2018 +0530 -- .../schema/table/TableSchemaBuilder.java| 33 .../schema/table/TableSchemaBuilderSuite.java | 13 +--- .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 5 +++ .../sdk/file/CarbonWriterBuilder.java | 13 ++-- 4 files changed, 36 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/35a7b5e9/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index ca082e1..b078400 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; @@ -114,12 +115,12 @@ public class TableSchemaBuilder { this.sortColumns = sortColumns; } - public ColumnSchema addColumn(StructField field, boolean isSortColumn) { -return addColumn(field, null, isSortColumn, false); + public ColumnSchema addColumn(StructField field, AtomicInteger valIndex, boolean isSortColumn) { +return addColumn(field, null, valIndex, isSortColumn, false); } - private ColumnSchema addColumn(StructField field, String parentName, boolean isSortColumn, - boolean isComplexChild) { + private ColumnSchema addColumn(StructField field, String parentName, AtomicInteger valIndex, + boolean isSortColumn, boolean isComplexChild) { Objects.requireNonNull(field); checkRepeatColumnName(field); ColumnSchema newColumn = new ColumnSchema(); @@ -184,33 +185,25 @@ public class TableSchemaBuilder { if (field.getDataType().isComplexType()) { String parentFieldName = newColumn.getColumnName(); if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) { -String colName = getColNameForArray(parentFieldName); -addColumn(new StructField(colName, -((ArrayType) field.getDataType()).getElementType()), field.getFieldName(), false, true); +String colName = getColNameForArray(valIndex); +addColumn(new StructField(colName, ((ArrayType) field.getDataType()).getElementType()), +field.getFieldName(), valIndex, false, true); } else if (field.getDataType().getName().equalsIgnoreCase("STRUCT") && ((StructType) field.getDataType()).getFields().size() > 0) { // This field has children. List fields = ((StructType) field.getDataType()).getFields(); for (int i = 0; i < fields.size(); i++) { - addColumn(fields.get(i), parentFieldName, false, true); + addColumn(fields.get(i), parentFieldName, valIndex, false, true); } } } return newColumn; } - private String getColNameForArray(String parentFieldName) { -if (!parentFieldName.endsWith(".val")) { - return "val"; -} else { - String[] splits = parentFieldName.split("val"); - if (splits.length == 1) { -return "val" + 1; - }
[1/2] carbondata git commit: [CARBONDATA-2443][SDK]Multi level complex type support for AVRO based SDK
Repository: carbondata Updated Branches: refs/heads/master 51db049c4 -> 74ea24d14 [CARBONDATA-2443][SDK]Multi level complex type support for AVRO based SDK Multi level complex type support for AVRO based SDK This closes #2276 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ec33c112 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ec33c112 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ec33c112 Branch: refs/heads/master Commit: ec33c11286ebe8009ac07698bf23ffb3bd3e7711 Parents: 51db049 Author: sounakrAuthored: Mon May 7 06:51:54 2018 +0530 Committer: ravipesala Committed: Tue May 8 16:49:08 2018 +0530 -- .../schema/table/TableSchemaBuilder.java| 17 +- .../TestNonTransactionalCarbonTable.scala | 10 +- .../datasources/SparkCarbonTableFormat.scala| 2 +- .../loading/DataLoadProcessBuilder.java | 12 +- .../converter/impl/FieldEncoderFactory.java | 2 +- .../loading/model/CarbonLoadModel.java | 14 +- .../InputProcessorStepForPartitionImpl.java | 251 --- .../InputProcessorStepWithNoConverterImpl.java | 306 +++ .../util/CarbonDataProcessorUtil.java | 29 ++ .../carbondata/sdk/file/AvroCarbonWriter.java | 60 ++-- .../sdk/file/CarbonWriterBuilder.java | 6 + 11 files changed, 401 insertions(+), 308 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/ec33c112/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index 42bb958..e3c07fa 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -184,7 +184,8 @@ public class TableSchemaBuilder { if (field.getDataType().isComplexType()) { String parentFieldName = newColumn.getColumnName(); if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) { -addColumn(new StructField("val", +String colName = getColNameForArray(parentFieldName); +addColumn(new StructField(colName, ((ArrayType) field.getDataType()).getElementType()), field.getFieldName(), false, true); } else if (field.getDataType().getName().equalsIgnoreCase("STRUCT") && ((StructType) field.getDataType()).getFields().size() > 0) { @@ -198,6 +199,20 @@ public class TableSchemaBuilder { return newColumn; } + private String getColNameForArray(String parentFieldName) { +if (!parentFieldName.contains(".val")) { + return "val"; +} else { + String[] splits = parentFieldName.split("val"); + if (splits.length == 1) { +return "val" + 1; + } else { +return "val" + (Integer.parseInt(parentFieldName +.substring(parentFieldName.lastIndexOf("val") + 3, parentFieldName.length())) + 1); + } +} + } + /** * Throw exception if {@param field} name is repeated */ http://git-wip-us.apache.org/repos/asf/carbondata/blob/ec33c112/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index fabcd02..6b02d5a 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -678,8 +678,8 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { val exception = intercept[RuntimeException] { - buildTestDataWithBadRecordFail() -} +buildTestDataWithBadRecordFail() + } assert(exception.getMessage() .contains("Data load failed due to bad record")) @@ -780,7 +780,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { | } | ] | } - """.stripMargin +