[CARBONDATA-2722] [CARBONDATA-2721] JsonWriter issue fixes [CARBONDATA-2722][SDK] [JsonWriter] NPE when schema and data are not of same length or Data is null.
problem: Null data is not handled in the json object to carbon row conversion. solution: add a null check when object is fetched from json map. [CARBONDATA-2721][SDK] [JsonWriter] Json writer is writing only first element of an array and discarding the rest of the elements. problem: converting json object to carbon row array object is based on array children count , instead of array element count. Hence as array will always have one children. only one element is filled. solution: use array element count instead of array children count This closes #2485 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/653efee0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/653efee0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/653efee0 Branch: refs/heads/carbonstore Commit: 653efee0283701d928562379f91e5df8fec73c24 Parents: 637a974 Author: ajantha-bhat <ajanthab...@gmail.com> Authored: Mon Jul 9 18:45:30 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Fri Jul 13 20:49:56 2018 +0530 ---------------------------------------------------------------------- .../jsonFiles/data/PrimitiveTypeWithNull.json | 4 + .../jsonFiles/data/StructOfAllTypes.json | 2 +- .../jsonFiles/data/allPrimitiveType.json | 2 +- ...tNonTransactionalCarbonTableJsonWriter.scala | 88 +++++++++++++++++--- .../loading/parser/impl/JsonRowParser.java | 51 ++++++++---- 5 files changed, 115 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json b/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json new file mode 100644 index 0000000..a5cd3d7 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json @@ -0,0 +1,4 @@ +{ + "stringField": null, + "intField": 26 +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json b/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json index 9806325..3beab07 100644 --- a/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json +++ b/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json @@ -5,7 +5,7 @@ "longField": 12345678, "doubleField": 123400.78, "boolField": true, - "FloorNum": [ 1, 2], + "FloorNum": [1,2,3,4,5,6], "FloorString": [ "abc", "def"], "FloorLong": [ 1234567, 2345678], "FloorDouble": [ 1.0, 2.0, 33.33], http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json b/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json index 86648c3..6d81ec7 100644 --- a/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json +++ b/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json @@ -1,5 +1,5 @@ { - "stringField": "ajantha", + "stringField": "ajantha\"bhat\"", "intField": 26, "shortField": 26, "longField": 1234567, http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala index 299c966..ff5c062 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala @@ -32,7 +32,6 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.metadata.datatype.DataTypes import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.sdk.file._ - import scala.collection.JavaConverters._ class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with BeforeAndAfterAll { @@ -94,7 +93,7 @@ class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with BeforeAnd private def writeCarbonFileFromJsonRowInput(jsonRow: String, carbonSchema: Schema) = { try { - var options: util.Map[String, String] = Map("bAd_RECords_action" -> "FAIL").asJava + var options: util.Map[String, String] = Map("bAd_RECords_action" -> "FAIL", "quotechar" -> "\"").asJava val writer = CarbonWriter.builder .outputPath(writerPath).isTransactionalTable(false) .uniqueIdentifier(System.currentTimeMillis()) @@ -138,7 +137,7 @@ class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with BeforeAnd s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION |'$writerPath' """.stripMargin) checkAnswer(sql("select * from sdkOutputTable"), - Seq(Row("ajantha", + Seq(Row("ajantha\"bhat\"", 26, 26, 1234567, @@ -266,21 +265,86 @@ class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with BeforeAnd sql( s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION |'$writerPath' """.stripMargin) + assert(sql("select * from sdkOutputTable").collectAsList().toString.equals( + "[[[bob,10,12345678,123400.78,true,WrappedArray(1, 2, 3, 4, 5, 6),WrappedArray(abc, def)," + + "WrappedArray(1234567, 2345678),WrappedArray(1.0, 2.0, 33.33),WrappedArray(true, false, " + + "false, true)]]]")) + sql("DROP TABLE sdkOutputTable") + // drop table should not delete the files + assert(new File(writerPath).listFiles().length > 0) + FileUtils.deleteDirectory(new File(writerPath)) + } - sql("select * from sdkOutputTable").show(false) - /* - * +--------------------------------------------------------------------+ - |StructColumn | - +--------------------------------------------------------------------+ - |[bob,10,12345678,123400.78,true,WrappedArray(1),WrappedArray(abc), | - | WrappedArray(1234567),WrappedArray(1.0),WrappedArray(true)] | - +--------------------------------------------------------------------+ - * */ + // test : One element as null + test("Read sdk writer Json output of primitive type with one element as null") { + FileUtils.deleteDirectory(new File(writerPath)) + var dataPath: String = null + dataPath = resourcesPath + "/jsonFiles/data/PrimitiveTypeWithNull.json" + val fields = new Array[Field](2) + fields(0) = new Field("stringField", DataTypes.STRING) + fields(1) = new Field("intField", DataTypes.INT) + val jsonRow = readFromFile(dataPath) + writeCarbonFileFromJsonRowInput(jsonRow, new Schema(fields)) + assert(new File(writerPath).exists()) + sql("DROP TABLE IF EXISTS sdkOutputTable") + sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + checkAnswer(sql("select * from sdkOutputTable"), + Seq(Row(null, + 26))) + sql("DROP TABLE sdkOutputTable") + // drop table should not delete the files + assert(new File(writerPath).listFiles().length > 0) + FileUtils.deleteDirectory(new File(writerPath)) + } + // test : Schema length is greater than array length + test("Read Json output of primitive type with Schema length is greater than array length") { + FileUtils.deleteDirectory(new File(writerPath)) + var dataPath: String = null + dataPath = resourcesPath + "/jsonFiles/data/PrimitiveTypeWithNull.json" + val fields = new Array[Field](5) + fields(0) = new Field("stringField", DataTypes.STRING) + fields(1) = new Field("intField", DataTypes.INT) + fields(2) = new Field("shortField", DataTypes.SHORT) + fields(3) = new Field("longField", DataTypes.LONG) + fields(4) = new Field("doubleField", DataTypes.DOUBLE) + val jsonRow = readFromFile(dataPath) + writeCarbonFileFromJsonRowInput(jsonRow, new Schema(fields)) + assert(new File(writerPath).exists()) + sql("DROP TABLE IF EXISTS sdkOutputTable") + sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + checkAnswer(sql("select * from sdkOutputTable"), + Seq(Row(null, 26, null, null, null))) sql("DROP TABLE sdkOutputTable") // drop table should not delete the files assert(new File(writerPath).listFiles().length > 0) FileUtils.deleteDirectory(new File(writerPath)) } + // test : Schema length is lesser than array length + test("Read Json output of primitive type with Schema length is lesser than array length") { + FileUtils.deleteDirectory(new File(writerPath)) + var dataPath: String = null + dataPath = resourcesPath + "/jsonFiles/data/allPrimitiveType.json" + val fields = new Array[Field](2) + fields(0) = new Field("stringField", DataTypes.STRING) + fields(1) = new Field("intField", DataTypes.INT) + val jsonRow = readFromFile(dataPath) + writeCarbonFileFromJsonRowInput(jsonRow, new Schema(fields)) + assert(new File(writerPath).exists()) + sql("DROP TABLE IF EXISTS sdkOutputTable") + sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + checkAnswer(sql("select * from sdkOutputTable"), + Seq(Row("ajantha\"bhat\"", 26))) + sql("DROP TABLE sdkOutputTable") + // drop table should not delete the files + assert(new File(writerPath).listFiles().length > 0) + FileUtils.deleteDirectory(new File(writerPath)) + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java index 6b06d89..c4f6074 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java @@ -56,6 +56,9 @@ public class JsonRowParser implements RowParser { Map<String, Object> jsonNodeMap = objectMapper.readValue(jsonString, new TypeReference<Map<String, Object>>() { }); + if (jsonNodeMap == null) { + return null; + } return jsonToCarbonRecord(jsonNodeMap, dataFields); } catch (IOException e) { throw new IOException("Failed to parse Json String: " + e.getMessage()); @@ -66,9 +69,7 @@ public class JsonRowParser implements RowParser { List<Object> fields = new ArrayList<>(); for (DataField dataField : dataFields) { Object field = jsonToCarbonObject(jsonNodeMap, dataField.getColumn()); - if (field != null) { - fields.add(field); - } + fields.add(field); } // use this array object to form carbonRow return fields.toArray(); @@ -78,12 +79,16 @@ public class JsonRowParser implements RowParser { DataType type = column.getDataType(); if (DataTypes.isArrayType(type)) { CarbonDimension carbonDimension = (CarbonDimension) column; - int size = carbonDimension.getNumberOfChild(); ArrayList array = (ArrayList) jsonNodeMap.get(extractChildColumnName(column)); + if ((array == null) || (array.size() == 0)) { + return null; + } // stored as array in carbonObject - Object[] arrayChildObjects = new Object[size]; - for (int i = 0; i < size; i++) { - CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(i); + Object[] arrayChildObjects = new Object[array.size()]; + for (int i = 0; i < array.size(); i++) { + // array column will have only one child, hence get(0). + // But data can have n elements, hence the loop. + CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(0); arrayChildObjects[i] = jsonChildElementToCarbonChildElement(array.get(i), childCol); } return new ArrayObject(arrayChildObjects); @@ -92,33 +97,45 @@ public class JsonRowParser implements RowParser { int size = carbonDimension.getNumberOfChild(); Map<String, Object> jsonMap = (Map<String, Object>) jsonNodeMap.get(extractChildColumnName(column)); + if (jsonMap == null) { + return null; + } Object[] structChildObjects = new Object[size]; for (int i = 0; i < size; i++) { CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(i); Object childObject = jsonChildElementToCarbonChildElement(jsonMap.get(extractChildColumnName(childCol)), childCol); - if (childObject != null) { - structChildObjects[i] = childObject; - } + structChildObjects[i] = childObject; } return new StructObject(structChildObjects); } else { // primitive type + if (jsonNodeMap.get(extractChildColumnName(column)) == null) { + return null; + } return jsonNodeMap.get(extractChildColumnName(column)).toString(); } } private Object jsonChildElementToCarbonChildElement(Object childObject, CarbonDimension column) { + if (childObject == null) { + return null; + } DataType type = column.getDataType(); if (DataTypes.isArrayType(type)) { - int size = column.getNumberOfChild(); ArrayList array = (ArrayList) childObject; + if (array.size() == 0) { + // handling empty array + return null; + } // stored as array in carbonObject - Object[] arrayChildObjects = new Object[size]; - for (int i = 0; i < size; i++) { - CarbonDimension childCol = column.getListOfChildDimensions().get(i); + Object[] arrayChildObjects = new Object[array.size()]; + for (int i = 0; i < array.size(); i++) { + // array column will have only one child, hence get(0). + // But data can have n elements, hence the loop. + CarbonDimension childCol = column.getListOfChildDimensions().get(0); arrayChildObjects[i] = jsonChildElementToCarbonChildElement(array.get(i), childCol); } return new ArrayObject(arrayChildObjects); @@ -130,9 +147,7 @@ public class JsonRowParser implements RowParser { CarbonDimension childCol = column.getListOfChildDimensions().get(i); Object child = jsonChildElementToCarbonChildElement( childFieldsMap.get(extractChildColumnName(childCol)), childCol); - if (child != null) { - structChildObjects[i] = child; - } + structChildObjects[i] = child; } return new StructObject(structChildObjects); } else { @@ -140,6 +155,7 @@ public class JsonRowParser implements RowParser { return childObject.toString(); } } + private static String extractChildColumnName(CarbonColumn column) { String columnName = column.getColName(); if (columnName.contains(".")) { @@ -153,5 +169,4 @@ public class JsonRowParser implements RowParser { } return columnName; } - }