This is an automated email from the ASF dual-hosted git repository.
suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 5a9ec60 [GOBBLIN-794] fix JsonIntermedidateToAvroConverter for nested
array/object use cases
5a9ec60 is described below
commit 5a9ec60baf741f03a3965e7d4ce06d7bfdd18eb4
Author: Chris Li <[email protected]>
AuthorDate: Wed Jun 5 11:00:44 2019 -0700
[GOBBLIN-794] fix JsonIntermedidateToAvroConverter for nested array/object
use cases
This commit fix a defect in converting nested json
schema, where a array had objects in items, and
then
the object had another array, and then another
layer of objects.
Converting the lowest level of objects require a
namespace, but the array converter did not carry
the
namespace to its nested converter.
The fix is to add a namespace parameter to
ArrayConverter, and let is pass the namespace to
subsequent calls
to RecordConverter.
Closes #2661 from chris9692/gobblin_794_fix
---
.../avro/JsonElementConversionFactory.java | 12 +-
.../avro/JsonElementConversionFactoryTest.java | 10 +-
.../avro/JsonIntermediateToAvroConverterTest.java | 33 +++++
.../src/test/resources/converter/nested_json.json | 49 ++++++++
.../test/resources/converter/nested_schema.json | 140 +++++++++++++++++++++
5 files changed, 233 insertions(+), 11 deletions(-)
diff --git
a/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
b/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
index dba68a5..c6d9f23 100644
---
a/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
+++
b/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
@@ -142,7 +142,7 @@ public class JsonElementConversionFactory {
return new BooleanConverter(schemaNode);
case ARRAY:
- return new ArrayConverter(schemaNode, state);
+ return new ArrayConverter(schemaNode, state, namespace);
case MAP:
return new MapConverter(schemaNode, state);
@@ -501,7 +501,7 @@ public class JsonElementConversionFactory {
return this.elementConverter;
}
- protected void processNestedItems(JsonSchema schema, WorkUnitState state)
+ protected void processNestedItems(JsonSchema schema, WorkUnitState state,
String namespace)
throws UnsupportedDateTypeException {
JsonSchema nestedItem = null;
if (schema.isType(ARRAY)) {
@@ -510,16 +510,16 @@ public class JsonElementConversionFactory {
if (schema.isType(MAP)) {
nestedItem = schema.getValuesWithinDataType();
}
- this.setElementConverter(getConvertor(nestedItem, null, state));
+ this.setElementConverter(getConvertor(nestedItem, namespace, state));
}
}
public static class ArrayConverter extends ComplexConverter {
- public ArrayConverter(JsonSchema schema, WorkUnitState state)
+ public ArrayConverter(JsonSchema schema, WorkUnitState state, String
namespace)
throws UnsupportedDateTypeException {
super(schema);
- processNestedItems(schema, state);
+ processNestedItems(schema, state, namespace);
}
@Override
@@ -558,7 +558,7 @@ public class JsonElementConversionFactory {
public MapConverter(JsonSchema schema, WorkUnitState state)
throws UnsupportedDateTypeException {
super(schema);
- processNestedItems(schema, state);
+ processNestedItems(schema, state, null);
}
@Override
diff --git
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
index 7f39d30..1cf260d 100644
---
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
+++
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
@@ -83,7 +83,7 @@ public class JsonElementConversionFactoryTest {
JsonSchema jsonSchema = new JsonSchema(schema);
jsonSchema.setColumnName("dummy");
- ArrayConverter converter = new ArrayConverter(jsonSchema, state);
+ ArrayConverter converter = new ArrayConverter(jsonSchema, state, null);
Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
}
@@ -96,7 +96,7 @@ public class JsonElementConversionFactoryTest {
JsonSchema jsonSchema = new JsonSchema(schema);
jsonSchema.setColumnName("dummy1");
- ArrayConverter converter = new ArrayConverter(jsonSchema, state);
+ ArrayConverter converter = new ArrayConverter(jsonSchema, state, null);
Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
}
@@ -123,7 +123,7 @@ public class JsonElementConversionFactoryTest {
JsonObject schema = getSchemaData(testName).getAsJsonObject();
JsonObject expected = getExpectedSchema(testName).getAsJsonObject();
- ArrayConverter converter = new ArrayConverter(new JsonSchema(schema),
state);
+ ArrayConverter converter = new ArrayConverter(new JsonSchema(schema),
state, null);
Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
}
@@ -145,7 +145,7 @@ public class JsonElementConversionFactoryTest {
JsonObject schema = getSchemaData(testName).getAsJsonObject();
JsonObject expected = getExpectedSchema(testName).getAsJsonObject();
- ArrayConverter converter = new ArrayConverter(new JsonSchema(schema),
state);
+ ArrayConverter converter = new ArrayConverter(new JsonSchema(schema),
state, null);
Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
}
@@ -378,7 +378,7 @@ public class JsonElementConversionFactoryTest {
JsonObject schema = getSchemaData(testName).getAsJsonObject();
JsonArray expected = getExpectedSchema(testName).getAsJsonArray();
- ArrayConverter converter = new ArrayConverter(new JsonSchema(schema),
state);
+ ArrayConverter converter = new ArrayConverter(new JsonSchema(schema),
state, null);
Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
}
diff --git
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
index be2f417..4ec4d33 100644
---
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
+++
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
@@ -162,4 +162,37 @@ public class JsonIntermediateToAvroConverterTest {
throws Exception {
complexSchemaTest("/converter/complex3.json");
}
+
+ @Test
+ public void testConverterWithNestJson() throws Exception {
+ Gson gson = new Gson();
+ jsonSchema = gson.fromJson(new InputStreamReader(
+
this.getClass().getResourceAsStream("/converter/nested_schema.json")),
+ JsonArray.class);
+
+ jsonRecord = gson.fromJson(new InputStreamReader(
+
this.getClass().getResourceAsStream("/converter/nested_json.json")),
+ JsonObject.class);
+
+ WorkUnit workUnit = new WorkUnit(new SourceState(),
+ new Extract(new SourceState(), Extract.TableType.SNAPSHOT_ONLY,
"namespace", "dummy_table"));
+ state = new WorkUnitState(workUnit);
+ state.setProp(ConfigurationKeys.CONVERTER_AVRO_TIME_FORMAT, "HH:mm:ss");
+ state.setProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "PST");
+
+ JsonIntermediateToAvroConverter converter = new
JsonIntermediateToAvroConverter();
+
+ Schema avroSchema = converter.convertSchema(jsonSchema, state);
+ GenericRecord record = converter.convertRecord(avroSchema,
+ jsonRecord.getAsJsonObject(), state).iterator().next();
+
+
Assert.assertEquals(jsonRecord.getAsJsonObject().get("metaData").getAsJsonObject(),
+ gson.fromJson(record.get("metaData").toString(), JsonObject.class));
+
+
Assert.assertEquals(jsonRecord.getAsJsonObject().get("context").getAsJsonArray(),
+ gson.fromJson(record.get("context").toString(), JsonArray.class));
+
+
Assert.assertEquals(jsonRecord.getAsJsonObject().get("metaData").getAsJsonObject().get("id").getAsString(),
+ ((GenericRecord)(record.get("metaData"))).get("id").toString());
+ }
}
diff --git a/gobblin-core/src/test/resources/converter/nested_json.json
b/gobblin-core/src/test/resources/converter/nested_json.json
new file mode 100644
index 0000000..544f295
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/nested_json.json
@@ -0,0 +1,49 @@
+{
+ "metaData": {
+ "id": "12345",
+ "url": "https://www.domain.com",
+ "title": "title 1",
+ "scheduled": "2019-04-14T20:00:00-07:00",
+ "started": "2019-04-14T19:45:07.596-07:00",
+ "duration": 3893,
+ "primaryUserId": "123456",
+ "direction": "Conference",
+ "system": "BlueJeans",
+ "scope": "External",
+ "media": "Video",
+ "language": "eng"
+ },
+ "context": [{
+ "system": "System 1",
+ "objects": [{
+ "objectType": "Opportunity",
+ "objectId": "abcdefg",
+ "fields": [{
+ "name": "Type",
+ "value": "New Business"
+ }, {
+ "name": "StageName",
+ "value": "Closed Won - Pending Delivery"
+ }, {
+ "name": "CloseDate",
+ "value": "2019-01-01"
+ }, {
+ "name": "Name",
+ "value": "University 1"
+ }]
+ }, {
+ "objectType": "Account",
+ "objectId": "defghijk",
+ "fields": [{
+ "name": "Industry",
+ "value": "Higher Education"
+ }, {
+ "name": "Website",
+ "value": "www.domain.com"
+ }, {
+ "name": "Name",
+ "value": "University 2"
+ }]
+ }]
+ }]
+}
\ No newline at end of file
diff --git a/gobblin-core/src/test/resources/converter/nested_schema.json
b/gobblin-core/src/test/resources/converter/nested_schema.json
new file mode 100644
index 0000000..0592222
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/nested_schema.json
@@ -0,0 +1,140 @@
+[{
+ "columnName": "metaData",
+ "dataType": {
+ "type": "record",
+ "name": "metaData",
+ "values": [{
+ "columnName": "id",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "url",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "title",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "scheduled",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "started",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "duration",
+ "dataType": {
+ "type": "int"
+ }
+ }, {
+ "columnName": "primaryUserId",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "direction",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "system",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "scope",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "media",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "language",
+ "dataType": {
+ "type": "string"
+ }
+ }]
+ }
+}, {
+ "columnName": "context",
+ "dataType": {
+ "type": "array",
+ "name": "context",
+ "items": {
+ "name": "contextItem",
+ "dataType": {
+ "name": "contextItem",
+ "type": "record",
+ "values": [{
+ "columnName": "system",
+ "isNullable": "true",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "objects",
+ "dataType": {
+ "type": "array",
+ "name": "objects",
+ "items": {
+ "name": "objectsItem",
+ "dataType": {
+ "name": "objectsItem",
+ "type": "record",
+ "values": [{
+ "columnName": "objectType",
+ "isNullable": "true",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "objectId",
+ "isNullable": "true",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "fields",
+ "dataType": {
+ "type": "array",
+ "name": "fields",
+ "items": {
+ "name": "fieldsItem",
+ "dataType": {
+ "name": "fieldsItem",
+ "type": "record",
+ "values": [{
+ "columnName": "name",
+ "isNullable": "true",
+ "dataType": {
+ "type": "string"
+ }
+ }, {
+ "columnName": "value",
+ "isNullable": "true",
+ "dataType": {
+ "type": "string"
+ }
+ }]
+ }
+ }
+ }
+ }]
+ }
+ }
+ }
+ }]
+ }
+ }
+ }
+}]
\ No newline at end of file