This is an automated email from the ASF dual-hosted git repository.

suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 5a9ec60  [GOBBLIN-794] fix JsonIntermedidateToAvroConverter for nested 
array/object use cases
5a9ec60 is described below

commit 5a9ec60baf741f03a3965e7d4ce06d7bfdd18eb4
Author: Chris Li <[email protected]>
AuthorDate: Wed Jun 5 11:00:44 2019 -0700

    [GOBBLIN-794] fix JsonIntermedidateToAvroConverter for nested array/object 
use cases
    
    This commit fix a defect in converting nested json
    schema, where a array had objects in items, and
    then
    the object had another array, and then another
    layer of objects.
    
    Converting the lowest level of objects require a
    namespace, but the array converter did not carry
    the
    namespace to its nested converter.
    
    The fix is to add a namespace parameter to
    ArrayConverter, and let is pass the namespace to
    subsequent calls
    to RecordConverter.
    
    Closes #2661 from chris9692/gobblin_794_fix
---
 .../avro/JsonElementConversionFactory.java         |  12 +-
 .../avro/JsonElementConversionFactoryTest.java     |  10 +-
 .../avro/JsonIntermediateToAvroConverterTest.java  |  33 +++++
 .../src/test/resources/converter/nested_json.json  |  49 ++++++++
 .../test/resources/converter/nested_schema.json    | 140 +++++++++++++++++++++
 5 files changed, 233 insertions(+), 11 deletions(-)

diff --git 
a/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
 
b/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
index dba68a5..c6d9f23 100644
--- 
a/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
+++ 
b/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionFactory.java
@@ -142,7 +142,7 @@ public class JsonElementConversionFactory {
         return new BooleanConverter(schemaNode);
 
       case ARRAY:
-        return new ArrayConverter(schemaNode, state);
+        return new ArrayConverter(schemaNode, state, namespace);
 
       case MAP:
         return new MapConverter(schemaNode, state);
@@ -501,7 +501,7 @@ public class JsonElementConversionFactory {
       return this.elementConverter;
     }
 
-    protected void processNestedItems(JsonSchema schema, WorkUnitState state)
+    protected void processNestedItems(JsonSchema schema, WorkUnitState state, 
String namespace)
         throws UnsupportedDateTypeException {
       JsonSchema nestedItem = null;
       if (schema.isType(ARRAY)) {
@@ -510,16 +510,16 @@ public class JsonElementConversionFactory {
       if (schema.isType(MAP)) {
         nestedItem = schema.getValuesWithinDataType();
       }
-      this.setElementConverter(getConvertor(nestedItem, null, state));
+      this.setElementConverter(getConvertor(nestedItem, namespace, state));
     }
   }
 
   public static class ArrayConverter extends ComplexConverter {
 
-    public ArrayConverter(JsonSchema schema, WorkUnitState state)
+    public ArrayConverter(JsonSchema schema, WorkUnitState state, String 
namespace)
         throws UnsupportedDateTypeException {
       super(schema);
-      processNestedItems(schema, state);
+      processNestedItems(schema, state, namespace);
     }
 
     @Override
@@ -558,7 +558,7 @@ public class JsonElementConversionFactory {
     public MapConverter(JsonSchema schema, WorkUnitState state)
         throws UnsupportedDateTypeException {
       super(schema);
-      processNestedItems(schema, state);
+      processNestedItems(schema, state, null);
     }
 
     @Override
diff --git 
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
 
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
index 7f39d30..1cf260d 100644
--- 
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
+++ 
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonElementConversionFactoryTest.java
@@ -83,7 +83,7 @@ public class JsonElementConversionFactoryTest {
     JsonSchema jsonSchema = new JsonSchema(schema);
     jsonSchema.setColumnName("dummy");
 
-    ArrayConverter converter = new ArrayConverter(jsonSchema, state);
+    ArrayConverter converter = new ArrayConverter(jsonSchema, state, null);
     Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
   }
 
@@ -96,7 +96,7 @@ public class JsonElementConversionFactoryTest {
     JsonSchema jsonSchema = new JsonSchema(schema);
     jsonSchema.setColumnName("dummy1");
 
-    ArrayConverter converter = new ArrayConverter(jsonSchema, state);
+    ArrayConverter converter = new ArrayConverter(jsonSchema, state, null);
 
     Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
   }
@@ -123,7 +123,7 @@ public class JsonElementConversionFactoryTest {
     JsonObject schema = getSchemaData(testName).getAsJsonObject();
     JsonObject expected = getExpectedSchema(testName).getAsJsonObject();
 
-    ArrayConverter converter = new ArrayConverter(new JsonSchema(schema), 
state);
+    ArrayConverter converter = new ArrayConverter(new JsonSchema(schema), 
state, null);
 
     Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
   }
@@ -145,7 +145,7 @@ public class JsonElementConversionFactoryTest {
     JsonObject schema = getSchemaData(testName).getAsJsonObject();
     JsonObject expected = getExpectedSchema(testName).getAsJsonObject();
 
-    ArrayConverter converter = new ArrayConverter(new JsonSchema(schema), 
state);
+    ArrayConverter converter = new ArrayConverter(new JsonSchema(schema), 
state, null);
 
     Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
   }
@@ -378,7 +378,7 @@ public class JsonElementConversionFactoryTest {
     JsonObject schema = getSchemaData(testName).getAsJsonObject();
     JsonArray expected = getExpectedSchema(testName).getAsJsonArray();
 
-    ArrayConverter converter = new ArrayConverter(new JsonSchema(schema), 
state);
+    ArrayConverter converter = new ArrayConverter(new JsonSchema(schema), 
state, null);
 
     Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
   }
diff --git 
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
 
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
index be2f417..4ec4d33 100644
--- 
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
+++ 
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonIntermediateToAvroConverterTest.java
@@ -162,4 +162,37 @@ public class JsonIntermediateToAvroConverterTest {
       throws Exception {
     complexSchemaTest("/converter/complex3.json");
   }
+
+  @Test
+  public void testConverterWithNestJson() throws Exception {
+    Gson gson = new Gson();
+    jsonSchema = gson.fromJson(new InputStreamReader(
+            
this.getClass().getResourceAsStream("/converter/nested_schema.json")),
+        JsonArray.class);
+
+    jsonRecord = gson.fromJson(new InputStreamReader(
+            
this.getClass().getResourceAsStream("/converter/nested_json.json")),
+        JsonObject.class);
+
+    WorkUnit workUnit = new WorkUnit(new SourceState(),
+        new Extract(new SourceState(), Extract.TableType.SNAPSHOT_ONLY, 
"namespace", "dummy_table"));
+    state = new WorkUnitState(workUnit);
+    state.setProp(ConfigurationKeys.CONVERTER_AVRO_TIME_FORMAT, "HH:mm:ss");
+    state.setProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "PST");
+
+    JsonIntermediateToAvroConverter converter = new 
JsonIntermediateToAvroConverter();
+
+    Schema avroSchema = converter.convertSchema(jsonSchema, state);
+    GenericRecord record = converter.convertRecord(avroSchema,
+        jsonRecord.getAsJsonObject(), state).iterator().next();
+
+    
Assert.assertEquals(jsonRecord.getAsJsonObject().get("metaData").getAsJsonObject(),
+        gson.fromJson(record.get("metaData").toString(), JsonObject.class));
+
+    
Assert.assertEquals(jsonRecord.getAsJsonObject().get("context").getAsJsonArray(),
+        gson.fromJson(record.get("context").toString(), JsonArray.class));
+
+    
Assert.assertEquals(jsonRecord.getAsJsonObject().get("metaData").getAsJsonObject().get("id").getAsString(),
+        ((GenericRecord)(record.get("metaData"))).get("id").toString());
+  }
 }
diff --git a/gobblin-core/src/test/resources/converter/nested_json.json 
b/gobblin-core/src/test/resources/converter/nested_json.json
new file mode 100644
index 0000000..544f295
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/nested_json.json
@@ -0,0 +1,49 @@
+{
+  "metaData": {
+    "id": "12345",
+    "url": "https://www.domain.com";,
+    "title": "title 1",
+    "scheduled": "2019-04-14T20:00:00-07:00",
+    "started": "2019-04-14T19:45:07.596-07:00",
+    "duration": 3893,
+    "primaryUserId": "123456",
+    "direction": "Conference",
+    "system": "BlueJeans",
+    "scope": "External",
+    "media": "Video",
+    "language": "eng"
+  },
+  "context": [{
+    "system": "System 1",
+    "objects": [{
+      "objectType": "Opportunity",
+      "objectId": "abcdefg",
+      "fields": [{
+        "name": "Type",
+        "value": "New Business"
+      }, {
+        "name": "StageName",
+        "value": "Closed Won - Pending Delivery"
+      }, {
+        "name": "CloseDate",
+        "value": "2019-01-01"
+      }, {
+        "name": "Name",
+        "value": "University 1"
+      }]
+    }, {
+      "objectType": "Account",
+      "objectId": "defghijk",
+      "fields": [{
+        "name": "Industry",
+        "value": "Higher Education"
+      }, {
+        "name": "Website",
+        "value": "www.domain.com"
+      }, {
+        "name": "Name",
+        "value": "University 2"
+      }]
+    }]
+  }]
+}
\ No newline at end of file
diff --git a/gobblin-core/src/test/resources/converter/nested_schema.json 
b/gobblin-core/src/test/resources/converter/nested_schema.json
new file mode 100644
index 0000000..0592222
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/nested_schema.json
@@ -0,0 +1,140 @@
+[{
+  "columnName": "metaData",
+  "dataType": {
+    "type": "record",
+    "name": "metaData",
+    "values": [{
+      "columnName": "id",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "url",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "title",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "scheduled",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "started",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "duration",
+      "dataType": {
+        "type": "int"
+      }
+    }, {
+      "columnName": "primaryUserId",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "direction",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "system",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "scope",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "media",
+      "dataType": {
+        "type": "string"
+      }
+    }, {
+      "columnName": "language",
+      "dataType": {
+        "type": "string"
+      }
+    }]
+  }
+}, {
+  "columnName": "context",
+  "dataType": {
+    "type": "array",
+    "name": "context",
+    "items": {
+      "name": "contextItem",
+      "dataType": {
+        "name": "contextItem",
+        "type": "record",
+        "values": [{
+          "columnName": "system",
+          "isNullable": "true",
+          "dataType": {
+            "type": "string"
+          }
+        }, {
+          "columnName": "objects",
+          "dataType": {
+            "type": "array",
+            "name": "objects",
+            "items": {
+              "name": "objectsItem",
+              "dataType": {
+                "name": "objectsItem",
+                "type": "record",
+                "values": [{
+                  "columnName": "objectType",
+                  "isNullable": "true",
+                  "dataType": {
+                    "type": "string"
+                  }
+                }, {
+                  "columnName": "objectId",
+                  "isNullable": "true",
+                  "dataType": {
+                    "type": "string"
+                  }
+                }, {
+                  "columnName": "fields",
+                  "dataType": {
+                    "type": "array",
+                    "name": "fields",
+                    "items": {
+                      "name": "fieldsItem",
+                      "dataType": {
+                        "name": "fieldsItem",
+                        "type": "record",
+                        "values": [{
+                          "columnName": "name",
+                          "isNullable": "true",
+                          "dataType": {
+                            "type": "string"
+                          }
+                        }, {
+                          "columnName": "value",
+                          "isNullable": "true",
+                          "dataType": {
+                            "type": "string"
+                          }
+                        }]
+                      }
+                    }
+                  }
+                }]
+              }
+            }
+          }
+        }]
+      }
+    }
+  }
+}]
\ No newline at end of file

Reply via email to