This is an automated email from the ASF dual-hosted git repository.
hutran pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 20a5a36 [GOBBLIN-987] Reject unrecognized Enum symbols in
JsonRecordAvroSchemaToAvroConverter
20a5a36 is described below
commit 20a5a362d8dcbf22b9930e0e6ef88f0520b3479f
Author: Ahmed Abdul Hamid <[email protected]>
AuthorDate: Mon Dec 2 10:42:57 2019 -0800
[GOBBLIN-987] Reject unrecognized Enum symbols in
JsonRecordAvroSchemaToAvroConverter
Closes #2833 from ahmedahamid/master
---
...JsonElementConversionWithAvroSchemaFactory.java | 8 ++-
.../JsonRecordAvroSchemaToAvroConverterTest.java | 82 ++++++++++++++--------
2 files changed, 60 insertions(+), 30 deletions(-)
diff --git
a/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionWithAvroSchemaFactory.java
b/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionWithAvroSchemaFactory.java
index 1990514..758a123 100644
---
a/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionWithAvroSchemaFactory.java
+++
b/gobblin-core/src/main/java/org/apache/gobblin/converter/avro/JsonElementConversionWithAvroSchemaFactory.java
@@ -17,6 +17,7 @@
package org.apache.gobblin.converter.avro;
+import com.google.common.base.Preconditions;
import com.sun.javafx.binding.StringFormatter;
import java.util.ArrayList;
import java.util.HashMap;
@@ -158,14 +159,17 @@ public class JsonElementConversionWithAvroSchemaFactory
extends JsonElementConve
this.enumSet.addAll(schemaNode.getEnumSymbols());
- this.enumName = schemaNode.getType().getName();
+ this.enumName = schemaNode.getFullName();
this.schema = schemaNode;
}
@Override
Object convertField(JsonElement value) {
- return new GenericData.EnumSymbol(this.schema, value.getAsString());
+ String valueString = value.getAsString();
+ Preconditions.checkArgument(this.enumSet.contains(valueString),
+ "%s is not one of the valid symbols for the %s enum: %s",
valueString, this.enumName, this.enumSet);
+ return new GenericData.EnumSymbol(this.schema, valueString);
}
@Override
diff --git
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonRecordAvroSchemaToAvroConverterTest.java
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonRecordAvroSchemaToAvroConverterTest.java
index c46c0d7..21e7080 100644
---
a/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonRecordAvroSchemaToAvroConverterTest.java
+++
b/gobblin-core/src/test/java/org/apache/gobblin/converter/avro/JsonRecordAvroSchemaToAvroConverterTest.java
@@ -17,6 +17,7 @@
package org.apache.gobblin.converter.avro;
+import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.apache.avro.Schema;
@@ -26,9 +27,9 @@ import org.apache.commons.io.IOUtils;
import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.configuration.SourceState;
import org.apache.gobblin.configuration.WorkUnitState;
+import org.apache.gobblin.converter.DataConversionException;
import org.apache.gobblin.source.workunit.Extract.TableType;
import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
@@ -39,37 +40,20 @@ import com.google.gson.JsonParser;
*/
@Test(groups = {"gobblin.converter"})
public class JsonRecordAvroSchemaToAvroConverterTest {
- private JsonObject jsonRecord;
- private WorkUnitState state;
-
- @BeforeClass
- public void setUp()
- throws Exception {
- String avroSchemaString =
IOUtils.toString(this.getClass().getResourceAsStream("/converter/jsonToAvroSchema.avsc"),
StandardCharsets.UTF_8);
-
- this.jsonRecord = new
JsonParser().parse(IOUtils.toString(this.getClass().getResourceAsStream(
- "/converter/jsonToAvroRecord.json"),
StandardCharsets.UTF_8)).getAsJsonObject();
-
- SourceState source = new SourceState();
- this.state = new WorkUnitState(
- source.createWorkUnit(source.createExtract(TableType.SNAPSHOT_ONLY,
"test_table", "test_namespace")));
- this.state.setProp(ConfigurationKeys.CONVERTER_AVRO_SCHEMA_KEY,
avroSchemaString);
- this.state.setProp(ConfigurationKeys.CONVERTER_IGNORE_FIELDS,
"fieldToIgnore");
- }
@Test
- public void testConverter()
- throws Exception {
- JsonRecordAvroSchemaToAvroConverter<String> converter = new
JsonRecordAvroSchemaToAvroConverter<>();
-
- converter.init(this.state);
-
- Schema avroSchema = converter.convertSchema("dummy", this.state);
+ public void testConverter() throws Exception {
+ String avroSchemaString = readResource("/converter/jsonToAvroSchema.avsc");
+ WorkUnitState workUnitState = createWorkUnitState(avroSchemaString,
"fieldToIgnore");
+ JsonObject jsonObject = new
JsonParser().parse(readResource("/converter/jsonToAvroRecord.json")).getAsJsonObject();
- GenericRecord record = converter.convertRecord(avroSchema,
this.jsonRecord, this.state).iterator().next();
+ JsonRecordAvroSchemaToAvroConverter<String> converter = new
JsonRecordAvroSchemaToAvroConverter<>();
+ converter.init(workUnitState);
+ Schema avroSchema = converter.convertSchema(avroSchemaString,
workUnitState);
+ GenericRecord record = converter.convertRecord(avroSchema, jsonObject,
workUnitState).iterator().next();
- Assert.assertEquals(record.get("fieldToIgnore"), null);
- Assert.assertEquals(record.get("nullableField"), null);
+ Assert.assertNull(record.get("fieldToIgnore"));
+ Assert.assertNull(record.get("nullableField"));
Assert.assertEquals(record.get("longField"), 1234L);
Assert.assertTrue(record.get("arrayField") instanceof GenericArray);
@@ -105,4 +89,46 @@ public class JsonRecordAvroSchemaToAvroConverterTest {
Assert.assertEquals(arrayWithUnion2.get(0).toString(), "arrU1");
Assert.assertNull(arrayWithUnion2.get(1));
Assert.assertEquals(arrayWithUnion2.get(2).toString(), "arrU3"); }
+
+ @Test(expectedExceptions = DataConversionException.class)
+ public void testConverterThrowsOnUnrecognizedEnumSymbols() throws
IOException, DataConversionException {
+ String avroSchemaString = "{\"name\": \"TestRecord\", "
+ + "\"type\": \"record\","
+ + "\"namespace\": \"org.apache.gobblin.test\", "
+ + "\"fields\": [ "
+ + " { "
+ + " \"name\": \"color\", "
+ + " \"type\": { "
+ + " \"type\": \"enum\","
+ + " \"name\": \"Colors\","
+ + " \"symbols\" : [\"RED\", \"GREEN\", \"BLUE\"]"
+ + " }"
+ + " } "
+ + " ]"
+ + "}";
+ String jsonString = "{\"color\": \"PURPLE\"}"; // PURPLE isn't a member
of the Colors enum
+
+ WorkUnitState workUnitState = createWorkUnitState(avroSchemaString, null);
+ JsonObject jsonObject = new
JsonParser().parse(jsonString).getAsJsonObject();
+ JsonRecordAvroSchemaToAvroConverter<String> converter = new
JsonRecordAvroSchemaToAvroConverter<>();
+ converter.init(workUnitState);
+ Schema avroSchema = converter.convertSchema(avroSchemaString,
workUnitState);
+
+ converter.convertRecord(avroSchema, jsonObject, workUnitState);
+ }
+
+ private static WorkUnitState createWorkUnitState(String avroSchemaString,
String fieldToIgnore) {
+ SourceState sourceState = new SourceState();
+ WorkUnitState workUnitState = new WorkUnitState(
+
sourceState.createWorkUnit(sourceState.createExtract(TableType.SNAPSHOT_ONLY,
"test_table", "test_namespace")));
+ workUnitState.setProp(ConfigurationKeys.CONVERTER_AVRO_SCHEMA_KEY,
avroSchemaString);
+ if (fieldToIgnore != null) {
+ workUnitState.setProp(ConfigurationKeys.CONVERTER_IGNORE_FIELDS,
fieldToIgnore);
+ }
+ return workUnitState;
+ }
+
+ private static String readResource(String path) throws IOException {
+ return
IOUtils.toString(JsonRecordAvroSchemaToAvroConverterTest.class.getResourceAsStream(path),
StandardCharsets.UTF_8);
+ }
}
\ No newline at end of file