This is an automated email from the ASF dual-hosted git repository. vinoth pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push: new 6a0aa9a [HUDI-803] Replaced used of NullNode with JsonProperties.NULL_VALUE in HoodieAvroUtils (#1538) 6a0aa9a is described below commit 6a0aa9a645d11ed7b50e18aa0563dafcd9d145f7 Author: Pratyaksh Sharma <pratyaks...@gmail.com> AuthorDate: Wed May 20 21:34:43 2020 +0530 [HUDI-803] Replaced used of NullNode with JsonProperties.NULL_VALUE in HoodieAvroUtils (#1538) - added more test cases in TestHoodieAvroUtils.class Co-authored-by: Vinoth Chandar <vin...@apache.org> --- .../java/org/apache/hudi/avro/HoodieAvroUtils.java | 17 ++-- .../org/apache/hudi/avro/TestHoodieAvroUtils.java | 93 +++++++++++++++++++++- 2 files changed, 97 insertions(+), 13 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 8c22122..38b9d32 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -18,7 +18,7 @@ package org.apache.hudi.avro; -import org.apache.avro.JsonProperties.Null; +import org.apache.avro.JsonProperties; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.SchemaCompatabilityException; @@ -64,7 +64,7 @@ public class HoodieAvroUtils { private static ThreadLocal<BinaryDecoder> reuseDecoder = ThreadLocal.withInitial(() -> null); // All metadata fields are optional strings. - private static final Schema METADATA_FIELD_SCHEMA = + static final Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING))); private static final Schema RECORD_KEY_SCHEMA = initRecordKeySchema(); @@ -96,7 +96,6 @@ public class HoodieAvroUtils { writer.write(record, jsonEncoder); jsonEncoder.flush(); return out.toByteArray(); - //metadata.toJsonString().getBytes(StandardCharsets.UTF_8)); } /** @@ -142,15 +141,15 @@ public class HoodieAvroUtils { List<Schema.Field> parentFields = new ArrayList<>(); Schema.Field commitTimeField = - new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); + new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field commitSeqnoField = - new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); + new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field recordKeyField = - new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); + new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field partitionPathField = - new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); + new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field fileNameField = - new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); + new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); parentFields.add(commitTimeField); parentFields.add(commitSeqnoField); @@ -272,7 +271,7 @@ public class HoodieAvroUtils { GenericRecord newRecord = new GenericData.Record(newSchema); for (Schema.Field f : fieldsToWrite) { if (record.get(f.name()) == null) { - if (f.defaultVal() instanceof Null) { + if (f.defaultVal() instanceof JsonProperties.Null) { newRecord.put(f.name(), null); } else { newRecord.put(f.name(), f.defaultVal()); diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index 9c5e046..7d5cf04 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -18,16 +18,24 @@ package org.apache.hudi.avro; +import org.apache.avro.JsonProperties; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.exception.SchemaCompatabilityException; + import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; +import org.codehaus.jackson.node.NullNode; import org.junit.jupiter.api.Test; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -47,13 +55,28 @@ public class TestHoodieAvroUtils { + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}]}"; - private static String SCHEMA_WITH_METADATA_FIELD = - "{\"type\": \"record\",\"name\": \"testrec2\",\"fields\": [ " + private static String SCHEMA_WITH_METADATA_FIELD = "{\"type\": \"record\",\"name\": \"testrec2\",\"fields\": [ " + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}," + "{\"name\": \"_hoodie_commit_time\", \"type\": [\"null\", \"string\"]}," - + "{\"name\": \"nullable_field\",\"type\": [\"null\" ,\"string\"],\"default\": null}]}"; + + "{\"name\": \"nullable_field\",\"type\": [\"null\" ,\"string\"],\"default\": null}," + + "{\"name\": \"nullable_field_wo_default\",\"type\": [\"null\" ,\"string\"]}]}"; + + private static String SCHEMA_WITH_NON_NULLABLE_FIELD = "{\"type\": \"record\",\"name\": \"testrec3\",\"fields\": [ " + + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," + + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}," + + "{\"name\": \"nullable_field\",\"type\": [\"null\" ,\"string\"],\"default\": null}," + + "{\"name\": \"non_nullable_field_wo_default\",\"type\": \"string\"}," + + "{\"name\": \"non_nullable_field_with_default\",\"type\": \"string\", \"default\": \"dummy\"}]}"; + + private static String SCHEMA_WITH_NON_NULLABLE_FIELD_WITH_DEFAULT = "{\"type\": \"record\",\"name\": \"testrec4\",\"fields\": [ " + + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," + + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}," + + "{\"name\": \"nullable_field\",\"type\": [\"null\" ,\"string\"],\"default\": null}," + + "{\"name\": \"non_nullable_field_with_default\",\"type\": \"string\", \"default\": \"dummy\"}]}"; @Test public void testPropsPresent() { @@ -85,9 +108,11 @@ public class TestHoodieAvroUtils { rec.put("non_pii_col", "val1"); rec.put("pii_col", "val2"); rec.put("timestamp", 3.5); - GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(EVOLVED_SCHEMA)); + Schema schemaWithMetadata = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(EVOLVED_SCHEMA)); + GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, schemaWithMetadata); assertEquals(rec1.get("new_col1"), "dummy_val"); assertNull(rec1.get("new_col2")); + assertNull(rec1.get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); } @Test @@ -111,5 +136,65 @@ public class TestHoodieAvroUtils { rec.put("timestamp", 3.5); GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(SCHEMA_WITH_METADATA_FIELD)); assertNull(rec1.get("_hoodie_commit_time")); + assertNull(rec1.get("nullable_field")); + assertNull(rec1.get("nullable_field_wo_default")); + } + + @Test + public void testNonNullableFieldWithoutDefault() { + GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EXAMPLE_SCHEMA)); + rec.put("_row_key", "key1"); + rec.put("non_pii_col", "val1"); + rec.put("pii_col", "val2"); + rec.put("timestamp", 3.5); + assertThrows(SchemaCompatabilityException.class, () -> HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(SCHEMA_WITH_NON_NULLABLE_FIELD))); + } + + @Test + public void testNonNullableFieldWithDefault() { + GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EXAMPLE_SCHEMA)); + rec.put("_row_key", "key1"); + rec.put("non_pii_col", "val1"); + rec.put("pii_col", "val2"); + rec.put("timestamp", 3.5); + GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(SCHEMA_WITH_NON_NULLABLE_FIELD_WITH_DEFAULT)); + assertEquals(rec1.get("non_nullable_field_with_default"), "dummy"); + } + + @Test + public void testJsonNodeNullWithDefaultValues() { + List<Schema.Field> fields = new ArrayList<>(); + Schema initialSchema = Schema.createRecord("test_record", "test record", "org.test.namespace", false); + Schema.Field field1 = new Schema.Field("key", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); + Schema.Field field2 = new Schema.Field("key1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); + Schema.Field field3 = new Schema.Field("key2", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); + fields.add(field1); + fields.add(field2); + fields.add(field3); + initialSchema.setFields(fields); + GenericRecord rec = new GenericData.Record(initialSchema); + rec.put("key", "val"); + rec.put("key1", "val1"); + rec.put("key2", "val2"); + + List<Schema.Field> evolvedFields = new ArrayList<>(); + Schema evolvedSchema = Schema.createRecord("evolved_record", "evolved record", "org.evolved.namespace", false); + Schema.Field evolvedField1 = new Schema.Field("key", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); + Schema.Field evolvedField2 = new Schema.Field("key1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); + Schema.Field evolvedField3 = new Schema.Field("key2", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); + Schema.Field evolvedField4 = new Schema.Field("evolved_field", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); + Schema.Field evolvedField5 = new Schema.Field("evolved_field1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); + evolvedFields.add(evolvedField1); + evolvedFields.add(evolvedField2); + evolvedFields.add(evolvedField3); + evolvedFields.add(evolvedField4); + evolvedFields.add(evolvedField5); + evolvedSchema.setFields(evolvedFields); + + GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, evolvedSchema); + //evolvedField4.defaultVal() returns a JsonProperties.Null instance. + assertNull(rec1.get("evolved_field")); + //evolvedField5.defaultVal() returns null. + assertNull(rec1.get("evolved_field1")); } }