Guosmilesmile commented on code in PR #16638:
URL: https://github.com/apache/iceberg/pull/16638#discussion_r3360570025
##########
data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java:
##########
@@ -696,49 +733,390 @@ void testReaderBuilderReuseContainers(FileFormat
fileFormat) throws IOException
}
@ParameterizedTest
- @FieldSource("FILE_FORMATS")
- void testReaderSchemaEvolutionNewColumnWithDefault(FileFormat fileFormat)
throws IOException {
+ @FieldSource("FORMAT_AND_PRIMITIVE_DEFAULTS")
+ void testReaderSchemaEvolutionNewColumnWithDefault(
+ FileFormat fileFormat, Type.PrimitiveType type, Literal<?> defaultValue)
throws IOException {
assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
Schema writeSchema = dataGenerator.schema();
List<Record> genericRecords = dataGenerator.generateRecords();
writeGenericRecords(fileFormat, writeSchema, genericRecords);
- String defaultStringValue = "default_value";
- int defaultIntValue = 42;
-
int maxFieldId =
writeSchema.columns().stream().mapToInt(Types.NestedField::fieldId).max().orElse(0);
List<Types.NestedField> evolvedColumns =
Lists.newArrayList(writeSchema.columns());
evolvedColumns.add(
- Types.NestedField.required("col_f")
+ Types.NestedField.optional("col_with_default")
.withId(maxFieldId + 1)
- .ofType(Types.StringType.get())
- .withInitialDefault(Literal.of(defaultStringValue))
- .build());
- evolvedColumns.add(
- Types.NestedField.optional("col_g")
- .withId(maxFieldId + 2)
- .ofType(Types.IntegerType.get())
- .withInitialDefault(Literal.of(defaultIntValue))
+ .ofType(type)
+ .withInitialDefault(defaultValue)
.build());
Schema evolvedSchema = new Schema(evolvedColumns);
- readAndAssertGenericRecords(
+ readAndAssertGenericRecords(fileFormat, evolvedSchema, genericRecords);
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testDefaultValues(FileFormat fileFormat) throws IOException {
Review Comment:
Make sense ,we can keep it for now.
##########
data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java:
##########
@@ -697,48 +705,420 @@ void testReaderBuilderReuseContainers(FileFormat
fileFormat) throws IOException
@ParameterizedTest
@FieldSource("FILE_FORMATS")
- void testReaderSchemaEvolutionNewColumnWithDefault(FileFormat fileFormat)
throws IOException {
+ void testDefaultValues(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema = new Schema(idField, dataField);
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.required("missing_str")
+ .withId(6)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("orange"))
+ .build(),
+ Types.NestedField.optional("missing_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = GenericRecord.create(expectedSchema);
+ expected.setField("id", record.getField("id"));
+ expected.setField("data", record.getField("data"));
+ expected.setField("missing_str", "orange");
+ expected.setField("missing_int", 34);
+
+ return expected;
+ });
+ }
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testNullDefaultValue(FileFormat fileFormat) throws IOException {
assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
- DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
- Schema writeSchema = dataGenerator.schema();
- List<Record> genericRecords = dataGenerator.generateRecords();
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema = new Schema(idField, dataField);
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
writeGenericRecords(fileFormat, writeSchema, genericRecords);
- String defaultStringValue = "default_value";
- int defaultIntValue = 42;
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("missing_date")
+ .withId(3)
+ .ofType(Types.DateType.get())
+ .build());
- int maxFieldId =
-
writeSchema.columns().stream().mapToInt(Types.NestedField::fieldId).max().orElse(0);
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = GenericRecord.create(expectedSchema);
+ expected.setField("id", record.getField("id"));
+ expected.setField("data", record.getField("data"));
+ expected.setField("missing_date", null);
- List<Types.NestedField> evolvedColumns =
Lists.newArrayList(writeSchema.columns());
- evolvedColumns.add(
- Types.NestedField.required("col_f")
- .withId(maxFieldId + 1)
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
.ofType(Types.StringType.get())
- .withInitialDefault(Literal.of(defaultStringValue))
- .build());
- evolvedColumns.add(
- Types.NestedField.optional("col_g")
- .withId(maxFieldId + 2)
- .ofType(Types.IntegerType.get())
- .withInitialDefault(Literal.of(defaultIntValue))
- .build());
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested")
+ .withId(3)
+ .ofType(
+ Types.StructType.of(
+ Types.NestedField.required(4, "inner",
Types.StringType.get())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested")
+ .withId(3)
+ .ofType(
+ Types.StructType.of(
+ Types.NestedField.required(4, "inner",
Types.StringType.get()),
+ Types.NestedField.optional("missing_inner_float")
+ .withId(5)
+ .ofType(Types.FloatType.get())
+ .withInitialDefault(Literal.of(-0.0F))
+ .build()))
+ .withDoc("Used to test nested field defaults")
+ .build());
- Schema evolvedSchema = new Schema(evolvedColumns);
- readAndAssertGenericRecords(
+ readAndAssertEngineRecords(
fileFormat,
- evolvedSchema,
+ expectedSchema,
genericRecords,
record -> {
- Record expected = copy(record, writeSchema, evolvedSchema);
- expected.setField("col_f", defaultStringValue);
- expected.setField("col_g", defaultIntValue);
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ Record writtenNested = (Record) record.getField("nested");
+ if (writtenNested != null) {
+ Record expectedNested =
+
GenericRecord.create(expectedSchema.findField("nested").type().asStructType());
+ expectedNested.setField("inner", writtenNested.getField("inner"));
+ expectedNested.setField("missing_inner_float", -0.0F);
+ expected.setField("nested", expectedNested);
+ }
+
return expected;
});
}
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testMapNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_map")
+ .withId(3)
+ .ofType(
+ Types.MapType.ofOptional(
+ 4,
+ 5,
+ Types.StringType.get(),
+ Types.StructType.of(
+ Types.NestedField.required(6, "value_str",
Types.StringType.get()))))
+ .withDoc("Used to test nested map value field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_map")
+ .withId(3)
+ .ofType(
+ Types.MapType.ofOptional(
+ 4,
+ 5,
+ Types.StringType.get(),
+ Types.StructType.of(
+ Types.NestedField.required(6, "value_str",
Types.StringType.get()),
+ Types.NestedField.optional("value_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ @SuppressWarnings("unchecked")
+ Map<Object, Record> writtenMap = (Map<Object, Record>)
expected.getField("nested_map");
+ if (writtenMap != null) {
+ Types.StructType valueType =
+ expectedSchema
+ .findField("nested_map")
+ .type()
+ .asMapType()
+ .valueType()
+ .asStructType();
+ Map<Object, Record> rebuilt = Maps.newLinkedHashMap();
+ writtenMap.forEach(
+ (key, value) ->
+ rebuilt.put(
+ key,
+ value == null
+ ? null
+ : GenericRecord.create(valueType)
+ .copy("value_str",
value.getField("value_str"), "value_int", 34)));
+ expected.setField("nested_map", rebuilt);
+ }
+
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testListNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_list")
+ .withId(3)
+ .ofType(
+ Types.ListType.ofOptional(
+ 4,
+ Types.StructType.of(
+ Types.NestedField.required(5, "element_str",
Types.StringType.get()))))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_list")
+ .withId(3)
+ .ofType(
+ Types.ListType.ofOptional(
+ 4,
+ Types.StructType.of(
+ Types.NestedField.required(5, "element_str",
Types.StringType.get()),
+ Types.NestedField.optional("element_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ @SuppressWarnings("unchecked")
+ List<Record> writtenList = (List<Record>)
expected.getField("nested_list");
+ if (writtenList != null) {
+ Types.StructType elementType =
+ expectedSchema
+ .findField("nested_list")
+ .type()
+ .asListType()
+ .elementType()
+ .asStructType();
+ List<Record> rebuilt =
+ writtenList.stream()
+ .map(
+ element ->
+ element == null
+ ? null
+ : GenericRecord.create(elementType)
+ .copy(
+ "element_str",
+ element.getField("element_str"),
+ "element_int",
+ 34))
+ .collect(Collectors.toList());
+ expected.setField("nested_list", rebuilt);
+ }
+
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testMissingRequiredWithoutDefault(FileFormat fileFormat) throws
IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Schema writeSchema = new Schema(Types.NestedField.required(1, "id",
Types.LongType.get()));
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ Types.NestedField.required(1, "id", Types.LongType.get()),
+ Types.NestedField.required("missing_str")
+ .withId(6)
+ .ofType(Types.StringType.get())
+ .withDoc("Missing required field with no default")
+ .build());
+
+ assertThatThrownBy(
+ () -> readAndAssertGenericRecords(fileFormat, expectedSchema,
genericRecords))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Missing required field: missing_str");
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testPrimitiveDefaultValues(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Schema writeSchema = DataGenerators.PrimitiveDefaults.WRITE_SCHEMA;
+ Schema readSchema =
+ supportsTime()
+ ? DataGenerators.PrimitiveDefaults.READ_SCHEMA
+ : TypeUtil.selectNot(
+ DataGenerators.PrimitiveDefaults.READ_SCHEMA,
+ Set.of(
+ DataGenerators.PrimitiveDefaults.READ_SCHEMA
+ .findField("time_with_default")
+ .fieldId()));
+
+ List<Record> sourceRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, sourceRecords);
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ readSchema,
+ sourceRecords,
+ record -> {
+ Record expected = GenericRecord.create(readSchema);
+ expected.setField("id", record.getField("id"));
+ for (Types.NestedField field : readSchema.columns()) {
+ if (!field.name().equals("id")) {
+ expected.setField(
+ field.name(),
+ GenericDataUtil.internalToGeneric(field.type(),
field.initialDefault()));
+ }
+ }
+
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testPrimitiveDefaultValuesNotApplied(FileFormat fileFormat) throws
IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Schema readSchema =
+ supportsTime()
+ ? DataGenerators.PrimitiveDefaults.READ_SCHEMA
+ : TypeUtil.selectNot(
+ DataGenerators.PrimitiveDefaults.READ_SCHEMA,
+ Set.of(
+ DataGenerators.PrimitiveDefaults.READ_SCHEMA
+ .findField("time_with_default")
+ .fieldId()));
+
+ List<Record> sourceRecords = RandomGenericData.generate(readSchema, 10,
1L);
+ writeGenericRecords(fileFormat, readSchema, sourceRecords);
+
+ readAndAssertEngineRecords(fileFormat, readSchema, sourceRecords, record
-> record);
Review Comment:
The situation in testPrimitiveDefaultValues seems to be quite similar. Can
we extract the same parts from there?
##########
data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java:
##########
@@ -697,48 +705,420 @@ void testReaderBuilderReuseContainers(FileFormat
fileFormat) throws IOException
@ParameterizedTest
@FieldSource("FILE_FORMATS")
- void testReaderSchemaEvolutionNewColumnWithDefault(FileFormat fileFormat)
throws IOException {
+ void testDefaultValues(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema = new Schema(idField, dataField);
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.required("missing_str")
+ .withId(6)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("orange"))
+ .build(),
+ Types.NestedField.optional("missing_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = GenericRecord.create(expectedSchema);
+ expected.setField("id", record.getField("id"));
+ expected.setField("data", record.getField("data"));
+ expected.setField("missing_str", "orange");
+ expected.setField("missing_int", 34);
+
+ return expected;
+ });
+ }
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testNullDefaultValue(FileFormat fileFormat) throws IOException {
assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
- DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
- Schema writeSchema = dataGenerator.schema();
- List<Record> genericRecords = dataGenerator.generateRecords();
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema = new Schema(idField, dataField);
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
writeGenericRecords(fileFormat, writeSchema, genericRecords);
- String defaultStringValue = "default_value";
- int defaultIntValue = 42;
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("missing_date")
+ .withId(3)
+ .ofType(Types.DateType.get())
+ .build());
- int maxFieldId =
-
writeSchema.columns().stream().mapToInt(Types.NestedField::fieldId).max().orElse(0);
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = GenericRecord.create(expectedSchema);
+ expected.setField("id", record.getField("id"));
+ expected.setField("data", record.getField("data"));
+ expected.setField("missing_date", null);
- List<Types.NestedField> evolvedColumns =
Lists.newArrayList(writeSchema.columns());
- evolvedColumns.add(
- Types.NestedField.required("col_f")
- .withId(maxFieldId + 1)
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
.ofType(Types.StringType.get())
- .withInitialDefault(Literal.of(defaultStringValue))
- .build());
- evolvedColumns.add(
- Types.NestedField.optional("col_g")
- .withId(maxFieldId + 2)
- .ofType(Types.IntegerType.get())
- .withInitialDefault(Literal.of(defaultIntValue))
- .build());
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested")
+ .withId(3)
+ .ofType(
+ Types.StructType.of(
+ Types.NestedField.required(4, "inner",
Types.StringType.get())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested")
+ .withId(3)
+ .ofType(
+ Types.StructType.of(
+ Types.NestedField.required(4, "inner",
Types.StringType.get()),
+ Types.NestedField.optional("missing_inner_float")
+ .withId(5)
+ .ofType(Types.FloatType.get())
+ .withInitialDefault(Literal.of(-0.0F))
+ .build()))
+ .withDoc("Used to test nested field defaults")
+ .build());
- Schema evolvedSchema = new Schema(evolvedColumns);
- readAndAssertGenericRecords(
+ readAndAssertEngineRecords(
fileFormat,
- evolvedSchema,
+ expectedSchema,
genericRecords,
record -> {
- Record expected = copy(record, writeSchema, evolvedSchema);
- expected.setField("col_f", defaultStringValue);
- expected.setField("col_g", defaultIntValue);
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ Record writtenNested = (Record) record.getField("nested");
+ if (writtenNested != null) {
+ Record expectedNested =
+
GenericRecord.create(expectedSchema.findField("nested").type().asStructType());
+ expectedNested.setField("inner", writtenNested.getField("inner"));
+ expectedNested.setField("missing_inner_float", -0.0F);
+ expected.setField("nested", expectedNested);
+ }
+
return expected;
});
}
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testMapNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_map")
+ .withId(3)
+ .ofType(
+ Types.MapType.ofOptional(
+ 4,
+ 5,
+ Types.StringType.get(),
+ Types.StructType.of(
+ Types.NestedField.required(6, "value_str",
Types.StringType.get()))))
+ .withDoc("Used to test nested map value field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_map")
+ .withId(3)
+ .ofType(
+ Types.MapType.ofOptional(
+ 4,
+ 5,
+ Types.StringType.get(),
+ Types.StructType.of(
+ Types.NestedField.required(6, "value_str",
Types.StringType.get()),
+ Types.NestedField.optional("value_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ @SuppressWarnings("unchecked")
+ Map<Object, Record> writtenMap = (Map<Object, Record>)
expected.getField("nested_map");
+ if (writtenMap != null) {
+ Types.StructType valueType =
+ expectedSchema
+ .findField("nested_map")
+ .type()
+ .asMapType()
+ .valueType()
+ .asStructType();
+ Map<Object, Record> rebuilt = Maps.newLinkedHashMap();
+ writtenMap.forEach(
+ (key, value) ->
+ rebuilt.put(
+ key,
+ value == null
+ ? null
+ : GenericRecord.create(valueType)
+ .copy("value_str",
value.getField("value_str"), "value_int", 34)));
+ expected.setField("nested_map", rebuilt);
+ }
+
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testListNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_list")
+ .withId(3)
+ .ofType(
+ Types.ListType.ofOptional(
+ 4,
+ Types.StructType.of(
+ Types.NestedField.required(5, "element_str",
Types.StringType.get()))))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_list")
+ .withId(3)
+ .ofType(
+ Types.ListType.ofOptional(
+ 4,
+ Types.StructType.of(
+ Types.NestedField.required(5, "element_str",
Types.StringType.get()),
+ Types.NestedField.optional("element_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ @SuppressWarnings("unchecked")
+ List<Record> writtenList = (List<Record>)
expected.getField("nested_list");
+ if (writtenList != null) {
+ Types.StructType elementType =
+ expectedSchema
+ .findField("nested_list")
+ .type()
+ .asListType()
+ .elementType()
+ .asStructType();
+ List<Record> rebuilt =
+ writtenList.stream()
+ .map(
+ element ->
+ element == null
+ ? null
+ : GenericRecord.create(elementType)
+ .copy(
+ "element_str",
+ element.getField("element_str"),
+ "element_int",
+ 34))
+ .collect(Collectors.toList());
+ expected.setField("nested_list", rebuilt);
+ }
+
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testMissingRequiredWithoutDefault(FileFormat fileFormat) throws
IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Schema writeSchema = new Schema(Types.NestedField.required(1, "id",
Types.LongType.get()));
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ Types.NestedField.required(1, "id", Types.LongType.get()),
+ Types.NestedField.required("missing_str")
+ .withId(6)
+ .ofType(Types.StringType.get())
+ .withDoc("Missing required field with no default")
+ .build());
+
+ assertThatThrownBy(
+ () -> readAndAssertGenericRecords(fileFormat, expectedSchema,
genericRecords))
Review Comment:
This test only checks the generic reader component. Do we need to add tests
for the engine-related components as well?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]