joyhaldar commented on code in PR #16638:
URL: https://github.com/apache/iceberg/pull/16638#discussion_r3361415997
##########
data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java:
##########
@@ -697,48 +705,420 @@ void testReaderBuilderReuseContainers(FileFormat
fileFormat) throws IOException
@ParameterizedTest
@FieldSource("FILE_FORMATS")
- void testReaderSchemaEvolutionNewColumnWithDefault(FileFormat fileFormat)
throws IOException {
+ void testDefaultValues(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema = new Schema(idField, dataField);
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.required("missing_str")
+ .withId(6)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("orange"))
+ .build(),
+ Types.NestedField.optional("missing_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = GenericRecord.create(expectedSchema);
+ expected.setField("id", record.getField("id"));
+ expected.setField("data", record.getField("data"));
+ expected.setField("missing_str", "orange");
+ expected.setField("missing_int", 34);
+
+ return expected;
+ });
+ }
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testNullDefaultValue(FileFormat fileFormat) throws IOException {
assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
- DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
- Schema writeSchema = dataGenerator.schema();
- List<Record> genericRecords = dataGenerator.generateRecords();
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema = new Schema(idField, dataField);
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
writeGenericRecords(fileFormat, writeSchema, genericRecords);
- String defaultStringValue = "default_value";
- int defaultIntValue = 42;
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("missing_date")
+ .withId(3)
+ .ofType(Types.DateType.get())
+ .build());
- int maxFieldId =
-
writeSchema.columns().stream().mapToInt(Types.NestedField::fieldId).max().orElse(0);
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = GenericRecord.create(expectedSchema);
+ expected.setField("id", record.getField("id"));
+ expected.setField("data", record.getField("data"));
+ expected.setField("missing_date", null);
- List<Types.NestedField> evolvedColumns =
Lists.newArrayList(writeSchema.columns());
- evolvedColumns.add(
- Types.NestedField.required("col_f")
- .withId(maxFieldId + 1)
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
.ofType(Types.StringType.get())
- .withInitialDefault(Literal.of(defaultStringValue))
- .build());
- evolvedColumns.add(
- Types.NestedField.optional("col_g")
- .withId(maxFieldId + 2)
- .ofType(Types.IntegerType.get())
- .withInitialDefault(Literal.of(defaultIntValue))
- .build());
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested")
+ .withId(3)
+ .ofType(
+ Types.StructType.of(
+ Types.NestedField.required(4, "inner",
Types.StringType.get())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested")
+ .withId(3)
+ .ofType(
+ Types.StructType.of(
+ Types.NestedField.required(4, "inner",
Types.StringType.get()),
+ Types.NestedField.optional("missing_inner_float")
+ .withId(5)
+ .ofType(Types.FloatType.get())
+ .withInitialDefault(Literal.of(-0.0F))
+ .build()))
+ .withDoc("Used to test nested field defaults")
+ .build());
- Schema evolvedSchema = new Schema(evolvedColumns);
- readAndAssertGenericRecords(
+ readAndAssertEngineRecords(
fileFormat,
- evolvedSchema,
+ expectedSchema,
genericRecords,
record -> {
- Record expected = copy(record, writeSchema, evolvedSchema);
- expected.setField("col_f", defaultStringValue);
- expected.setField("col_g", defaultIntValue);
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ Record writtenNested = (Record) record.getField("nested");
+ if (writtenNested != null) {
+ Record expectedNested =
+
GenericRecord.create(expectedSchema.findField("nested").type().asStructType());
+ expectedNested.setField("inner", writtenNested.getField("inner"));
+ expectedNested.setField("missing_inner_float", -0.0F);
+ expected.setField("nested", expectedNested);
+ }
+
return expected;
});
}
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testMapNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_map")
+ .withId(3)
+ .ofType(
+ Types.MapType.ofOptional(
+ 4,
+ 5,
+ Types.StringType.get(),
+ Types.StructType.of(
+ Types.NestedField.required(6, "value_str",
Types.StringType.get()))))
+ .withDoc("Used to test nested map value field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_map")
+ .withId(3)
+ .ofType(
+ Types.MapType.ofOptional(
+ 4,
+ 5,
+ Types.StringType.get(),
+ Types.StructType.of(
+ Types.NestedField.required(6, "value_str",
Types.StringType.get()),
+ Types.NestedField.optional("value_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ @SuppressWarnings("unchecked")
+ Map<Object, Record> writtenMap = (Map<Object, Record>)
expected.getField("nested_map");
+ if (writtenMap != null) {
+ Types.StructType valueType =
+ expectedSchema
+ .findField("nested_map")
+ .type()
+ .asMapType()
+ .valueType()
+ .asStructType();
+ Map<Object, Record> rebuilt = Maps.newLinkedHashMap();
+ writtenMap.forEach(
+ (key, value) ->
+ rebuilt.put(
+ key,
+ value == null
+ ? null
+ : GenericRecord.create(valueType)
+ .copy("value_str",
value.getField("value_str"), "value_int", 34)));
+ expected.setField("nested_map", rebuilt);
+ }
+
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testListNestedDefaultValue(FileFormat fileFormat) throws IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Types.NestedField idField = Types.NestedField.required(1, "id",
Types.LongType.get());
+ Types.NestedField dataField =
+ Types.NestedField.optional("data")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault(Literal.of("wrong!"))
+ .withDoc("Should not produce default value")
+ .build();
+
+ Schema writeSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_list")
+ .withId(3)
+ .ofType(
+ Types.ListType.ofOptional(
+ 4,
+ Types.StructType.of(
+ Types.NestedField.required(5, "element_str",
Types.StringType.get()))))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ idField,
+ dataField,
+ Types.NestedField.optional("nested_list")
+ .withId(3)
+ .ofType(
+ Types.ListType.ofOptional(
+ 4,
+ Types.StructType.of(
+ Types.NestedField.required(5, "element_str",
Types.StringType.get()),
+ Types.NestedField.optional("element_int")
+ .withId(7)
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Literal.of(34))
+ .build())))
+ .withDoc("Used to test nested field defaults")
+ .build());
+
+ readAndAssertEngineRecords(
+ fileFormat,
+ expectedSchema,
+ genericRecords,
+ record -> {
+ Record expected = copy(record, writeSchema, expectedSchema);
+
+ @SuppressWarnings("unchecked")
+ List<Record> writtenList = (List<Record>)
expected.getField("nested_list");
+ if (writtenList != null) {
+ Types.StructType elementType =
+ expectedSchema
+ .findField("nested_list")
+ .type()
+ .asListType()
+ .elementType()
+ .asStructType();
+ List<Record> rebuilt =
+ writtenList.stream()
+ .map(
+ element ->
+ element == null
+ ? null
+ : GenericRecord.create(elementType)
+ .copy(
+ "element_str",
+ element.getField("element_str"),
+ "element_int",
+ 34))
+ .collect(Collectors.toList());
+ expected.setField("nested_list", rebuilt);
+ }
+
+ return expected;
+ });
+ }
+
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testMissingRequiredWithoutDefault(FileFormat fileFormat) throws
IOException {
+ assumeSupports(fileFormat, FEATURE_READER_DEFAULT);
+
+ Schema writeSchema = new Schema(Types.NestedField.required(1, "id",
Types.LongType.get()));
+
+ List<Record> genericRecords = RandomGenericData.generate(writeSchema, 10,
1L);
+ writeGenericRecords(fileFormat, writeSchema, genericRecords);
+
+ Schema expectedSchema =
+ new Schema(
+ Types.NestedField.required(1, "id", Types.LongType.get()),
+ Types.NestedField.required("missing_str")
+ .withId(6)
+ .ofType(Types.StringType.get())
+ .withDoc("Missing required field with no default")
+ .build());
+
+ assertThatThrownBy(
+ () -> readAndAssertGenericRecords(fileFormat, expectedSchema,
genericRecords))
Review Comment:
You are right, switched it to `readAndAssertEngineRecords`, throws the same
`IllegalArgumentException`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]