rdblue commented on a change in pull request #1744:
URL: https://github.com/apache/iceberg/pull/1744#discussion_r521767305



##########
File path: core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java
##########
@@ -526,4 +527,216 @@ public void testListOfStructsProjection() throws 
IOException {
     Assert.assertNull("Should not project y", projectedP2.get("y"));
     Assert.assertEquals("Should project null z", null, projectedP2.get("z"));
   }
+
+  @Test
+  public void testEmptyStructProjection() throws Exception {
+    Schema writeSchema = new Schema(
+        Types.NestedField.required(0, "id", Types.LongType.get()),
+        Types.NestedField.optional(3, "location", Types.StructType.of(
+            Types.NestedField.required(1, "lat", Types.FloatType.get()),
+            Types.NestedField.required(2, "long", Types.FloatType.get())
+        ))
+    );
+
+    Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
+    record.put("id", 34L);
+    Record location = new Record(
+        
AvroSchemaUtil.fromOption(record.getSchema().getField("location").schema()));
+    location.put("lat", 52.995143f);
+    location.put("long", -1.539054f);
+    record.put("location", location);
+
+    Schema emptyStruct = new Schema(
+        Types.NestedField.required(3, "location", Types.StructType.of())
+    );
+
+    Record projected = writeAndRead("empty_proj", writeSchema, emptyStruct, 
record);
+    Assert.assertNull("Should not project data", projected.get("data"));
+    Record result = (Record) projected.get("location");
+    Assert.assertNotNull("Should contain an empty record", result);
+    Assert.assertNull("Should not project lat", result.get("lat"));
+    Assert.assertNull("Should not project long", result.get("long"));
+  }
+
+  @Test
+  public void testEmptyStructRequiredProjection() throws Exception {
+    Schema writeSchema = new Schema(
+        Types.NestedField.required(0, "id", Types.LongType.get()),
+        Types.NestedField.required(3, "location", Types.StructType.of(
+            Types.NestedField.required(1, "lat", Types.FloatType.get()),
+            Types.NestedField.required(2, "long", Types.FloatType.get())
+        ))
+    );
+
+    Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
+    record.put("id", 34L);
+    Record location = new 
Record(record.getSchema().getField("location").schema());
+    location.put("lat", 52.995143f);
+    location.put("long", -1.539054f);
+    record.put("location", location);
+
+    Schema emptyStruct = new Schema(
+        Types.NestedField.required(3, "location", Types.StructType.of())
+    );
+
+    Record projected = writeAndRead("empty_req_proj", writeSchema, 
emptyStruct, record);
+    Assert.assertNull("Should not project data", projected.get("data"));
+    Record result = (Record) projected.get("location");
+    Assert.assertNotNull("Should contain an empty record", result);
+    Assert.assertNull("Should not project lat", result.get("lat"));
+    Assert.assertNull("Should not project long", result.get("long"));
+  }
+
+  @Test
+  public void testEmptyNestedStructProjection() throws Exception {
+    Schema writeSchema = new Schema(
+        Types.NestedField.required(0, "id", Types.LongType.get()),
+        Types.NestedField.optional(3, "outer", Types.StructType.of(
+            Types.NestedField.required(1, "lat", Types.FloatType.get()),
+            Types.NestedField.optional(2, "inner", Types.StructType.of(
+                Types.NestedField.required(5, "lon", Types.FloatType.get())
+                )
+            )
+        ))
+    );
+
+    Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
+    record.put("id", 34L);
+    Record outer = new Record(
+        
AvroSchemaUtil.fromOption(record.getSchema().getField("outer").schema()));
+    Record inner = new 
Record(AvroSchemaUtil.fromOption(outer.getSchema().getField("inner").schema()));
+    inner.put("lon", 32.14f);
+    outer.put("lat", 52.995143f);
+    outer.put("inner", inner);
+    record.put("outer", outer);
+
+    Schema emptyStruct = new Schema(
+        Types.NestedField.required(3, "outer", Types.StructType.of(
+            Types.NestedField.required(2, "inner", Types.StructType.of())
+        )));
+
+    Record projected = writeAndRead("nested_empty_proj", writeSchema, 
emptyStruct, record);
+    Assert.assertNull("Should not project data", projected.get("id"));
+    Record outerResult = (Record) projected.get("outer");
+    Assert.assertNotNull("Should contain the outer record", outerResult);
+    Assert.assertNull("Should not contain lat", outerResult.get("lat"));
+    Record innerResult = (Record) outerResult.get("inner");
+    Assert.assertNotNull("Should contain the inner record", innerResult);
+    Assert.assertNull("Should not contain lon", innerResult.get("lon"));
+  }
+
+  @Test
+  public void testEmptyNestedStructRequiredProjection() throws Exception {
+    Schema writeSchema = new Schema(
+        Types.NestedField.required(0, "id", Types.LongType.get()),
+        Types.NestedField.required(3, "outer", Types.StructType.of(
+            Types.NestedField.required(1, "lat", Types.FloatType.get()),
+            Types.NestedField.required(2, "inner", Types.StructType.of(
+                Types.NestedField.required(5, "lon", Types.FloatType.get())
+                )
+            )
+        ))
+    );
+
+    Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
+    record.put("id", 34L);
+    Record outer = new Record(record.getSchema().getField("outer").schema());
+    Record inner = new Record(outer.getSchema().getField("inner").schema());
+    inner.put("lon", 32.14f);
+    outer.put("lat", 52.995143f);
+    outer.put("inner", inner);
+    record.put("outer", outer);
+
+    Schema emptyStruct = new Schema(
+        Types.NestedField.required(3, "outer", Types.StructType.of(
+            Types.NestedField.required(2, "inner", Types.StructType.of())
+        )));
+
+    Record projected = writeAndRead("nested_empty_req_proj", writeSchema, 
emptyStruct, record);
+    Assert.assertNull("Should not project data", projected.get("id"));
+    Record outerResult = (Record) projected.get("outer");
+    Assert.assertNotNull("Should contain the outer record", outerResult);
+    Assert.assertNull("Should not contain lat", outerResult.get("lat"));
+    Record innerResult = (Record) outerResult.get("inner");
+    Assert.assertNotNull("Should contain the inner record", innerResult);
+    Assert.assertNull("Should not contain lon", innerResult.get("lon"));
+  }
+
+  @Test
+  public void testMetadataFieldProjection() throws Exception {
+    Schema writeSchema = new Schema(
+        Types.NestedField.required(0, "id", Types.LongType.get()),
+        Types.NestedField.required(3, "outer", Types.StructType.of(
+            Types.NestedField.required(1, "lat", Types.FloatType.get()),
+            Types.NestedField.required(2, "inner", Types.StructType.of(
+                Types.NestedField.required(5, "lon", Types.FloatType.get())
+                )
+            )
+        ))
+    );
+
+    Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
+    record.put("id", 34L);
+    Record outer = new Record(record.getSchema().getField("outer").schema());
+    Record inner = new Record(outer.getSchema().getField("inner").schema());
+    inner.put("lon", 32.14f);
+    outer.put("lat", 52.995143f);
+    outer.put("inner", inner);
+    record.put("outer", outer);
+
+    Schema metadataStruct = new Schema(
+        Types.NestedField.required(3, "outer", Types.StructType.of(
+            Types.NestedField.required(2, "inner", 
Types.StructType.of(MetadataColumns.ROW_POSITION)
+            ))
+        ));
+
+    Record projected = writeAndRead("metadata_field_proj", writeSchema, 
metadataStruct, record);
+    Assert.assertNull("Should not project data", projected.get("id"));
+    Record outerResult = (Record) projected.get("outer");
+    Assert.assertNotNull("Should contain the outer record", outerResult);
+    Assert.assertNull("Should not contain lat", outerResult.get("lat"));
+    Record innerResult = (Record) outerResult.get("inner");
+    Assert.assertNotNull("Should contain the inner record", innerResult);
+    Assert.assertNull("Should not contain lon", innerResult.get("lon"));
+    String metaName = MetadataColumns.ROW_POSITION.name() + "_r" + 
MetadataColumns.ROW_POSITION.fieldId();
+    Assert.assertNotNull("Should contain metadata field", 
innerResult.get(metaName));
+  }
+
+  @Test
+  public void testNonExistentProjection() throws Exception {
+    Schema writeSchema = new Schema(
+        Types.NestedField.required(0, "id", Types.LongType.get()),
+        Types.NestedField.optional(3, "location", Types.StructType.of(
+            Types.NestedField.required(1, "lat", Types.FloatType.get()),
+            Types.NestedField.required(2, "long", Types.FloatType.get())
+        ))
+    );
+
+    Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
+    record.put("id", 34L);
+    Record location = new Record(
+        
AvroSchemaUtil.fromOption(record.getSchema().getField("location").schema()));
+    location.put("lat", 52.995143f);
+    location.put("long", -1.539054f);
+    record.put("location", location);
+
+    Schema emptyStruct = new Schema(
+        Types.NestedField.required(3, "location", Types.StructType.of(
+            Types.NestedField.optional(10000, "foo", Types.StructType.of(
+                Types.NestedField.optional(10001, "bar", 
Types.IntegerType.get())
+            ))
+        ))
+    );
+
+    Record projected = writeAndRead("non_existant_proj", writeSchema, 
emptyStruct, record);
+    Assert.assertNull("Should not project data", projected.get("data"));
+    Record result = (Record) projected.get("location");
+    Assert.assertNotNull("Should contain an fake optional record", result);
+    Assert.assertNull("Should not project lat", result.get("lat"));
+    Assert.assertNull("Should not project long", result.get("long"));

Review comment:
       I think this also needs to assert that `foo` is null because it is not 
in the source data. A record that is missing from the source data should be 
null if optional and result in an exception if required.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to