ahmedabu98 commented on code in PR #34102:
URL: https://github.com/apache/beam/pull/34102#discussion_r1979496089
##########
sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProtoTest.java:
##########
@@ -1671,6 +1673,85 @@ public void testIgnoreUnknownRepeatedNestedField()
throws Exception {
assertEquals("foobar_doubly_nested",
unknownDoublyNestedStruct.get("unknown_doubly_nested"));
}
+ @Test
+ public void testIgnoreUnknownRepeatedNestedFieldWithNoUnknowns() throws
Exception {
+
+ List<TableFieldSchema> fields = new ArrayList<>();
+ fields.add(new TableFieldSchema().setName("foo").setType("STRING"));
+ fields.add(
+ new TableFieldSchema()
+ .setName("repeated1")
+ .setMode("REPEATED")
+ .setType("RECORD")
+ .setFields(
+ ImmutableList.of(
+ new
TableFieldSchema().setName("key1").setType("STRING").setMode("REQUIRED"),
+ new
TableFieldSchema().setName("key2").setType("STRING"))));
+ TableSchema schema = new TableSchema().setFields(fields);
+ TableRow tableRow =
+ new TableRow()
+ .set("foo", "bar")
+ .set(
+ "repeated1",
+ ImmutableList.of(
+ new TableCell().set("key1", "valueA").set("key2",
"valueC"),
+ new TableCell().set("key1", "valueB").set("key2",
"valueD")));
+
+ Descriptor descriptor =
+ TableRowToStorageApiProto.getDescriptorFromTableSchema(schema, true,
false);
+ TableRowToStorageApiProto.SchemaInformation schemaInformation =
+ TableRowToStorageApiProto.SchemaInformation.fromTableSchema(schema);
+ TableRow unknown = new TableRow();
+ DynamicMessage msg =
+ TableRowToStorageApiProto.messageFromTableRow(
+ schemaInformation, descriptor, tableRow, true, false, unknown,
null, -1);
+ assertEquals(2, msg.getAllFields().size());
+ assertTrue(unknown.isEmpty());
+ }
+
+ @Test
+ public void testIgnoreUnknownRepeatedNestedFieldWithUknownInRepeatedField()
throws Exception {
+
+ List<TableFieldSchema> fields = new ArrayList<>();
+ fields.add(new TableFieldSchema().setName("foo").setType("STRING"));
+ fields.add(
+ new TableFieldSchema()
+ .setName("repeated1")
+ .setMode("REPEATED")
+ .setType("RECORD")
+ .setFields(
+ ImmutableList.of(
+ new
TableFieldSchema().setName("key1").setType("STRING").setMode("REQUIRED"),
+ new
TableFieldSchema().setName("key2").setType("STRING"))));
+ TableSchema schema = new TableSchema().setFields(fields);
+ TableRow tableRow =
+ new TableRow()
+ .set("foo", "bar")
+ .set(
+ "repeated1",
+ ImmutableList.of(
+ new TableCell().set("key1", "valueA").set("key2",
"valueC"),
+ new TableCell()
+ .set("key1", "valueB")
+ .set("key2", "valueD")
+ .set("unknown", "valueE")));
+
+ Descriptor descriptor =
+ TableRowToStorageApiProto.getDescriptorFromTableSchema(schema, true,
false);
+ TableRowToStorageApiProto.SchemaInformation schemaInformation =
+ TableRowToStorageApiProto.SchemaInformation.fromTableSchema(schema);
+ TableRow unknown = new TableRow();
+ DynamicMessage msg =
+ TableRowToStorageApiProto.messageFromTableRow(
+ schemaInformation, descriptor, tableRow, true, false, unknown,
null, -1);
+ assertEquals(2, msg.getAllFields().size());
+ assertFalse(unknown.isEmpty());
+ assertEquals(2, ((List<?>) unknown.get("repeated1")).size());
+ assertNotNull(((List<?>) unknown.get("repeated1")).get(0));
+ assertNotNull(((List<?>) unknown.get("repeated1")).get(1));
+ assertEquals("valueE", ((TableRow) ((List<?>)
unknown.get("repeated1")).get(1)).get("unknown"));
Review Comment:
so if i'm following correctly, index 1 of `repeated` list is `{unknown:
valueE}`, and index 0 is an empty map? can we add a check that it's empty?
##########
sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProtoTest.java:
##########
@@ -1671,6 +1673,85 @@ public void testIgnoreUnknownRepeatedNestedField()
throws Exception {
assertEquals("foobar_doubly_nested",
unknownDoublyNestedStruct.get("unknown_doubly_nested"));
}
+ @Test
+ public void testIgnoreUnknownRepeatedNestedFieldWithNoUnknowns() throws
Exception {
+
+ List<TableFieldSchema> fields = new ArrayList<>();
+ fields.add(new TableFieldSchema().setName("foo").setType("STRING"));
+ fields.add(
+ new TableFieldSchema()
+ .setName("repeated1")
+ .setMode("REPEATED")
+ .setType("RECORD")
+ .setFields(
+ ImmutableList.of(
+ new
TableFieldSchema().setName("key1").setType("STRING").setMode("REQUIRED"),
+ new
TableFieldSchema().setName("key2").setType("STRING"))));
+ TableSchema schema = new TableSchema().setFields(fields);
+ TableRow tableRow =
+ new TableRow()
+ .set("foo", "bar")
+ .set(
+ "repeated1",
+ ImmutableList.of(
+ new TableCell().set("key1", "valueA").set("key2",
"valueC"),
+ new TableCell().set("key1", "valueB").set("key2",
"valueD")));
+
+ Descriptor descriptor =
+ TableRowToStorageApiProto.getDescriptorFromTableSchema(schema, true,
false);
+ TableRowToStorageApiProto.SchemaInformation schemaInformation =
+ TableRowToStorageApiProto.SchemaInformation.fromTableSchema(schema);
+ TableRow unknown = new TableRow();
+ DynamicMessage msg =
+ TableRowToStorageApiProto.messageFromTableRow(
+ schemaInformation, descriptor, tableRow, true, false, unknown,
null, -1);
+ assertEquals(2, msg.getAllFields().size());
+ assertTrue(unknown.isEmpty());
+ }
+
+ @Test
+ public void testIgnoreUnknownRepeatedNestedFieldWithUknownInRepeatedField()
throws Exception {
Review Comment:
nit
```suggestion
public void
testIgnoreUnknownRepeatedNestedFieldWithUnknownInRepeatedField() throws
Exception {
```
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java:
##########
@@ -536,11 +536,20 @@ public static DynamicMessage messageFromMap(
// For STRUCT fields, we add a placeholder to unknownFields using the
getNestedUnknown
// supplier (in case we encounter unknown nested fields). If the
placeholder comes out
// to be empty, we should clean it up
+
if
(fieldSchemaInformation.getType().equals(TableFieldSchema.Type.STRUCT)
- && unknownFields != null
- && unknownFields.get(key) instanceof Map
- && ((Map<?, ?>) unknownFields.get(key)).isEmpty()) {
- unknownFields.remove(key);
+ && unknownFields != null) {
+ if (unknownFields.get(key) instanceof Map
+ && ((Map<?, ?>) unknownFields.get(key)).isEmpty()) {
+ unknownFields.remove(key);
+ } else if (unknownFields.get(key) instanceof List) { // repeated
+
+ if (((List<?>) unknownFields.get(key)).isEmpty()
+ || ((List<?>) unknownFields.get(key))
+ .stream().allMatch(row -> row == null || ((Map<?, ?>)
row).isEmpty())) {
Review Comment:
combine these two if-clauses together?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]