chamikaramj commented on code in PR #31958:
URL: https://github.com/apache/beam/pull/31958#discussion_r1694514891
##########
sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergUtils.java:
##########
@@ -99,36 +105,130 @@ public static Schema icebergSchemaToBeamSchema(final
org.apache.iceberg.Schema s
return builder.build();
}
- public static Schema icebergStructTypeToBeamSchema(final Types.StructType
struct) {
+ private static Schema icebergStructTypeToBeamSchema(final Types.StructType
struct) {
Schema.Builder builder = Schema.builder();
for (Types.NestedField f : struct.fields()) {
builder.addField(icebergFieldToBeamField(f));
}
return builder.build();
}
- public static Types.NestedField beamFieldToIcebergField(int fieldId, final
Schema.Field field) {
- @Nullable Type icebergType =
BEAM_TYPES_TO_ICEBERG_TYPES.get(field.getType());
+ /**
+ * Represents an Object (in practice, either {@link Type} or {@link
Types.NestedField}) along with
+ * the most recent (max) ID that has been used to build this object.
+ *
+ * <p>Iceberg Schema fields are required to have unique IDs. This includes
unique IDs for a {@link
+ * Types.ListType}'s collection type, a {@link Types.MapType}'s key type and
value type, and
+ * nested {@link Types.StructType}s. When constructing any of these types,
we use multiple unique
+ * ID's for the type's components. The {@code maxId} in this object
represents the most recent ID
+ * used after building this type. This helps signal that the next field we
construct should have
+ * an ID greater than this one.
+ */
+ private static class ObjectAndMaxId<T> {
+ int maxId;
+ T object;
- if (icebergType != null) {
- return Types.NestedField.of(
- fieldId, field.getType().getNullable(), field.getName(),
icebergType);
- } else {
- return Types.NestedField.of(
- fieldId, field.getType().getNullable(), field.getName(),
Types.StringType.get());
+ ObjectAndMaxId(int id, T object) {
+ this.maxId = id;
+ this.object = object;
}
}
+ private static ObjectAndMaxId<Type> beamFieldTypeToIcebergFieldType(
+ int fieldId, Schema.FieldType beamType) {
+ if (BEAM_TYPES_TO_ICEBERG_TYPES.containsKey(beamType.getTypeName())) {
+ return new ObjectAndMaxId<>(fieldId,
BEAM_TYPES_TO_ICEBERG_TYPES.get(beamType.getTypeName()));
+ } else if (beamType.getTypeName().isCollectionType()) { // ARRAY or
ITERABLE
+ // List ID needs to be unique from the NestedField that contains this
ListType
+ int listId = fieldId + 1;
+ Schema.FieldType beamCollectionType =
+
Preconditions.checkArgumentNotNull(beamType.getCollectionElementType());
Review Comment:
Let's make sure that the logic here is covered by unit tests.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]