openinx commented on a change in pull request #2465:
URL: https://github.com/apache/iceberg/pull/2465#discussion_r618164919



##########
File path: core/src/main/java/org/apache/iceberg/SchemaUpdate.java
##########
@@ -408,11 +427,55 @@ private TableMetadata applyChangesToMapping(TableMetadata 
metadata) {
   private static Schema applyChanges(Schema schema, List<Integer> deletes,
                                      Map<Integer, Types.NestedField> updates,
                                      Multimap<Integer, Types.NestedField> adds,
-                                     Multimap<Integer, Move> moves) {
+                                     Multimap<Integer, Move> moves,
+                                     Set<String> identifierNames) {
+    // validate existing identifier fields are not deleted
+    for (String name : identifierNames) {
+      Types.NestedField field = schema.findField(name);
+      if (field != null) {
+        Preconditions.checkArgument(!deletes.contains(field.fieldId()),
+            "Cannot delete identifier field %s. To force deletion, " +
+                "also call setIdentifierFields to update identifier fields.", 
field);
+      }
+    }
+
+    // apply schema changes
     Types.StructType struct = TypeUtil
         .visit(schema, new ApplyChanges(deletes, updates, adds, moves))
         .asNestedType().asStructType();
-    return new Schema(struct.fields());
+
+    // validate identifier requirements based on latest schema
+    Schema noIdentifierSchema = new Schema(struct.fields());
+    Set<Integer> validatedIdentifiers = identifierNames.stream()
+        .map(n -> validateIdentifierField(n, noIdentifierSchema))
+        .collect(Collectors.toSet());
+
+    return new Schema(struct.fields(), validatedIdentifiers);
+  }
+
+  private static int validateIdentifierField(String name, Schema schema) {
+    Types.NestedField field = schema.findField(name);
+    Preconditions.checkArgument(field != null,
+        "Cannot add field %s as an identifier field, not found in current 
schema or added columns");
+    Preconditions.checkArgument(field.type().isPrimitiveType(),
+        "Cannot add field %s as an identifier field: not a primitive type 
field", name);
+    Map<Integer, Integer> newIdToParent = 
TypeUtil.indexParents(schema.asStruct());
+    validateIdentifierFieldParent(field.name(), field.fieldId(), 
newIdToParent, schema);

Review comment:
       I would prefer to use a while loop (rather than a recursive method) to 
describe the parent field validation logics because that makes more clear:
   
   ```java
     private static int validateIdentifierField(String name, Schema schema) {
       Types.NestedField field = schema.findField(name);
       Preconditions.checkArgument(field != null,
           "Cannot add field %s as an identifier field, not found in current 
schema or added columns");
       Preconditions.checkArgument(field.type().isPrimitiveType(),
           "Cannot add field %s as an identifier field: not a primitive type 
field", name);
   
       // Check whether the nested column is in a chain of struct field list.
       Map<Integer, Integer> newIdToParent = 
TypeUtil.indexParents(schema.asStruct());
       Integer parentId = newIdToParent.get(field.fieldId());
       while (parentId != null) {
         Types.NestedField parent = schema.findField(parentId);
         ValidationException.check(parent.type().isStructType(),
             "Cannot add field %s as an identifier field: must not be nested in 
%s", name, parent);
         parentId = newIdToParent.get(parent.fieldId());
       }
       return field.fieldId();
     }
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to