Re: [PR] [python] Support schema evolution of nested struct sub-fields [paimon]

via GitHub Sun, 14 Jun 2026 05:19:30 -0700


JingsongLi commented on code in PR #8187:
URL: https://github.com/apache/paimon/pull/8187#discussion_r3409517470



##########
paimon-python/pypaimon/schema/schema_manager.py:
##########
@@ -55,49 +174,87 @@ def _get_rename_mappings(changes: List[SchemaChange]) -> 
dict:
 def _handle_update_column_comment(
     change: UpdateColumnComment, new_fields: List[DataField]
 ):
-    field_name = change.field_names[-1]
-    field_index = _find_field_index(new_fields, field_name)
-    if field_index is None:
-        raise ColumnNotExistException(field_name)
-    field = new_fields[field_index]
-    new_fields[field_index] = DataField(
-        field.id, field.name, field.type, change.new_comment, 
field.default_value
-    )
+    def update_func(field: DataField, depth: int) -> DataField:
+        return DataField(
+            field.id, field.name, field.type, change.new_comment, 
field.default_value
+        )
+    _update_nested_column(new_fields, change.field_names, update_func)
+
+
+def _assert_nullability_change(old_nullability: bool, new_nullability: bool,
+                               field_name: str, disable_null_to_not_null: 
bool):
+    if disable_null_to_not_null and old_nullability and not new_nullability:
+        raise ValueError(
+            "Cannot update column type from nullable to non nullable for {}. "
+            "You can set table configuration option "
+            "'alter-column-null-to-not-null.disabled' = 'false' "
+            "to allow converting null columns to not null".format(field_name)
+        )
 
 
 def _handle_update_column_nullability(
-    change: UpdateColumnNullability, new_fields: List[DataField]
+    change: UpdateColumnNullability, new_fields: List[DataField],
+    disable_null_to_not_null: bool
 ):
-    field_name = change.field_names[-1]
-    field_index = _find_field_index(new_fields, field_name)
-    if field_index is None:
-        raise ColumnNotExistException(field_name)
-    field = new_fields[field_index]
     from pypaimon.schema.data_types import DataTypeParser
-    field_type_dict = field.type.to_dict()
-    new_type = DataTypeParser.parse_data_type(field_type_dict)
-    new_type.nullable = change.new_nullability
-    new_fields[field_index] = DataField(
-        field.id, field.name, new_type, field.description, field.default_value
-    )
+    field_names = change.field_names
+    max_depth = len(field_names)
+
+    def update_func(field: DataField, depth: int) -> DataField:
+        source_root = _get_root_type(field.type, depth, max_depth)
+        _assert_nullability_change(
+            source_root.nullable, change.new_nullability,
+            '.'.join(field_names), disable_null_to_not_null)
+        new_root = DataTypeParser.parse_data_type(source_root.to_dict())
+        new_root.nullable = change.new_nullability
+        new_type = _get_array_map_type_with_target_type_root(
+            field.type, new_root, depth, max_depth)
+        return DataField(
+            field.id, field.name, new_type, field.description, 
field.default_value
+        )
+    _update_nested_column(new_fields, field_names, update_func)
 
 
 def _handle_update_column_type(
-    change: UpdateColumnType, new_fields: List[DataField]
+    change: UpdateColumnType, new_fields: List[DataField],
+    disable_null_to_not_null: bool
 ):
-    field_name = change.field_names[-1]
-    field_index = _find_field_index(new_fields, field_name)
-    if field_index is None:
-        raise ColumnNotExistException(field_name)
-    field = new_fields[field_index]
     from pypaimon.schema.data_types import DataTypeParser
-    new_type_dict = change.new_data_type.to_dict()
-    new_type = DataTypeParser.parse_data_type(new_type_dict)
-    if change.keep_nullability:
-        new_type.nullable = field.type.nullable
-    new_fields[field_index] = DataField(
-        field.id, field.name, new_type, field.description, field.default_value
-    )
+    field_names = change.field_names
+    max_depth = len(field_names)
+
+    def update_func(field: DataField, depth: int) -> DataField:
+        source_root = _get_root_type(field.type, depth, max_depth)
+        target_root = 
DataTypeParser.parse_data_type(change.new_data_type.to_dict())

Review Comment:
   This reparses `change.new_data_type.to_dict()` directly, but parameterized 
atomic types keep the `NOT NULL` suffix inside `AtomicType.type`. For example, 
`SchemaChange.update_column_type("v", AtomicType("DECIMAL(12, 2)", 
nullable=False))` on an existing non-null `INT` column is a valid widening, but 
`to_dict()` produces `DECIMAL(12, 2) NOT NULL`, the parser stores that whole 
string as the atomic type, and the new `can_execute_cast` check now fails 
during alter with `Unsupported data type: DECIMAL(12, 2) NOT NULL NOT NULL`. 
Could we normalize the parsed target so nullability is stored only in 
`nullable` (or fix the atomic parser to strip the suffix for parameterized 
types) before running the executable-cast check?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [python] Support schema evolution of nested struct sub-fields [paimon]

Reply via email to