JingsongLi commented on code in PR #8187:
URL: https://github.com/apache/paimon/pull/8187#discussion_r3393444513
##########
paimon-python/pypaimon/schema/schema_manager.py:
##########
@@ -44,6 +46,108 @@ def _find_field_index(fields: List[DataField], field_name:
str) -> Optional[int]
return None
+def _extract_row_data_fields(data_type, out_fields: List[DataField]) -> int:
+ """Collect the immediate sub-fields reachable from *data_type* into
+ *out_fields* and return the path depth consumed. A ROW contributes its
+ fields (depth 1); an ARRAY/MAP is transparent and descends into its
+ element/value (consuming the ``element``/``value`` path token); anything
+ else contributes nothing (depth 1)."""
+ if isinstance(data_type, RowType):
+ out_fields.extend(data_type.fields)
+ return 1
+ if isinstance(data_type, ArrayType):
+ return _extract_row_data_fields(data_type.element, out_fields) + 1
+ if isinstance(data_type, MapType):
+ return _extract_row_data_fields(data_type.value, out_fields) + 1
+ return 1
+
+
+def _wrap_new_row_type(data_type, nested_fields: List[DataField]):
+ """Rebuild *data_type* substituting *nested_fields* at its innermost ROW,
+ preserving any ARRAY/MAP wrappers."""
+ if isinstance(data_type, RowType):
+ return RowType(data_type.nullable, nested_fields)
+ if isinstance(data_type, ArrayType):
+ return ArrayType(data_type.nullable,
_wrap_new_row_type(data_type.element, nested_fields))
+ if isinstance(data_type, MapType):
+ return MapType(
+ data_type.nullable, data_type.key,
+ _wrap_new_row_type(data_type.value, nested_fields))
+ return data_type
+
+
+def _get_root_type(data_type, curr_depth: int, max_depth: int):
+ """Return the type sitting at ``max_depth`` when walking ARRAY/MAP wrappers
+ from *data_type* (e.g. the INT in ARRAY<MAP<STRING, ARRAY<INT>>>)."""
+ if curr_depth == max_depth - 1:
+ return data_type
+ if isinstance(data_type, ArrayType):
+ return _get_root_type(data_type.element, curr_depth + 1, max_depth)
+ if isinstance(data_type, MapType):
+ return _get_root_type(data_type.value, curr_depth + 1, max_depth)
+ return data_type
+
+
+def _get_array_map_type_with_target_type_root(source, target, curr_depth: int,
max_depth: int):
+ """Rebuild *source* with *target* substituted at ``max_depth``, keeping the
+ ARRAY/MAP wrappers around it intact."""
+ if curr_depth == max_depth - 1:
+ return target
+ if isinstance(source, ArrayType):
+ return ArrayType(
+ source.nullable,
+ _get_array_map_type_with_target_type_root(
+ source.element, target, curr_depth + 1, max_depth))
+ if isinstance(source, MapType):
+ return MapType(
+ source.nullable, source.key,
+ _get_array_map_type_with_target_type_root(
+ source.value, target, curr_depth + 1, max_depth))
+ return target
+
+
+def _update_intermediate_column(new_fields, previous_fields, depth, prev_depth,
+ field_names, update_last_fn):
+ """Walk *field_names* into nested ROW (transparently through ARRAY/MAP),
+ then run *update_last_fn(depth, fields, name)* on the field list that
+ directly contains the final path element, rebuilding parent types
upward."""
+ if depth == len(field_names) - 1:
+ update_last_fn(depth, new_fields, field_names[depth])
+ return
+ if depth >= len(field_names):
+ # Path descended through ARRAY/MAP past the last ROW; operate on the
+ # field that owns the wrapper at the previous depth.
+ update_last_fn(prev_depth, previous_fields, field_names[prev_depth])
+ return
+ for i, field in enumerate(new_fields):
+ if field.name != field_names[depth]:
+ continue
+ nested_fields: List[DataField] = []
+ new_depth = depth + _extract_row_data_fields(field.type, nested_fields)
Review Comment:
This path walker does not validate the wrapper token it consumes for
ARRAY/MAP. For example, on `arr ARRAY<ROW<a BIGINT, b STRING>>`,
`SchemaChange.add_column(["arr", "wrong", "c"], INT)` is accepted and adds `c`
to the array element exactly like `["arr", "element", "c"]`; the same applies
to map values. That makes invalid column paths silently mutate the schema.
Please require the consumed token to be `element` for arrays and `value` for
maps (or otherwise reject unknown wrapper steps) before descending.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]