damccorm commented on code in PR #35672: URL: https://github.com/apache/beam/pull/35672#discussion_r2236918841
########## sdks/python/apache_beam/yaml/yaml_provider.py: ########## @@ -932,6 +932,129 @@ def __init__(self): # pylint: disable=useless-parent-delegation super().__init__() + def _unify_field_types(self, existing_type, field_type): + """Unify two field types, handling Optional and List types.""" + # Extract inner types from Optional if needed + existing_inner = ( + existing_type.__args__[0] if hasattr(existing_type, '__args__') and + len(existing_type.__args__) == 1 else existing_type) + field_inner = ( + field_type.__args__[0] if hasattr(field_type, '__args__') and + len(field_type.__args__) == 1 else field_type) + + # Handle type unification more carefully + if existing_inner == Any or field_inner == Any: + return Optional[Any] + elif existing_inner == field_inner: + return Optional[existing_inner] Review Comment: I see - could you add a comment here explaining that this function expects all iterables to already be coerced to lists? I agree we do that correctly below, but it is probably a little brittle to rely on this without being explicit about the API ########## sdks/python/apache_beam/yaml/yaml_provider.py: ########## @@ -932,6 +932,129 @@ def __init__(self): # pylint: disable=useless-parent-delegation super().__init__() + def _unify_field_types(self, existing_type, field_type): + """Unify two field types, handling Optional and List types.""" + # Extract inner types from Optional if needed + existing_inner = ( + existing_type.__args__[0] if hasattr(existing_type, '__args__') and + len(existing_type.__args__) == 1 else existing_type) + field_inner = ( + field_type.__args__[0] if hasattr(field_type, '__args__') and + len(field_type.__args__) == 1 else field_type) + + # Handle type unification more carefully + if existing_inner == Any or field_inner == Any: + return Optional[Any] + elif existing_inner == field_inner: + return Optional[existing_inner] + else: + # Check for list types and prioritize them over other types + from apache_beam.typehints import typehints as th + existing_is_list = ( + hasattr(existing_inner, '__origin__') and + existing_inner.__origin__ in (list, th.List)) + field_is_list = ( + hasattr(field_inner, '__origin__') and + field_inner.__origin__ in (list, th.List)) + + if existing_is_list and field_is_list: + # Both are list types, unify their element types + existing_elem = existing_inner.__args__[ + 0] if existing_inner.__args__ else Any + field_elem = field_inner.__args__[0] if field_inner.__args__ else Any + if existing_elem == field_elem: + return Optional[th.List[existing_elem]] + else: + return Optional[th.List[Any]] + elif existing_is_list: + # Existing is list, keep it as list type + return Optional[existing_inner] + elif field_is_list: + # New field is list, use list type + return Optional[field_inner] Review Comment: Right, but if I'm unifying `List[int]` and `int`, right now it unifies to `Optional[List[int]]`, right? But that isn't right if I'm flattening `{foo: 1}` and `{foo: [1,2,3]}` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org