TheNeuralBit commented on code in PR #22066: URL: https://github.com/apache/beam/pull/22066#discussion_r910527379
########## sdks/python/apache_beam/typehints/row_type.py: ########## @@ -17,19 +17,89 @@ # pytype: skip-file +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple + from apache_beam.typehints import typehints +from apache_beam.typehints.native_type_compatibility import match_is_named_tuple + +# Name of the attribute added to user types (existing and generated) to store +# the corresponding schema ID +_BEAM_SCHEMA_ID = "_beam_schema_id" class RowTypeConstraint(typehints.TypeConstraint): - def __init__(self, fields): - self._fields = tuple(fields) + def __init__(self, fields: List[Tuple[str, type]], user_type=None): + """For internal use only, no backwards comatibility guaratees. See + https://beam.apache.org/documentation/programming-guide/#schemas-for-pl-types + for guidance on creating PCollections with inferred schemas. + + Note RowTypeConstraint does not currently store functions for converting + to/from the user type. Currently we only support a few types that satisfy + some assumptions: + + - **to:** We assume that the user type can be constructed with field values + in order. + - **from:** We assume that field values can be accessed from instances of + the type by attribute (i.e. with ``getattr(obj, field_name)``). + + The RowTypeConstraint constructor should not be called directly (even + internally to Beam). Prefer static methods ``from_user_type`` or + ``from_fields``. + + Parameters: + fields: a list of (name, type) tuples, representing the schema inferred + from user_type. + user_type: constructor for a user type (e.g. NamedTuple class) that is + used to represent this schema in user code. + """ + # Recursively wrap row types in a RowTypeConstraint + self._fields = tuple((name, RowTypeConstraint.from_user_type(typ) or typ) + for name, + typ in fields) + + self._user_type = user_type + if self._user_type is not None and hasattr(self._user_type, + _BEAM_SCHEMA_ID): + self._schema_id = getattr(self._user_type, _BEAM_SCHEMA_ID) + else: + self._schema_id = None + + @staticmethod + def from_user_type(user_type: type) -> Optional['RowTypeConstraint']: + if match_is_named_tuple(user_type): + fields = [(name, user_type.__annotations__[name]) + for name in user_type._fields] + + return RowTypeConstraint(fields=fields, user_type=user_type) + + return None + + @staticmethod + def from_fields(fields: Sequence[Tuple[str, type]]) -> 'RowTypeConstraint': Review Comment: Well it looks like PyLint fails here because flake8 can't handle these postponed annotations ``` 16:00:57 Running flake8... 16:00:57 /home/jenkins/jenkins-slave/workspace/beam_PreCommit_PythonLint_Commit/src/sdks/python/test-suites/tox/py37/build/srcs/sdks/python/target/.tox-py37-lint/py37-lint/lib/python3.7/site-packages/pycodestyle.py:113: FutureWarning: Possible nested set at position 1 16:00:57 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') 16:01:05 apache_beam/typehints/row_type.py:76:51: F821 undefined name 'RowTypeConstraint' 16:01:05 def from_user_type(user_type: type) -> Optional[RowTypeConstraint]: 16:01:05 ^ 16:01:05 apache_beam/typehints/row_type.py:86:58: F821 undefined name 'RowTypeConstraint' 16:01:05 def from_fields(fields: Sequence[Tuple[str, type]]) -> RowTypeConstraint: 16:01:05 ^ 16:01:05 2 F821 undefined name 'RowTypeConstraint' 16:01:05 2 16:01:05 Command exited with non-zero status 1 16:01:05 582.99user 20.95system 1:17.53elapsed 778%CPU (0avgtext+0avgdata 677744maxresident)k 16:01:05 16inputs+400outputs (2major+1959397minor)pagefaults 0swaps 16:01:05 ERROR: InvocationError for command /usr/bin/time scripts/run_pylint.sh (exited with code 1) ``` This is https://github.com/PyCQA/pyflakes/issues/356 I sent https://github.com/apache/beam/pull/22110 to upgrade flake8 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
