robertwb commented on a change in pull request #12881: URL: https://github.com/apache/beam/pull/12881#discussion_r492466948
########## File path: sdks/python/apache_beam/runners/worker/bundle_processor.py ########## @@ -119,10 +125,10 @@ class RunnerIOOperation(operations.Operation): def __init__(self, name_context, # type: Union[str, common.NameContext] - step_name, + step_name, # type: Any Review comment: Is this not str or Optional[str]? ########## File path: sdks/python/apache_beam/runners/worker/data_plane.py ########## @@ -331,6 +369,7 @@ def add_to_inverse_output(timer): is_last=False)) def close_stream(timer): + # type: (bytes) -> None Review comment: I wonder if we should call this encoded_timer[s]? ########## File path: sdks/python/apache_beam/coders/coder_impl.py ########## @@ -725,7 +726,7 @@ def __init__(self, key_coder_impl, window_coder_impl): self._tag_coder_impl = StrUtf8Coder().get_impl() def encode_to_stream(self, value, out, nested): - # type: (dict, create_OutputStream, bool) -> None + # type: (userstate.Timer, create_OutputStream, bool) -> None Review comment: I think it used to be correct back when timers were being implemented. This code changed a couple of months ago too. ########## File path: sdks/python/apache_beam/runners/worker/bundle_processor.py ########## @@ -1070,23 +1078,24 @@ def delayed_bundle_application(self, return beam_fn_api_pb2.DelayedBundleApplication( requested_time_delay=proto_deferred_watermark, application=self.construct_bundle_application( - op, current_watermark, element_and_restriction)) + op.input_info, current_watermark, element_and_restriction)) def bundle_application(self, op, # type: operations.DoOperation primary # type: SplitResultPrimary ): # type: (...) -> beam_fn_api_pb2.BundleApplication - return self.construct_bundle_application(op, None, primary.primary_value) + assert op.input_info is not None + return self.construct_bundle_application( + op.input_info, None, primary.primary_value) def construct_bundle_application(self, - op, # type: operations.DoOperation + op_input_info, # type: operations.OpInputInfo Review comment: Sounds good to me. ########## File path: sdks/python/apache_beam/runners/worker/data_plane.py ########## @@ -81,7 +85,11 @@ class ClosableOutputStream(OutputStream): """A Outputstream for use with CoderImpls that has a close() method.""" - def __init__(self, close_callback=None): + def __init__( + self, + close_callback=None # type: Optional[Optional[Callable[[bytes], None]]] Review comment: Why the double Optional (here and elsewhere below)? ########## File path: sdks/python/apache_beam/runners/worker/data_plane.py ########## @@ -218,7 +249,7 @@ class DataChannel(with_metaclass(abc.ABCMeta, object)): # type: ignore[misc] @abc.abstractmethod def input_elements(self, instruction_id, # type: str - expected_inputs, # type: Collection[str] + expected_inputs, # type: Sized Review comment: Don't we call `__contains__` as well? I'd rather keep it more fully typed. ########## File path: sdks/python/apache_beam/runners/worker/bundle_processor.py ########## @@ -947,11 +955,11 @@ def process_bundle(self, instruction_id): # (transform_id, timer_family_id). data_channels = collections.defaultdict( list - ) # type: DefaultDict[data_plane.GrpcClientDataChannel, List[str]] + ) # type: DefaultDict[data_plane.GrpcClientDataChannel, List[Union[str, Tuple[str, str]]]] Review comment: Yep, this changed a couple of months ago. It'll be good to finally have these type annotations checked. ########## File path: sdks/python/apache_beam/runners/worker/data_plane.py ########## @@ -243,7 +274,7 @@ def output_stream( instruction_id, # type: str transform_id # type: str ): - # type: (...) -> ClosableOutputStream + # type: (...) -> SizeBasedBufferingClosableOutputStream Review comment: We're also thinking about adding a time-based one. Let's add a no-op maybe_flush method to the baseclass and keep ClosableOutputStream everywhere. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org