robertwb commented on a change in pull request #12881:
URL: https://github.com/apache/beam/pull/12881#discussion_r492466948
##########
File path: sdks/python/apache_beam/runners/worker/bundle_processor.py
##########
@@ -119,10 +125,10 @@ class RunnerIOOperation(operations.Operation):
def __init__(self,
name_context, # type: Union[str, common.NameContext]
- step_name,
+ step_name, # type: Any
Review comment:
Is this not str or Optional[str]?
##########
File path: sdks/python/apache_beam/runners/worker/data_plane.py
##########
@@ -331,6 +369,7 @@ def add_to_inverse_output(timer):
is_last=False))
def close_stream(timer):
+ # type: (bytes) -> None
Review comment:
I wonder if we should call this encoded_timer[s]?
##########
File path: sdks/python/apache_beam/coders/coder_impl.py
##########
@@ -725,7 +726,7 @@ def __init__(self, key_coder_impl, window_coder_impl):
self._tag_coder_impl = StrUtf8Coder().get_impl()
def encode_to_stream(self, value, out, nested):
- # type: (dict, create_OutputStream, bool) -> None
+ # type: (userstate.Timer, create_OutputStream, bool) -> None
Review comment:
I think it used to be correct back when timers were being implemented.
This code changed a couple of months ago too.
##########
File path: sdks/python/apache_beam/runners/worker/bundle_processor.py
##########
@@ -1070,23 +1078,24 @@ def delayed_bundle_application(self,
return beam_fn_api_pb2.DelayedBundleApplication(
requested_time_delay=proto_deferred_watermark,
application=self.construct_bundle_application(
- op, current_watermark, element_and_restriction))
+ op.input_info, current_watermark, element_and_restriction))
def bundle_application(self,
op, # type: operations.DoOperation
primary # type: SplitResultPrimary
):
# type: (...) -> beam_fn_api_pb2.BundleApplication
- return self.construct_bundle_application(op, None, primary.primary_value)
+ assert op.input_info is not None
+ return self.construct_bundle_application(
+ op.input_info, None, primary.primary_value)
def construct_bundle_application(self,
- op, # type: operations.DoOperation
+ op_input_info, # type:
operations.OpInputInfo
Review comment:
Sounds good to me.
##########
File path: sdks/python/apache_beam/runners/worker/data_plane.py
##########
@@ -81,7 +85,11 @@
class ClosableOutputStream(OutputStream):
"""A Outputstream for use with CoderImpls that has a close() method."""
- def __init__(self, close_callback=None):
+ def __init__(
+ self,
+ close_callback=None # type: Optional[Optional[Callable[[bytes], None]]]
Review comment:
Why the double Optional (here and elsewhere below)?
##########
File path: sdks/python/apache_beam/runners/worker/data_plane.py
##########
@@ -218,7 +249,7 @@ class DataChannel(with_metaclass(abc.ABCMeta, object)): #
type: ignore[misc]
@abc.abstractmethod
def input_elements(self,
instruction_id, # type: str
- expected_inputs, # type: Collection[str]
+ expected_inputs, # type: Sized
Review comment:
Don't we call `__contains__` as well? I'd rather keep it more fully
typed.
##########
File path: sdks/python/apache_beam/runners/worker/bundle_processor.py
##########
@@ -947,11 +955,11 @@ def process_bundle(self, instruction_id):
# (transform_id, timer_family_id).
data_channels = collections.defaultdict(
list
- ) # type: DefaultDict[data_plane.GrpcClientDataChannel, List[str]]
+ ) # type: DefaultDict[data_plane.GrpcClientDataChannel, List[Union[str,
Tuple[str, str]]]]
Review comment:
Yep, this changed a couple of months ago. It'll be good to finally have
these type annotations checked.
##########
File path: sdks/python/apache_beam/runners/worker/data_plane.py
##########
@@ -243,7 +274,7 @@ def output_stream(
instruction_id, # type: str
transform_id # type: str
):
- # type: (...) -> ClosableOutputStream
+ # type: (...) -> SizeBasedBufferingClosableOutputStream
Review comment:
We're also thinking about adding a time-based one.
Let's add a no-op maybe_flush method to the baseclass and keep
ClosableOutputStream everywhere.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]