robertwb commented on a change in pull request #11270: [BEAM-9639][BEAM-9608] 
Improvements for FnApiRunner
URL: https://github.com/apache/beam/pull/11270#discussion_r407663916
 
 

 ##########
 File path: 
sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py
 ##########
 @@ -367,6 +413,73 @@ def _build_process_bundle_descriptor(self):
         state_api_service_descriptor=self.state_api_service_descriptor(),
         timer_api_service_descriptor=self.data_api_service_descriptor())
 
+  def commit_output_views_to_state(self):
+    """Commit bundle outputs to state to be consumed as side inputs later.
+
+    Only the outputs that should be side inputs are committed to state.
+    """
+    data_side_input = {}  # type: DataSideInput
+    for pcoll, si_ids in self.stage.downstream_side_inputs.items():
+      for (consumer_transform_name, tag), access_pattern in si_ids.items():
+        data_side_input[consumer_transform_name, tag] = (
+            translations.create_buffer_id(pcoll), access_pattern)
+    self.execution_context.commit_side_inputs_to_state(data_side_input)
+
+  def extract_bundle_inputs(self):
+    # type: (...) -> Tuple[Dict[str, PartitionableBuffer], DataOutput]
+
+    """Returns maps of transform names to PCollection identifiers.
+
+    Also mutates IO stages to point to the data ApiServiceDescriptor.
+
+    Returns:
+      A tuple of (data_input, data_output) dictionaries.
+        `data_input` is a dictionary mapping (transform_name, output_name) to a
+        PCollection buffer; `data_output` is a dictionary mapping
+        (transform_name, output_name) to a PCollection ID.
+    """
+    data_input = {}  # type: Dict[str, PartitionableBuffer]
+    data_output = {}  # type: DataOutput
+    # A mapping of {(transform_id, timer_family_id) : buffer_id}
+    expected_timer_output = {}  # type: Dict[Tuple(str, str), str]
+    for transform in self.stage.transforms:
+      if transform.spec.urn in (bundle_processor.DATA_INPUT_URN,
+                                bundle_processor.DATA_OUTPUT_URN):
+        pcoll_id = transform.spec.payload
+        if transform.spec.urn == bundle_processor.DATA_INPUT_URN:
+          coder_id = self.execution_context.data_channel_coders[only_element(
+              transform.outputs.values())]
+          coder = self.execution_context.pipeline_context.coders[
+              self.execution_context.safe_coders.get(coder_id, coder_id)]
+          if pcoll_id == translations.IMPULSE_BUFFER:
+            data_input[transform.unique_name] = ListBuffer(
+                coder_impl=coder.get_impl())
+            data_input[transform.unique_name].append(ENCODED_IMPULSE_VALUE)
+          else:
+            if pcoll_id not in self.execution_context.pcoll_buffers:
+              self.execution_context.pcoll_buffers[pcoll_id] = ListBuffer(
+                  coder_impl=coder.get_impl())
+            data_input[transform.unique_name] = \
+              self.execution_context.pcoll_buffers[pcoll_id]
+        elif transform.spec.urn == bundle_processor.DATA_OUTPUT_URN:
+          data_output[transform.unique_name] = pcoll_id
+          coder_id = self.execution_context.data_channel_coders[only_element(
+              transform.inputs.values())]
+        else:
+          raise NotImplementedError
+        data_spec = beam_fn_api_pb2.RemoteGrpcPort(coder_id=coder_id)
+        data_api_service_descriptor = \
+          self.data_api_service_descriptor()
+        if data_api_service_descriptor:
+          data_spec.api_service_descriptor.url = (
+              data_api_service_descriptor.url)
+        transform.spec.payload = data_spec.SerializeToString()
+      elif transform.spec.urn in translations.PAR_DO_URNS:
+        for timer_family_id in payload.timer_family_specs.keys():
+          expected_timer_output[(transform.unique_name, timer_family_id)] = (
+              create_buffer_id(timer_family_id, 'timers'))
+    return data_input, data_output, expected_timer_output
 
 Review comment:
   Update docs to match. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to