chamikaramj commented on a change in pull request #11185: [BEAM-8019] Updates 
Python SDK to handle remote SDK coders and preserve tags added by remote SDKs 
and propagate restriction coders.
URL: https://github.com/apache/beam/pull/11185#discussion_r403354509
 
 

 ##########
 File path: sdks/python/apache_beam/pipeline.py
 ##########
 @@ -1128,29 +1136,67 @@ def from_runner_api(proto,  # type: 
beam_runner_api_pb2.PTransform
                       context  # type: PipelineContext
                      ):
     # type: (...) -> AppliedPTransform
-    def is_side_input(tag):
+    def is_python_side_input(tag):
       # type: (str) -> bool
       # As per named_inputs() above.
-      return tag.startswith('side')
+      return re.match(SIDE_INPUT_REGEX, tag)
+
+    side_input_tags = []
+    if common_urns.primitives.PAR_DO.urn == proto.spec.urn:
+      # Preserving side input tags.
+      from apache_beam.utils import proto_utils
+      from apache_beam.portability.api import beam_runner_api_pb2
+      payload = (
+          proto_utils.parse_Bytes(
+              proto.spec.payload, beam_runner_api_pb2.ParDoPayload))
+      for tag, si in payload.side_inputs.items():
+        side_input_tags.append(tag)
 
     main_inputs = [
         context.pcollections.get_by_id(id) for tag,
-        id in proto.inputs.items() if not is_side_input(tag)
+        id in proto.inputs.items() if tag not in side_input_tags
     ]
 
-    # Ordering is important here.
-    indexed_side_inputs = [
-        (get_sideinput_index(tag), context.pcollections.get_by_id(id)) for tag,
-        id in proto.inputs.items() if is_side_input(tag)
-    ]
+    # Using a list here so that we can pass this into a function
+    # TODO: use nonlocal after fully migrated to Python3.
+    next_index = [0]
+
+    def _get_sideinput_index(tag, next_index):
+      if is_python_side_input(tag):
+        return get_sideinput_index(tag)
+      else:
+        index = next_index[0]
+        next_index[0] = next_index[0] + 1
+        return index
+
+    # Ordering is important here for Python sideinputs.
+    indexed_side_inputs = [(
+        _get_sideinput_index(tag, next_index),
+        context.pcollections.get_by_id(id)) for tag,
+                           id in proto.inputs.items() if tag in 
side_input_tags]
     side_inputs = [si for _, si in sorted(indexed_side_inputs)]
+
+    input_tags_to_preserve = {}
+
     transform = ptransform.PTransform.from_runner_api(proto, context)
+    if isinstance(transform, RunnerAPIPTransformHolder):
+      # For external transforms that are ParDos, we have to set side-inputs
+      # manually and preserve input tags.
+      transform.side_inputs = [pvalue.AsMultiMap(pc) for pc in side_inputs]
 
 Review comment:
   Unfortunately 'pvalue.SideInputData.from_runner_api' is Python only and 
fails for external side inputs: 
https://github.com/apache/beam/blob/master/sdks/python/apache_beam/pvalue.py#L463
   
   So enumerated the two supported types here and added a TODO.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to