[ 
https://issues.apache.org/jira/browse/BEAM-4150?focusedWorklogId=114621&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-114621
 ]

ASF GitHub Bot logged work on BEAM-4150:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 22/Jun/18 05:45
            Start Date: 22/Jun/18 05:45
    Worklog Time Spent: 10m 
      Work Description: robertwb closed pull request #5700: [BEAM-4150] Extract 
IO coders from the RemoteGrpcPort.
URL: https://github.com/apache/beam/pull/5700
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py 
b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index b807eae9d1c..607e6ca92c1 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -881,18 +881,20 @@ def extract_endpoints(stage):
           if transform.spec.urn == bundle_processor.DATA_INPUT_URN:
             target = transform.unique_name, only_element(transform.outputs)
             data_input[target] = pcoll_buffers[pcoll_id]
+            coder_id = pipeline_components.pcollections[
+                only_element(transform.outputs.values())].coder_id
           elif transform.spec.urn == bundle_processor.DATA_OUTPUT_URN:
             target = transform.unique_name, only_element(transform.inputs)
             data_output[target] = pcoll_id
+            coder_id = pipeline_components.pcollections[
+                only_element(transform.inputs.values())].coder_id
           else:
             raise NotImplementedError
+          data_spec = beam_fn_api_pb2.RemoteGrpcPort(coder_id=coder_id)
           if data_api_service_descriptor:
-            data_spec = beam_fn_api_pb2.RemoteGrpcPort()
             data_spec.api_service_descriptor.url = (
                 data_api_service_descriptor.url)
-            transform.spec.payload = data_spec.SerializeToString()
-          else:
-            transform.spec.payload = ""
+          transform.spec.payload = data_spec.SerializeToString()
         elif transform.spec.urn == common_urns.primitives.PAR_DO.urn:
           payload = proto_utils.parse_Bytes(
               transform.spec.payload, beam_runner_api_pb2.ParDoPayload)
diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor.py 
b/sdks/python/apache_beam/runners/worker/bundle_processor.py
index 7727c87c4c7..da0052aefad 100644
--- a/sdks/python/apache_beam/runners/worker/bundle_processor.py
+++ b/sdks/python/apache_beam/runners/worker/bundle_processor.py
@@ -359,6 +359,8 @@ def create_operation(self, transform_id, consumers):
     return creator(self, transform_id, transform_proto, payload, consumers)
 
   def get_coder(self, coder_id):
+    if coder_id not in self.descriptor.coders:
+      raise KeyError("No such coder: %s" % coder_id)
     coder_proto = self.descriptor.coders[coder_id]
     if coder_proto.spec.spec.urn:
       return self.context.coders.get_by_id(coder_id)
@@ -418,7 +420,9 @@ def create(factory, transform_id, transform_proto, 
grpc_port, consumers):
       consumers,
       factory.counter_factory,
       factory.state_sampler,
-      factory.get_only_output_coder(transform_proto),
+      factory.get_coder(grpc_port.coder_id)
+      if grpc_port.coder_id
+      else factory.get_only_output_coder(transform_proto),
       input_target=target,
       data_channel=factory.data_channel_factory.create_data_channel(grpc_port))
 
@@ -435,8 +439,9 @@ def create(factory, transform_id, transform_proto, 
grpc_port, consumers):
       consumers,
       factory.counter_factory,
       factory.state_sampler,
-      # TODO(robertwb): Perhaps this could be distinct from the input coder?
-      factory.get_only_input_coder(transform_proto),
+      factory.get_coder(grpc_port.coder_id)
+      if grpc_port.coder_id
+      else factory.get_only_input_coder(transform_proto),
       target=target,
       data_channel=factory.data_channel_factory.create_data_channel(grpc_port))
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 114621)
    Time Spent: 2h  (was: 1h 50m)

> Standardize use of PCollection coder proto attribute
> ----------------------------------------------------
>
>                 Key: BEAM-4150
>                 URL: https://issues.apache.org/jira/browse/BEAM-4150
>             Project: Beam
>          Issue Type: Task
>          Components: beam-model
>            Reporter: Robert Bradshaw
>            Assignee: Kenneth Knowles
>            Priority: Major
>             Fix For: Not applicable
>
>          Time Spent: 2h
>  Remaining Estimate: 0h
>
> In some places it's expected to be a WindowedCoder, in others the raw 
> ElementCoder. We should use the same convention (decided in discussion to be 
> the raw ElementCoder) everywhere. The WindowCoder can be pulled out of the 
> attached windowing strategy, and the input/output ports should specify the 
> encoding directly rather than read the adjacent PCollection coder fields. 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to