Repository: beam Updated Branches: refs/heads/master 650ee8ea6 -> 3961ce46c
Do not depend on message id in DataflowRunner This field is deprecated and causing messages to be repeated. Hash message to avoid printing duplicate messages. Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bcde998f Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bcde998f Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bcde998f Branch: refs/heads/master Commit: bcde998f9fff72d59d74107db317de7b20a9f003 Parents: 650ee8e Author: Ahmet Altay <[email protected]> Authored: Tue Apr 25 18:42:03 2017 -0700 Committer: Ahmet Altay <[email protected]> Committed: Wed Apr 26 18:06:03 2017 -0700 ---------------------------------------------------------------------- .../apache_beam/runners/dataflow/dataflow_runner.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/bcde998f/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py index 4534895..05f6833 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py @@ -76,7 +76,7 @@ class DataflowRunner(PipelineRunner): def poll_for_job_completion(runner, result): """Polls for the specified job to finish running (successfully or not).""" last_message_time = None - last_message_id = None + last_message_hash = None last_error_rank = float('-inf') last_error_msg = None @@ -126,19 +126,20 @@ class DataflowRunner(PipelineRunner): messages, page_token = runner.dataflow_client.list_messages( job_id, page_token=page_token, start_time=last_message_time) for m in messages: - if last_message_id is not None and m.id == last_message_id: + message = '%s: %s: %s' % (m.time, m.messageImportance, m.messageText) + m_hash = hash(message) + + if last_message_hash is not None and m_hash == last_message_hash: # Skip the first message if it is the last message we got in the # previous round. This can happen because we use the # last_message_time as a parameter of the query for new messages. continue last_message_time = m.time - last_message_id = m.id + last_message_hash = m_hash # Skip empty messages. if m.messageImportance is None: continue - logging.info( - '%s: %s: %s: %s', m.id, m.time, m.messageImportance, - m.messageText) + logging.info(message) if str(m.messageImportance) == 'JOB_MESSAGE_ERROR': if rank_error(m.messageText) >= last_error_rank: last_error_rank = rank_error(m.messageText)
