[
https://issues.apache.org/jira/browse/BEAM-5039?focusedWorklogId=129014&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129014
]
ASF GitHub Bot logged work on BEAM-5039:
----------------------------------------
Author: ASF GitHub Bot
Created on: 30/Jul/18 23:23
Start Date: 30/Jul/18 23:23
Worklog Time Spent: 10m
Work Description: charlesccychen closed pull request #6085: [BEAM-5039]
Fix streaming_wordcount_it_test
URL: https://github.com/apache/beam/pull/6085
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/sdks/python/apache_beam/examples/streaming_wordcount.py
b/sdks/python/apache_beam/examples/streaming_wordcount.py
index db3b97cda18..2bb8e4fc27a 100644
--- a/sdks/python/apache_beam/examples/streaming_wordcount.py
+++ b/sdks/python/apache_beam/examples/streaming_wordcount.py
@@ -60,10 +60,16 @@ def run(argv=None):
# Read from PubSub into a PCollection.
if known_args.input_subscription:
- lines = p | beam.io.ReadFromPubSub(
- subscription=known_args.input_subscription)
+ messages = (p
+ | beam.io.ReadFromPubSub(
+ subscription=known_args.input_subscription)
+ .with_output_types(six.binary_type))
else:
- lines = p | beam.io.ReadFromPubSub(topic=known_args.input_topic)
+ messages = (p
+ | beam.io.ReadFromPubSub(topic=known_args.input_topic)
+ .with_output_types(six.binary_type))
+
+ lines = messages | 'decode' >> beam.Map(lambda x: x.decode('utf-8'))
# Count the occurrences of each word.
def count_ones(word_ones):
@@ -83,7 +89,10 @@ def format_result(word_count):
(word, count) = word_count
return '%s: %d' % (word, count)
- output = counts | 'format' >> beam.Map(format_result)
+ output = (counts
+ | 'format' >> beam.Map(format_result)
+ | 'encode' >> beam.Map(lambda x: x.encode('utf-8'))
+ .with_output_types(six.binary_type))
# Write to PubSub.
# pylint: disable=expression-not-assigned
diff --git a/sdks/python/apache_beam/io/gcp/pubsub.py
b/sdks/python/apache_beam/io/gcp/pubsub.py
index a8e8d4de81a..3b65fac80e7 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub.py
@@ -187,7 +187,7 @@ def to_runner_api_parameter(self, context):
return self.to_runner_api_pickled(context)
-@deprecated(since='2.6.0', extra_message='Use ReadFromPubSub instead.')
+@deprecated(since='2.7.0', extra_message='Use ReadFromPubSub instead.')
def ReadStringsFromPubSub(topic=None, subscription=None, id_label=None):
return _ReadStringsFromPubSub(topic, subscription, id_label)
@@ -210,7 +210,7 @@ def expand(self, pvalue):
return p
-@deprecated(since='2.6.0', extra_message='Use WriteToPubSub instead.')
+@deprecated(since='2.7.0', extra_message='Use WriteToPubSub instead.')
def WriteStringsToPubSub(topic):
return _WriteStringsToPubSub(topic)
@@ -238,7 +238,7 @@ class WriteToPubSub(PTransform):
"""A ``PTransform`` for writing messages to Cloud Pub/Sub."""
# Implementation note: This ``PTransform`` is overridden by Directrunner.
- def __init__(self, topic, with_attributes, id_label=None,
+ def __init__(self, topic, with_attributes=False, id_label=None,
timestamp_attribute=None):
"""Initializes ``WriteToPubSub``.
diff --git a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
index 9890dcd92e1..6217faf569d 100644
--- a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
+++ b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
@@ -102,6 +102,7 @@ def _wait_for_messages(self, subscription, expected_num,
timeout):
while time.time() - start_time <= timeout:
pulled = subscription.pull(max_messages=MAX_MESSAGES_IN_ONE_PULL)
for ack_id, message in pulled:
+ subscription.acknowledge([ack_id])
if not self.with_attributes:
total_messages.append(message.data)
continue
@@ -116,7 +117,6 @@ def _wait_for_messages(self, subscription, expected_num,
timeout):
'expected attribute not found.')
total_messages.append(msg)
- subscription.acknowledge([ack_id])
if len(total_messages) >= expected_num:
return total_messages
time.sleep(1)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 129014)
Time Spent: 1h (was: 50m)
> python postcommit broken in call to WriteToPubSub
> -------------------------------------------------
>
> Key: BEAM-5039
> URL: https://issues.apache.org/jira/browse/BEAM-5039
> Project: Beam
> Issue Type: Bug
> Components: sdk-py-core
> Reporter: Udi Meiri
> Assignee: Udi Meiri
> Priority: Major
> Time Spent: 1h
> Remaining Estimate: 0h
>
> ERROR: test_streaming_wordcount_it
> (apache_beam.examples.streaming_wordcount_it_test.StreamingWordCountIT)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/src/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py",
> line 105, in test_streaming_wordcount_it
> self.test_pipeline.get_full_options_as_args(**extra_opts))
> File
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/src/sdks/python/apache_beam/examples/streaming_wordcount.py",
> line 90, in run
> output | beam.io.WriteToPubSub(known_args.output_topic)
> TypeError: __init__() takes at least 3 arguments (2 given)
> https://builds.apache.org/job/beam_PostCommit_Python_Verify/5597/consoleText
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)