[beam] branch master updated: [BEAM-7630] add ITs for writing and reading bytes from pubsub

pabloem Wed, 17 Jul 2019 11:52:15 -0700

This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git



The following commit(s) were added to refs/heads/master by this push:
     new 3b11a4c  [BEAM-7630] add ITs for writing and reading bytes from pubsub
     new 2b5e30d  Merge pull request #8985 from Juta/bytes
3b11a4c is described below

commit 3b11a4cf491ef49ea090c26ad4a2ebd72a94ab8c
Author: Juta <[email protected]>
AuthorDate: Mon Jul 8 10:48:43 2019 +0200

    [BEAM-7630] add ITs for writing and reading bytes from pubsub
---
 .../examples/streaming_wordcount_it_test.py        |  3 ++-
 .../apache_beam/io/gcp/pubsub_integration_test.py  | 24 ++++++++++++++++++++--
 .../apache_beam/io/gcp/tests/pubsub_matcher.py     |  2 +-
 .../io/gcp/tests/pubsub_matcher_test.py            |  6 +++---
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py 
b/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py
index eddca52..c194d52 100644
--- a/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py
+++ b/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py
@@ -82,7 +82,8 @@ class StreamingWordCountIT(unittest.TestCase):
   @attr('IT')
   def test_streaming_wordcount_it(self):
     # Build expected dataset.
-    expected_msg = [('%d: 1' % num) for num in range(DEFAULT_INPUT_NUMBERS)]
+    expected_msg = [('%d: 1' % num).encode('utf-8')
+                    for num in range(DEFAULT_INPUT_NUMBERS)]
 
     # Set extra options to the pipeline for test purpose
     state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
diff --git a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py 
b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
index c8a743e..2c43786 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
@@ -63,6 +63,10 @@ class PubSubIntegrationTest(unittest.TestCase):
           PubsubMessage(b'data002', {
               TIMESTAMP_ATTRIBUTE: '2018-07-11T02:02:50.149000Z',
           }),
+          PubsubMessage(b'data003\xab\xac', {}),
+          PubsubMessage(b'data004\xab\xac', {
+              TIMESTAMP_ATTRIBUTE: '2018-07-11T02:02:50.149000Z',
+          })
       ],
       'TestDataflowRunner': [
           # Use ID_LABEL attribute to deduplicate messages with the same ID.
@@ -74,6 +78,12 @@ class PubSubIntegrationTest(unittest.TestCase):
           # by Beam), as a TIMESTAMP_ATTRIBUTE + '_out' attribute.
           PubsubMessage(b'data002', {
               TIMESTAMP_ATTRIBUTE: '2018-07-11T02:02:50.149000Z',
+          }),
+          PubsubMessage(b'data003\xab\xac', {ID_LABEL: 'foo2'}),
+          PubsubMessage(b'data003\xab\xac', {ID_LABEL: 'foo2'}),
+          PubsubMessage(b'data003\xab\xac', {ID_LABEL: 'foo2'}),
+          PubsubMessage(b'data004\xab\xac', {
+              TIMESTAMP_ATTRIBUTE: '2018-07-11T02:02:50.149000Z',
           })
       ],
   }
@@ -85,6 +95,12 @@ class PubSubIntegrationTest(unittest.TestCase):
               TIMESTAMP_ATTRIBUTE + '_out': '2018-07-11T02:02:50.149000Z',
               'processed': 'IT',
           }),
+          PubsubMessage(b'data003\xab\xac-seen', {'processed': 'IT'}),
+          PubsubMessage(b'data004\xab\xac-seen', {
+              TIMESTAMP_ATTRIBUTE: '2018-07-11T02:02:50.149000Z',
+              TIMESTAMP_ATTRIBUTE + '_out': '2018-07-11T02:02:50.149000Z',
+              'processed': 'IT',
+          })
       ],
       'TestDataflowRunner': [
           PubsubMessage(b'data001-seen', {'processed': 'IT'}),
@@ -92,6 +108,11 @@ class PubSubIntegrationTest(unittest.TestCase):
               TIMESTAMP_ATTRIBUTE + '_out': '2018-07-11T02:02:50.149000Z',
               'processed': 'IT',
           }),
+          PubsubMessage(b'data003\xab\xac-seen', {'processed': 'IT'}),
+          PubsubMessage(b'data004\xab\xac-seen', {
+              TIMESTAMP_ATTRIBUTE + '_out': '2018-07-11T02:02:50.149000Z',
+              'processed': 'IT',
+          })
       ],
   }
 
@@ -139,8 +160,7 @@ class PubSubIntegrationTest(unittest.TestCase):
     state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
     expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name]
     if not with_attributes:
-      expected_messages = [pubsub_msg.data.decode('utf-8')
-                           for pubsub_msg in expected_messages]
+      expected_messages = [pubsub_msg.data for pubsub_msg in expected_messages]
     if self.runner_name == 'TestDirectRunner':
       strip_attributes = None
     else:
diff --git a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py 
b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
index ba7a674..7a0b5c8 100644
--- a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
+++ b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
@@ -103,7 +103,7 @@ class PubSubMessageMatcher(BaseMatcher):
       for rm in response.received_messages:
         msg = PubsubMessage._from_message(rm.message)
         if not self.with_attributes:
-          total_messages.append(msg.data.decode('utf-8'))
+          total_messages.append(msg.data)
           continue
 
         if self.strip_attributes:
diff --git a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py 
b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py
index 6a58ddf..cb9fbb9 100644
--- a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py
+++ b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py
@@ -59,7 +59,7 @@ class PubSubMatcherTest(unittest.TestCase):
 
   def test_message_matcher_success(self, mock_get_sub, unsued_mock):
     self.init_matcher()
-    self.pubsub_matcher.expected_msg = ['a', 'b']
+    self.pubsub_matcher.expected_msg = [b'a', b'b']
     mock_sub = mock_get_sub.return_value
     mock_sub.pull.side_effect = [
         create_pull_response([PullResponseMessage(b'a', {})]),
@@ -121,7 +121,7 @@ class PubSubMatcherTest(unittest.TestCase):
 
   def test_message_matcher_mismatch(self, mock_get_sub, unused_mock):
     self.init_matcher()
-    self.pubsub_matcher.expected_msg = ['a']
+    self.pubsub_matcher.expected_msg = [b'a']
     mock_sub = mock_get_sub.return_value
     mock_sub.pull.side_effect = [
         create_pull_response([PullResponseMessage(b'c', {}),
@@ -130,7 +130,7 @@ class PubSubMatcherTest(unittest.TestCase):
     with self.assertRaises(AssertionError) as error:
       hc_assert_that(self.mock_presult, self.pubsub_matcher)
     self.assertEqual(mock_sub.pull.call_count, 1)
-    self.assertCountEqual(['c', 'd'], self.pubsub_matcher.messages)
+    self.assertCountEqual([b'c', b'd'], self.pubsub_matcher.messages)
     self.assertTrue(
         '\nExpected: Expected 1 messages.\n     but: Got 2 messages.'
         in str(error.exception.args[0]))

[beam] branch master updated: [BEAM-7630] add ITs for writing and reading bytes from pubsub

Reply via email to