This is an automated email from the ASF dual-hosted git repository.
altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new c34c367 Finish Python 3 porting for coders subpackage (#6310)
c34c367 is described below
commit c34c367f5da6f9bef8a46471195470923a201af9
Author: Robbe Sneyders <[email protected]>
AuthorDate: Fri Sep 7 03:44:46 2018 +0200
Finish Python 3 porting for coders subpackage (#6310)
---
sdks/python/apache_beam/coders/coders.py | 5 +++--
.../apache_beam/coders/coders_test_common.py | 12 +++++++----
sdks/python/apache_beam/coders/slow_stream.py | 22 ++++++++++++++++++--
.../apache_beam/coders/standard_coders_test.py | 2 +-
sdks/python/apache_beam/coders/stream_test.py | 24 +++++++++++-----------
sdks/python/tox.ini | 4 +++-
6 files changed, 47 insertions(+), 22 deletions(-)
diff --git a/sdks/python/apache_beam/coders/coders.py
b/sdks/python/apache_beam/coders/coders.py
index cf4b9b5..ad4edbb 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -62,12 +62,13 @@ __all__ = ['Coder',
def serialize_coder(coder):
from apache_beam.internal import pickler
- return '%s$%s' % (coder.__class__.__name__, pickler.dumps(coder))
+ return b'%s$%s' % (coder.__class__.__name__.encode('utf-8'),
+ pickler.dumps(coder))
def deserialize_coder(serialized):
from apache_beam.internal import pickler
- return pickler.loads(serialized.split('$', 1)[1])
+ return pickler.loads(serialized.split(b'$', 1)[1])
# pylint: enable=wrong-import-order, wrong-import-position
diff --git a/sdks/python/apache_beam/coders/coders_test_common.py
b/sdks/python/apache_beam/coders/coders_test_common.py
index 0b8b4c2..969c1de 100644
--- a/sdks/python/apache_beam/coders/coders_test_common.py
+++ b/sdks/python/apache_beam/coders/coders_test_common.py
@@ -20,6 +20,7 @@ from __future__ import absolute_import
import logging
import math
+import sys
import unittest
from builtins import range
@@ -41,7 +42,7 @@ from . import observable
class CustomCoder(coders.Coder):
def encode(self, x):
- return str(x+1)
+ return str(x+1).encode('utf-8')
def decode(self, encoded):
return int(encoded) - 1
@@ -56,6 +57,9 @@ class CodersTest(unittest.TestCase):
def setUpClass(cls):
cls.seen = set()
cls.seen_nested = set()
+ # Method has been renamed in Python 3
+ if sys.version_info[0] < 3:
+ cls.assertCountEqual = cls.assertItemsEqual
@classmethod
def tearDownClass(cls):
@@ -272,7 +276,7 @@ class CodersTest(unittest.TestCase):
yield i
iterable_coder = coders.IterableCoder(coders.VarIntCoder())
- self.assertItemsEqual(list(iter_generator(count)),
+ self.assertCountEqual(list(iter_generator(count)),
iterable_coder.decode(
iterable_coder.encode(iter_generator(count))))
@@ -374,8 +378,8 @@ class CodersTest(unittest.TestCase):
self.assertEqual({'@type': 'kind:global_window'},
coder.as_cloud_object())
# Test binary representation
- self.assertEqual('', coder.encode(value))
- self.assertEqual(value, coder.decode(''))
+ self.assertEqual(b'', coder.encode(value))
+ self.assertEqual(value, coder.decode(b''))
# Test unnested
self.check_coder(coder, value)
# Test nested
diff --git a/sdks/python/apache_beam/coders/slow_stream.py
b/sdks/python/apache_beam/coders/slow_stream.py
index da27a49..4bdece6 100644
--- a/sdks/python/apache_beam/coders/slow_stream.py
+++ b/sdks/python/apache_beam/coders/slow_stream.py
@@ -22,6 +22,7 @@ For internal use only; no backwards-compatibility guarantees.
from __future__ import absolute_import
import struct
+import sys
from builtins import chr
from builtins import object
@@ -70,7 +71,7 @@ class OutputStream(object):
self.write(struct.pack('>d', v))
def get(self):
- return ''.join(self.data)
+ return b''.join(self.data)
def size(self):
return len(self.data)
@@ -114,6 +115,19 @@ class InputStream(object):
self.data = data
self.pos = 0
+ # The behavior of looping over a byte-string and obtaining byte characters
+ # has been changed between python 2 and 3.
+ # b = b'\xff\x01'
+ # Python 2:
+ # b[0] = '\xff'
+ # ord(b[0]) = 255
+ # Python 3:
+ # b[0] = 255
+ if sys.version_info[0] >= 3:
+ self.read_byte = self.read_byte_py3
+ else:
+ self.read_byte = self.read_byte_py2
+
def size(self):
return len(self.data) - self.pos
@@ -124,10 +138,14 @@ class InputStream(object):
def read_all(self, nested):
return self.read(self.read_var_int64() if nested else self.size())
- def read_byte(self):
+ def read_byte_py2(self):
self.pos += 1
return ord(self.data[self.pos - 1])
+ def read_byte_py3(self):
+ self.pos += 1
+ return self.data[self.pos - 1]
+
def read_var_int64(self):
shift = 0
result = 0
diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py
b/sdks/python/apache_beam/coders/standard_coders_test.py
index f704c49..031406f 100644
--- a/sdks/python/apache_beam/coders/standard_coders_test.py
+++ b/sdks/python/apache_beam/coders/standard_coders_test.py
@@ -67,7 +67,7 @@ class StandardCodersTest(unittest.TestCase):
}
_urn_to_json_value_parser = {
- 'beam:coder:bytes:v1': lambda x: x,
+ 'beam:coder:bytes:v1': lambda x: x.encode('utf-8'),
'beam:coder:varint:v1': lambda x: x,
'beam:coder:kv:v1':
lambda x, key_parser, value_parser: (key_parser(x['key']),
diff --git a/sdks/python/apache_beam/coders/stream_test.py
b/sdks/python/apache_beam/coders/stream_test.py
index 641fefa..ad046fb 100644
--- a/sdks/python/apache_beam/coders/stream_test.py
+++ b/sdks/python/apache_beam/coders/stream_test.py
@@ -36,20 +36,20 @@ class StreamTest(unittest.TestCase):
def test_read_write(self):
out_s = self.OutputStream()
- out_s.write('abc')
- out_s.write('\0\t\n')
- out_s.write('xyz', True)
- out_s.write('', True)
+ out_s.write(b'abc')
+ out_s.write(b'\0\t\n')
+ out_s.write(b'xyz', True)
+ out_s.write(b'', True)
in_s = self.InputStream(out_s.get())
- self.assertEquals('abc\0\t\n', in_s.read(6))
- self.assertEquals('xyz', in_s.read_all(True))
- self.assertEquals('', in_s.read_all(True))
+ self.assertEquals(b'abc\0\t\n', in_s.read(6))
+ self.assertEquals(b'xyz', in_s.read_all(True))
+ self.assertEquals(b'', in_s.read_all(True))
def test_read_all(self):
out_s = self.OutputStream()
- out_s.write('abc')
+ out_s.write(b'abc')
in_s = self.InputStream(out_s.get())
- self.assertEquals('abc', in_s.read_all(False))
+ self.assertEquals(b'abc', in_s.read_all(False))
def test_read_write_byte(self):
out_s = self.OutputStream()
@@ -129,15 +129,15 @@ class StreamTest(unittest.TestCase):
def test_byte_counting(self):
bc_s = self.ByteCountingOutputStream()
self.assertEquals(0, bc_s.get_count())
- bc_s.write('def')
+ bc_s.write(b'def')
self.assertEquals(3, bc_s.get_count())
- bc_s.write('')
+ bc_s.write(b'')
self.assertEquals(3, bc_s.get_count())
bc_s.write_byte(10)
self.assertEquals(4, bc_s.get_count())
# "nested" also writes the length of the string, which should
# cause 1 extra byte to be counted.
- bc_s.write('2345', nested=True)
+ bc_s.write(b'2345', nested=True)
self.assertEquals(9, bc_s.get_count())
bc_s.write_var_int64(63)
self.assertEquals(10, bc_s.get_count())
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index f425ba7..b12e35d 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -56,11 +56,13 @@ commands =
[testenv:py3]
setenv =
BEAM_EXPERIMENTAL_PY3=1
+modules =
+ apache_beam.coders,apache_beam.tools
commands =
python --version
pip --version
{toxinidir}/scripts/run_tox_cleanup.sh
- python setup.py nosetests --tests
apache_beam.runners.direct.direct_metrics_test:DirectMetricsTest.test_combiner_functions
+ python setup.py nosetests --tests {[testenv:py3]modules}
{toxinidir}/scripts/run_tox_cleanup.sh
[testenv:py27-cython]