[
https://issues.apache.org/jira/browse/BEAM-3981?focusedWorklogId=100665&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-100665
]
ASF GitHub Bot logged work on BEAM-3981:
----------------------------------------
Author: ASF GitHub Bot
Created on: 10/May/18 16:09
Start Date: 10/May/18 16:09
Worklog Time Spent: 10m
Work Description: aaltay closed pull request #5053: [BEAM-3981] Futurize
coders subpackage
URL: https://github.com/apache/beam/pull/5053
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/sdks/python/apache_beam/coders/__init__.py
b/sdks/python/apache_beam/coders/__init__.py
index acca89f70f4..3192494ebbf 100644
--- a/sdks/python/apache_beam/coders/__init__.py
+++ b/sdks/python/apache_beam/coders/__init__.py
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+from __future__ import absolute_import
from apache_beam.coders.coders import *
from apache_beam.coders.typecoders import registry
diff --git a/sdks/python/apache_beam/coders/coder_impl.pxd
b/sdks/python/apache_beam/coders/coder_impl.pxd
index 98dd508556a..8a85d085448 100644
--- a/sdks/python/apache_beam/coders/coder_impl.pxd
+++ b/sdks/python/apache_beam/coders/coder_impl.pxd
@@ -70,11 +70,11 @@ cdef class DeterministicFastPrimitivesCoderImpl(CoderImpl):
cdef object NoneType
cdef char UNKNOWN_TYPE, NONE_TYPE, INT_TYPE, FLOAT_TYPE, BOOL_TYPE
-cdef char STR_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE
+cdef char BYTES_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE
cdef class FastPrimitivesCoderImpl(StreamCoderImpl):
cdef CoderImpl fallback_coder_impl
- @cython.locals(unicode_value=unicode, dict_value=dict)
+ @cython.locals(dict_value=dict)
cpdef encode_to_stream(self, value, OutputStream stream, bint nested)
diff --git a/sdks/python/apache_beam/coders/coder_impl.py
b/sdks/python/apache_beam/coders/coder_impl.py
index cc7ed87c3ad..2fcd0f22a31 100644
--- a/sdks/python/apache_beam/coders/coder_impl.py
+++ b/sdks/python/apache_beam/coders/coder_impl.py
@@ -24,13 +24,17 @@
This module may be optionally compiled with Cython, using the corresponding
coder_impl.pxd file for type hints.
+Py2/3 porting: Native range is used on both python versions instead of
+future.builtins.range to avoid performance regression in Cython compiled code.
+
For internal use only; no backwards-compatibility guarantees.
"""
from __future__ import absolute_import
+from __future__ import division
-from types import NoneType
-
-import six
+import sys
+from builtins import chr
+from builtins import object
from apache_beam.coders import observable
from apache_beam.utils import windowed_value
@@ -54,10 +58,12 @@
from .slow_stream import get_varint_size
# pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
-try:
- long # Python 2
-except NameError:
- long = int # Python 3
+try: # Python 2
+ long # pylint: disable=long-builtin
+ unicode # pylint: disable=unicode-builtin
+except NameError: # Python 3
+ long = int
+ unicode = str
class CoderImpl(object):
@@ -199,7 +205,7 @@ def __init__(self, coder, step_label):
self._step_label = step_label
def _check_safe(self, value):
- if isinstance(value, (str, six.text_type, long, int, float)):
+ if isinstance(value, (bytes, unicode, long, int, float)):
pass
elif value is None:
pass
@@ -253,7 +259,7 @@ def decode(self, encoded):
NONE_TYPE = 0
INT_TYPE = 1
FLOAT_TYPE = 2
-STR_TYPE = 3
+BYTES_TYPE = 3
UNICODE_TYPE = 4
BOOL_TYPE = 9
LIST_TYPE = 5
@@ -279,7 +285,7 @@ def get_estimated_size_and_observables(self, value,
nested=False):
def encode_to_stream(self, value, stream, nested):
t = type(value)
- if t is NoneType:
+ if value is None:
stream.write_byte(NONE_TYPE)
elif t is int:
stream.write_byte(INT_TYPE)
@@ -287,13 +293,13 @@ def encode_to_stream(self, value, stream, nested):
elif t is float:
stream.write_byte(FLOAT_TYPE)
stream.write_bigendian_double(value)
- elif t is str:
- stream.write_byte(STR_TYPE)
+ elif t is bytes:
+ stream.write_byte(BYTES_TYPE)
stream.write(value, nested)
- elif t is six.text_type:
- unicode_value = value # for typing
+ elif t is unicode:
+ text_value = value # for typing
stream.write_byte(UNICODE_TYPE)
- stream.write(unicode_value.encode('utf-8'), nested)
+ stream.write(text_value.encode('utf-8'), nested)
elif t is list or t is tuple or t is set:
stream.write_byte(
LIST_TYPE if t is list else TUPLE_TYPE if t is tuple else SET_TYPE)
@@ -304,7 +310,13 @@ def encode_to_stream(self, value, stream, nested):
dict_value = value # for typing
stream.write_byte(DICT_TYPE)
stream.write_var_int64(len(dict_value))
- for k, v in dict_value.iteritems():
+ # Use iteritems() on Python 2 instead of future.builtins.iteritems to
+ # avoid performance regression in Cython compiled code.
+ if sys.version_info[0] == 2:
+ items = dict_value.iteritems() # pylint: disable=dict-iter-method
+ else:
+ items = dict_value.items()
+ for k, v in items:
self.encode_to_stream(k, stream, True)
self.encode_to_stream(v, stream, True)
elif t is bool:
@@ -322,7 +334,7 @@ def decode_from_stream(self, stream, nested):
return stream.read_var_int64()
elif t == FLOAT_TYPE:
return stream.read_bigendian_double()
- elif t == STR_TYPE:
+ elif t == BYTES_TYPE:
return stream.read_all(nested)
elif t == UNICODE_TYPE:
return stream.read_all(nested).decode('utf-8')
@@ -394,8 +406,9 @@ def _from_normal_time(self, value):
def encode_to_stream(self, value, out, nested):
span_micros = value.end.micros - value.start.micros
- out.write_bigendian_uint64(self._from_normal_time(value.end.micros / 1000))
- out.write_var_int64(span_micros / 1000)
+ out.write_bigendian_uint64(
+ self._from_normal_time(value.end.micros // 1000))
+ out.write_var_int64(span_micros // 1000)
def decode_from_stream(self, in_, nested):
end_millis = self._to_normal_time(in_.read_bigendian_uint64())
@@ -409,7 +422,7 @@ def estimate_size(self, value, nested=False):
# An IntervalWindow is context-insensitive, with a timestamp (8 bytes)
# and a varint timespam.
span = value.end.micros - value.start.micros
- return 8 + get_varint_size(span / 1000)
+ return 8 + get_varint_size(span // 1000)
class TimestampCoderImpl(StreamCoderImpl):
@@ -427,7 +440,7 @@ def estimate_size(self, unused_value, nested=False):
return 8
-small_ints = [chr(_) for _ in range(128)]
+small_ints = [chr(_).encode('latin-1') for _ in range(128)]
class VarIntCoderImpl(StreamCoderImpl):
@@ -474,7 +487,7 @@ def decode_from_stream(self, stream, nested):
return self._value
def encode(self, value):
- b = '' # avoid byte vs str vs unicode error
+ b = b'' # avoid byte vs str vs unicode error
return b
def decode(self, encoded):
@@ -783,7 +796,7 @@ def encode_to_stream(self, value, out, nested):
# TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on
# precision of timestamps.
self._from_normal_time(
- restore_sign * (abs(wv.timestamp_micros) / 1000)))
+ restore_sign * (abs(wv.timestamp_micros) // 1000)))
self._windows_coder.encode_to_stream(wv.windows, out, True)
# Default PaneInfo encoded byte representing NO_FIRING.
self._pane_info_coder.encode_to_stream(wv.pane_info, out, True)
@@ -797,9 +810,9 @@ def decode_from_stream(self, in_stream, nested):
# were indeed MIN/MAX timestamps.
# TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on
# precision of timestamps.
- if timestamp == -(abs(MIN_TIMESTAMP.micros) / 1000):
+ if timestamp == -(abs(MIN_TIMESTAMP.micros) // 1000):
timestamp = MIN_TIMESTAMP.micros
- elif timestamp == (MAX_TIMESTAMP.micros / 1000):
+ elif timestamp == (MAX_TIMESTAMP.micros // 1000):
timestamp = MAX_TIMESTAMP.micros
else:
timestamp *= 1000
diff --git a/sdks/python/apache_beam/coders/coders.py
b/sdks/python/apache_beam/coders/coders.py
index d1ebe6b15da..b6aa40d6c82 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -22,7 +22,7 @@
from __future__ import absolute_import
import base64
-import cPickle as pickle
+from builtins import object
import google.protobuf
from google.protobuf import wrappers_pb2
@@ -33,6 +33,12 @@
from apache_beam.portability.api import beam_runner_api_pb2
from apache_beam.utils import proto_utils
+# This is for py2/3 compatibility. cPickle was renamed pickle in python 3.
+try:
+ import cPickle as pickle # Python 2
+except ImportError:
+ import pickle # Python 3
+
# pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
try:
from .stream import get_varint_size
@@ -210,11 +216,15 @@ def as_cloud_object(self):
def __repr__(self):
return self.__class__.__name__
+ # pylint: disable=protected-access
def __eq__(self, other):
- # pylint: disable=protected-access
return (self.__class__ == other.__class__
and self._dict_without_impl() == other._dict_without_impl())
- # pylint: enable=protected-access
+
+ def __hash__(self):
+ return hash((self.__class__,) +
+ tuple(sorted(self._dict_without_impl().items())))
+ # pylint: enable=protected-access
_known_urns = {}
@@ -312,7 +322,7 @@ class ToStringCoder(Coder):
def encode(self, value):
try: # Python 2
- if isinstance(value, unicode):
+ if isinstance(value, unicode): # pylint: disable=unicode-builtin
return value.encode('utf-8')
except NameError: # Python 3
pass
diff --git a/sdks/python/apache_beam/coders/coders_test.py
b/sdks/python/apache_beam/coders/coders_test.py
index 705de8920d5..a6c456f619f 100644
--- a/sdks/python/apache_beam/coders/coders_test.py
+++ b/sdks/python/apache_beam/coders/coders_test.py
@@ -14,11 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
+from __future__ import absolute_import
import base64
import logging
import unittest
+from builtins import object
from apache_beam.coders import proto2_coder_test_messages_pb2 as test_message
from apache_beam.coders import coders
@@ -47,17 +48,11 @@ def test_equality(self):
class CodersTest(unittest.TestCase):
def test_str_utf8_coder(self):
- real_coder = coders_registry.get_coder(str)
- expected_coder = coders.BytesCoder()
- self.assertEqual(
- real_coder.encode('abc'), expected_coder.encode('abc'))
- self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
-
real_coder = coders_registry.get_coder(bytes)
expected_coder = coders.BytesCoder()
self.assertEqual(
- real_coder.encode('abc'), expected_coder.encode('abc'))
- self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
+ real_coder.encode(b'abc'), expected_coder.encode(b'abc'))
+ self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc')))
# The test proto message file was generated by running the following:
@@ -99,6 +94,9 @@ def __eq__(self, other):
return True
return False
+ def __hash__(self):
+ return hash(type(self))
+
class FallbackCoderTest(unittest.TestCase):
diff --git a/sdks/python/apache_beam/coders/coders_test_common.py
b/sdks/python/apache_beam/coders/coders_test_common.py
index 0ea7da2b6ad..0b8b4c20fde 100644
--- a/sdks/python/apache_beam/coders/coders_test_common.py
+++ b/sdks/python/apache_beam/coders/coders_test_common.py
@@ -21,6 +21,7 @@
import logging
import math
import unittest
+from builtins import range
import dill
@@ -103,7 +104,7 @@ def test_custom_coder(self):
self.check_coder(CustomCoder(), 1, -10, 5)
self.check_coder(coders.TupleCoder((CustomCoder(), coders.BytesCoder())),
- (1, 'a'), (-10, 'b'), (5, 'c'))
+ (1, b'a'), (-10, b'b'), (5, b'c'))
def test_pickle_coder(self):
self.check_coder(coders.PickleCoder(), 'a', 1, 1.5, (1, 2, 3))
@@ -129,7 +130,7 @@ def test_dill_coder(self):
def test_fast_primitives_coder(self):
coder = coders.FastPrimitivesCoder(coders.SingletonCoder(len))
- self.check_coder(coder, None, 1, -1, 1.5, 'str\0str', u'unicode\0\u0101')
+ self.check_coder(coder, None, 1, -1, 1.5, b'str\0str', u'unicode\0\u0101')
self.check_coder(coder, (), (1, 2, 3))
self.check_coder(coder, [], [1, 2, 3])
self.check_coder(coder, dict(), {'a': 'b'}, {0: dict(), 1: len})
@@ -139,7 +140,7 @@ def test_fast_primitives_coder(self):
self.check_coder(coders.TupleCoder((coder,)), ('a',), (1,))
def test_bytes_coder(self):
- self.check_coder(coders.BytesCoder(), 'a', '\0', 'z' * 1000)
+ self.check_coder(coders.BytesCoder(), b'a', b'\0', b'z' * 1000)
def test_varint_coder(self):
# Small ints.
@@ -190,7 +191,7 @@ def test_timestamp_coder(self):
timestamp.Timestamp(micros=1234567890123456789))
self.check_coder(
coders.TupleCoder((coders.TimestampCoder(), coders.BytesCoder())),
- (timestamp.Timestamp.of(27), 'abc'))
+ (timestamp.Timestamp.of(27), b'abc'))
def test_tuple_coder(self):
kv_coder = coders.TupleCoder((coders.VarIntCoder(), coders.BytesCoder()))
@@ -206,14 +207,14 @@ def test_tuple_coder(self):
kv_coder.as_cloud_object())
# Test binary representation
self.assertEqual(
- '\x04abc',
- kv_coder.encode((4, 'abc')))
+ b'\x04abc',
+ kv_coder.encode((4, b'abc')))
# Test unnested
self.check_coder(
kv_coder,
- (1, 'a'),
- (-2, 'a' * 100),
- (300, 'abc\0' * 5))
+ (1, b'a'),
+ (-2, b'a' * 100),
+ (300, b'abc\0' * 5))
# Test nested
self.check_coder(
coders.TupleCoder(
@@ -322,12 +323,12 @@ def test_windowed_value_coder(self):
},
coder.as_cloud_object())
# Test binary representation
- self.assertEqual('\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
+ self.assertEqual(b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
coder.encode(window.GlobalWindows.windowed_value(1)))
# Test decoding large timestamp
self.assertEqual(
- coder.decode('\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
+ coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(),)))
# Test unnested
@@ -364,7 +365,7 @@ def test_proto_coder(self):
proto_coder = coders.ProtoCoder(ma.__class__)
self.check_coder(proto_coder, ma)
self.check_coder(coders.TupleCoder((proto_coder, coders.BytesCoder())),
- (ma, 'a'), (mb, 'b'))
+ (ma, b'a'), (mb, b'b'))
def test_global_window_coder(self):
coder = coders.GlobalWindowCoder()
@@ -391,16 +392,16 @@ def test_length_prefix_coder(self):
},
coder.as_cloud_object())
# Test binary representation
- self.assertEqual('\x00', coder.encode(''))
- self.assertEqual('\x01a', coder.encode('a'))
- self.assertEqual('\x02bc', coder.encode('bc'))
- self.assertEqual('\xff\x7f' + 'z' * 16383, coder.encode('z' * 16383))
+ self.assertEqual(b'\x00', coder.encode(b''))
+ self.assertEqual(b'\x01a', coder.encode(b'a'))
+ self.assertEqual(b'\x02bc', coder.encode(b'bc'))
+ self.assertEqual(b'\xff\x7f' + b'z' * 16383, coder.encode(b'z' * 16383))
# Test unnested
- self.check_coder(coder, '', 'a', 'bc', 'def')
+ self.check_coder(coder, b'', b'a', b'bc', b'def')
# Test nested
self.check_coder(coders.TupleCoder((coder, coder)),
- ('', 'a'),
- ('bc', 'def'))
+ (b'', b'a'),
+ (b'bc', b'def'))
def test_nested_observables(self):
class FakeObservableIterator(observable.ObservableMixin):
diff --git a/sdks/python/apache_beam/coders/fast_coders_test.py
b/sdks/python/apache_beam/coders/fast_coders_test.py
index a13334a2c26..eb4077344f6 100644
--- a/sdks/python/apache_beam/coders/fast_coders_test.py
+++ b/sdks/python/apache_beam/coders/fast_coders_test.py
@@ -16,6 +16,7 @@
#
"""Unit tests for compiled implementation of coder impls."""
+from __future__ import absolute_import
import logging
import unittest
diff --git a/sdks/python/apache_beam/coders/observable.py
b/sdks/python/apache_beam/coders/observable.py
index fc952cf4e55..3d0a7fc10bf 100644
--- a/sdks/python/apache_beam/coders/observable.py
+++ b/sdks/python/apache_beam/coders/observable.py
@@ -20,6 +20,9 @@
For internal use only; no backwards-compatibility guarantees.
"""
+from __future__ import absolute_import
+
+from builtins import object
class ObservableMixin(object):
diff --git a/sdks/python/apache_beam/coders/observable_test.py
b/sdks/python/apache_beam/coders/observable_test.py
index 09ca3041c29..ce32bf05ef2 100644
--- a/sdks/python/apache_beam/coders/observable_test.py
+++ b/sdks/python/apache_beam/coders/observable_test.py
@@ -16,6 +16,7 @@
#
"""Tests for the Observable mixin class."""
+from __future__ import absolute_import
import logging
import unittest
diff --git a/sdks/python/apache_beam/coders/slow_coders_test.py
b/sdks/python/apache_beam/coders/slow_coders_test.py
index 97aa39ca094..b543b56dd26 100644
--- a/sdks/python/apache_beam/coders/slow_coders_test.py
+++ b/sdks/python/apache_beam/coders/slow_coders_test.py
@@ -16,6 +16,7 @@
#
"""Unit tests for uncompiled implementation of coder impls."""
+from __future__ import absolute_import
import logging
import unittest
diff --git a/sdks/python/apache_beam/coders/slow_stream.py
b/sdks/python/apache_beam/coders/slow_stream.py
index 1ab55d90f98..da27a49883a 100644
--- a/sdks/python/apache_beam/coders/slow_stream.py
+++ b/sdks/python/apache_beam/coders/slow_stream.py
@@ -19,8 +19,11 @@
For internal use only; no backwards-compatibility guarantees.
"""
+from __future__ import absolute_import
import struct
+from builtins import chr
+from builtins import object
class OutputStream(object):
@@ -32,13 +35,13 @@ def __init__(self):
self.data = []
def write(self, b, nested=False):
- assert isinstance(b, str)
+ assert isinstance(b, bytes)
if nested:
self.write_var_int64(len(b))
self.data.append(b)
def write_byte(self, val):
- self.data.append(chr(val))
+ self.data.append(chr(val).encode('latin-1'))
def write_var_int64(self, v):
if v < 0:
diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py
b/sdks/python/apache_beam/coders/standard_coders_test.py
index b595cfd5922..f704c490c97 100644
--- a/sdks/python/apache_beam/coders/standard_coders_test.py
+++ b/sdks/python/apache_beam/coders/standard_coders_test.py
@@ -17,6 +17,7 @@
"""Unit tests for coders that must be consistent across all Beam SDKs.
"""
+from __future__ import absolute_import
from __future__ import print_function
import json
@@ -24,6 +25,7 @@
import os.path
import sys
import unittest
+from builtins import map
import yaml
@@ -74,7 +76,7 @@ class StandardCodersTest(unittest.TestCase):
lambda x: IntervalWindow(
start=Timestamp(micros=(x['end'] - x['span']) * 1000),
end=Timestamp(micros=x['end'] * 1000)),
- 'beam:coder:iterable:v1': lambda x, parser: map(parser, x),
+ 'beam:coder:iterable:v1': lambda x, parser: list(map(parser, x)),
'beam:coder:global_window:v1': lambda x: window.GlobalWindow(),
'beam:coder:windowed_value:v1':
lambda x, value_parser, window_parser: windowed_value.create(
diff --git a/sdks/python/apache_beam/coders/stream_test.py
b/sdks/python/apache_beam/coders/stream_test.py
index 15bc5eb9ba9..641fefad45d 100644
--- a/sdks/python/apache_beam/coders/stream_test.py
+++ b/sdks/python/apache_beam/coders/stream_test.py
@@ -16,10 +16,13 @@
#
"""Tests for the stream implementations."""
+from __future__ import absolute_import
+from __future__ import division
import logging
import math
import unittest
+from builtins import range
from apache_beam.coders import slow_stream
diff --git a/sdks/python/apache_beam/coders/typecoders.py
b/sdks/python/apache_beam/coders/typecoders.py
index 355c6230f92..e4efa2c7ffd 100644
--- a/sdks/python/apache_beam/coders/typecoders.py
+++ b/sdks/python/apache_beam/coders/typecoders.py
@@ -63,12 +63,18 @@ def MakeXyzs(v):
See apache_beam.typehints.decorators module for more details.
"""
+from __future__ import absolute_import
-import six
+from builtins import object
from apache_beam.coders import coders
from apache_beam.typehints import typehints
+try:
+ unicode # pylint: disable=unicode-builtin
+except NameError:
+ unicode = str
+
__all__ = ['registry']
@@ -84,9 +90,8 @@ def register_standard_coders(self, fallback_coder):
"""Register coders for all basic and composite types."""
self._register_coder_internal(int, coders.VarIntCoder)
self._register_coder_internal(float, coders.FloatCoder)
- self._register_coder_internal(str, coders.BytesCoder)
self._register_coder_internal(bytes, coders.BytesCoder)
- self._register_coder_internal(six.text_type, coders.StrUtf8Coder)
+ self._register_coder_internal(unicode, coders.StrUtf8Coder)
self._register_coder_internal(typehints.TupleConstraint, coders.TupleCoder)
# Default fallback coders applied in that order until the first matching
# coder found.
diff --git a/sdks/python/apache_beam/coders/typecoders_test.py
b/sdks/python/apache_beam/coders/typecoders_test.py
index 2b6aa7a5129..b64cb653e56 100644
--- a/sdks/python/apache_beam/coders/typecoders_test.py
+++ b/sdks/python/apache_beam/coders/typecoders_test.py
@@ -16,8 +16,10 @@
#
"""Unit tests for the typecoders module."""
+from __future__ import absolute_import
import unittest
+from builtins import object
from apache_beam.coders import coders
from apache_beam.coders import typecoders
@@ -33,6 +35,9 @@ def __init__(self, n):
def __eq__(self, other):
return self.number == other.number
+ def __hash__(self):
+ return self.number
+
class CustomCoder(coders.Coder):
@@ -75,7 +80,7 @@ def test_get_coder_with_composite_custom_coder(self):
def test_get_coder_with_standard_coder(self):
self.assertEqual(coders.BytesCoder,
- typecoders.registry.get_coder(str).__class__)
+ typecoders.registry.get_coder(bytes).__class__)
def test_fallbackcoder(self):
coder = typecoders.registry.get_coder(typehints.Any)
@@ -100,22 +105,16 @@ def test_standard_int_coder(self):
real_coder.decode(real_coder.encode(0x040404040404)))
def test_standard_str_coder(self):
- real_coder = typecoders.registry.get_coder(str)
- expected_coder = coders.BytesCoder()
- self.assertEqual(
- real_coder.encode('abc'), expected_coder.encode('abc'))
- self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
-
real_coder = typecoders.registry.get_coder(bytes)
expected_coder = coders.BytesCoder()
self.assertEqual(
- real_coder.encode('abc'), expected_coder.encode('abc'))
- self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
+ real_coder.encode(b'abc'), expected_coder.encode(b'abc'))
+ self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc')))
def test_iterable_coder(self):
- real_coder = typecoders.registry.get_coder(typehints.Iterable[str])
+ real_coder = typecoders.registry.get_coder(typehints.Iterable[bytes])
expected_coder = coders.IterableCoder(coders.BytesCoder())
- values = ['abc', 'xyz']
+ values = [b'abc', b'xyz']
self.assertEqual(expected_coder, real_coder)
self.assertEqual(real_coder.encode(values), expected_coder.encode(values))
diff --git a/sdks/python/generate_pydoc.sh b/sdks/python/generate_pydoc.sh
index ae8d043d376..68b7006a1b6 100755
--- a/sdks/python/generate_pydoc.sh
+++ b/sdks/python/generate_pydoc.sh
@@ -121,6 +121,9 @@ ignore_identifiers = [
# Ignore broken built-in type references
'tuple',
+ # Ignore future.builtin type references
+ 'future.types.newobject.newobject',
+
# Ignore private classes
'apache_beam.coders.coders._PickleCoderBase',
'apache_beam.coders.coders.FastCoder',
diff --git a/sdks/python/run_pylint.sh b/sdks/python/run_pylint.sh
index 7150fa6f50e..4e7dc9d8218 100755
--- a/sdks/python/run_pylint.sh
+++ b/sdks/python/run_pylint.sh
@@ -89,25 +89,6 @@ done
isort ${MODULE} -p apache_beam --line-width 120 --check-only --order-by-type \
--combine-star --force-single-line-imports --diff --recursive ${SKIP_PARAM}
-FUTURIZE_EXCLUDED=(
- "typehints.py"
- "pb2"
- "trivial_infernce.py"
-)
-FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" )
-echo "Checking for files requiring stage 1 refactoring from futurize"
-futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored)
-futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM"
|| echo "")
-count=${#futurize_filtered}
-if [ "$count" != "0" ]; then
- echo "Some of the changes require futurize stage 1 changes."
- echo "The files with required changes:"
- echo "$futurize_filtered"
- echo "You can run futurize apache_beam to see the proposed changes."
- exit 1
-fi
-echo "No future changes needed"
-
echo "Checking unittest.main for module ${MODULE}:"
TESTS_MISSING_MAIN=$(find ${MODULE} | grep '\.py$' | xargs grep -l '^import
unittest$' | xargs grep -L unittest.main)
if [ -n "${TESTS_MISSING_MAIN}" ]; then
diff --git a/sdks/python/run_pylint_2to3.sh b/sdks/python/run_pylint_2to3.sh
new file mode 100755
index 00000000000..9a074368dba
--- /dev/null
+++ b/sdks/python/run_pylint_2to3.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script will run pylint with the --py3k parameter to check for python
+# 3 compatibility. This script can run on a list of modules provided as
+# command line arguments.
+#
+# The exit-code of the script indicates success or a failure.
+
+set -o errexit
+set -o pipefail
+
+DEFAULT_MODULE=apache_beam
+
+usage(){ echo "Usage: $0 [MODULE|--help]
+# The default MODULE is $DEFAULT_MODULE"; }
+
+MODULES=${DEFAULT_MODULE}
+while [[ $# -gt 0 ]] ; do
+ key="$1"
+ case ${key} in
+ --help) usage; exit 1;;
+ *)
+ if [ ${MODULES} = ${DEFAULT_MODULE} ] ; then
+ MODULES=()
+ fi
+ MODULES+=("$1")
+ shift;;
+ esac
+done
+
+FUTURIZE_EXCLUDED=(
+ "typehints.py"
+ "pb2"
+ "trivial_infernce.py"
+)
+FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" )
+echo "Checking for files requiring stage 1 refactoring from futurize"
+futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored)
+futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM" \
+ || echo "")
+count=${#futurize_filtered}
+if [ "$count" != "0" ]; then
+ echo "Some of the changes require futurize stage 1 changes."
+ echo "The files with required changes:"
+ echo "$futurize_filtered"
+ echo "You can run futurize apache_beam to see the proposed changes."
+ exit 1
+fi
+echo "No future changes needed"
+
+# Following generated files are excluded from lint checks.
+EXCLUDED_GENERATED_FILES=(
+"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py"
+"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py"
+"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py"
+"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py"
+"apache_beam/io/gcp/internal/clients/storage/storage_v1_client.py"
+"apache_beam/io/gcp/internal/clients/storage/storage_v1_messages.py"
+"apache_beam/coders/proto2_coder_test_messages_pb2.py"
+apache_beam/portability/api/*pb2*.py
+)
+
+FILES_TO_IGNORE=""
+for file in "${EXCLUDED_GENERATED_FILES[@]}"; do
+ if test -z "$FILES_TO_IGNORE"
+ then FILES_TO_IGNORE="$(basename $file)"
+ else FILES_TO_IGNORE="$FILES_TO_IGNORE, $(basename $file)"
+ fi
+done
+echo "Skipping lint for generated files: $FILES_TO_IGNORE"
+
+echo "Running pylint --py3k for modules $( printf "%s " "${MODULES[@]}" ):"
+pylint -j8 $( printf "%s " "${MODULES[@]}" ) \
+ --ignore-patterns="$FILES_TO_IGNORE" --py3k
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 364148355c2..35e705583f6 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -66,7 +66,7 @@ def get_version():
)
-REQUIRED_CYTHON_VERSION = '0.26.1'
+REQUIRED_CYTHON_VERSION = '0.28.1'
try:
_CYTHON_VERSION = get_distribution('cython').version
if StrictVersion(_CYTHON_VERSION) < StrictVersion(REQUIRED_CYTHON_VERSION):
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 0d49cbc958a..caa899de2db 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -17,7 +17,7 @@
[tox]
# new environments will be excluded by default unless explicitly added to
envlist.
-envlist = py27,py27-{gcp,cython,lint},py3-lint,docs
+envlist = py27,py27-{gcp,cython,lint,lint3},py3-lint,docs
toxworkdir = {toxinidir}/target/.tox
[pycodestyle]
@@ -34,7 +34,8 @@ whitelist_externals =
find
time
deps =
- cython: cython==0.26.1
+ cython: cython==0.28.1
+ future==0.16.0
# These 2 magic command overrides are required for Jenkins builds.
# Otherwise we get "OSError: [Errno 2] No such file or directory" errors.
@@ -88,6 +89,22 @@ commands =
pip --version
time {toxinidir}/run_pylint.sh
+[testenv:py27-lint3]
+# Checks for py2/3 compatibility issues
+deps =
+ pycodestyle==2.3.1
+ pylint==1.7.2
+ future==0.16.0
+ isort==4.2.15
+ flake8==3.5.0
+modules =
+ apache_beam/coders
+commands =
+ python --version
+ pip --version
+ time {toxinidir}/run_pylint_2to3.sh {[testenv:py27-lint3]modules}
+
+
[testenv:py3-lint]
deps =
pycodestyle==2.3.1
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 100665)
Time Spent: 18h 20m (was: 18h 10m)
> Futurize and fix python 2 compatibility for coders package
> ----------------------------------------------------------
>
> Key: BEAM-3981
> URL: https://issues.apache.org/jira/browse/BEAM-3981
> Project: Beam
> Issue Type: Sub-task
> Components: sdk-py-core
> Reporter: Robbe
> Assignee: Robbe
> Priority: Major
> Time Spent: 18h 20m
> Remaining Estimate: 0h
>
> Run automatic conversion with futurize tool on coders subpackage and fix
> python 2 compatibility. This prepares the subpackage for python 3 support.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)