[ 
https://issues.apache.org/jira/browse/BEAM-3981?focusedWorklogId=100665&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-100665
 ]

ASF GitHub Bot logged work on BEAM-3981:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 10/May/18 16:09
            Start Date: 10/May/18 16:09
    Worklog Time Spent: 10m 
      Work Description: aaltay closed pull request #5053: [BEAM-3981] Futurize 
coders subpackage
URL: https://github.com/apache/beam/pull/5053
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/python/apache_beam/coders/__init__.py 
b/sdks/python/apache_beam/coders/__init__.py
index acca89f70f4..3192494ebbf 100644
--- a/sdks/python/apache_beam/coders/__init__.py
+++ b/sdks/python/apache_beam/coders/__init__.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from __future__ import absolute_import
 
 from apache_beam.coders.coders import *
 from apache_beam.coders.typecoders import registry
diff --git a/sdks/python/apache_beam/coders/coder_impl.pxd 
b/sdks/python/apache_beam/coders/coder_impl.pxd
index 98dd508556a..8a85d085448 100644
--- a/sdks/python/apache_beam/coders/coder_impl.pxd
+++ b/sdks/python/apache_beam/coders/coder_impl.pxd
@@ -70,11 +70,11 @@ cdef class DeterministicFastPrimitivesCoderImpl(CoderImpl):
 
 cdef object NoneType
 cdef char UNKNOWN_TYPE, NONE_TYPE, INT_TYPE, FLOAT_TYPE, BOOL_TYPE
-cdef char STR_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE
+cdef char BYTES_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE
 
 cdef class FastPrimitivesCoderImpl(StreamCoderImpl):
   cdef CoderImpl fallback_coder_impl
-  @cython.locals(unicode_value=unicode, dict_value=dict)
+  @cython.locals(dict_value=dict)
   cpdef encode_to_stream(self, value, OutputStream stream, bint nested)
 
 
diff --git a/sdks/python/apache_beam/coders/coder_impl.py 
b/sdks/python/apache_beam/coders/coder_impl.py
index cc7ed87c3ad..2fcd0f22a31 100644
--- a/sdks/python/apache_beam/coders/coder_impl.py
+++ b/sdks/python/apache_beam/coders/coder_impl.py
@@ -24,13 +24,17 @@
 This module may be optionally compiled with Cython, using the corresponding
 coder_impl.pxd file for type hints.
 
+Py2/3 porting: Native range is used on both python versions instead of
+future.builtins.range to avoid performance regression in Cython compiled code.
+
 For internal use only; no backwards-compatibility guarantees.
 """
 from __future__ import absolute_import
+from __future__ import division
 
-from types import NoneType
-
-import six
+import sys
+from builtins import chr
+from builtins import object
 
 from apache_beam.coders import observable
 from apache_beam.utils import windowed_value
@@ -54,10 +58,12 @@
   from .slow_stream import get_varint_size
 # pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
 
-try:
-  long        # Python 2
-except NameError:
-  long = int  # Python 3
+try:                # Python 2
+  long              # pylint: disable=long-builtin
+  unicode           # pylint: disable=unicode-builtin
+except NameError:   # Python 3
+  long = int
+  unicode = str
 
 
 class CoderImpl(object):
@@ -199,7 +205,7 @@ def __init__(self, coder, step_label):
     self._step_label = step_label
 
   def _check_safe(self, value):
-    if isinstance(value, (str, six.text_type, long, int, float)):
+    if isinstance(value, (bytes, unicode, long, int, float)):
       pass
     elif value is None:
       pass
@@ -253,7 +259,7 @@ def decode(self, encoded):
 NONE_TYPE = 0
 INT_TYPE = 1
 FLOAT_TYPE = 2
-STR_TYPE = 3
+BYTES_TYPE = 3
 UNICODE_TYPE = 4
 BOOL_TYPE = 9
 LIST_TYPE = 5
@@ -279,7 +285,7 @@ def get_estimated_size_and_observables(self, value, 
nested=False):
 
   def encode_to_stream(self, value, stream, nested):
     t = type(value)
-    if t is NoneType:
+    if value is None:
       stream.write_byte(NONE_TYPE)
     elif t is int:
       stream.write_byte(INT_TYPE)
@@ -287,13 +293,13 @@ def encode_to_stream(self, value, stream, nested):
     elif t is float:
       stream.write_byte(FLOAT_TYPE)
       stream.write_bigendian_double(value)
-    elif t is str:
-      stream.write_byte(STR_TYPE)
+    elif t is bytes:
+      stream.write_byte(BYTES_TYPE)
       stream.write(value, nested)
-    elif t is six.text_type:
-      unicode_value = value  # for typing
+    elif t is unicode:
+      text_value = value  # for typing
       stream.write_byte(UNICODE_TYPE)
-      stream.write(unicode_value.encode('utf-8'), nested)
+      stream.write(text_value.encode('utf-8'), nested)
     elif t is list or t is tuple or t is set:
       stream.write_byte(
           LIST_TYPE if t is list else TUPLE_TYPE if t is tuple else SET_TYPE)
@@ -304,7 +310,13 @@ def encode_to_stream(self, value, stream, nested):
       dict_value = value  # for typing
       stream.write_byte(DICT_TYPE)
       stream.write_var_int64(len(dict_value))
-      for k, v in dict_value.iteritems():
+      # Use iteritems() on Python 2 instead of future.builtins.iteritems to
+      # avoid performance regression in Cython compiled code.
+      if sys.version_info[0] == 2:
+        items = dict_value.iteritems()  # pylint: disable=dict-iter-method
+      else:
+        items = dict_value.items()
+      for k, v in items:
         self.encode_to_stream(k, stream, True)
         self.encode_to_stream(v, stream, True)
     elif t is bool:
@@ -322,7 +334,7 @@ def decode_from_stream(self, stream, nested):
       return stream.read_var_int64()
     elif t == FLOAT_TYPE:
       return stream.read_bigendian_double()
-    elif t == STR_TYPE:
+    elif t == BYTES_TYPE:
       return stream.read_all(nested)
     elif t == UNICODE_TYPE:
       return stream.read_all(nested).decode('utf-8')
@@ -394,8 +406,9 @@ def _from_normal_time(self, value):
 
   def encode_to_stream(self, value, out, nested):
     span_micros = value.end.micros - value.start.micros
-    out.write_bigendian_uint64(self._from_normal_time(value.end.micros / 1000))
-    out.write_var_int64(span_micros / 1000)
+    out.write_bigendian_uint64(
+        self._from_normal_time(value.end.micros // 1000))
+    out.write_var_int64(span_micros // 1000)
 
   def decode_from_stream(self, in_, nested):
     end_millis = self._to_normal_time(in_.read_bigendian_uint64())
@@ -409,7 +422,7 @@ def estimate_size(self, value, nested=False):
     # An IntervalWindow is context-insensitive, with a timestamp (8 bytes)
     # and a varint timespam.
     span = value.end.micros - value.start.micros
-    return 8 + get_varint_size(span / 1000)
+    return 8 + get_varint_size(span // 1000)
 
 
 class TimestampCoderImpl(StreamCoderImpl):
@@ -427,7 +440,7 @@ def estimate_size(self, unused_value, nested=False):
     return 8
 
 
-small_ints = [chr(_) for _ in range(128)]
+small_ints = [chr(_).encode('latin-1') for _ in range(128)]
 
 
 class VarIntCoderImpl(StreamCoderImpl):
@@ -474,7 +487,7 @@ def decode_from_stream(self, stream, nested):
     return self._value
 
   def encode(self, value):
-    b = ''  # avoid byte vs str vs unicode error
+    b = b''  # avoid byte vs str vs unicode error
     return b
 
   def decode(self, encoded):
@@ -783,7 +796,7 @@ def encode_to_stream(self, value, out, nested):
         # TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on
         # precision of timestamps.
         self._from_normal_time(
-            restore_sign * (abs(wv.timestamp_micros) / 1000)))
+            restore_sign * (abs(wv.timestamp_micros) // 1000)))
     self._windows_coder.encode_to_stream(wv.windows, out, True)
     # Default PaneInfo encoded byte representing NO_FIRING.
     self._pane_info_coder.encode_to_stream(wv.pane_info, out, True)
@@ -797,9 +810,9 @@ def decode_from_stream(self, in_stream, nested):
     # were indeed MIN/MAX timestamps.
     # TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on
     # precision of timestamps.
-    if timestamp == -(abs(MIN_TIMESTAMP.micros) / 1000):
+    if timestamp == -(abs(MIN_TIMESTAMP.micros) // 1000):
       timestamp = MIN_TIMESTAMP.micros
-    elif timestamp == (MAX_TIMESTAMP.micros / 1000):
+    elif timestamp == (MAX_TIMESTAMP.micros // 1000):
       timestamp = MAX_TIMESTAMP.micros
     else:
       timestamp *= 1000
diff --git a/sdks/python/apache_beam/coders/coders.py 
b/sdks/python/apache_beam/coders/coders.py
index d1ebe6b15da..b6aa40d6c82 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -22,7 +22,7 @@
 from __future__ import absolute_import
 
 import base64
-import cPickle as pickle
+from builtins import object
 
 import google.protobuf
 from google.protobuf import wrappers_pb2
@@ -33,6 +33,12 @@
 from apache_beam.portability.api import beam_runner_api_pb2
 from apache_beam.utils import proto_utils
 
+# This is for py2/3 compatibility. cPickle was renamed pickle in python 3.
+try:
+  import cPickle as pickle # Python 2
+except ImportError:
+  import pickle # Python 3
+
 # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
 try:
   from .stream import get_varint_size
@@ -210,11 +216,15 @@ def as_cloud_object(self):
   def __repr__(self):
     return self.__class__.__name__
 
+  # pylint: disable=protected-access
   def __eq__(self, other):
-    # pylint: disable=protected-access
     return (self.__class__ == other.__class__
             and self._dict_without_impl() == other._dict_without_impl())
-    # pylint: enable=protected-access
+
+  def __hash__(self):
+    return hash((self.__class__,) +
+                tuple(sorted(self._dict_without_impl().items())))
+  # pylint: enable=protected-access
 
   _known_urns = {}
 
@@ -312,7 +322,7 @@ class ToStringCoder(Coder):
 
   def encode(self, value):
     try:               # Python 2
-      if isinstance(value, unicode):
+      if isinstance(value, unicode):   # pylint: disable=unicode-builtin
         return value.encode('utf-8')
     except NameError:  # Python 3
       pass
diff --git a/sdks/python/apache_beam/coders/coders_test.py 
b/sdks/python/apache_beam/coders/coders_test.py
index 705de8920d5..a6c456f619f 100644
--- a/sdks/python/apache_beam/coders/coders_test.py
+++ b/sdks/python/apache_beam/coders/coders_test.py
@@ -14,11 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+from __future__ import absolute_import
 
 import base64
 import logging
 import unittest
+from builtins import object
 
 from apache_beam.coders import proto2_coder_test_messages_pb2 as test_message
 from apache_beam.coders import coders
@@ -47,17 +48,11 @@ def test_equality(self):
 class CodersTest(unittest.TestCase):
 
   def test_str_utf8_coder(self):
-    real_coder = coders_registry.get_coder(str)
-    expected_coder = coders.BytesCoder()
-    self.assertEqual(
-        real_coder.encode('abc'), expected_coder.encode('abc'))
-    self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
-
     real_coder = coders_registry.get_coder(bytes)
     expected_coder = coders.BytesCoder()
     self.assertEqual(
-        real_coder.encode('abc'), expected_coder.encode('abc'))
-    self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
+        real_coder.encode(b'abc'), expected_coder.encode(b'abc'))
+    self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc')))
 
 
 # The test proto message file was generated by running the following:
@@ -99,6 +94,9 @@ def __eq__(self, other):
       return True
     return False
 
+  def __hash__(self):
+    return hash(type(self))
+
 
 class FallbackCoderTest(unittest.TestCase):
 
diff --git a/sdks/python/apache_beam/coders/coders_test_common.py 
b/sdks/python/apache_beam/coders/coders_test_common.py
index 0ea7da2b6ad..0b8b4c20fde 100644
--- a/sdks/python/apache_beam/coders/coders_test_common.py
+++ b/sdks/python/apache_beam/coders/coders_test_common.py
@@ -21,6 +21,7 @@
 import logging
 import math
 import unittest
+from builtins import range
 
 import dill
 
@@ -103,7 +104,7 @@ def test_custom_coder(self):
 
     self.check_coder(CustomCoder(), 1, -10, 5)
     self.check_coder(coders.TupleCoder((CustomCoder(), coders.BytesCoder())),
-                     (1, 'a'), (-10, 'b'), (5, 'c'))
+                     (1, b'a'), (-10, b'b'), (5, b'c'))
 
   def test_pickle_coder(self):
     self.check_coder(coders.PickleCoder(), 'a', 1, 1.5, (1, 2, 3))
@@ -129,7 +130,7 @@ def test_dill_coder(self):
 
   def test_fast_primitives_coder(self):
     coder = coders.FastPrimitivesCoder(coders.SingletonCoder(len))
-    self.check_coder(coder, None, 1, -1, 1.5, 'str\0str', u'unicode\0\u0101')
+    self.check_coder(coder, None, 1, -1, 1.5, b'str\0str', u'unicode\0\u0101')
     self.check_coder(coder, (), (1, 2, 3))
     self.check_coder(coder, [], [1, 2, 3])
     self.check_coder(coder, dict(), {'a': 'b'}, {0: dict(), 1: len})
@@ -139,7 +140,7 @@ def test_fast_primitives_coder(self):
     self.check_coder(coders.TupleCoder((coder,)), ('a',), (1,))
 
   def test_bytes_coder(self):
-    self.check_coder(coders.BytesCoder(), 'a', '\0', 'z' * 1000)
+    self.check_coder(coders.BytesCoder(), b'a', b'\0', b'z' * 1000)
 
   def test_varint_coder(self):
     # Small ints.
@@ -190,7 +191,7 @@ def test_timestamp_coder(self):
                      timestamp.Timestamp(micros=1234567890123456789))
     self.check_coder(
         coders.TupleCoder((coders.TimestampCoder(), coders.BytesCoder())),
-        (timestamp.Timestamp.of(27), 'abc'))
+        (timestamp.Timestamp.of(27), b'abc'))
 
   def test_tuple_coder(self):
     kv_coder = coders.TupleCoder((coders.VarIntCoder(), coders.BytesCoder()))
@@ -206,14 +207,14 @@ def test_tuple_coder(self):
         kv_coder.as_cloud_object())
     # Test binary representation
     self.assertEqual(
-        '\x04abc',
-        kv_coder.encode((4, 'abc')))
+        b'\x04abc',
+        kv_coder.encode((4, b'abc')))
     # Test unnested
     self.check_coder(
         kv_coder,
-        (1, 'a'),
-        (-2, 'a' * 100),
-        (300, 'abc\0' * 5))
+        (1, b'a'),
+        (-2, b'a' * 100),
+        (300, b'abc\0' * 5))
     # Test nested
     self.check_coder(
         coders.TupleCoder(
@@ -322,12 +323,12 @@ def test_windowed_value_coder(self):
         },
         coder.as_cloud_object())
     # Test binary representation
-    self.assertEqual('\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
+    self.assertEqual(b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
                      coder.encode(window.GlobalWindows.windowed_value(1)))
 
     # Test decoding large timestamp
     self.assertEqual(
-        coder.decode('\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
+        coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
         windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(),)))
 
     # Test unnested
@@ -364,7 +365,7 @@ def test_proto_coder(self):
     proto_coder = coders.ProtoCoder(ma.__class__)
     self.check_coder(proto_coder, ma)
     self.check_coder(coders.TupleCoder((proto_coder, coders.BytesCoder())),
-                     (ma, 'a'), (mb, 'b'))
+                     (ma, b'a'), (mb, b'b'))
 
   def test_global_window_coder(self):
     coder = coders.GlobalWindowCoder()
@@ -391,16 +392,16 @@ def test_length_prefix_coder(self):
         },
         coder.as_cloud_object())
     # Test binary representation
-    self.assertEqual('\x00', coder.encode(''))
-    self.assertEqual('\x01a', coder.encode('a'))
-    self.assertEqual('\x02bc', coder.encode('bc'))
-    self.assertEqual('\xff\x7f' + 'z' * 16383, coder.encode('z' * 16383))
+    self.assertEqual(b'\x00', coder.encode(b''))
+    self.assertEqual(b'\x01a', coder.encode(b'a'))
+    self.assertEqual(b'\x02bc', coder.encode(b'bc'))
+    self.assertEqual(b'\xff\x7f' + b'z' * 16383, coder.encode(b'z' * 16383))
     # Test unnested
-    self.check_coder(coder, '', 'a', 'bc', 'def')
+    self.check_coder(coder, b'', b'a', b'bc', b'def')
     # Test nested
     self.check_coder(coders.TupleCoder((coder, coder)),
-                     ('', 'a'),
-                     ('bc', 'def'))
+                     (b'', b'a'),
+                     (b'bc', b'def'))
 
   def test_nested_observables(self):
     class FakeObservableIterator(observable.ObservableMixin):
diff --git a/sdks/python/apache_beam/coders/fast_coders_test.py 
b/sdks/python/apache_beam/coders/fast_coders_test.py
index a13334a2c26..eb4077344f6 100644
--- a/sdks/python/apache_beam/coders/fast_coders_test.py
+++ b/sdks/python/apache_beam/coders/fast_coders_test.py
@@ -16,6 +16,7 @@
 #
 
 """Unit tests for compiled implementation of coder impls."""
+from __future__ import absolute_import
 
 import logging
 import unittest
diff --git a/sdks/python/apache_beam/coders/observable.py 
b/sdks/python/apache_beam/coders/observable.py
index fc952cf4e55..3d0a7fc10bf 100644
--- a/sdks/python/apache_beam/coders/observable.py
+++ b/sdks/python/apache_beam/coders/observable.py
@@ -20,6 +20,9 @@
 
 For internal use only; no backwards-compatibility guarantees.
 """
+from __future__ import absolute_import
+
+from builtins import object
 
 
 class ObservableMixin(object):
diff --git a/sdks/python/apache_beam/coders/observable_test.py 
b/sdks/python/apache_beam/coders/observable_test.py
index 09ca3041c29..ce32bf05ef2 100644
--- a/sdks/python/apache_beam/coders/observable_test.py
+++ b/sdks/python/apache_beam/coders/observable_test.py
@@ -16,6 +16,7 @@
 #
 
 """Tests for the Observable mixin class."""
+from __future__ import absolute_import
 
 import logging
 import unittest
diff --git a/sdks/python/apache_beam/coders/slow_coders_test.py 
b/sdks/python/apache_beam/coders/slow_coders_test.py
index 97aa39ca094..b543b56dd26 100644
--- a/sdks/python/apache_beam/coders/slow_coders_test.py
+++ b/sdks/python/apache_beam/coders/slow_coders_test.py
@@ -16,6 +16,7 @@
 #
 
 """Unit tests for uncompiled implementation of coder impls."""
+from __future__ import absolute_import
 
 import logging
 import unittest
diff --git a/sdks/python/apache_beam/coders/slow_stream.py 
b/sdks/python/apache_beam/coders/slow_stream.py
index 1ab55d90f98..da27a49883a 100644
--- a/sdks/python/apache_beam/coders/slow_stream.py
+++ b/sdks/python/apache_beam/coders/slow_stream.py
@@ -19,8 +19,11 @@
 
 For internal use only; no backwards-compatibility guarantees.
 """
+from __future__ import absolute_import
 
 import struct
+from builtins import chr
+from builtins import object
 
 
 class OutputStream(object):
@@ -32,13 +35,13 @@ def __init__(self):
     self.data = []
 
   def write(self, b, nested=False):
-    assert isinstance(b, str)
+    assert isinstance(b, bytes)
     if nested:
       self.write_var_int64(len(b))
     self.data.append(b)
 
   def write_byte(self, val):
-    self.data.append(chr(val))
+    self.data.append(chr(val).encode('latin-1'))
 
   def write_var_int64(self, v):
     if v < 0:
diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py 
b/sdks/python/apache_beam/coders/standard_coders_test.py
index b595cfd5922..f704c490c97 100644
--- a/sdks/python/apache_beam/coders/standard_coders_test.py
+++ b/sdks/python/apache_beam/coders/standard_coders_test.py
@@ -17,6 +17,7 @@
 
 """Unit tests for coders that must be consistent across all Beam SDKs.
 """
+from __future__ import absolute_import
 from __future__ import print_function
 
 import json
@@ -24,6 +25,7 @@
 import os.path
 import sys
 import unittest
+from builtins import map
 
 import yaml
 
@@ -74,7 +76,7 @@ class StandardCodersTest(unittest.TestCase):
           lambda x: IntervalWindow(
               start=Timestamp(micros=(x['end'] - x['span']) * 1000),
               end=Timestamp(micros=x['end'] * 1000)),
-      'beam:coder:iterable:v1': lambda x, parser: map(parser, x),
+      'beam:coder:iterable:v1': lambda x, parser: list(map(parser, x)),
       'beam:coder:global_window:v1': lambda x: window.GlobalWindow(),
       'beam:coder:windowed_value:v1':
           lambda x, value_parser, window_parser: windowed_value.create(
diff --git a/sdks/python/apache_beam/coders/stream_test.py 
b/sdks/python/apache_beam/coders/stream_test.py
index 15bc5eb9ba9..641fefad45d 100644
--- a/sdks/python/apache_beam/coders/stream_test.py
+++ b/sdks/python/apache_beam/coders/stream_test.py
@@ -16,10 +16,13 @@
 #
 
 """Tests for the stream implementations."""
+from __future__ import absolute_import
+from __future__ import division
 
 import logging
 import math
 import unittest
+from builtins import range
 
 from apache_beam.coders import slow_stream
 
diff --git a/sdks/python/apache_beam/coders/typecoders.py 
b/sdks/python/apache_beam/coders/typecoders.py
index 355c6230f92..e4efa2c7ffd 100644
--- a/sdks/python/apache_beam/coders/typecoders.py
+++ b/sdks/python/apache_beam/coders/typecoders.py
@@ -63,12 +63,18 @@ def MakeXyzs(v):
 
 See apache_beam.typehints.decorators module for more details.
 """
+from __future__ import absolute_import
 
-import six
+from builtins import object
 
 from apache_beam.coders import coders
 from apache_beam.typehints import typehints
 
+try:
+  unicode           # pylint: disable=unicode-builtin
+except NameError:
+  unicode = str
+
 __all__ = ['registry']
 
 
@@ -84,9 +90,8 @@ def register_standard_coders(self, fallback_coder):
     """Register coders for all basic and composite types."""
     self._register_coder_internal(int, coders.VarIntCoder)
     self._register_coder_internal(float, coders.FloatCoder)
-    self._register_coder_internal(str, coders.BytesCoder)
     self._register_coder_internal(bytes, coders.BytesCoder)
-    self._register_coder_internal(six.text_type, coders.StrUtf8Coder)
+    self._register_coder_internal(unicode, coders.StrUtf8Coder)
     self._register_coder_internal(typehints.TupleConstraint, coders.TupleCoder)
     # Default fallback coders applied in that order until the first matching
     # coder found.
diff --git a/sdks/python/apache_beam/coders/typecoders_test.py 
b/sdks/python/apache_beam/coders/typecoders_test.py
index 2b6aa7a5129..b64cb653e56 100644
--- a/sdks/python/apache_beam/coders/typecoders_test.py
+++ b/sdks/python/apache_beam/coders/typecoders_test.py
@@ -16,8 +16,10 @@
 #
 
 """Unit tests for the typecoders module."""
+from __future__ import absolute_import
 
 import unittest
+from builtins import object
 
 from apache_beam.coders import coders
 from apache_beam.coders import typecoders
@@ -33,6 +35,9 @@ def __init__(self, n):
   def __eq__(self, other):
     return self.number == other.number
 
+  def __hash__(self):
+    return self.number
+
 
 class CustomCoder(coders.Coder):
 
@@ -75,7 +80,7 @@ def test_get_coder_with_composite_custom_coder(self):
 
   def test_get_coder_with_standard_coder(self):
     self.assertEqual(coders.BytesCoder,
-                     typecoders.registry.get_coder(str).__class__)
+                     typecoders.registry.get_coder(bytes).__class__)
 
   def test_fallbackcoder(self):
     coder = typecoders.registry.get_coder(typehints.Any)
@@ -100,22 +105,16 @@ def test_standard_int_coder(self):
                      real_coder.decode(real_coder.encode(0x040404040404)))
 
   def test_standard_str_coder(self):
-    real_coder = typecoders.registry.get_coder(str)
-    expected_coder = coders.BytesCoder()
-    self.assertEqual(
-        real_coder.encode('abc'), expected_coder.encode('abc'))
-    self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
-
     real_coder = typecoders.registry.get_coder(bytes)
     expected_coder = coders.BytesCoder()
     self.assertEqual(
-        real_coder.encode('abc'), expected_coder.encode('abc'))
-    self.assertEqual('abc', real_coder.decode(real_coder.encode('abc')))
+        real_coder.encode(b'abc'), expected_coder.encode(b'abc'))
+    self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc')))
 
   def test_iterable_coder(self):
-    real_coder = typecoders.registry.get_coder(typehints.Iterable[str])
+    real_coder = typecoders.registry.get_coder(typehints.Iterable[bytes])
     expected_coder = coders.IterableCoder(coders.BytesCoder())
-    values = ['abc', 'xyz']
+    values = [b'abc', b'xyz']
     self.assertEqual(expected_coder, real_coder)
     self.assertEqual(real_coder.encode(values), expected_coder.encode(values))
 
diff --git a/sdks/python/generate_pydoc.sh b/sdks/python/generate_pydoc.sh
index ae8d043d376..68b7006a1b6 100755
--- a/sdks/python/generate_pydoc.sh
+++ b/sdks/python/generate_pydoc.sh
@@ -121,6 +121,9 @@ ignore_identifiers = [
   # Ignore broken built-in type references
   'tuple',
 
+  # Ignore future.builtin type references
+  'future.types.newobject.newobject',
+
   # Ignore private classes
   'apache_beam.coders.coders._PickleCoderBase',
   'apache_beam.coders.coders.FastCoder',
diff --git a/sdks/python/run_pylint.sh b/sdks/python/run_pylint.sh
index 7150fa6f50e..4e7dc9d8218 100755
--- a/sdks/python/run_pylint.sh
+++ b/sdks/python/run_pylint.sh
@@ -89,25 +89,6 @@ done
 isort ${MODULE} -p apache_beam --line-width 120 --check-only --order-by-type \
     --combine-star --force-single-line-imports --diff --recursive ${SKIP_PARAM}
 
-FUTURIZE_EXCLUDED=(
-  "typehints.py"
-  "pb2"
-  "trivial_infernce.py"
-)
-FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" )
-echo "Checking for files requiring stage 1 refactoring from futurize"
-futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored)
-futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM" 
|| echo "")
-count=${#futurize_filtered}
-if [ "$count" != "0" ]; then
-  echo "Some of the changes require futurize stage 1 changes."
-  echo "The files with required changes:"
-  echo "$futurize_filtered"
-  echo "You can run futurize apache_beam to see the proposed changes."
-  exit 1
-fi
-echo "No future changes needed"
-
 echo "Checking unittest.main for module ${MODULE}:"
 TESTS_MISSING_MAIN=$(find ${MODULE} | grep '\.py$' | xargs grep -l '^import 
unittest$' | xargs grep -L unittest.main)
 if [ -n "${TESTS_MISSING_MAIN}" ]; then
diff --git a/sdks/python/run_pylint_2to3.sh b/sdks/python/run_pylint_2to3.sh
new file mode 100755
index 00000000000..9a074368dba
--- /dev/null
+++ b/sdks/python/run_pylint_2to3.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+# This script will run pylint with the --py3k parameter to check for python
+# 3 compatibility. This script can run on a list of modules provided as
+# command line arguments.
+#
+# The exit-code of the script indicates success or a failure.
+
+set -o errexit
+set -o pipefail
+
+DEFAULT_MODULE=apache_beam
+
+usage(){ echo "Usage: $0 [MODULE|--help]
+# The default MODULE is $DEFAULT_MODULE"; }
+
+MODULES=${DEFAULT_MODULE}
+while [[ $# -gt 0 ]] ; do
+  key="$1"
+  case ${key} in
+    --help) usage; exit 1;;
+       *)
+       if [ ${MODULES} = ${DEFAULT_MODULE} ] ; then
+         MODULES=()
+       fi
+       MODULES+=("$1")
+       shift;;
+  esac
+done
+
+FUTURIZE_EXCLUDED=(
+  "typehints.py"
+  "pb2"
+  "trivial_infernce.py"
+)
+FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" )
+echo "Checking for files requiring stage 1 refactoring from futurize"
+futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored)
+futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM" \
+  || echo "")
+count=${#futurize_filtered}
+if [ "$count" != "0" ]; then
+  echo "Some of the changes require futurize stage 1 changes."
+  echo "The files with required changes:"
+  echo "$futurize_filtered"
+  echo "You can run futurize apache_beam to see the proposed changes."
+  exit 1
+fi
+echo "No future changes needed"
+
+# Following generated files are excluded from lint checks.
+EXCLUDED_GENERATED_FILES=(
+"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py"
+"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py"
+"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py"
+"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py"
+"apache_beam/io/gcp/internal/clients/storage/storage_v1_client.py"
+"apache_beam/io/gcp/internal/clients/storage/storage_v1_messages.py"
+"apache_beam/coders/proto2_coder_test_messages_pb2.py"
+apache_beam/portability/api/*pb2*.py
+)
+
+FILES_TO_IGNORE=""
+for file in "${EXCLUDED_GENERATED_FILES[@]}"; do
+  if test -z "$FILES_TO_IGNORE"
+    then FILES_TO_IGNORE="$(basename $file)"
+    else FILES_TO_IGNORE="$FILES_TO_IGNORE, $(basename $file)"
+  fi
+done
+echo "Skipping lint for generated files: $FILES_TO_IGNORE"
+
+echo "Running pylint --py3k for modules $( printf "%s " "${MODULES[@]}" ):"
+pylint -j8 $( printf "%s " "${MODULES[@]}" ) \
+  --ignore-patterns="$FILES_TO_IGNORE" --py3k
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 364148355c2..35e705583f6 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -66,7 +66,7 @@ def get_version():
   )
 
 
-REQUIRED_CYTHON_VERSION = '0.26.1'
+REQUIRED_CYTHON_VERSION = '0.28.1'
 try:
   _CYTHON_VERSION = get_distribution('cython').version
   if StrictVersion(_CYTHON_VERSION) < StrictVersion(REQUIRED_CYTHON_VERSION):
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 0d49cbc958a..caa899de2db 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -17,7 +17,7 @@
 
 [tox]
 # new environments will be excluded by default unless explicitly added to 
envlist.
-envlist = py27,py27-{gcp,cython,lint},py3-lint,docs
+envlist = py27,py27-{gcp,cython,lint,lint3},py3-lint,docs
 toxworkdir = {toxinidir}/target/.tox
 
 [pycodestyle]
@@ -34,7 +34,8 @@ whitelist_externals =
   find
   time
 deps =
-  cython: cython==0.26.1
+  cython: cython==0.28.1
+  future==0.16.0
 
 # These 2 magic command overrides are required for Jenkins builds.
 # Otherwise we get "OSError: [Errno 2] No such file or directory" errors.
@@ -88,6 +89,22 @@ commands =
   pip --version
   time {toxinidir}/run_pylint.sh
 
+[testenv:py27-lint3]
+# Checks for py2/3 compatibility issues
+deps =
+  pycodestyle==2.3.1
+  pylint==1.7.2
+  future==0.16.0
+  isort==4.2.15
+  flake8==3.5.0
+modules =
+  apache_beam/coders
+commands =
+  python --version
+  pip --version
+  time {toxinidir}/run_pylint_2to3.sh {[testenv:py27-lint3]modules}
+
+
 [testenv:py3-lint]
 deps =
   pycodestyle==2.3.1


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 100665)
    Time Spent: 18h 20m  (was: 18h 10m)

> Futurize and fix python 2 compatibility for coders package
> ----------------------------------------------------------
>
>                 Key: BEAM-3981
>                 URL: https://issues.apache.org/jira/browse/BEAM-3981
>             Project: Beam
>          Issue Type: Sub-task
>          Components: sdk-py-core
>            Reporter: Robbe
>            Assignee: Robbe
>            Priority: Major
>          Time Spent: 18h 20m
>  Remaining Estimate: 0h
>
> Run automatic conversion with futurize tool on coders subpackage and fix 
> python 2 compatibility. This prepares the subpackage for python 3 support.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to