[ 
https://issues.apache.org/jira/browse/BEAM-3761?focusedWorklogId=111773&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-111773
 ]

ASF GitHub Bot logged work on BEAM-3761:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 14/Jun/18 04:56
            Start Date: 14/Jun/18 04:56
    Worklog Time Spent: 10m 
      Work Description: stale[bot] closed pull request #4774: [BEAM-3761]Fix 
Python 3 cmp usage
URL: https://github.com/apache/beam/pull/4774
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py 
b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
index 87d798bebe3..7a58fe3e212 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
@@ -90,7 +90,7 @@ def compare_path(p1, p2):
   3. If no `id` is defined for both paths, then their `names` are compared.
   """
 
-  result = cmp(p1.kind, p2.kind)
+  result = (p1.kind > p2.kind) - (p1.kind < p2.kind)
   if result != 0:
     return result
 
@@ -98,12 +98,12 @@ def compare_path(p1, p2):
     if not p2.HasField('id'):
       return -1
 
-    return cmp(p1.id, p2.id)
+    return (p1.id > p2.id) - (p1.id < p2.id)
 
   if p2.HasField('id'):
     return 1
 
-  return cmp(p1.name, p2.name)
+  return (p1.name > p2.name) - (p1.name < p2.name)
 
 
 def get_datastore(project):
diff --git a/sdks/python/apache_beam/io/vcfio_test.py 
b/sdks/python/apache_beam/io/vcfio_test.py
index 029515fe341..a750dc74711 100644
--- a/sdks/python/apache_beam/io/vcfio_test.py
+++ b/sdks/python/apache_beam/io/vcfio_test.py
@@ -70,15 +70,6 @@ def get_full_dir():
   return os.path.join(os.path.dirname(__file__), '..', 'testing', 'data', 
'vcf')
 
 
-# Helper method for comparing variants.
-def _variant_comparator(v1, v2):
-  if v1.reference_name == v2.reference_name:
-    if v1.start == v2.start:
-      return cmp(v1.end, v2.end)
-    return cmp(v1.start, v2.start)
-  return cmp(v1.reference_name, v2.reference_name)
-
-
 # Helper method for verifying equal count on PCollection.
 def _count_equals_to(expected_count):
   def _count_equal(actual_list):
diff --git a/sdks/python/apache_beam/testing/test_stream.py 
b/sdks/python/apache_beam/testing/test_stream.py
index 8a63e7bd056..db4cc66b1d0 100644
--- a/sdks/python/apache_beam/testing/test_stream.py
+++ b/sdks/python/apache_beam/testing/test_stream.py
@@ -21,7 +21,7 @@
 """
 
 from abc import ABCMeta
-from abc import abstractmethod
+from functools import total_ordering
 
 from apache_beam import coders
 from apache_beam import core
@@ -46,44 +46,65 @@ class Event(object):
 
   __metaclass__ = ABCMeta
 
-  def __cmp__(self, other):
-    if type(self) is not type(other):
-      return cmp(type(self), type(other))
-    return self._typed_cmp(other)
-
-  @abstractmethod
-  def _typed_cmp(self, other):
-    raise NotImplementedError
-
 
+@total_ordering
 class ElementEvent(Event):
   """Element-producing test stream event."""
 
   def __init__(self, timestamped_values):
     self.timestamped_values = timestamped_values
 
-  def _typed_cmp(self, other):
-    return cmp(self.timestamped_values, other.timestamped_values)
+  def __eq__(self, other):
+    return (type(self) is type(other) and
+            self.timestamped_values == other.timestamped_values)
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def __lt__(self, other):
+    if type(self) is not type(other):
+      return type(self) < type(other)
+    return self.timestamped_values < other.timestamped_values
 
 
+@total_ordering
 class WatermarkEvent(Event):
   """Watermark-advancing test stream event."""
 
   def __init__(self, new_watermark):
     self.new_watermark = timestamp.Timestamp.of(new_watermark)
 
-  def _typed_cmp(self, other):
-    return cmp(self.new_watermark, other.new_watermark)
+  def __eq__(self, other):
+    return (type(self) is type(other) and
+            self.new_watermark == other.new_watermark)
 
+  def __ne__(self, other):
+    return not self.__eq__(other)
 
+  def __lt__(self, other):
+    if type(self) is not type(other):
+      return type(self) < type(other)
+    return self.new_watermark < other.new_watermark
+
+
+@total_ordering
 class ProcessingTimeEvent(Event):
   """Processing time-advancing test stream event."""
 
   def __init__(self, advance_by):
     self.advance_by = timestamp.Duration.of(advance_by)
 
-  def _typed_cmp(self, other):
-    return cmp(self.advance_by, other.advance_by)
+  def __eq__(self, other):
+    return (type(self) is type(other) and
+            self.advance_by == other.advance_by)
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def __lt__(self, other):
+    if type(self) is not type(other):
+      return type(self) < type(other)
+    return self.advance_by < other.advance_by
 
 
 class TestStream(PTransform):
diff --git a/sdks/python/apache_beam/transforms/window.py 
b/sdks/python/apache_beam/transforms/window.py
index c250e8c6d36..2a0c5236d93 100644
--- a/sdks/python/apache_beam/transforms/window.py
+++ b/sdks/python/apache_beam/transforms/window.py
@@ -50,6 +50,7 @@
 from __future__ import absolute_import
 
 import abc
+from functools import total_ordering
 
 from google.protobuf import duration_pb2
 from google.protobuf import timestamp_pb2
@@ -177,6 +178,7 @@ def get_transformed_output_time(self, window, 
input_timestamp):  # pylint: disab
   urns.RunnerApiFn.register_pickle_urn(python_urns.PICKLED_WINDOWFN)
 
 
+@total_ordering
 class BoundedWindow(object):
   """A window for timestamps in range (-infinity, end).
 
@@ -190,15 +192,18 @@ def __init__(self, end):
   def max_timestamp(self):
     return self.end.predecessor()
 
-  def __cmp__(self, other):
-    # Order first by endpoint, then arbitrarily.
-    return cmp(self.end, other.end) or cmp(hash(self), hash(other))
-
   def __eq__(self, other):
-    raise NotImplementedError
+    return type(self) is type(other) and self.end == other.end
 
-  def __hash__(self):
-    return hash(self.end)
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def __lt__(self, other):
+    if type(self) is not type(other):
+      raise TypeError("Can not compare type %s and %s" %
+                      (type(self), type(other)))
+    # Order first by endpoint, then arbitrarily.
+    return (self.end, hash(self)) < (other.end, hash(other))
 
   def __repr__(self):
     return '[?, %s)' % float(self.end)
@@ -220,7 +225,9 @@ def __hash__(self):
     return hash((self.start, self.end))
 
   def __eq__(self, other):
-    return self.start == other.start and self.end == other.end
+    return (type(self) == type(other) and
+            self.start == other.start and
+            self.end == other.end)
 
   def __repr__(self):
     return '[%s, %s)' % (float(self.start), float(self.end))
@@ -233,6 +240,7 @@ def union(self, other):
         min(self.start, other.start), max(self.end, other.end))
 
 
+@total_ordering
 class TimestampedValue(object):
   """A timestamped value having a value and a timestamp.
 
@@ -245,10 +253,14 @@ def __init__(self, value, timestamp):
     self.value = value
     self.timestamp = Timestamp.of(timestamp)
 
-  def __cmp__(self, other):
+  def __eq__(self, other):
+    return self.value == other.value and self.timestamp == other.timestamp
+
+  def __lt__(self, other):
     if type(self) is not type(other):
-      return cmp(type(self), type(other))
-    return cmp((self.value, self.timestamp), (other.value, other.timestamp))
+      raise TypeError("Can not compare type %s and %s" %
+                      (type(self), type(other)))
+    return (self.value, self.timestamp) < (other.value, other.timestamp)
 
 
 class GlobalWindow(BoundedWindow):
diff --git a/sdks/python/apache_beam/utils/timestamp.py 
b/sdks/python/apache_beam/utils/timestamp.py
index 7c41c3002f6..91d018ffde9 100644
--- a/sdks/python/apache_beam/utils/timestamp.py
+++ b/sdks/python/apache_beam/utils/timestamp.py
@@ -20,15 +20,16 @@
 For internal use only; no backwards-compatibility guarantees.
 """
 
-from __future__ import absolute_import
-from __future__ import division
+from __future__ import absolute_import, division
 
 import datetime
 import re
+from functools import total_ordering
 
 import pytz
 
 
+@total_ordering
 class Timestamp(object):
   """Represents a Unix second timestamp with microsecond granularity.
 
@@ -142,11 +143,23 @@ def __int__(self):
     # Note that the returned value may have lost precision.
     return self.micros // 1000000
 
-  def __cmp__(self, other):
+  def __eq__(self, other):
     # Allow comparisons between Duration and Timestamp values.
-    if not isinstance(other, Duration):
+    if not isinstance(other, (Timestamp, Duration)):
       other = Timestamp.of(other)
-    return cmp(self.micros, other.micros)
+    return self.micros == other.micros
+
+  def __ne__(self, other):
+    # Allow comparisons between Duration and Timestamp values.
+    if not isinstance(other, (Timestamp, Duration)):
+      other = Timestamp.of(other)
+    return self.micros != other.micros
+
+  def __lt__(self, other):
+    # Allow comparisons between Duration and Timestamp values.
+    if not isinstance(other, (Timestamp, Duration)):
+      other = Timestamp.of(other)
+    return self.micros < other.micros
 
   def __hash__(self):
     return hash(self.micros)
@@ -171,6 +184,7 @@ def __mod__(self, other):
 MAX_TIMESTAMP = Timestamp(micros=0x7fffffffffffffff)
 
 
+@total_ordering
 class Duration(object):
   """Represents a second duration with microsecond granularity.
 
@@ -220,11 +234,23 @@ def __float__(self):
     # Note that the returned value may have lost precision.
     return self.micros / 1000000
 
-  def __cmp__(self, other):
+  def __eq__(self, other):
+    # Allow comparisons between Duration and Timestamp values.
+    if not isinstance(other, (Timestamp, Duration)):
+      other = Duration.of(other)
+    return self.micros == other.micros
+
+  def __ne__(self, other):
+    # Allow comparisons between Duration and Timestamp values.
+    if not isinstance(other, (Timestamp, Duration)):
+      other = Duration.of(other)
+    return self.micros != other.micros
+
+  def __lt__(self, other):
     # Allow comparisons between Duration and Timestamp values.
-    if not isinstance(other, Timestamp):
+    if not isinstance(other, (Timestamp, Duration)):
       other = Duration.of(other)
-    return cmp(self.micros, other.micros)
+    return self.micros < other.micros
 
   def __hash__(self):
     return hash(self.micros)
diff --git a/sdks/python/apache_beam/utils/windowed_value.py 
b/sdks/python/apache_beam/utils/windowed_value.py
index 1b3228b2e6e..a586acf96fc 100644
--- a/sdks/python/apache_beam/utils/windowed_value.py
+++ b/sdks/python/apache_beam/utils/windowed_value.py
@@ -193,7 +193,7 @@ def __cmp__(left, right):  # pylint: 
disable=no-self-argument
     on unequal values (always returning 1).
     """
     if type(left) is not type(right):
-      return cmp(type(left), type(right))
+      return 1
 
     # TODO(robertwb): Avoid the type checks?
     # Returns False (0) if equal, and True (1) if not.


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 111773)
    Time Spent: 7h 50m  (was: 7h 40m)

> Fix Python 3 cmp function
> -------------------------
>
>                 Key: BEAM-3761
>                 URL: https://issues.apache.org/jira/browse/BEAM-3761
>             Project: Beam
>          Issue Type: Improvement
>          Components: sdk-py-core
>            Reporter: holdenk
>            Priority: Major
>          Time Spent: 7h 50m
>  Remaining Estimate: 0h
>
> Various functions don't exist in Python 3 that did in python 2. This Jira is 
> to fix the use of cmp (which often will involve rewriting __cmp__ as well).
>  
> Note: there are existing PRs for basestring and unicode ( 
> [https://github.com/apache/beam/pull/4697|https://github.com/apache/beam/pull/4697,]
>  , [https://github.com/apache/beam/pull/4730] )
>  
> Note once all of the missing names/functions are fixed we can enable F821 in 
> falke8 python 3.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to