[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=131066=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-131066 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 03/Aug/18 20:47 Start Date: 03/Aug/18 20:47 Worklog Time Spent: 10m Work Description: aaltay closed pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/python/apache_beam/pipeline_test.py b/sdks/python/apache_beam/pipeline_test.py index 72791fc262b..9d4e79a9a9a 100644 --- a/sdks/python/apache_beam/pipeline_test.py +++ b/sdks/python/apache_beam/pipeline_test.py @@ -520,11 +520,11 @@ def test_dir(self): options = Breakfast() self.assertEquals( set(['from_dictionary', 'get_all_options', 'slices', 'style', - 'view_as', 'display_data']), + 'view_as', 'display_data', 'next']), set([attr for attr in dir(options) if not attr.startswith('_')])) self.assertEquals( set(['from_dictionary', 'get_all_options', 'style', 'view_as', - 'display_data']), + 'display_data', 'next']), set([attr for attr in dir(options.view_as(Eggs)) if not attr.startswith('_')])) diff --git a/sdks/python/apache_beam/transforms/__init__.py b/sdks/python/apache_beam/transforms/__init__.py index 3c04b370cfe..a207009f871 100644 --- a/sdks/python/apache_beam/transforms/__init__.py +++ b/sdks/python/apache_beam/transforms/__init__.py @@ -18,6 +18,8 @@ """PTransform and descendants.""" # pylint: disable=wildcard-import +from __future__ import absolute_import + from apache_beam.transforms import combiners from apache_beam.transforms.core import * from apache_beam.transforms.ptransform import * diff --git a/sdks/python/apache_beam/transforms/combiners.py b/sdks/python/apache_beam/transforms/combiners.py index 9b0c0e81e35..8db0fe5e14f 100644 --- a/sdks/python/apache_beam/transforms/combiners.py +++ b/sdks/python/apache_beam/transforms/combiners.py @@ -18,9 +18,12 @@ """A library of basic combiner PTransform subclasses.""" from __future__ import absolute_import +from __future__ import division import operator import random +from builtins import object +from builtins import zip from past.builtins import long diff --git a/sdks/python/apache_beam/transforms/combiners_test.py b/sdks/python/apache_beam/transforms/combiners_test.py index f372e881024..a768231ec6e 100644 --- a/sdks/python/apache_beam/transforms/combiners_test.py +++ b/sdks/python/apache_beam/transforms/combiners_test.py @@ -16,12 +16,15 @@ # """Unit tests for our libraries of combine PTransforms.""" +from __future__ import absolute_import +from __future__ import division import itertools import random import unittest import hamcrest as hc +from future.builtins import range import apache_beam as beam import apache_beam.transforms.combiners as combine @@ -286,7 +289,7 @@ def match(actual): def matcher(): def match(actual): equal_to([1])([len(actual)]) -equal_to(pairs)(actual[0].iteritems()) +equal_to(pairs)(actual[0].items()) return match assert_that(result, matcher()) pipeline.run() diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py index bbd78342a7f..fa867e5231d 100644 --- a/sdks/python/apache_beam/transforms/core.py +++ b/sdks/python/apache_beam/transforms/core.py @@ -21,12 +21,15 @@ import copy import inspect -import itertools import random import re import types +from builtins import map +from builtins import object +from builtins import range -from six import string_types +from future.builtins import filter +from past.builtins import unicode from apache_beam import coders from apache_beam import pvalue @@ -82,7 +85,6 @@ 'Impulse', ] - # Type variables T = typehints.TypeVariable('T') K = typehints.TypeVariable('K') @@ -291,6 +293,9 @@ def __eq__(self, other): return self.param_id == other.param_id return False + def __hash__(self): +return hash(self.param_id) + def __repr__(self): return self.param_id @@ -698,7 +703,7 @@ def merge_accumulators(self, accumulators, *args, **kwargs): class ReiterableNonEmptyAccumulators(object): def __iter__(self): -return itertools.ifilter(filter_fn, accumulators) +return filter(filter_fn, accumulators) # It's (weakly) assumed that self._fn is associative. return self._fn(ReiterableNonEmptyAccumulators(), *args, **kwargs) @@ -902,7
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=131065=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-131065 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 03/Aug/18 20:47 Start Date: 03/Aug/18 20:47 Worklog Time Spent: 10m Work Description: aaltay commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-410372004 Thank you @Fematich and all the reviewers! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 131065) Time Spent: 16h (was: 15h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 16h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130779=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130779 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 03/Aug/18 09:37 Start Date: 03/Aug/18 09:37 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207493020 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1712,6 +1718,10 @@ def __eq__(self, other): and self.timestamp_combiner == other.timestamp_combiner) return False + def __hash__(self): +return hash((type(self), self.windowfn, self.accumulation_mode, Review comment: You are right, `self._default` indeed is defined based on values of other object attributes included in hash. Objects of this class are used in this dictionary: https://github.com/apache/beam/blob/b8499d9ce9d00bb78049abedc929a8d5d59fdfbd/sdks/python/apache_beam/runners/pipeline_context.py#L78. Current implementation makes sense to me. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130779) Time Spent: 15h 50m (was: 15h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130737=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130737 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 03/Aug/18 06:24 Start Date: 03/Aug/18 06:24 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207450985 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1712,6 +1718,10 @@ def __eq__(self, other): and self.timestamp_combiner == other.timestamp_combiner) return False + def __hash__(self): +return hash((type(self), self.windowfn, self.accumulation_mode, Review comment: Unfortunately removing `__hash__` here, results in `TypeError: unhashable type: 'Windowing'` for ` test_top_prefixes`. However, looking at self._default, it seems OK to remove it from the hash, since it's actually a check on the other class variables. Therefore I think `__eq__` and `__hash__` are still in sync. I removed `type(self)` as well. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130737) Time Spent: 15h 40m (was: 15.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130030=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130030 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065425 ## File path: sdks/python/apache_beam/transforms/trigger.py ## @@ -484,6 +494,9 @@ def __repr__(self): def __eq__(self, other): return type(self) == type(other) and self.underlying == other.underlying + def __hash__(self): +return hash((type(self), self.underlying)) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130030) Time Spent: 15h 10m (was: 15h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130025=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130025 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065473 ## File path: sdks/python/apache_beam/transforms/trigger.py ## @@ -526,6 +537,9 @@ def __repr__(self): def __eq__(self, other): return type(self) == type(other) and self.triggers == other.triggers + def __hash__(self): +return hash((type(self), self.triggers)) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130025) Time Spent: 14h 50m (was: 14h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 14h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130033=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130033 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207068499 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +263,33 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): Review comment: I checked performance of `windowed_value`, `interval_window`, `timestamped_value`, `bounded_window` in dictionaries and ordered lists, with and without this PR. For the most part, performance is not changed or improved. `@total_ordering` does not significantly affect it. Only concern is using `hash(type(self))` when evaluating hashes of objects may be unnecessary in most cases, and slightly decreases the performance here: https://github.com/apache/beam/pull/5729/files#diff-d7dfd884622fb59806ba9276cf3bd8fbR242. So I left some more comments to simplify hash functions. The change above was also the trigger for test flakiness, although ultimately the test was at fault. Without PR: ``` wv_with_one_window: dict, 1 element(s) : per element median time cost: 4.71699e-06 sec, relative std: 5.93% wv_with_multiple_windows: dict, 1 element(s): per element median time cost: 4.02698e-05 sec, relative std: 0.60% interval_window: dict, 1 element(s) : per element median time cost: 1.5276e-06 sec, relative std: 1.78% timestamped_value: dict, 1 element(s) : per element median time cost: 1.39499e-07 sec, relative std: 7.44% interval_window: sorting., 1 element(s) : per element median time cost: 4.04392e-05 sec, relative std: 0.63% timestamped_value: sorting., 1 element(s) : per element median time cost: 1.80363e-05 sec, relative std: 1.35% bounded_window: sorting., 1 element(s) : per element median time cost: 4.06633e-05 sec, relative std: 1.26% ``` With PR (including the change suggested in last iteration). ``` wv_with_one_window: dict, 1 element(s) : per element median time cost: 5.047e-06 sec, relative std: 2.16% wv_with_multiple_windows: dict, 1 element(s): per element median time cost: 4.0575e-05 sec, relative std: 2.20% interval_window: dict, 1 element(s) : per element median time cost: 1.53821e-06 sec, relative std: 2.43% timestamped_value: dict, 1 element(s) : per element median time cost: 1.27995e-06 sec, relative std: 6.11% interval_window: sorting., 1 element(s) : per element median time cost: 1.83087e-05 sec, relative std: 1.28% timestamped_value: sorting., 1 element(s) : per element median time cost: 8.4375e-06 sec, relative std: 2.62% bounded_window: sorting., 1 element(s) : per element median time cost: 1.80462e-05 sec, relative std: 3.56% ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130033) Time Spent: 15.5h (was: 15h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130028=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130028 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065504 ## File path: sdks/python/apache_beam/transforms/trigger.py ## @@ -620,6 +634,9 @@ def __repr__(self): def __eq__(self, other): return type(self) == type(other) and self.triggers == other.triggers + def __hash__(self): +return hash((type(self), self.triggers)) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130028) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130024=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130024 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065689 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +273,23 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): +return (type(self) == type(other) +and self.value == other.value +and self.timestamp == other.timestamp) + + def __hash__(self): +return hash((type(self), self.value, self.timestamp)) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130024) Time Spent: 14h 40m (was: 14.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 14h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130032=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130032 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065995 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -474,6 +526,12 @@ def __eq__(self, other): if type(self) == type(other) == Sessions: return self.gap_size == other.gap_size + def __ne__(self, other): +return not self == other + + def __hash__(self): +return hash((type(self), self.gap_size)) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130032) Time Spent: 15h 20m (was: 15h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130029=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130029 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065825 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -348,6 +391,9 @@ def __eq__(self, other): if type(self) == type(other) == FixedWindows: return self.size == other.size and self.offset == other.offset + def __hash__(self): +return hash((type(self), self.size, self.offset)) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130029) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130022=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130022 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207064664 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -291,6 +293,9 @@ def __eq__(self, other): return self.param_id == other.param_id return False + def __hash__(self): +return hash((type(self), self.param_id)) Review comment: let's simplify this to `hash(self.param_id)`. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130022) Time Spent: 14h 20m (was: 14h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 14h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130023=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130023 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207064203 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -218,10 +239,15 @@ def __init__(self, start, end): self.start = Timestamp.of(start) def __hash__(self): -return hash((self.start, self.end)) +return hash((self.start, self.end, type(self))) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130023) Time Spent: 14.5h (was: 14h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 14.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130031=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130031 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065911 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -407,6 +453,12 @@ def __eq__(self, other): and self.offset == other.offset and self.period == other.period) + def __ne__(self, other): +return not self == other + + def __hash__(self): +return hash((type(self), self.offset, self.period)) Review comment: Let's remove `type(self)` from the tuple. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130031) Time Spent: 15h 20m (was: 15h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130027=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130027 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065328 ## File path: sdks/python/apache_beam/transforms/trigger.py ## @@ -446,6 +453,9 @@ def __repr__(self): def __eq__(self, other): return type(self) == type(other) and self.count == other.count + def __hash__(self): +return hash((type(self), self.count)) Review comment: Let's simplify this to `return hash(self.count)`. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130027) Time Spent: 15h (was: 14h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=130026=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130026 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 02/Aug/18 00:08 Start Date: 02/Aug/18 00:08 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207065158 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1712,6 +1718,10 @@ def __eq__(self, other): and self.timestamp_combiner == other.timestamp_combiner) return False + def __hash__(self): +return hash((type(self), self.windowfn, self.accumulation_mode, Review comment: Since this was not defined, most likely this will be dead code, and current implementation may break the contract with `__eq__` since it's not taking `self._default` into account, let's make it `__hash__ = None`. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130026) Time Spent: 15h (was: 14h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 15h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=129945=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129945 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 01/Aug/18 20:59 Start Date: 01/Aug/18 20:59 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r207028792 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +273,23 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): +return (type(self) == type(other) +and self.value == other.value +and self.timestamp == other.timestamp) + + def __hash__(self): +return hash((type(self), self.value, self.timestamp)) + + def __ne__(self, other): +return not self == other + + def __lt__(self, other): Review comment: Since types are not comparable in Python 3, how about we change the implementation to: ``` def __lt__(self, other): if type(self) != type(other): return type(self).__name__ < type(other).__name__ if self.value != other.value: return self.value < other.value return self.timestamp < other.timestamp ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129945) Time Spent: 14h 10m (was: 14h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 14h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=129944=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129944 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 01/Aug/18 20:59 Start Date: 01/Aug/18 20:59 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r206991261 ## File path: sdks/python/apache_beam/transforms/cy_combiners.py ## @@ -162,7 +167,7 @@ def extract_output(self): self.sum %= 2**64 if self.sum >= INT64_MAX: self.sum -= 2**64 -return self.sum / self.count if self.count else _NAN +return self.sum // self.count if self.count else _NAN Review comment: Please also make the change in line 266. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129944) Time Spent: 14h (was: 13h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 14h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=129921=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129921 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 01/Aug/18 19:47 Start Date: 01/Aug/18 19:47 Worklog Time Spent: 10m Work Description: Fematich commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-409698800 Run Python Dataflow ValidatesRunner This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129921) Time Spent: 13h 50m (was: 13h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 13h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=129509=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129509 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 31/Jul/18 21:12 Start Date: 31/Jul/18 21:12 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r206685323 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +263,33 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): Review comment: Yes, I just retested with the full implementation which seems to work again. However, will be good to test the @total_ordering after your PR has been merged :-). This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129509) Time Spent: 13h 40m (was: 13.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 13h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=129100=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129100 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 31/Jul/18 03:25 Start Date: 31/Jul/18 03:25 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r206385144 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +263,33 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): Review comment: With this PR, the test becomes flaky, or in other words passes sometimes. It may still flake if we implement all ops manually - did you try running the test multiple times when all ops are implemented? I don't understand yet what change in behavior triggers this (we should find out), but I think we need to fix the test regardless of this: https://github.com/apache/beam/pull/6104. Performance-wise, last week I used https://github.com/apache/beam/compare/master...tvalentyn:transforms_microbenchmark to compare different options, and did not notice a significant difference when using @total_ordering (we could double check), so I favored the decorator to reduce the boilerplate. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129100) Time Spent: 13.5h (was: 13h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 13.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=129099=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129099 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 31/Jul/18 03:25 Start Date: 31/Jul/18 03:25 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r206385144 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +263,33 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): Review comment: With this PR, the test becomes flaky, or in other words passes sometimes. It may still flake if we implement all ops manually - did you try running it multiple times? I don't understand yet what change in behavior triggers this (we should find out), but I think we need to fix the test regardless of this: https://github.com/apache/beam/pull/6104. Performance-wise, last week I used https://github.com/apache/beam/compare/master...tvalentyn:transforms_microbenchmark to compare different options, and did not notice a significant difference when using @total_ordering (we could double check), so I favored the decorator to reduce the boilerplate. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129099) Time Spent: 13h 20m (was: 13h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 13h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128853=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128853 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 30/Jul/18 18:09 Start Date: 30/Jul/18 18:09 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205946917 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +263,33 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): Review comment: Using `total_ordering` results in unexpected behavior. Concretely the test `test_reshuffle_windows_unchanged` fails. I have tried to locate the exact cause by implementing all OPs (with the `total_ordering` decorator in place) and subsequently leaving out the OPs one by one: 1) adding the `total_ordering` decorator itself doesn't introduce issues 2) only use `total_ordering` to fill in `__lt__` works, using other combinations always fail. 3) I am currently testing the OPs by manually using the conversion rules defined by [total_ordering](https://github.com/python/cpython/blob/master/Lib/functools.py) to see if I can locate the exact problem: - `__ge__` can be removed and works with functools - `__gt__` work from manual copy functools from lt, but not with functools - `__le__` doesn't work from manual copy functools from lt, however replacing `and self==other` by `and not (self != other)` works. @tvalentyn: Given [the note](https://docs.python.org/3/library/functools.html#functools.total_ordering) on performance impact of the total_ordering decorator, it might make sense to implement all OPs instead of using the decorator? That works already, in the meantime I will continue the testing (step3) to see if I can give more info. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128853) Time Spent: 13h 10m (was: 13h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 13h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128455=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128455 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 28/Jul/18 16:09 Start Date: 28/Jul/18 16:09 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205946917 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +263,33 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): Review comment: Using `total_ordering` results in unexpected behavior. Concretely the test `test_reshuffle_windows_unchanged` fails. I have tried to locate the exact cause by implementing all OPs (with the `total_ordering` decorator in place) and subsequently leaving out the OPs one by one: 1) adding the `total_ordering` decorator itself doesn't introduce issues 2) only use `total_ordering` to fill in `__lt__` works, using other combinations always fail. 3) I am currently testing the OPs by manually using the conversion rules defined by [total_ordering](https://github.com/python/cpython/blob/master/Lib/functools.py) to see if I can locate the exact problem. @tvalentyn: Given [the note](https://docs.python.org/3/library/functools.html#functools.total_ordering) on performance impact of the total_ordering decorator, it might make sense to implement all OPs instead of using the decorator? That works already, in the meantime I will continue the testing (step3) to see if I can give more info. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128455) Time Spent: 13h (was: 12h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 13h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128320=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128320 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 22:34 Start Date: 27/Jul/18 22:34 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205914987 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -348,6 +388,9 @@ def __eq__(self, other): if type(self) == type(other) == FixedWindows: return self.size == other.size and self.offset == other.offset + def __hash__(self): +return hash((type(self), self.size, self.offset)) + def __ne__(self, other): Review comment: Thanks! I have just pushed the commit with these changes. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128320) Time Spent: 12h 50m (was: 12h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 12h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128318=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128318 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 22:32 Start Date: 27/Jul/18 22:32 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205914607 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -348,6 +388,9 @@ def __eq__(self, other): if type(self) == type(other) == FixedWindows: return self.size == other.size and self.offset == other.offset + def __hash__(self): +return hash((type(self), self.size, self.offset)) + def __ne__(self, other): Review comment: Agree with you, looking at the test it seems that we are doing the right thing. I think there will not be performance impact here, but I'll do one final A/B test with and without the PR to be safe once we finalize it. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128318) Time Spent: 12h 40m (was: 12.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 12h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128317=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128317 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 22:25 Start Date: 27/Jul/18 22:25 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205913757 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -348,6 +388,9 @@ def __eq__(self, other): if type(self) == type(other) == FixedWindows: return self.size == other.size and self.offset == other.offset + def __hash__(self): +return hash((type(self), self.size, self.offset)) + def __ne__(self, other): Review comment: Adding ``` def __ne__(self, other): return not self == other ``` for `IntervalWindow` causes `test_global_window` to fail on the last assertion (comparing IntervalWindow (max-range) to GlobalWindow). To resolve this failed assertion I need to add type in `__eq__`: ``` def __hash__(self): return hash((self.start, self.end, type(self))) def __eq__(self, other): return (self.start == other.start and self.end == other.end) and type(self) == type(other)) def __ne__(self, other): return not self == other ``` I think this makes sense and will be necessary for Python3 compatibility, however I'm not sure if this will have performance impact here? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128317) Time Spent: 12.5h (was: 12h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 12.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128254=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128254 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 18:46 Start Date: 27/Jul/18 18:46 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205866972 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) < 0 + + def __le__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) <= 0 + + def __gt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) > 0 + + def __ge__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) >= 0 + def __hash__(self): -return hash(self.end) +raise NotImplementedError Review comment: OK thanks, and interesting! So the `NotImplementedError` is only relevant for the consistency of the `__eq__` but doesn't have an impact for child classes. Good to know :-)! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128254) Time Spent: 12h 20m (was: 12h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 12h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128198=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128198 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 16:26 Start Date: 27/Jul/18 16:26 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205829071 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) < 0 + + def __le__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) <= 0 + + def __gt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) > 0 + + def __ge__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) >= 0 + def __hash__(self): -return hash(self.end) +raise NotImplementedError Review comment: I see, that makes sense, thanks, I think it's a good idea to keep returning `NotImplementedError` as you suggested, since we don't implement `__eq__`. Note that if a class that overrides `__eq__()` needs to retain the implementation of `__hash__()` from a parent class, the interpreter must be told this explicitly by setting `__hash__ = .__hash__`, see: https://docs.python.org/3/reference/datamodel.html#object.__hash__ . This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128198) Time Spent: 12h 10m (was: 12h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 12h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128135=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128135 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 10:31 Start Date: 27/Jul/18 10:31 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205732823 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) < 0 + + def __le__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) <= 0 + + def __gt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) > 0 + + def __ge__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) >= 0 + def __hash__(self): -return hash(self.end) +raise NotImplementedError Review comment: I removed the implementation to match the `__eq__` behavior which also raises `NotImplemented`. This to enforce the child classes to implement `__hash__` and to make it impossible for child classes like `GlobalWindow` and `IntervalWindow` objects to have the same hash. Does this make sense, or should I add the `__hash__` method back? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128135) Time Spent: 12h (was: 11h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 12h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128134=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128134 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 10:31 Start Date: 27/Jul/18 10:31 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205732823 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) < 0 + + def __le__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) <= 0 + + def __gt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) > 0 + + def __ge__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) >= 0 + def __hash__(self): -return hash(self.end) +raise NotImplementedError Review comment: I removed the implementation to match the `__eq__` behavior which also raises `NotImplemented`. This to enforce the child classes to implement `__hash__` and to make it impossible for child classes like `GlobalWindow` and `IntervalWindow` objects to have the same hash. Does this make sense, or should I add the __hash__ method back? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128134) Time Spent: 11h 50m (was: 11h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 11h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128075=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128075 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:32 Start Date: 27/Jul/18 06:32 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205659315 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): Review comment: It seems to be a little faster if we don't pull in `cmp`. How about we implement the rich comparisons as follows: ``` # Order first by endpoint, then arbitrarily. <-- Let's mention this comment once. def __op__(self, other): if self.end != other.end: return self.end $op_symbol other.end return hash(self) $op_symbo hash(other) ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128075) Time Spent: 11h 40m (was: 11.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 11h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128074=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128074 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:31 Start Date: 27/Jul/18 06:31 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205659315 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): Review comment: It seems to be a little faster if we don't pull in `cmp`. How about we implement the rich comparisons as follows: ``` # Order first by endpoint, then arbitrarily. <-- Let's mention this comment once. def __op__(self, other): if self.end != other.end: return self.end $op_symbol other.end return hash(self) $op_symbo hash(other) ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128074) Time Spent: 11.5h (was: 11h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 11.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128073=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128073 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:24 Start Date: 27/Jul/18 06:24 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205678298 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -246,10 +263,33 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): -if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + def __eq__(self, other): Review comment: I suggest: 1. Use `@total_ordering`. 2. Implement `__ne__` as `return not self == other` 3. Implement `__lt__` without `cmp` and `tuple`s, which performs slightly better: ``` def __lt__(self, other): if type(self) != type(other): return type(self) < type(other) if self.value != other.value: return self.value < other.value return self.timestamp < other.timestamp ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128073) Time Spent: 11h 20m (was: 11h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 11h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128071=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128071 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:21 Start Date: 27/Jul/18 06:21 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r20561 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): Review comment: We can also remove `cmp` here: ``` def __ne__(self, other): return self.end != other.end or hash(self) != hash(other) ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128071) Time Spent: 11h 10m (was: 11h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 11h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128070=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128070 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:15 Start Date: 27/Jul/18 06:15 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205659315 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): Review comment: It seems to be a little faster if we don't pull in `cmp`. How about we implement the rich comparisons as follows: ``` def __op__(self, other): if self.end != other.end: return self.end $op_symbol other.end return hash(self) $op_symbo hash(other) ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128070) Time Spent: 11h (was: 10h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 11h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128066=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128066 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:10 Start Date: 27/Jul/18 06:10 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205651979 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -348,6 +388,9 @@ def __eq__(self, other): if type(self) == type(other) == FixedWindows: return self.size == other.size and self.offset == other.offset + def __hash__(self): +return hash((type(self), self.size, self.offset)) + def __ne__(self, other): Review comment: Looks like classes `IntervalWindow`, `GlobalWindow`, `SlidingWindows`, and `Sessions` define `__eq__`, but don't define `__ne__`. Let's add: ``` def __ne__(self, other): return not self == other ``` since this would be the default implementation of `__ne__` in Python 3. Curious, why the conversion tool does not add something similar. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128066) Time Spent: 10h 50m (was: 10h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 10h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128065=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128065 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:09 Start Date: 27/Jul/18 06:09 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205651979 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -348,6 +388,9 @@ def __eq__(self, other): if type(self) == type(other) == FixedWindows: return self.size == other.size and self.offset == other.offset + def __hash__(self): +return hash((type(self), self.size, self.offset)) + def __ne__(self, other): Review comment: Looks like classes `IntervalWindow`, `GlobalWindow`, `SlidingWindows`, and `Sessions` define `__eq__`, but don't define `__ne__`. Let's add: ``` def __ne__(self, other): return not self == other ``` Since this would be the default implementation of `__ne__` in Python 3. Curious, why the conversion tool does not add something similar. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128065) Time Spent: 10h 40m (was: 10.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 10h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128064=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128064 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:09 Start Date: 27/Jul/18 06:09 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205659315 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): Review comment: It seems to be a little faster if we don't pull in `cmp`. How about we implement the rich comparisons as follows: ``` def __op__(self, other): if self.end != other.end: return self.end $op other.end return hash(self) $op hash(other) ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128064) Time Spent: 10.5h (was: 10h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 10.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=128063=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-128063 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 27/Jul/18 06:09 Start Date: 27/Jul/18 06:09 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r205649914 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,15 +192,31 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): -# Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) - def __eq__(self, other): raise NotImplementedError + def __ne__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) != 0 + + def __lt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) < 0 + + def __le__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) <= 0 + + def __gt__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) > 0 + + def __ge__(self, other): +return (cmp(self.end, other.end) +or cmp(hash(self), hash(other))) >= 0 + def __hash__(self): -return hash(self.end) +raise NotImplementedError Review comment: What's the reason to change the original behavior of `__hash__`? Seems like we should revert this change since it makes the objects of this class unhashable. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 128063) Time Spent: 10.5h (was: 10h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 10.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126964=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126964 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 23:48 Start Date: 24/Jul/18 23:48 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204945776 ## File path: sdks/python/apache_beam/transforms/util.py ## @@ -273,7 +279,7 @@ def div_keys(kv1_kv2): pairs = sorted(zip(sorted_data[::2], sorted_data[1::2]), key=div_keys) # Keep the top 1/3 most different pairs, average the top 2/3 most similar. -threshold = 2 * len(pairs) / 3 +threshold = 2 * len(pairs) // 3 Review comment: Thanks for your patience with this investigation. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126964) Time Spent: 10h 20m (was: 10h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 10h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126958=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126958 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 23:43 Start Date: 24/Jul/18 23:43 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204945047 ## File path: sdks/python/apache_beam/transforms/util.py ## @@ -273,7 +279,7 @@ def div_keys(kv1_kv2): pairs = sorted(zip(sorted_data[::2], sorted_data[1::2]), key=div_keys) # Keep the top 1/3 most different pairs, average the top 2/3 most similar. -threshold = 2 * len(pairs) / 3 +threshold = 2 * len(pairs) // 3 Review comment: Perfect! I will update the PR, thx! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126958) Time Spent: 10h 10m (was: 10h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 10h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126956=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126956 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 23:40 Start Date: 24/Jul/18 23:40 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204944489 ## File path: sdks/python/apache_beam/transforms/util.py ## @@ -273,7 +279,7 @@ def div_keys(kv1_kv2): pairs = sorted(zip(sorted_data[::2], sorted_data[1::2]), key=div_keys) # Keep the top 1/3 most different pairs, average the top 2/3 most similar. -threshold = 2 * len(pairs) / 3 +threshold = 2 * len(pairs) // 3 Review comment: I have confirmed that this change brings performance back to the same ballpark. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126956) Time Spent: 10h (was: 9h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 10h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126955=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126955 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 23:34 Start Date: 24/Jul/18 23:34 Worklog Time Spent: 10m Work Description: tvalentyn edited a comment on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-407585250 @Fematich I'm taking a look at c270644. I don't believe `@total_ordering` was an issue, but I'll see if the change makes a difference, I also started working on a microbenchmark but stopped pursuing that direction once I saw that window.py changes were not the main offender. I'll take a look at your microbenchmark as well. Since we now know how to make a have Py3-compatible version of this change that performs comparably well, the rest of performance testing won't take much time. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126955) Time Spent: 9h 50m (was: 9h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 9h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126954=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126954 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 23:34 Start Date: 24/Jul/18 23:34 Worklog Time Spent: 10m Work Description: tvalentyn commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-407585250 I'm taking a look at c270644. I don't believe `@total_ordering` was an issue, but I'll see if the change makes a difference, I also started working on a microbenchmark but stopped pursuing that direction once I saw that window.py changes were not the main offender. I'll take a look at your microbenchmark as well. Since we now know how to make a have Py3-compatible version of this change that performs comparably well, the rest of performance testing won't take much time. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126954) Time Spent: 9h 40m (was: 9.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 9h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126950=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126950 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 23:13 Start Date: 24/Jul/18 23:13 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204940139 ## File path: sdks/python/apache_beam/transforms/util.py ## @@ -273,7 +279,7 @@ def div_keys(kv1_kv2): pairs = sorted(zip(sorted_data[::2], sorted_data[1::2]), key=div_keys) # Keep the top 1/3 most different pairs, average the top 2/3 most similar. -threshold = 2 * len(pairs) / 3 +threshold = 2 * len(pairs) // 3 Review comment: Let's use past.utils.division.old_div in line 280 as an exception, and add a TODO(BEAM-4858) comment to clean this up. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126950) Time Spent: 9.5h (was: 9h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 9.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126946=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126946 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 23:04 Start Date: 24/Jul/18 23:04 Worklog Time Spent: 10m Work Description: tvalentyn commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-407579682 With latest round of experiments, we finally got to the bottom of this performance regression, see: https://issues.apache.org/jira/browse/BEAM-4858. I will also put some details inline in util.py. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126946) Time Spent: 9h 20m (was: 9h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 9h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126933=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126933 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 24/Jul/18 22:28 Start Date: 24/Jul/18 22:28 Worklog Time Spent: 10m Work Description: Fematich commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-407572571 @tvalentyn is there anything I can help with? Are you planning to benchmark the changes in https://github.com/apache/beam/commit/c2706447a6a602614c2f9bf36db1a666fa938819? Or do you first want to add more microbenchmarks? I am working on a microbenchmark for TimestampedValue #BEAM-4855 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126933) Time Spent: 9h 10m (was: 9h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 9h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126089=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126089 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 23/Jul/18 15:39 Start Date: 23/Jul/18 15:39 Worklog Time Spent: 10m Work Description: tvalentyn commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-407102486 I did not benchmark changes in c270644 yet, but doing bisection on previous version of the PRs shows that the largest contributor to regression is the line `from __future__ import division` in `util.py`. I still don't understand the reason why it causes the regression. One place where we may have missed to modify division is https://github.com/Fematich/beam/blob/c2706447a6a602614c2f9bf36db1a666fa938819/sdks/python/apache_beam/transforms/util.py#L277 but changing division there to integer division does not seem to help. Looking further. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126089) Time Spent: 8h 50m (was: 8h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 8h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=126090=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-126090 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 23/Jul/18 15:39 Start Date: 23/Jul/18 15:39 Worklog Time Spent: 10m Work Description: tvalentyn edited a comment on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-407102486 I did not benchmark changes in c270644 yet, but doing bisection on previous version of the PR shows that the largest contributor to regression is the line `from __future__ import division` in `util.py`. I still don't understand the reason why it causes the regression. One place where we may have missed to modify division is https://github.com/Fematich/beam/blob/c2706447a6a602614c2f9bf36db1a666fa938819/sdks/python/apache_beam/transforms/util.py#L277 but changing division there to integer division does not seem to help. Looking further. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 126090) Time Spent: 9h (was: 8h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 9h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=125556=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125556 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 16:28 Start Date: 20/Jul/18 16:28 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204099457 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -234,6 +252,7 @@ def union(self, other): min(self.start, other.start), max(self.end, other.end)) +@total_ordering Review comment: I have pushed a new commit that should speed up the compare functions for `BoundedWindow` and `TimestampedValue` objects: - removed the total_ordering decorator - removed the custom cmp-method in `BoundedWindow` - removed the if-statement in `TimestampedValue` using the short-circuit behavior of `or` All of these changes had a positive impact on a small test I used. A microbenchmark in the apache beam code, would really be useful indeed :-)! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 125556) Time Spent: 8h 40m (was: 8.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 8h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=12=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-12 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 16:27 Start Date: 20/Jul/18 16:27 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204099457 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -234,6 +252,7 @@ def union(self, other): min(self.start, other.start), max(self.end, other.end)) +@total_ordering Review comment: I have pushed a new commit that should speed up the compare functions for `BoundedWindow` and `TimestampedValue` objects: - removed the total_ordering decorator - removed the custom cmp_method - removed the if-statement in `TimestampedValue` using the short-circuit behavior of `or` All of these changes had a positive impact on a small test I used. A microbenchmark in the apache beam code, would really be useful indeed :-)! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 12) Time Spent: 8.5h (was: 8h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 8.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=125537=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125537 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 15:39 Start Date: 20/Jul/18 15:39 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204085546 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -234,6 +252,7 @@ def union(self, other): min(self.start, other.start), max(self.end, other.end)) +@total_ordering Review comment: We have confirmed so far by bisection that the slowdown is caused by some changes in `util.py` and/or `window.py`, and there are additional benchmark runs in flight to narrow this down further. It is very likely that the slowdown is caused by time it takes to compare objects of some of the classes defined in `window.py` due to changes in implementation `cmp` or `hash` functions . I also plan to confirm it with a microbenchmark similar to https://github.com/apache/beam/compare/master...tvalentyn:utils_futurization_benchmark?expand=1#diff-de123c6d83f9809a6f0d95be5a7d1826. That could help us to get performance metrics for different implementations without running a slow benchmark suite. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 125537) Time Spent: 8h 20m (was: 8h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 8h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=125536=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125536 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 15:38 Start Date: 20/Jul/18 15:38 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204085546 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -234,6 +252,7 @@ def union(self, other): min(self.start, other.start), max(self.end, other.end)) +@total_ordering Review comment: We have confirmed so far by bisection that the slowdown is caused by some changes in `util.py` and/or `window.py`, and there are additional benchmark runs in flight to narrow this down further. It is very likely that the slowdown is caused by time it takes to compare objects of some of the classes defined in `window.py` due to changes in implementation `cmp` or `hash` functions . I also plan to confirm it with a microbenchmark similar to https://github.com/apache/beam/compare/master...tvalentyn:utils_futurization_benchmark?expand=1#diff-de123c6d83f9809a6f0d95be5a7d1826 , that could help us to get performance metrics for different implementations without running a slow benchmark suite. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 125536) Time Spent: 8h 10m (was: 8h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 8h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=125535=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125535 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 15:38 Start Date: 20/Jul/18 15:38 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204085546 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -234,6 +252,7 @@ def union(self, other): min(self.start, other.start), max(self.end, other.end)) +@total_ordering Review comment: We have confirmed so far by bisection that the slowdown is caused by some changes in `util.py` and/or `window.py`, and there are additional benchmark runs in flight to narrow this down further. It is very likely that the slowdown is caused by time it takes to compare objects of some of the classes defined in `window.py` due to `cmp` or `hash` functions . I also plan to confirm it with a microbenchmark similar to https://github.com/apache/beam/compare/master...tvalentyn:utils_futurization_benchmark?expand=1#diff-de123c6d83f9809a6f0d95be5a7d1826 , that could help us to get performance metrics for different implementations without running a slow benchmark suite. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 125535) Time Spent: 8h (was: 7h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 8h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=125494=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125494 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 13:55 Start Date: 20/Jul/18 13:55 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204052352 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,13 +192,30 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): + def cmp(self, other): Review comment: Adds some extra delay This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 125494) Time Spent: 7h 50m (was: 7h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=125483=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125483 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 13:36 Start Date: 20/Jul/18 13:36 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r204046090 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -234,6 +252,7 @@ def union(self, other): min(self.start, other.start), max(self.end, other.end)) +@total_ordering Review comment: I think it is, working on a version without @total_ordering and the use of the comp function call now! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 125483) Time Spent: 7h 40m (was: 7.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=125399=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125399 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 20/Jul/18 08:53 Start Date: 20/Jul/18 08:53 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r203978611 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1906,7 +1920,7 @@ def __init__(self, value): value: An object of values for the PCollection """ super(Create, self).__init__() -if isinstance(value, string_types): +if isinstance(value, (unicode, str, bytes)): Review comment: See https://github.com/apache/beam/pull/5729#discussion_r199660012. Bytes in Python3 also shouldn't be allowed since we don't want to support creation of a PCollection of single bytes. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 125399) Time Spent: 7.5h (was: 7h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122563=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122563 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 13/Jul/18 00:20 Start Date: 13/Jul/18 00:20 Worklog Time Spent: 10m Work Description: superbobry commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r202203698 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -191,13 +192,30 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): + def cmp(self, other): Review comment: Consider using `@total_ordering` as well. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122563) Time Spent: 7h 20m (was: 7h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122561=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122561 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 13/Jul/18 00:20 Start Date: 13/Jul/18 00:20 Worklog Time Spent: 10m Work Description: superbobry commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r202202700 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -62,6 +64,11 @@ from apache_beam.typehints.typehints import is_consistent_with from apache_beam.utils import urns +try: + from itertools import ifilter as filter Review comment: How about ```python from future.builtins import filter ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122561) Time Spent: 7h (was: 6h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122565=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122565 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 13/Jul/18 00:20 Start Date: 13/Jul/18 00:20 Worklog Time Spent: 10m Work Description: superbobry commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r202203442 ## File path: sdks/python/apache_beam/transforms/trigger.py ## @@ -43,6 +48,10 @@ from apache_beam.utils.timestamp import TIME_GRANULARITY # AfterCount is experimental. No backwards compatibility guarantees. +try: Review comment: I think this could be rewritten using `future.moves.itertools`. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122565) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122564=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122564 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 13/Jul/18 00:20 Start Date: 13/Jul/18 00:20 Worklog Time Spent: 10m Work Description: superbobry commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r202203844 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -234,6 +252,7 @@ def union(self, other): min(self.start, other.start), max(self.end, other.end)) +@total_ordering Review comment: I wonder if the slowdown is due to the indirection introduced by `@total_ordering`? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122564) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122562=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122562 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 13/Jul/18 00:20 Start Date: 13/Jul/18 00:20 Worklog Time Spent: 10m Work Description: superbobry commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r202202981 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1906,7 +1920,7 @@ def __init__(self, value): value: An object of values for the PCollection """ super(Create, self).__init__() -if isinstance(value, string_types): +if isinstance(value, (unicode, str, bytes)): Review comment: No need to check for both `str` and `bytes` since Python 2.7 defines `bytes == str` and on Python 3.X `unicode == str`. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122562) Time Spent: 7h 10m (was: 7h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 7h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122560=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122560 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 13/Jul/18 00:20 Start Date: 13/Jul/18 00:20 Worklog Time Spent: 10m Work Description: superbobry commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r202202816 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1792,7 +1806,7 @@ def __init__(self, windowfn, **kwargs): accumulation_mode = kwargs.pop('accumulation_mode', None) timestamp_combiner = kwargs.pop('timestamp_combiner', None) if kwargs: - raise ValueError('Unexpected keyword arguments: %s' % kwargs.keys()) + raise ValueError('Unexpected keyword arguments: %s' % list(kwargs.keys())) Review comment: Just `list(kwargs)` will work too. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122560) Time Spent: 6h 50m (was: 6h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 6h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122465=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122465 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 12/Jul/18 18:38 Start Date: 12/Jul/18 18:38 Worklog Time Spent: 10m Work Description: charlesccychen commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-404609861 Unfortunately, the ifilter change here (https://github.com/apache/beam/pull/5729#discussion_r201973087) doesn't fix the regression. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122465) Time Spent: 6h 40m (was: 6.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 6h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122442=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122442 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 12/Jul/18 17:27 Start Date: 12/Jul/18 17:27 Worklog Time Spent: 10m Work Description: charlesccychen commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r202116070 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -698,7 +703,7 @@ def merge_accumulators(self, accumulators, *args, **kwargs): class ReiterableNonEmptyAccumulators(object): def __iter__(self): -return itertools.ifilter(filter_fn, accumulators) +return filter(filter_fn, accumulators) Review comment: Thank you. Let me test the pipeline with this change. Unfortunately it's not easy to export the benchmark data. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122442) Time Spent: 6.5h (was: 6h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 6.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122260=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122260 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 12/Jul/18 09:44 Start Date: 12/Jul/18 09:44 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r201973579 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -698,7 +703,7 @@ def merge_accumulators(self, accumulators, *args, **kwargs): class ReiterableNonEmptyAccumulators(object): def __iter__(self): -return itertools.ifilter(filter_fn, accumulators) +return filter(filter_fn, accumulators) Review comment: @charlesccychen and @tvalentyn: is there more detailed info on the benchmarks? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122260) Time Spent: 6h 20m (was: 6h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 6h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122259=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122259 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 12/Jul/18 09:43 Start Date: 12/Jul/18 09:43 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r201973087 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -698,7 +703,7 @@ def merge_accumulators(self, accumulators, *args, **kwargs): class ReiterableNonEmptyAccumulators(object): def __iter__(self): -return itertools.ifilter(filter_fn, accumulators) +return filter(filter_fn, accumulators) Review comment: I think this might be a potential source for the performance loss --> I'll update this to use ifilter on PY2 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122259) Time Spent: 6h 10m (was: 6h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 6h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=122234=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122234 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 12/Jul/18 08:30 Start Date: 12/Jul/18 08:30 Worklog Time Spent: 10m Work Description: charlesccychen commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-404433323 Unfortunately, this change is seen to produce a ~15% regression in internal Dataflow benchmarks. We have to investigate this regression before merging. CC: @tvalentyn This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122234) Time Spent: 6h (was: 5h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 6h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121566=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121566 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 20:29 Start Date: 10/Jul/18 20:29 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r201483568 ## File path: sdks/python/apache_beam/transforms/util.py ## @@ -19,11 +19,17 @@ """ from __future__ import absolute_import +from __future__ import division Review comment: treshold variable is now an int and Python PostCommit succeeds This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121566) Time Spent: 5h 50m (was: 5h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 5h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121392=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121392 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 12:37 Start Date: 10/Jul/18 12:37 Worklog Time Spent: 10m Work Description: Fematich commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403806603 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121392) Time Spent: 5h 40m (was: 5.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 5h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121391=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121391 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 12:37 Start Date: 10/Jul/18 12:37 Worklog Time Spent: 10m Work Description: Fematich removed a comment on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403760574 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121391) Time Spent: 5.5h (was: 5h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 5.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121390=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121390 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 12:36 Start Date: 10/Jul/18 12:36 Worklog Time Spent: 10m Work Description: Fematich removed a comment on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403745790 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121390) Time Spent: 5h 20m (was: 5h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 5h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121317=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121317 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 09:30 Start Date: 10/Jul/18 09:30 Worklog Time Spent: 10m Work Description: Fematich commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403760574 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121317) Time Spent: 5h 10m (was: 5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 5h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121293=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121293 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 08:39 Start Date: 10/Jul/18 08:39 Worklog Time Spent: 10m Work Description: Fematich commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403745790 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121293) Time Spent: 5h (was: 4h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121292=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121292 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 08:39 Start Date: 10/Jul/18 08:39 Worklog Time Spent: 10m Work Description: Fematich removed a comment on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403733209 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121292) Time Spent: 4h 50m (was: 4h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 4h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121277=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121277 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 08:11 Start Date: 10/Jul/18 08:11 Worklog Time Spent: 10m Work Description: Fematich edited a comment on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403733209 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121277) Time Spent: 4h 40m (was: 4.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 4h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121276=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121276 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 08:11 Start Date: 10/Jul/18 08:11 Worklog Time Spent: 10m Work Description: Fematich edited a comment on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403733209 t This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121276) Time Spent: 4.5h (was: 4h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 4.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=121271=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121271 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 10/Jul/18 07:51 Start Date: 10/Jul/18 07:51 Worklog Time Spent: 10m Work Description: Fematich commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403733209 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121271) Time Spent: 4h 20m (was: 4h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 4h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120944=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120944 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 09/Jul/18 18:07 Start Date: 09/Jul/18 18:07 Worklog Time Spent: 10m Work Description: charlesccychen commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r201096467 ## File path: sdks/python/apache_beam/transforms/util.py ## @@ -19,11 +19,17 @@ """ from __future__ import absolute_import +from __future__ import division Review comment: This change requires further changes in this file. In `_BatchSizeEstimator._thin_data` on line 285, we need to explicitly cast to int here; otherwise, we get the following error: ``` Traceback (most recent call last): File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/batchworker.py", line 642, in do_work work_executor.execute() File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/executor.py", line 156, in execute op.start() File "dataflow_worker/native_operations.py", line 38, in dataflow_worker.native_operations.NativeReadOperation.start def start(self): File "dataflow_worker/native_operations.py", line 39, in dataflow_worker.native_operations.NativeReadOperation.start with self.scoped_start_state: File "dataflow_worker/native_operations.py", line 44, in dataflow_worker.native_operations.NativeReadOperation.start with self.spec.source.reader() as reader: File "dataflow_worker/native_operations.py", line 54, in dataflow_worker.native_operations.NativeReadOperation.start self.output(windowed_value) File "apache_beam/runners/worker/operations.py", line 175, in apache_beam.runners.worker.operations.Operation.output cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value) File "apache_beam/runners/worker/operations.py", line 85, in apache_beam.runners.worker.operations.ConsumerSet.receive cython.cast(Operation, consumer).process(windowed_value) File "apache_beam/runners/worker/operations.py", line 403, in apache_beam.runners.worker.operations.DoOperation.process with self.scoped_process_state: File "apache_beam/runners/worker/operations.py", line 404, in apache_beam.runners.worker.operations.DoOperation.process self.dofn_receiver.receive(o) File "apache_beam/runners/common.py", line 569, in apache_beam.runners.common.DoFnRunner.receive self.process(windowed_value) File "apache_beam/runners/common.py", line 577, in apache_beam.runners.common.DoFnRunner.process self._reraise_augmented(exn) File "apache_beam/runners/common.py", line 602, in apache_beam.runners.common.DoFnRunner._reraise_augmented raise File "apache_beam/runners/common.py", line 575, in apache_beam.runners.common.DoFnRunner.process self.do_fn_invoker.invoke_process(windowed_value) File "apache_beam/runners/common.py", line 352, in apache_beam.runners.common.SimpleInvoker.invoke_process output_processor.process_outputs( File "apache_beam/runners/common.py", line 673, in apache_beam.runners.common._OutputProcessor.process_outputs self.main_receivers.receive(windowed_value) File "apache_beam/runners/worker/operations.py", line 85, in apache_beam.runners.worker.operations.ConsumerSet.receive cython.cast(Operation, consumer).process(windowed_value) File "apache_beam/runners/worker/operations.py", line 403, in apache_beam.runners.worker.operations.DoOperation.process with self.scoped_process_state: File "apache_beam/runners/worker/operations.py", line 404, in apache_beam.runners.worker.operations.DoOperation.process self.dofn_receiver.receive(o) File "apache_beam/runners/common.py", line 569, in apache_beam.runners.common.DoFnRunner.receive self.process(windowed_value) File "apache_beam/runners/common.py", line 577, in apache_beam.runners.common.DoFnRunner.process self._reraise_augmented(exn) File "apache_beam/runners/common.py", line 618, in apache_beam.runners.common.DoFnRunner._reraise_augmented six.reraise(type(new_exn), new_exn, original_traceback) File "apache_beam/runners/common.py", line 575, in apache_beam.runners.common.DoFnRunner.process self.do_fn_invoker.invoke_process(windowed_value) File "apache_beam/runners/common.py", line 352, in apache_beam.runners.common.SimpleInvoker.invoke_process output_processor.process_outputs( File "apache_beam/runners/common.py", line 651, in apache_beam.runners.common._OutputProcessor.process_outputs for result in results: File
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120917=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120917 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 09/Jul/18 17:48 Start Date: 09/Jul/18 17:48 Worklog Time Spent: 10m Work Description: charlesccychen commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403563306 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120917) Time Spent: 4h (was: 3h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 4h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120808=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120808 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 09/Jul/18 15:27 Start Date: 09/Jul/18 15:27 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r201045007 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -190,12 +192,29 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): + def cmp(self, other): # Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) +end_cmp = (self.end > other.end) - (self.end < other.end) +hash_cmp = (hash(self) > hash(other)) - (hash(self) < hash(other)) +return end_cmp or hash_cmp def __eq__(self, other): -raise NotImplementedError +return self.cmp(other) == 0 Review comment: Now equivalent again to the original code. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120808) Time Spent: 3h 50m (was: 3h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 3h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120661=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120661 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 09/Jul/18 08:04 Start Date: 09/Jul/18 08:04 Worklog Time Spent: 10m Work Description: charlesccychen commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200912860 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -245,10 +265,19 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): + def __eq__(self, other): +type_eq = type(self) == type(other) +value_eq = self.value == other.value +timestamp_eq = self.timestamp == other.timestamp +return type_eq and value_eq and timestamp_eq Review comment: This will not work correctly, as the previous code relied on the short-circuiting behavior of "and". Accessing `other.value` will not work if the type is not as we expected. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120661) Time Spent: 3h 40m (was: 3.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 3h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120660=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120660 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 09/Jul/18 08:04 Start Date: 09/Jul/18 08:04 Worklog Time Spent: 10m Work Description: charlesccychen commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200912362 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -190,12 +192,29 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): + def cmp(self, other): # Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) +end_cmp = (self.end > other.end) - (self.end < other.end) +hash_cmp = (hash(self) > hash(other)) - (hash(self) < hash(other)) +return end_cmp or hash_cmp def __eq__(self, other): -raise NotImplementedError +return self.cmp(other) == 0 Review comment: It doesn't look like this takes care of the case where other is not of type `BoundedWindow`. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120660) Time Spent: 3h 40m (was: 3.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 3h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120530=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120530 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 08/Jul/18 22:27 Start Date: 08/Jul/18 22:27 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200856328 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1855,7 +1865,7 @@ def __init__(self, value): value: An object of values for the PCollection """ super(Create, self).__init__() -if isinstance(value, string_types): +if isinstance(value, (unicode, str)): Review comment: I have added bytes in the list! Strings in Python2 didn't have a `__iter__()` method, the for loop functionality was provided by the `__getitem__()` method. That's the reason I suggested to use the `hasattr(values,'__iter__')` attribute check. However, in Python3 `__iter__()` is available for Strings as well, so the check wouldn't work for it. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120530) Time Spent: 3.5h (was: 3h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 3.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120528=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120528 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 08/Jul/18 22:15 Start Date: 08/Jul/18 22:15 Worklog Time Spent: 10m Work Description: Fematich commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403321457 Yes, sorry for that! Should now be cleaned up :-). This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120528) Time Spent: 3h 20m (was: 3h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 3h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120514=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120514 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 08/Jul/18 21:28 Start Date: 08/Jul/18 21:28 Worklog Time Spent: 10m Work Description: charlesccychen commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200854567 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1855,7 +1865,7 @@ def __init__(self, value): value: An object of values for the PCollection """ super(Create, self).__init__() -if isinstance(value, string_types): +if isinstance(value, (unicode, str)): Review comment: The intention of this line is to prohibit string types from being returned where we expect an iterable of items. Strings technically are iterable (and return individual characters), so we want to prevent them from being returned accidentally (e.g., a user may intend to return a single string, but we don't want to interpret it as its individual characters). In both Python 2 and Python 3, string types are iterable, so I think we should add `bytes` to this list of "blacklisted" return types. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120514) Time Spent: 3h 10m (was: 3h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 3h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120513=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120513 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 08/Jul/18 21:26 Start Date: 08/Jul/18 21:26 Worklog Time Spent: 10m Work Description: charlesccychen commented on issue #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#issuecomment-403318437 Oops, it looks like there is something wrong with the commit history--a bunch of Java changes are being pulled in. Can you rebase to master, cherrypick and / or squash everything into one commit? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120513) Time Spent: 3h (was: 2h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 3h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120228=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120228 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 11:48 Start Date: 07/Jul/18 11:48 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200810287 ## File path: sdks/python/apache_beam/transforms/ptransform.py ## @@ -622,7 +626,7 @@ def __init__(self, fn, *args, **kwargs): super(PTransformWithSideInputs, self).__init__() if (any([isinstance(v, pvalue.PCollection) for v in args]) or -any([isinstance(v, pvalue.PCollection) for v in kwargs.itervalues()])): +any([isinstance(v, pvalue.PCollection) for v in itervalues(kwargs)])): Review comment: This is a bit less optimal in Python2, but since it's only in the __init__, I'll change it to `.values()` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120228) Time Spent: 2h 50m (was: 2h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 2h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120226=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120226 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 11:44 Start Date: 07/Jul/18 11:44 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200810188 ## File path: sdks/python/apache_beam/transforms/display.py ## @@ -141,7 +147,7 @@ def create_from_options(cls, pipeline_options): items = {k: (v if DisplayDataItem._get_value_type(v) is not None else str(v)) - for k, v in pipeline_options.display_data().items()} + for k, v in iteritems(pipeline_options.display_data())} Review comment: I added for efficiency in Python2, but this will actually not be called that often, so I'll revert it. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120226) Time Spent: 2h 40m (was: 2.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 2h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120223=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120223 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 11:38 Start Date: 07/Jul/18 11:38 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200810075 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1855,7 +1865,7 @@ def __init__(self, value): value: An object of values for the PCollection """ super(Create, self).__init__() -if isinstance(value, string_types): +if isinstance(value, (unicode, str)): Review comment: Based on the requirements of value (possible to, I think we should actually check if value is an iterable: `hasattr(values,'__iter__')` which also fails for string_types This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120223) Time Spent: 2h 20m (was: 2h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 2h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120224=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120224 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 11:38 Start Date: 07/Jul/18 11:38 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200810075 ## File path: sdks/python/apache_beam/transforms/core.py ## @@ -1855,7 +1865,7 @@ def __init__(self, value): value: An object of values for the PCollection """ super(Create, self).__init__() -if isinstance(value, string_types): +if isinstance(value, (unicode, str)): Review comment: Based on the requirements of value, I think we should actually check if value is an iterable: `hasattr(values,'__iter__')` which also fails for string_types This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120224) Time Spent: 2.5h (was: 2h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 2.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120217=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120217 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 10:47 Start Date: 07/Jul/18 10:47 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200809163 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -190,15 +192,41 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): + def cmp(self, other): # Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) +end_cmp = (self.end > other.end) - (self.end < other.end) +hash_cmp = (hash(self) > hash(other)) - (hash(self) < hash(other)) +return end_cmp or hash_cmp def __eq__(self, other): -raise NotImplementedError +return self.cmp(other) == 0 Review comment: By only implementing 2 methods (as in the example of TimestampedValue), `test_sessions_after_all (apache_beam.transforms.trigger_test.TriggerTest)` didn't run This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120217) Time Spent: 2h 10m (was: 2h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 2h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120216=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120216 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 10:46 Start Date: 07/Jul/18 10:46 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200809163 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -190,15 +192,41 @@ def __init__(self, end): def max_timestamp(self): return self.end.predecessor() - def __cmp__(self, other): + def cmp(self, other): # Order first by endpoint, then arbitrarily. -return cmp(self.end, other.end) or cmp(hash(self), hash(other)) +end_cmp = (self.end > other.end) - (self.end < other.end) +hash_cmp = (hash(self) > hash(other)) - (hash(self) < hash(other)) +return end_cmp or hash_cmp def __eq__(self, other): -raise NotImplementedError +return self.cmp(other) == 0 Review comment: By only implementing 2 methods (as in the example of TimestampedValue), test_sessions_after_all (apache_beam.transforms.trigger_test.TriggerTest) didn't run This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120216) Time Spent: 2h (was: 1h 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 2h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120213=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120213 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 10:35 Start Date: 07/Jul/18 10:35 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200808916 ## File path: sdks/python/apache_beam/transforms/window.py ## @@ -245,10 +274,17 @@ def __init__(self, value, timestamp): self.value = value self.timestamp = Timestamp.of(timestamp) - def __cmp__(self, other): + def __eq__(self, other): +return (type(self) == type(other)) and (self.value == other.value) and \ + (self.timestamp == other.timestamp) + + def __hash__(self): +return hash((type(self), self.value, self.timestamp)) + + def __lt__(self, other): if type(self) is not type(other): - return cmp(type(self), type(other)) -return cmp((self.value, self.timestamp), (other.value, other.timestamp)) + return type(self) < type(other) Review comment: Indeed! Using the hash of type should resolve this issue. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120213) Time Spent: 1h 50m (was: 1h 40m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 1h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=120210=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120210 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 07/Jul/18 10:04 Start Date: 07/Jul/18 10:04 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200808310 ## File path: sdks/python/apache_beam/transforms/trigger.py ## @@ -972,10 +988,13 @@ def __eq__(self, other): if isinstance(other, collections.Iterable): return all( a == b - for a, b in itertools.izip_longest(self, other, fillvalue=object())) + for a, b in zip_longest(self, other, fillvalue=object())) else: return NotImplemented + def __hash__(self): +return hash(self) Review comment: Exactly thx! Fixed now with: `return hash(tuple(self))` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 120210) Time Spent: 1h 40m (was: 1.5h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 1h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=119028=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119028 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 04/Jul/18 10:45 Start Date: 04/Jul/18 10:45 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200085113 ## File path: sdks/python/apache_beam/pipeline_test.py ## @@ -520,11 +520,11 @@ def test_dir(self): options = Breakfast() self.assertEquals( set(['from_dictionary', 'get_all_options', 'slices', 'style', - 'view_as', 'display_data']), + 'view_as', 'display_data', 'next']), Review comment: This is because of the import `from builtins import object` in apache_beam/transforms/display.py. This import adds an alias: `next = __next__` for Python2 and Python3 compatibility. PipelineOptions (the tested class in this test) inherits from HasDisplayData class defined in the display.py module. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119028) Time Spent: 1.5h (was: 1h 20m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 1.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=119025=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119025 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 04/Jul/18 10:42 Start Date: 04/Jul/18 10:42 Worklog Time Spent: 10m Work Description: Fematich commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r200085113 ## File path: sdks/python/apache_beam/pipeline_test.py ## @@ -520,11 +520,11 @@ def test_dir(self): options = Breakfast() self.assertEquals( set(['from_dictionary', 'get_all_options', 'slices', 'style', - 'view_as', 'display_data']), + 'view_as', 'display_data', 'next']), Review comment: This is because of the import `from builtins import object` in apache_beam/transforms/display.py. This import adds an alias: `next = __next__` to make Python2 and Python3 object classes compatible PipelineOptions (the tested class in this test) inherits from HasDisplayData class defined in the display.py module. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119025) Time Spent: 1h 20m (was: 1h 10m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 1h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=118586=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118586 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 03/Jul/18 06:00 Start Date: 03/Jul/18 06:00 Worklog Time Spent: 10m Work Description: superbobry commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r199691154 ## File path: sdks/python/apache_beam/transforms/create_source.py ## @@ -57,15 +64,15 @@ def split(self, desired_bundle_size, start_position=None, start_position = 0 if stop_position is None: stop_position = len(self._serialized_values) - avg_size_per_value = self._total_size / len(self._serialized_values) + avg_size_per_value = self._total_size // len(self._serialized_values) num_values_per_split = max( - int(desired_bundle_size / avg_size_per_value), 1) + int(desired_bundle_size // avg_size_per_value), 1) Review comment: Good point. I didn't realise that `avg_size_per_value` could be a float. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 118586) Time Spent: 1h 10m (was: 1h) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 1h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4006) Futurize and fix python 2 compatibility for transforms subpackage
[ https://issues.apache.org/jira/browse/BEAM-4006?focusedWorklogId=118545=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118545 ] ASF GitHub Bot logged work on BEAM-4006: Author: ASF GitHub Bot Created on: 03/Jul/18 01:21 Start Date: 03/Jul/18 01:21 Worklog Time Spent: 10m Work Description: charlesccychen commented on a change in pull request #5729: [BEAM-4006] Futurize transforms subpackage URL: https://github.com/apache/beam/pull/5729#discussion_r199660651 ## File path: sdks/python/apache_beam/transforms/sideinputs_test.py ## @@ -194,7 +197,7 @@ def match(actual): [[actual_elem, actual_list, actual_dict]] = actual equal_to([expected_elem])([actual_elem]) equal_to(expected_list)(actual_list) -equal_to(expected_pairs)(actual_dict.iteritems()) +equal_to(expected_pairs)(iteritems(actual_dict)) Review comment: Can we just use `.items()`? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 118545) Time Spent: 1h (was: 50m) > Futurize and fix python 2 compatibility for transforms subpackage > - > > Key: BEAM-4006 > URL: https://issues.apache.org/jira/browse/BEAM-4006 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robbe >Assignee: Matthias Feys >Priority: Major > Time Spent: 1h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)