[
https://issues.apache.org/jira/browse/BEAM-10921?focusedWorklogId=508671&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-508671
]
ASF GitHub Bot logged work on BEAM-10921:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 07/Nov/20 00:41
Start Date: 07/Nov/20 00:41
Worklog Time Spent: 10m
Work Description: davidyan74 commented on a change in pull request #13277:
URL: https://github.com/apache/beam/pull/13277#discussion_r519069697
##########
File path: sdks/python/apache_beam/runners/interactive/derivation_tree_test.py
##########
@@ -0,0 +1,181 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
+
+import unittest
+
+import apache_beam as beam
+from apache_beam.runners.interactive.derivation_tree import DerivationTree
+
+
+class DerivationTreeTest(unittest.TestCase):
+ def test_getting_unknown_pid_returns_none(self):
+ dt = DerivationTree()
+
+ p = beam.Pipeline()
+
+ self.assertIsNone(dt.get_pipeline(str(id(p))))
+
+ def test_getting_unknown_pipeline_returns_none(self):
+ dt = DerivationTree()
+
+ p = beam.Pipeline()
+
+ self.assertIsNone(dt.get_user_pipeline(p))
+
+ def test_no_parent_returns_none(self):
+ dt = DerivationTree()
+
+ user = beam.Pipeline()
+ derived = beam.Pipeline()
+ orphan = beam.Pipeline()
+
+ dt.add_derived_pipeline(user, derived)
+
+ self.assertIsNone(dt.get_user_pipeline(orphan))
+
+ def test_get_user_pipeline_is_same(self):
+ dt = DerivationTree()
+
+ p = beam.Pipeline()
+ dt.add_user_pipeline(p)
+
+ self.assertIs(dt.get_user_pipeline(p), p)
+
+ def test_can_add_derived(self):
+ dt = DerivationTree()
+
+ user = beam.Pipeline()
+ derived = beam.Pipeline()
+
+ dt.add_derived_pipeline(user, derived)
+
+ self.assertIs(dt.get_user_pipeline(derived), user)
+
+ def test_can_add_multiple_derived(self):
+ """Tests that there can be many user pipelines with many derived
+ pipelines.
+ """
+ dt = DerivationTree()
+
+ # Add the first set of user and derived pipelines.
+ user1 = beam.Pipeline()
+ derived11 = beam.Pipeline()
+ derived12 = beam.Pipeline()
+
+ dt.add_derived_pipeline(user1, derived11)
+ dt.add_derived_pipeline(user1, derived12)
+
+ # Add the second set of user and derived pipelines.
+ user2 = beam.Pipeline()
+ derived21 = beam.Pipeline()
+ derived22 = beam.Pipeline()
+
+ dt.add_derived_pipeline(user2, derived21)
+ dt.add_derived_pipeline(user2, derived22)
+
+ # Assert that the user pipelines are correct.
+ self.assertIs(dt.get_user_pipeline(derived11), user1)
+ self.assertIs(dt.get_user_pipeline(derived12), user1)
+ self.assertIs(dt.get_user_pipeline(derived21), user2)
+ self.assertIs(dt.get_user_pipeline(derived22), user2)
+
+ def test_cannot_have_multiple_parents(self):
+ dt = DerivationTree()
+
+ user1 = beam.Pipeline()
+ user2 = beam.Pipeline()
+ derived = beam.Pipeline()
+
+ dt.add_derived_pipeline(user1, derived)
+ dt.add_derived_pipeline(user2, derived)
+
+ self.assertIs(dt.get_user_pipeline(derived), user1)
+
+ def test_adding_derived_with_derived_gets_user_pipeline(self):
+ """Tests that one can correctly add a derived pipeline from a derived
+ pipeline and still get the correct user pipeline.
+ """
+ dt = DerivationTree()
+
+ user = beam.Pipeline()
+ derived1 = beam.Pipeline()
+ derived2 = beam.Pipeline()
+
+ # Add the first derived pipeline to the user pipelne.
+ dt.add_derived_pipeline(user, derived1)
+
+ # Add the second derived pipeline to the first derived pipeline. This
should
+ # get the user pipeline of the first and add the second to it.
+ dt.add_derived_pipeline(derived1, derived2)
Review comment:
What is the use case of this? I was wondering whether we could just
implement this using a two-way map (i.e. map from user pipeline to multiple
derived pipelines and from derived pipeline to user pipeline).
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 508671)
Time Spent: 3h (was: 2h 50m)
> Interactive Runner Python unit tests are flaky on Windows
> ---------------------------------------------------------
>
> Key: BEAM-10921
> URL: https://issues.apache.org/jira/browse/BEAM-10921
> Project: Beam
> Issue Type: Bug
> Components: sdk-py-core
> Reporter: Brian Hulette
> Assignee: Ning Kang
> Priority: P1
> Labels: currently-failing, flake
> Fix For: Not applicable
>
> Time Spent: 3h
> Remaining Estimate: 0h
>
> Over the past few days python unit tests have been failing frequently. The
> errors always seem to occur when cleaning up the interactive environment:
> {code}
> ...........
> [100%]
> ================================== FAILURES
> ===================================
> _
> PipelineInstrumentTest.test_able_to_cache_intermediate_unbounded_source_pcollection
> _
> [gw2] win32 -- Python 3.5.4
> d:\a\beam\beam\sdks\python\target\.tox\py35-win\scripts\python.exe
> self =
> <apache_beam.runners.interactive.pipeline_instrument_test.PipelineInstrumentTest
> testMethod=test_able_to_cache_intermediate_unbounded_source_pcollection>
> def setUp(self):
> > ie.new_env()
> apache_beam\runners\interactive\pipeline_instrument_test.py:46:
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> _
> apache_beam\runners\interactive\interactive_environment.py:117: in new_env
> _interactive_beam_env.cleanup()
> apache_beam\runners\interactive\interactive_environment.py:273: in cleanup
> cache_manager.cleanup()
> apache_beam\runners\interactive\caching\streaming_cache.py:391: in cleanup
> shutil.rmtree(self._cache_dir)
> c:\hostedtoolcache\windows\python\3.5.4\x64\lib\shutil.py:494: in rmtree
> return _rmtree_unsafe(path, onerror)
> c:\hostedtoolcache\windows\python\3.5.4\x64\lib\shutil.py:384: in
> _rmtree_unsafe
> _rmtree_unsafe(fullname, onerror)
> c:\hostedtoolcache\windows\python\3.5.4\x64\lib\shutil.py:389: in
> _rmtree_unsafe
> onerror(os.unlink, fullname, sys.exc_info())
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> _
> path =
> 'D:\\a\\beam\\beam\\sdks\\python\\target\\.tox\\py35-win\\tmp\\it-8vh2z7pi2021914046928\\full'
> onerror = <function rmtree.<locals>.onerror at 0x000001D6C3E5C7B8>
> def _rmtree_unsafe(path, onerror):
> try:
> if os.path.islink(path):
> # symlinks to directories are forbidden, see bug #1669
> raise OSError("Cannot call rmtree on a symbolic link")
> except OSError:
> onerror(os.path.islink, path, sys.exc_info())
> # can't continue even if onerror hook returns
> return
> names = []
> try:
> names = os.listdir(path)
> except OSError:
> onerror(os.listdir, path, sys.exc_info())
> for name in names:
> fullname = os.path.join(path, name)
> try:
> mode = os.lstat(fullname).st_mode
> except OSError:
> mode = 0
> if stat.S_ISDIR(mode):
> _rmtree_unsafe(fullname, onerror)
> else:
> try:
> > os.unlink(fullname)
> E PermissionError: [WinError 32] The process cannot access
> the file because it is being used by another process:
> 'D:\\a\\beam\\beam\\sdks\\python\\target\\.tox\\py35-win\\tmp\\it-8vh2z7pi2021914046928\\full\\ac8879590f-2021876280456-2021876278608-2021914046928'
> c:\hostedtoolcache\windows\python\3.5.4\x64\lib\shutil.py:387: PermissionError
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)