[ 
https://issues.apache.org/jira/browse/BEAM-5626?focusedWorklogId=152439&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-152439
 ]

ASF GitHub Bot logged work on BEAM-5626:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 08/Oct/18 22:11
            Start Date: 08/Oct/18 22:11
    Worklog Time Spent: 10m 
      Work Description: charlesccychen closed pull request #6587: [BEAM-5626] 
Fix hadoop filesystem test for py3.
URL: https://github.com/apache/beam/pull/6587
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/python/apache_beam/io/hadoopfilesystem_test.py 
b/sdks/python/apache_beam/io/hadoopfilesystem_test.py
index a943a12bb4d..8421c43e629 100644
--- a/sdks/python/apache_beam/io/hadoopfilesystem_test.py
+++ b/sdks/python/apache_beam/io/hadoopfilesystem_test.py
@@ -22,6 +22,7 @@
 import io
 import logging
 import posixpath
+import sys
 import unittest
 from builtins import object
 
@@ -153,7 +154,7 @@ def delete(self, path, recursive=True):
 
     _ = self.status(path)
 
-    for filepath in self.files.keys():  # pylint: 
disable=consider-iterating-dictionary
+    for filepath in list(self.files):
       if filepath.startswith(path):
         del self.files[filepath]
 
@@ -197,6 +198,12 @@ def checksum(self, path):
 
 class HadoopFileSystemTest(unittest.TestCase):
 
+  @classmethod
+  def setUpClass(cls):
+    # Method has been renamed in Python 3
+    if sys.version_info[0] < 3:
+      cls.assertCountEqual = cls.assertItemsEqual
+
   def setUp(self):
     self._fake_hdfs = FakeHdfs()
     hdfs.hdfs.InsecureClient = (
@@ -258,7 +265,7 @@ def test_match_file(self):
     returned_files = [f.path
                       for match_result in result
                       for f in match_result.metadata_list]
-    self.assertItemsEqual(expected_files, returned_files)
+    self.assertCountEqual(expected_files, returned_files)
 
   def test_match_file_with_limits(self):
     expected_files = [self.fs.join(self.tmpdir, filename)
@@ -296,7 +303,7 @@ def test_match_directory(self):
     # structure, so listing without a '/' will return no results.
     result = self.fs.match([self.tmpdir + '/'])[0]
     files = [f.path for f in result.metadata_list]
-    self.assertItemsEqual(files, expected_files)
+    self.assertCountEqual(files, expected_files)
 
   def test_match_directory_trailing_slash(self):
     expected_files = [self.fs.join(self.tmpdir, filename)
@@ -304,7 +311,7 @@ def test_match_directory_trailing_slash(self):
 
     result = self.fs.match([self.tmpdir + '/'])[0]
     files = [f.path for f in result.metadata_list]
-    self.assertItemsEqual(files, expected_files)
+    self.assertCountEqual(files, expected_files)
 
   def test_create_success(self):
     url = self.fs.join(self.tmpdir, 'new_file')
@@ -322,7 +329,7 @@ def test_create_write_read_compressed(self):
     path = self.fs._parse_url(url)
     expected_file = FakeFile(path, 'wb')
     self.assertEqual(self._fake_hdfs.files[path], expected_file)
-    data = 'abc' * 10
+    data = b'abc' * 10
     handle.write(data)
     # Compressed data != original data
     self.assertNotEquals(data, self._fake_hdfs.files[path].getvalue())
@@ -336,7 +343,7 @@ def test_create_write_read_compressed(self):
   def test_open(self):
     url = self.fs.join(self.tmpdir, 'old_file1')
     handle = self.fs.open(url)
-    expected_data = ''
+    expected_data = b''
     data = handle.read()
     self.assertEqual(data, expected_data)
 
@@ -356,7 +363,7 @@ def test_copy_file(self):
     url2 = self.fs.join(self.tmpdir, 'new_file2')
     url3 = self.fs.join(self.tmpdir, 'new_file3')
     with self.fs.create(url1) as f1:
-      f1.write('Hello')
+      f1.write(b'Hello')
     self.fs.copy([url1, url1], [url2, url3])
     self.assertTrue(self._cmpfiles(url1, url2))
     self.assertTrue(self._cmpfiles(url1, url3))
@@ -365,9 +372,9 @@ def test_copy_file_overwrite_error(self):
     url1 = self.fs.join(self.tmpdir, 'new_file1')
     url2 = self.fs.join(self.tmpdir, 'new_file2')
     with self.fs.create(url1) as f1:
-      f1.write('Hello')
+      f1.write(b'Hello')
     with self.fs.create(url2) as f2:
-      f2.write('nope')
+      f2.write(b'nope')
     with self.assertRaisesRegexp(
         BeamIOError, r'already exists.*%s' % posixpath.basename(url2)):
       self.fs.copy([url1], [url2])
@@ -378,7 +385,7 @@ def test_copy_file_error(self):
     url3 = self.fs.join(self.tmpdir, 'new_file3')
     url4 = self.fs.join(self.tmpdir, 'new_file4')
     with self.fs.create(url3) as f:
-      f.write('Hello')
+      f.write(b'Hello')
     with self.assertRaisesRegexp(
         BeamIOError, r'^Copy operation failed .*%s.*%s.* not found' % (
             url1, url2)):
@@ -397,7 +404,7 @@ def test_copy_directory(self):
     url1 = self.fs.join(url_t1_inner, 'f1')
     url2 = self.fs.join(url_t2_inner, 'f1')
     with self.fs.create(url1) as f:
-      f.write('Hello')
+      f.write(b'Hello')
 
     self.fs.copy([url_t1], [url_t2])
     self.assertTrue(self._cmpfiles(url1, url2))
@@ -419,9 +426,9 @@ def test_copy_directory_overwrite_error(self):
     url3_inner = self.fs.join(url_t2_inner, 'f3')
     for url in [url1, url1_inner, url3_inner]:
       with self.fs.create(url) as f:
-        f.write('Hello')
+        f.write(b'Hello')
     with self.fs.create(url2) as f:
-      f.write('nope')
+      f.write(b'nope')
 
     with self.assertRaisesRegexp(BeamIOError, r'already exists'):
       self.fs.copy([url_t1], [url_t2])
@@ -430,7 +437,7 @@ def test_rename_file(self):
     url1 = self.fs.join(self.tmpdir, 'f1')
     url2 = self.fs.join(self.tmpdir, 'f2')
     with self.fs.create(url1) as f:
-      f.write('Hello')
+      f.write(b'Hello')
 
     self.fs.rename([url1], [url2])
     self.assertFalse(self.fs.exists(url1))
@@ -442,7 +449,7 @@ def test_rename_file_error(self):
     url3 = self.fs.join(self.tmpdir, 'f3')
     url4 = self.fs.join(self.tmpdir, 'f4')
     with self.fs.create(url3) as f:
-      f.write('Hello')
+      f.write(b'Hello')
 
     with self.assertRaisesRegexp(
         BeamIOError, r'^Rename operation failed .*%s.*%s' % (url1, url2)):
@@ -457,7 +464,7 @@ def test_rename_directory(self):
     url1 = self.fs.join(url_t1, 'f1')
     url2 = self.fs.join(url_t2, 'f1')
     with self.fs.create(url1) as f:
-      f.write('Hello')
+      f.write(b'Hello')
 
     self.fs.rename([url_t1], [url_t2])
     self.assertFalse(self.fs.exists(url_t1))
@@ -474,13 +481,13 @@ def test_exists(self):
   def test_size(self):
     url = self.fs.join(self.tmpdir, 'f1')
     with self.fs.create(url) as f:
-      f.write('Hello')
+      f.write(b'Hello')
     self.assertEqual(5, self.fs.size(url))
 
   def test_checksum(self):
     url = self.fs.join(self.tmpdir, 'f1')
     with self.fs.create(url) as f:
-      f.write('Hello')
+      f.write(b'Hello')
     self.assertEqual('fake_algo-5-checksum_byte_sequence',
                      self.fs.checksum(url))
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 152439)
    Time Spent: 4h  (was: 3h 50m)

> Several IO tests fail in Python 3 with RuntimeError('dictionary changed size 
> during iteration',)}
> -------------------------------------------------------------------------------------------------
>
>                 Key: BEAM-5626
>                 URL: https://issues.apache.org/jira/browse/BEAM-5626
>             Project: Beam
>          Issue Type: Sub-task
>          Components: sdk-py-core
>            Reporter: Valentyn Tymofieiev
>            Assignee: Ruoyun Huang
>            Priority: Major
>          Time Spent: 4h
>  Remaining Estimate: 0h
>
>  ERROR: test_delete_dir 
> (apache_beam.io.hadoopfilesystem_test.HadoopFileSystemTest)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File 
> "/usr/local/google/home/valentyn/projects/beam/clean_head/beam/sdks/python/apache_beam/io/hadoopfilesystem_test.py",
>  line 506, in test_delete_dir
>      self.fs.delete([url_t1])
>    File 
> "/usr/local/google/home/valentyn/projects/beam/clean_head/beam/sdks/python/apache_beam/io/hadoopfilesystem.py",
>  line 370, in delete
>      raise BeamIOError("Delete operation failed", exceptions)
>  apache_beam.io.filesystem.BeamIOError: Delete operation failed with 
> exceptions {'hdfs://test_dir/new_dir1': RuntimeError('dictionary changed size 
> during iteration',       )}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to