This is an automated email from the ASF dual-hosted git repository.
jrmccluskey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new c69e8a18601 remove list_prefix deprecated method (#37587)
c69e8a18601 is described below
commit c69e8a186015fda7934f8f370e35c5fa02d5ea34
Author: Derrick Williams <[email protected]>
AuthorDate: Tue Feb 17 09:39:28 2026 -0500
remove list_prefix deprecated method (#37587)
---
sdks/python/apache_beam/io/aws/s3io.py | 28 ++---------------
sdks/python/apache_beam/io/aws/s3io_test.py | 36 +++++++++++++---------
sdks/python/apache_beam/io/azure/blobstorageio.py | 26 ++--------------
.../io/external/xlang_bigqueryio_it_test.py | 2 +-
sdks/python/apache_beam/io/gcp/gcsio.py | 26 ++--------------
sdks/python/apache_beam/io/gcp/gcsio_test.py | 5 ++-
.../runners/interactive/cache_manager.py | 2 +-
7 files changed, 33 insertions(+), 92 deletions(-)
diff --git a/sdks/python/apache_beam/io/aws/s3io.py
b/sdks/python/apache_beam/io/aws/s3io.py
index 887bb4c7baa..534e63eab51 100644
--- a/sdks/python/apache_beam/io/aws/s3io.py
+++ b/sdks/python/apache_beam/io/aws/s3io.py
@@ -33,7 +33,6 @@ from apache_beam.io.filesystemio import DownloaderStream
from apache_beam.io.filesystemio import Uploader
from apache_beam.io.filesystemio import UploaderStream
from apache_beam.utils import retry
-from apache_beam.utils.annotations import deprecated
try:
# pylint: disable=wrong-import-order, wrong-import-position
@@ -100,27 +99,6 @@ class S3IO(object):
else:
raise ValueError('Invalid file open mode: %s.' % mode)
- @deprecated(since='2.45.0', current='list_files')
- def list_prefix(self, path, with_metadata=False):
- """Lists files matching the prefix.
-
- ``list_prefix`` has been deprecated. Use `list_files` instead, which
returns
- a generator of file information instead of a dict.
-
- Args:
- path: S3 file path pattern in the form s3://<bucket>/[name].
- with_metadata: Experimental. Specify whether returns file metadata.
-
- Returns:
- If ``with_metadata`` is False: dict of file name -> size; if
- ``with_metadata`` is True: dict of file name -> tuple(size, timestamp).
- """
- file_info = {}
- for file_metadata in self.list_files(path, with_metadata):
- file_info[file_metadata[0]] = file_metadata[1]
-
- return file_info
-
def list_files(self, path, with_metadata=False):
"""Lists files matching the prefix.
@@ -186,7 +164,7 @@ class S3IO(object):
break
logging.log(
- # do not spam logs when list_prefix is likely used to check empty
folder
+ # do not spam logs when list_files is likely used to check empty folder
logging.INFO if counter > 0 else logging.DEBUG,
"Finished listing %s files in %s seconds.",
counter,
@@ -288,7 +266,7 @@ class S3IO(object):
assert dest.endswith('/')
results = []
- for entry in self.list_prefix(src):
+ for entry, _ in self.list_files(src):
rel_path = entry[len(src):]
try:
self.copy(entry, dest + rel_path)
@@ -436,7 +414,7 @@ class S3IO(object):
"""
assert root.endswith('/')
- paths = self.list_prefix(root)
+ paths = [p for p, _ in self.list_files(root)]
return self.delete_files(paths)
def size(self, path):
diff --git a/sdks/python/apache_beam/io/aws/s3io_test.py
b/sdks/python/apache_beam/io/aws/s3io_test.py
index ffab9572707..09df3da7674 100644
--- a/sdks/python/apache_beam/io/aws/s3io_test.py
+++ b/sdks/python/apache_beam/io/aws/s3io_test.py
@@ -170,14 +170,17 @@ class TestS3IO(unittest.TestCase):
file_size = 1024
self._insert_random_file(self.client, src_file_name, file_size)
- self.assertTrue(src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertTrue(
+ src_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
self.assertFalse(
- dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ dest_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
self.aws.copy(src_file_name, dest_file_name)
- self.assertTrue(src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
- self.assertTrue(dest_file_name in
self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertTrue(
+ src_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
+ self.assertTrue(
+ dest_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
# Clean up
self.aws.delete_files([src_file_name, dest_file_name])
@@ -290,9 +293,9 @@ class TestS3IO(unittest.TestCase):
dest_file_name = dest_dir_name + path
self._insert_random_file(self.client, src_file_name, file_size)
self.assertTrue(
- src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ src_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
self.assertFalse(
- dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ dest_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
results = self.aws.copy_tree(src_dir_name, dest_dir_name)
@@ -303,9 +306,9 @@ class TestS3IO(unittest.TestCase):
self.assertIsNone(err)
self.assertTrue(
- src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ src_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
self.assertTrue(
- dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ dest_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
# Clean up
for path in paths:
@@ -320,14 +323,17 @@ class TestS3IO(unittest.TestCase):
self._insert_random_file(self.client, src_file_name, file_size)
- self.assertTrue(src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertTrue(
+ src_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
self.assertFalse(
- dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ dest_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
self.aws.rename(src_file_name, dest_file_name)
- self.assertFalse(src_file_name in
self.aws.list_prefix(self.TEST_DATA_PATH))
- self.assertTrue(dest_file_name in
self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertFalse(
+ src_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
+ self.assertTrue(
+ dest_file_name in dict(self.aws.list_files(self.TEST_DATA_PATH)))
# Clean up
self.aws.delete_files([src_file_name, dest_file_name])
@@ -439,7 +445,7 @@ class TestS3IO(unittest.TestCase):
# Create the file and check that it was created
self._insert_random_file(self.aws.client, file_name, file_size)
- files = self.aws.list_prefix(self.TEST_DATA_PATH)
+ files = dict(self.aws.list_files(self.TEST_DATA_PATH))
self.assertTrue(file_name in files)
# Delete the file and check that it was deleted
@@ -748,7 +754,7 @@ class TestS3IO(unittest.TestCase):
# Clean up
self.aws.delete(file_name)
- def test_list_prefix(self):
+ def test_list_files(self):
objects = [
('jerry/pigpen/phil', 5),
@@ -785,7 +791,7 @@ class TestS3IO(unittest.TestCase):
expected_file_names = [(self.TEST_DATA_PATH + object_name, size)
for (object_name, size) in expected_object_names]
self.assertEqual(
- set(self.aws.list_prefix(file_pattern).items()),
+ set(dict(self.aws.list_files(file_pattern)).items()),
set(expected_file_names))
# Clean up
diff --git a/sdks/python/apache_beam/io/azure/blobstorageio.py
b/sdks/python/apache_beam/io/azure/blobstorageio.py
index 9b0f595e102..bc017ff7b21 100644
--- a/sdks/python/apache_beam/io/azure/blobstorageio.py
+++ b/sdks/python/apache_beam/io/azure/blobstorageio.py
@@ -35,7 +35,6 @@ from apache_beam.io.filesystemio import Uploader
from apache_beam.io.filesystemio import UploaderStream
from apache_beam.options.pipeline_options import AzureOptions
from apache_beam.utils import retry
-from apache_beam.utils.annotations import deprecated
_LOGGER = logging.getLogger(__name__)
@@ -207,7 +206,7 @@ class BlobStorageIO(object):
assert dest.endswith('/')
results = []
- for entry in self.list_prefix(src):
+ for entry, _ in self.list_files(src):
rel_path = entry[len(src):]
try:
self.copy(entry, dest + rel_path)
@@ -504,7 +503,7 @@ class BlobStorageIO(object):
assert root.endswith('/')
# Get the blob under the root directory.
- paths_to_delete = self.list_prefix(root)
+ paths_to_delete = [p for p, _ in self.list_files(root)]
return self.delete_files(paths_to_delete)
@@ -577,25 +576,6 @@ class BlobStorageIO(object):
return results
- @deprecated(since='2.45.0', current='list_files')
- def list_prefix(self, path, with_metadata=False):
- """Lists files matching the prefix.
-
- Args:
- path: Azure Blob Storage file path pattern in the form
- azfs://<storage-account>/<container>/[name].
- with_metadata: Experimental. Specify whether returns file metadata.
-
- Returns:
- If ``with_metadata`` is False: dict of file name -> size; if
- ``with_metadata`` is True: dict of file name -> tuple(size, timestamp).
- """
- file_info = {}
- for file_metadata in self.list_files(path, with_metadata):
- file_info[file_metadata[0]] = file_metadata[1]
-
- return file_info
-
def list_files(self, path, with_metadata=False):
"""Lists files matching the prefix.
@@ -644,7 +624,7 @@ class BlobStorageIO(object):
yield file_name, item.size
logging.log(
- # do not spam logs when list_prefix is likely used to check empty
folder
+ # do not spam logs when list_files is likely used to check empty folder
logging.INFO if counter > 0 else logging.DEBUG,
"Finished listing %s files in %s seconds.",
counter,
diff --git a/sdks/python/apache_beam/io/external/xlang_bigqueryio_it_test.py
b/sdks/python/apache_beam/io/external/xlang_bigqueryio_it_test.py
index d659d57aad9..bc012bd7be9 100644
--- a/sdks/python/apache_beam/io/external/xlang_bigqueryio_it_test.py
+++ b/sdks/python/apache_beam/io/external/xlang_bigqueryio_it_test.py
@@ -186,7 +186,7 @@ class BigQueryXlangStorageWriteIT(unittest.TestCase):
# List objects in the bucket with the constructed prefix
try:
- objects = gcs_io.list_prefix(f"gs://{bucket_name}/{search_prefix}")
+ objects = gcs_io.list_files(f"gs://{bucket_name}/{search_prefix}")
object_count = len(list(objects))
if object_count < expected_count:
diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py
b/sdks/python/apache_beam/io/gcp/gcsio.py
index 3b5898ed79f..da5c20aa0e7 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio.py
@@ -49,7 +49,6 @@ from apache_beam.io.gcp import gcsio_retry
from apache_beam.metrics.metric import Metrics
from apache_beam.options.pipeline_options import GoogleCloudOptions
from apache_beam.options.pipeline_options import PipelineOptions
-from apache_beam.utils.annotations import deprecated
__all__ = ['GcsIO', 'create_storage_client']
@@ -459,7 +458,7 @@ class GcsIO(object):
"""
assert src.endswith('/')
assert dest.endswith('/')
- for entry in self.list_prefix(src):
+ for entry, _ in self.list_files(src):
rel_path = entry[len(src):]
self.copy(entry, dest + rel_path)
@@ -564,27 +563,6 @@ class GcsIO(object):
else:
raise NotFound('Object %s not found', path)
- @deprecated(since='2.45.0', current='list_files')
- def list_prefix(self, path, with_metadata=False):
- """Lists files matching the prefix.
-
- ``list_prefix`` has been deprecated. Use `list_files` instead, which
returns
- a generator of file information instead of a dict.
-
- Args:
- path: GCS file path pattern in the form gs://<bucket>/[name].
- with_metadata: Experimental. Specify whether returns file metadata.
-
- Returns:
- If ``with_metadata`` is False: dict of file name -> size; if
- ``with_metadata`` is True: dict of file name -> tuple(size, timestamp).
- """
- file_info = {}
- for file_metadata in self.list_files(path, with_metadata):
- file_info[file_metadata[0]] = file_metadata[1]
-
- return file_info
-
def list_files(self, path, with_metadata=False):
"""Lists files matching the prefix.
@@ -627,7 +605,7 @@ class GcsIO(object):
yield file_name, item.size
_LOGGER.log(
- # do not spam logs when list_prefix is likely used to check empty
folder
+ # do not spam logs when list_files is likely used to check empty folder
logging.INFO if counter > 0 else logging.DEBUG,
"Finished listing %s files in %s seconds.",
counter,
diff --git a/sdks/python/apache_beam/io/gcp/gcsio_test.py
b/sdks/python/apache_beam/io/gcp/gcsio_test.py
index d2b873f566c..ec4ccbf1cf5 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio_test.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio_test.py
@@ -683,7 +683,7 @@ class TestGCSIO(unittest.TestCase):
self.gcs.open(file_name, 'w')
writer.assert_called()
- def test_list_prefix(self):
+ def test_list_files(self):
bucket_name = 'gcsio-test'
objects = [
('cow/cat/fish', 2),
@@ -716,8 +716,7 @@ class TestGCSIO(unittest.TestCase):
expected_file_names = [('gs://%s/%s' % (bucket_name, object_name), size)
for (object_name, size) in expected_object_names]
self.assertEqual(
- set(self.gcs.list_prefix(file_pattern).items()),
- set(expected_file_names))
+ set(self.gcs.list_files(file_pattern)), set(expected_file_names))
def test_downloader_fail_non_existent_object(self):
file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
diff --git a/sdks/python/apache_beam/runners/interactive/cache_manager.py
b/sdks/python/apache_beam/runners/interactive/cache_manager.py
index e725f3f82ac..0dc79d4001a 100644
--- a/sdks/python/apache_beam/runners/interactive/cache_manager.py
+++ b/sdks/python/apache_beam/runners/interactive/cache_manager.py
@@ -200,7 +200,7 @@ class FileBasedCacheManager(CacheManager):
if 'gs://' in matched_path[0]:
from apache_beam.io.gcp import gcsio
return sum(
- sum(gcsio.GcsIO().list_prefix(path).values())
+ sum(s for _, s in gcsio.GcsIO().list_files(path))
for path in matched_path)
return sum(os.path.getsize(path) for path in matched_path)
return 0