This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 0f48a5af08 [python] file_scanner should use with_shard instead of new
interfaces
0f48a5af08 is described below
commit 0f48a5af080090da69ddd6b1d8a98859ecdf6739
Author: JingsongLi <[email protected]>
AuthorDate: Wed Mar 11 14:26:15 2026 +0800
[python] file_scanner should use with_shard instead of new interfaces
---
paimon-python/pypaimon/read/scanner/file_scanner.py | 17 +----------------
1 file changed, 1 insertion(+), 16 deletions(-)
diff --git a/paimon-python/pypaimon/read/scanner/file_scanner.py
b/paimon-python/pypaimon/read/scanner/file_scanner.py
index d5e543b2fc..3345ae6810 100755
--- a/paimon-python/pypaimon/read/scanner/file_scanner.py
+++ b/paimon-python/pypaimon/read/scanner/file_scanner.py
@@ -168,10 +168,7 @@ class FileScanner:
manifest_scanner: Callable[[], List[ManifestFileMeta]],
predicate: Optional[Predicate] = None,
limit: Optional[int] = None,
- vector_search: Optional['VectorSearch'] = None,
- shard_index: Optional[int] = None,
- shard_count: Optional[int] = None,
- bucket_filter: Optional[Callable[[int], bool]] = None
+ vector_search: Optional['VectorSearch'] = None
):
from pypaimon.table.file_store_table import FileStoreTable
@@ -182,11 +179,6 @@ class FileScanner:
self.limit = limit
self.vector_search = vector_search
- # Bucket-level sharding for parallel consumption
- self._shard_index = shard_index
- self._shard_count = shard_count
- self._bucket_filter = bucket_filter
-
self.snapshot_manager = SnapshotManager(table)
self.manifest_list_manager = ManifestListManager(table)
self.manifest_file_manager = ManifestFileManager(table)
@@ -422,13 +414,6 @@ class FileScanner:
return False
if self.partition_key_predicate and not
self.partition_key_predicate.test(entry.partition):
return False
- # Apply bucket-level sharding for parallel consumption
- if self._shard_index is not None and self._shard_count is not None:
- if entry.bucket % self._shard_count != self._shard_index:
- return False
- elif self._bucket_filter is not None:
- if not self._bucket_filter(entry.bucket):
- return False
# Get SimpleStatsEvolution for this schema
evolution =
self.simple_stats_evolutions.get_or_create(entry.file.schema_id)