This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 0f48a5af08 [python] file_scanner should use with_shard instead of new 
interfaces
0f48a5af08 is described below

commit 0f48a5af080090da69ddd6b1d8a98859ecdf6739
Author: JingsongLi <[email protected]>
AuthorDate: Wed Mar 11 14:26:15 2026 +0800

    [python] file_scanner should use with_shard instead of new interfaces
---
 paimon-python/pypaimon/read/scanner/file_scanner.py | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/paimon-python/pypaimon/read/scanner/file_scanner.py 
b/paimon-python/pypaimon/read/scanner/file_scanner.py
index d5e543b2fc..3345ae6810 100755
--- a/paimon-python/pypaimon/read/scanner/file_scanner.py
+++ b/paimon-python/pypaimon/read/scanner/file_scanner.py
@@ -168,10 +168,7 @@ class FileScanner:
         manifest_scanner: Callable[[], List[ManifestFileMeta]],
         predicate: Optional[Predicate] = None,
         limit: Optional[int] = None,
-        vector_search: Optional['VectorSearch'] = None,
-        shard_index: Optional[int] = None,
-        shard_count: Optional[int] = None,
-        bucket_filter: Optional[Callable[[int], bool]] = None
+        vector_search: Optional['VectorSearch'] = None
     ):
         from pypaimon.table.file_store_table import FileStoreTable
 
@@ -182,11 +179,6 @@ class FileScanner:
         self.limit = limit
         self.vector_search = vector_search
 
-        # Bucket-level sharding for parallel consumption
-        self._shard_index = shard_index
-        self._shard_count = shard_count
-        self._bucket_filter = bucket_filter
-
         self.snapshot_manager = SnapshotManager(table)
         self.manifest_list_manager = ManifestListManager(table)
         self.manifest_file_manager = ManifestFileManager(table)
@@ -422,13 +414,6 @@ class FileScanner:
             return False
         if self.partition_key_predicate and not 
self.partition_key_predicate.test(entry.partition):
             return False
-        # Apply bucket-level sharding for parallel consumption
-        if self._shard_index is not None and self._shard_count is not None:
-            if entry.bucket % self._shard_count != self._shard_index:
-                return False
-        elif self._bucket_filter is not None:
-            if not self._bucket_filter(entry.bucket):
-                return False
         # Get SimpleStatsEvolution for this schema
         evolution = 
self.simple_stats_evolutions.get_or_create(entry.file.schema_id)
 

Reply via email to