This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch pyiceberg-0.9.x
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git

commit e5d1bb87ba0a358cc2abdf430f48fd033549564e
Author: koenvo <[email protected]>
AuthorDate: Thu Apr 10 00:35:34 2025 +0200

    Temporary fix for filtering on empty batches (#1901)
    
    Potential fix for https://github.com/apache/iceberg-python/issues/1804
    
    Might want to write a test, but not sure yet how to reproduce without
    using glue.
    
    Closes https://github.com/apache/iceberg-python/issues/1804
---
 pyiceberg/io/pyarrow.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 839b16ad..90309546 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1421,11 +1421,15 @@ def _task_to_record_batches(
 
             # Apply the user filter
             if pyarrow_filter is not None:
-                current_batch = current_batch.filter(pyarrow_filter)
+                # Temporary fix until PyArrow 21 is released ( 
https://github.com/apache/arrow/pull/46057 )
+                table = pa.Table.from_batches([current_batch])
+                table = table.filter(pyarrow_filter)
                 # skip empty batches
-                if current_batch.num_rows == 0:
+                if table.num_rows == 0:
                     continue
 
+                current_batch = table.combine_chunks().to_batches()[0]
+
             result_batch = _to_requested_schema(
                 projected_schema,
                 file_project_schema,

Reply via email to