This is an automated email from the ASF dual-hosted git repository. fokko pushed a commit to branch pyiceberg-0.9.x in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
commit e5d1bb87ba0a358cc2abdf430f48fd033549564e Author: koenvo <[email protected]> AuthorDate: Thu Apr 10 00:35:34 2025 +0200 Temporary fix for filtering on empty batches (#1901) Potential fix for https://github.com/apache/iceberg-python/issues/1804 Might want to write a test, but not sure yet how to reproduce without using glue. Closes https://github.com/apache/iceberg-python/issues/1804 --- pyiceberg/io/pyarrow.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 839b16ad..90309546 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1421,11 +1421,15 @@ def _task_to_record_batches( # Apply the user filter if pyarrow_filter is not None: - current_batch = current_batch.filter(pyarrow_filter) + # Temporary fix until PyArrow 21 is released ( https://github.com/apache/arrow/pull/46057 ) + table = pa.Table.from_batches([current_batch]) + table = table.filter(pyarrow_filter) # skip empty batches - if current_batch.num_rows == 0: + if table.num_rows == 0: continue + current_batch = table.combine_chunks().to_batches()[0] + result_batch = _to_requested_schema( projected_schema, file_project_schema,
