This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 12e3353  Do not set Parquet version (#377)
12e3353 is described below

commit 12e3353262d15bcccfb0742fd4667d3369c94d2b
Author: Jonas Haag <[email protected]>
AuthorDate: Tue Feb 6 16:44:30 2024 +0100

    Do not set Parquet version (#377)
    
    * Do not set Parquet version
    
    * Fix filesizes
    
    ---------
    
    Co-authored-by: Fokko Driesprong <[email protected]>
---
 pyiceberg/io/pyarrow.py          |  2 +-
 tests/integration/test_writes.py | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 9726451..91d8452 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1734,7 +1734,7 @@ def write_file(table: Table, tasks: Iterator[WriteTask]) 
-> Iterator[DataFile]:
 
     fo = table.io.new_output(file_path)
     with fo.create(overwrite=True) as fos:
-        with pq.ParquetWriter(fos, schema=file_schema, version="1.0", 
**parquet_writer_kwargs) as writer:
+        with pq.ParquetWriter(fos, schema=file_schema, 
**parquet_writer_kwargs) as writer:
             writer.write_table(task.df)
 
     data_file = DataFile(
diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py
index a65d98f..c08916b 100644
--- a/tests/integration/test_writes.py
+++ b/tests/integration/test_writes.py
@@ -357,39 +357,39 @@ def test_summaries(spark: SparkSession, session_catalog: 
Catalog, arrow_table_wi
 
     assert summaries[0] == {
         'added-data-files': '1',
-        'added-files-size': '5437',
+        'added-files-size': '5459',
         'added-records': '3',
         'total-data-files': '1',
         'total-delete-files': '0',
         'total-equality-deletes': '0',
-        'total-files-size': '5437',
+        'total-files-size': '5459',
         'total-position-deletes': '0',
         'total-records': '3',
     }
 
     assert summaries[1] == {
         'added-data-files': '1',
-        'added-files-size': '5437',
+        'added-files-size': '5459',
         'added-records': '3',
         'total-data-files': '2',
         'total-delete-files': '0',
         'total-equality-deletes': '0',
-        'total-files-size': '10874',
+        'total-files-size': '10918',
         'total-position-deletes': '0',
         'total-records': '6',
     }
 
     assert summaries[2] == {
         'added-data-files': '1',
-        'added-files-size': '5437',
+        'added-files-size': '5459',
         'added-records': '3',
         'deleted-data-files': '2',
         'deleted-records': '6',
-        'removed-files-size': '10874',
+        'removed-files-size': '10918',
         'total-data-files': '1',
         'total-delete-files': '0',
         'total-equality-deletes': '0',
-        'total-files-size': '5437',
+        'total-files-size': '5459',
         'total-position-deletes': '0',
         'total-records': '3',
     }
@@ -555,12 +555,12 @@ def test_summaries_with_only_nulls(
 
     assert summaries[1] == {
         'added-data-files': '1',
-        'added-files-size': '4217',
+        'added-files-size': '4239',
         'added-records': '2',
         'total-data-files': '1',
         'total-delete-files': '0',
         'total-equality-deletes': '0',
-        'total-files-size': '4217',
+        'total-files-size': '4239',
         'total-position-deletes': '0',
         'total-records': '2',
     }

Reply via email to