This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 12e3353 Do not set Parquet version (#377)
12e3353 is described below
commit 12e3353262d15bcccfb0742fd4667d3369c94d2b
Author: Jonas Haag <[email protected]>
AuthorDate: Tue Feb 6 16:44:30 2024 +0100
Do not set Parquet version (#377)
* Do not set Parquet version
* Fix filesizes
---------
Co-authored-by: Fokko Driesprong <[email protected]>
---
pyiceberg/io/pyarrow.py | 2 +-
tests/integration/test_writes.py | 18 +++++++++---------
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 9726451..91d8452 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1734,7 +1734,7 @@ def write_file(table: Table, tasks: Iterator[WriteTask])
-> Iterator[DataFile]:
fo = table.io.new_output(file_path)
with fo.create(overwrite=True) as fos:
- with pq.ParquetWriter(fos, schema=file_schema, version="1.0",
**parquet_writer_kwargs) as writer:
+ with pq.ParquetWriter(fos, schema=file_schema,
**parquet_writer_kwargs) as writer:
writer.write_table(task.df)
data_file = DataFile(
diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py
index a65d98f..c08916b 100644
--- a/tests/integration/test_writes.py
+++ b/tests/integration/test_writes.py
@@ -357,39 +357,39 @@ def test_summaries(spark: SparkSession, session_catalog:
Catalog, arrow_table_wi
assert summaries[0] == {
'added-data-files': '1',
- 'added-files-size': '5437',
+ 'added-files-size': '5459',
'added-records': '3',
'total-data-files': '1',
'total-delete-files': '0',
'total-equality-deletes': '0',
- 'total-files-size': '5437',
+ 'total-files-size': '5459',
'total-position-deletes': '0',
'total-records': '3',
}
assert summaries[1] == {
'added-data-files': '1',
- 'added-files-size': '5437',
+ 'added-files-size': '5459',
'added-records': '3',
'total-data-files': '2',
'total-delete-files': '0',
'total-equality-deletes': '0',
- 'total-files-size': '10874',
+ 'total-files-size': '10918',
'total-position-deletes': '0',
'total-records': '6',
}
assert summaries[2] == {
'added-data-files': '1',
- 'added-files-size': '5437',
+ 'added-files-size': '5459',
'added-records': '3',
'deleted-data-files': '2',
'deleted-records': '6',
- 'removed-files-size': '10874',
+ 'removed-files-size': '10918',
'total-data-files': '1',
'total-delete-files': '0',
'total-equality-deletes': '0',
- 'total-files-size': '5437',
+ 'total-files-size': '5459',
'total-position-deletes': '0',
'total-records': '3',
}
@@ -555,12 +555,12 @@ def test_summaries_with_only_nulls(
assert summaries[1] == {
'added-data-files': '1',
- 'added-files-size': '4217',
+ 'added-files-size': '4239',
'added-records': '2',
'total-data-files': '1',
'total-delete-files': '0',
'total-equality-deletes': '0',
- 'total-files-size': '4217',
+ 'total-files-size': '4239',
'total-position-deletes': '0',
'total-records': '2',
}