This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b2e8c33c86 GH-41699: [Python][Parquet] Implement to_dict method on
SortingColumn (#41704)
b2e8c33c86 is described below
commit b2e8c33c86c819b167a1cbca834da3c9047a9350
Author: Tai Le Manh <[email protected]>
AuthorDate: Tue May 21 15:23:38 2024 +0700
GH-41699: [Python][Parquet] Implement to_dict method on SortingColumn
(#41704)
### Rationale for this change
Resolves #41699 .
### What changes are included in this PR?
Add `to_dict` method and test case
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* GitHub Issue: #41699
Authored-by: Tai Le Manh <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
---
python/pyarrow/_parquet.pyx | 16 ++++++++++++++++
python/pyarrow/tests/parquet/test_metadata.py | 22 +++++++++++++++-------
2 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 7bc68a288a..f7724b9b1f 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -705,6 +705,22 @@ cdef class SortingColumn:
"""Whether null values appear before valid values (bool)."""
return self.nulls_first
+ def to_dict(self):
+ """
+ Get dictionary representation of the SortingColumn.
+
+ Returns
+ -------
+ dict
+ Dictionary with a key for each attribute of this class.
+ """
+ d = dict(
+ column_index=self.column_index,
+ descending=self.descending,
+ nulls_first=self.nulls_first
+ )
+ return d
+
cdef class RowGroupMetaData(_Weakrefable):
"""Metadata for a single row group."""
diff --git a/python/pyarrow/tests/parquet/test_metadata.py
b/python/pyarrow/tests/parquet/test_metadata.py
index bf186bd923..1eb0598b5c 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -303,14 +303,18 @@ def test_parquet_write_disable_statistics(tempdir):
def test_parquet_sorting_column():
sorting_col = pq.SortingColumn(10)
- assert sorting_col.column_index == 10
- assert sorting_col.descending is False
- assert sorting_col.nulls_first is False
+ assert sorting_col.to_dict() == {
+ 'column_index': 10,
+ 'descending': False,
+ 'nulls_first': False
+ }
sorting_col = pq.SortingColumn(0, descending=True, nulls_first=True)
- assert sorting_col.column_index == 0
- assert sorting_col.descending is True
- assert sorting_col.nulls_first is True
+ assert sorting_col.to_dict() == {
+ 'column_index': 0,
+ 'descending': True,
+ 'nulls_first': True
+ }
schema = pa.schema([('a', pa.int64()), ('b', pa.int64())])
sorting_cols = (
@@ -381,9 +385,13 @@ def test_parquet_file_sorting_columns():
# Can retrieve sorting columns from metadata
metadata = pq.read_metadata(reader)
- assert metadata.num_row_groups == 1
assert sorting_columns == metadata.row_group(0).sorting_columns
+ metadata_dict = metadata.to_dict()
+ assert metadata_dict.get('num_columns') == 2
+ assert metadata_dict.get('num_rows') == 3
+ assert metadata_dict.get('num_row_groups') == 1
+
def test_field_id_metadata():
# ARROW-7080