This is an automated email from the ASF dual-hosted git repository.

alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b2e8c33c86 GH-41699: [Python][Parquet] Implement to_dict method on 
SortingColumn (#41704)
b2e8c33c86 is described below

commit b2e8c33c86c819b167a1cbca834da3c9047a9350
Author: Tai Le Manh <[email protected]>
AuthorDate: Tue May 21 15:23:38 2024 +0700

    GH-41699: [Python][Parquet] Implement to_dict method on SortingColumn 
(#41704)
    
    
    
    ### Rationale for this change
    Resolves #41699 .
    
    ### What changes are included in this PR?
    Add `to_dict` method and test case
    
    ### Are these changes tested?
    Yes
    
    ### Are there any user-facing changes?
    No
    
    * GitHub Issue: #41699
    
    Authored-by: Tai Le Manh <[email protected]>
    Signed-off-by: AlenkaF <[email protected]>
---
 python/pyarrow/_parquet.pyx                   | 16 ++++++++++++++++
 python/pyarrow/tests/parquet/test_metadata.py | 22 +++++++++++++++-------
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 7bc68a288a..f7724b9b1f 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -705,6 +705,22 @@ cdef class SortingColumn:
         """Whether null values appear before valid values (bool)."""
         return self.nulls_first
 
+    def to_dict(self):
+        """
+        Get dictionary representation of the SortingColumn.
+
+        Returns
+        -------
+        dict
+            Dictionary with a key for each attribute of this class.
+        """
+        d = dict(
+            column_index=self.column_index,
+            descending=self.descending,
+            nulls_first=self.nulls_first
+        )
+        return d
+
 
 cdef class RowGroupMetaData(_Weakrefable):
     """Metadata for a single row group."""
diff --git a/python/pyarrow/tests/parquet/test_metadata.py 
b/python/pyarrow/tests/parquet/test_metadata.py
index bf186bd923..1eb0598b5c 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -303,14 +303,18 @@ def test_parquet_write_disable_statistics(tempdir):
 
 def test_parquet_sorting_column():
     sorting_col = pq.SortingColumn(10)
-    assert sorting_col.column_index == 10
-    assert sorting_col.descending is False
-    assert sorting_col.nulls_first is False
+    assert sorting_col.to_dict() == {
+        'column_index': 10,
+        'descending': False,
+        'nulls_first': False
+    }
 
     sorting_col = pq.SortingColumn(0, descending=True, nulls_first=True)
-    assert sorting_col.column_index == 0
-    assert sorting_col.descending is True
-    assert sorting_col.nulls_first is True
+    assert sorting_col.to_dict() == {
+        'column_index': 0,
+        'descending': True,
+        'nulls_first': True
+    }
 
     schema = pa.schema([('a', pa.int64()), ('b', pa.int64())])
     sorting_cols = (
@@ -381,9 +385,13 @@ def test_parquet_file_sorting_columns():
 
     # Can retrieve sorting columns from metadata
     metadata = pq.read_metadata(reader)
-    assert metadata.num_row_groups == 1
     assert sorting_columns == metadata.row_group(0).sorting_columns
 
+    metadata_dict = metadata.to_dict()
+    assert metadata_dict.get('num_columns') == 2
+    assert metadata_dict.get('num_rows') == 3
+    assert metadata_dict.get('num_row_groups') == 1
+
 
 def test_field_id_metadata():
     # ARROW-7080

Reply via email to