marcin-krystianc commented on issue #27644:
URL: https://github.com/apache/arrow/issues/27644#issuecomment-2135060818

   Hi, I've tried this in recent pyarrow (v16.1.0), and I think it is still 
broken.
   My test code:
   ```
   import unittest
   import tempfile
   
   import pyarrow.parquet as pq
   import pyarrow as pa
   import os
   
   def get_table():
   
       pa_arrays = [[1.0, 2.0], [1, 2]]
   
       column_names = ["c0", "c1"]
   
       # Create a PyArrow Table from the Arrays
       return pa.Table.from_arrays(pa_arrays, names=column_names)
   
   class TestStatistics(unittest.TestCase):
   
       def test_inmemory_index_data(self):
           with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as 
tmpdirname:
               path = os.path.join(tmpdirname, "my.parquet")
               table = get_table()
   
               pq.write_table(table, path, write_statistics=True)
   
               pr = pq.ParquetReader()
               pr.open(path)
               float_column = pr.metadata.row_group(0).column(0)
               int_column = pr.metadata.row_group(0).column(1)
               
               print (pa.__version__)
               print (int_column.statistics)
               print (float_column.statistics)
               self.assertEqual(int_column.physical_type, 'INT64')
               self.assertEqual(int_column.statistics.has_min_max, True)
               self.assertEqual(int_column.statistics.has_distinct_count, True)
               
               self.assertEqual(float_column.physical_type, 'DOUBLE')
               self.assertEqual(float_column.statistics.has_min_max, True)
               self.assertEqual(float_column.statistics.has_distinct_count, 
True)
              
   if __name__ == '__main__':
       unittest.main()
   ```
   
   Output:
   ```
   16.1.0
   <pyarrow._parquet.Statistics object at 0x7ff97eedd530>
     has_min_max: True
     min: 1
     max: 2
     null_count: 0
     distinct_count: None
     num_values: 2
     physical_type: INT64
     logical_type: None
     converted_type (legacy): NONE
   <pyarrow._parquet.Statistics object at 0x7ff97fa3b920>
     has_min_max: True
     min: 1.0
     max: 2.0
     null_count: 0
     distinct_count: None
     num_values: 2
     physical_type: DOUBLE
     logical_type: None
     converted_type (legacy): NONE
   ```
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to