marcin-krystianc commented on issue #27644:
URL: https://github.com/apache/arrow/issues/27644#issuecomment-2135060818
Hi, I've tried this in recent pyarrow (v16.1.0), and I think it is still
broken.
My test code:
```
import unittest
import tempfile
import pyarrow.parquet as pq
import pyarrow as pa
import os
def get_table():
pa_arrays = [[1.0, 2.0], [1, 2]]
column_names = ["c0", "c1"]
# Create a PyArrow Table from the Arrays
return pa.Table.from_arrays(pa_arrays, names=column_names)
class TestStatistics(unittest.TestCase):
def test_inmemory_index_data(self):
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as
tmpdirname:
path = os.path.join(tmpdirname, "my.parquet")
table = get_table()
pq.write_table(table, path, write_statistics=True)
pr = pq.ParquetReader()
pr.open(path)
float_column = pr.metadata.row_group(0).column(0)
int_column = pr.metadata.row_group(0).column(1)
print (pa.__version__)
print (int_column.statistics)
print (float_column.statistics)
self.assertEqual(int_column.physical_type, 'INT64')
self.assertEqual(int_column.statistics.has_min_max, True)
self.assertEqual(int_column.statistics.has_distinct_count, True)
self.assertEqual(float_column.physical_type, 'DOUBLE')
self.assertEqual(float_column.statistics.has_min_max, True)
self.assertEqual(float_column.statistics.has_distinct_count,
True)
if __name__ == '__main__':
unittest.main()
```
Output:
```
16.1.0
<pyarrow._parquet.Statistics object at 0x7ff97eedd530>
has_min_max: True
min: 1
max: 2
null_count: 0
distinct_count: None
num_values: 2
physical_type: INT64
logical_type: None
converted_type (legacy): NONE
<pyarrow._parquet.Statistics object at 0x7ff97fa3b920>
has_min_max: True
min: 1.0
max: 2.0
null_count: 0
distinct_count: None
num_values: 2
physical_type: DOUBLE
logical_type: None
converted_type (legacy): NONE
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]