paleolimbot commented on code in PR #45459:
URL: https://github.com/apache/arrow/pull/45459#discussion_r2059543564
##########
python/pyarrow/_parquet.pyx:
##########
@@ -319,6 +319,136 @@ cdef _box_flba(ParquetFLBA val, uint32_t len):
return cp.PyBytes_FromStringAndSize(<char*> val.ptr, <Py_ssize_t> len)
+cdef class GeoStatistics(_Weakrefable):
+ """Statistics for columns with geospatial data types (experimental)"""
+
+ def __init__(self):
+ raise TypeError(f"Do not call {self.__class__.__name__}'s constructor
directly")
+
+ def __cinit__(self):
+ pass
+
+ def __repr__(self):
+ return f"""{object.__repr__(self)}
+ geospatial_types: {self.geospatial_types}
+ xmin: {self.xmin}, xmax: {self.xmax}
+ ymin: {self.ymin}, ymax: {self.ymax}
+ zmin: {self.zmin}, zmax: {self.zmax}
+ mmin: {self.mmin}, mmax: {self.mmax}"""
+
+ def to_dict(self):
+ out = {
+ "geospatial_types": self.geospatial_types,
+ "xmin": self.xmin,
+ "xmax": self.xmax,
+ "ymin": self.ymin,
+ "ymax": self.ymax,
+ "zmin": self.zmin,
+ "zmax": self.zmax,
+ "mmin": self.mmin,
+ "mmax": self.mmax
+ }
+
+ return out
+
+ @property
+ def geospatial_types(self):
+ cdef optional[vector[int32_t]] maybe_geometry_types = \
+ self.statistics.get().geometry_types()
+ if not maybe_geometry_types.has_value():
+ return None
+
+ return list(maybe_geometry_types.value())
+
+ @property
+ def lower_bound(self):
+ return [self.statistics.get().lower_bound()[i] for i in range(4)]
+
+ @property
+ def upper_bound(self):
+ return [self.statistics.get().upper_bound()[i] for i in range(4)]
+
+ @property
+ def dimension_empty(self):
+ return [self.statistics.get().dimension_empty()[i] for i in range(4)]
+
+ @property
+ def dimension_valid(self):
+ return [self.statistics.get().dimension_valid()[i] for i in range(4)]
+
+ @property
+ def has_x(self):
+ return self.dimension_valid[0] and not self.dimension_empty[0]
+
+ @property
+ def has_y(self):
+ return self.dimension_valid[1] and not self.dimension_empty[1]
+
+ @property
+ def has_z(self):
+ return self.dimension_valid[2] and not self.dimension_empty[2]
+
+ @property
+ def has_m(self):
+ return self.dimension_valid[3] and not self.dimension_empty[3]
+
+ @property
+ def xmin(self):
+ if self.has_x:
+ return self.lower_bound[0]
+ else:
+ return None
Review Comment:
I also updated `has_x` to do something similar!
##########
python/pyarrow/_parquet.pyx:
##########
@@ -450,6 +590,24 @@ cdef class ColumnChunkMetaData(_Weakrefable):
statistics.init(self.metadata.statistics(), self)
return statistics
+ @property
+ def is_geo_stats_set(self):
+ """Whether or not geometry statistics are present in metadata
(bool)."""
+ return self.metadata.is_geo_stats_set()
+
+ @property
+ def geo_statistics(self):
+ """Statistics for column chunk (:class:`GeoStatistics`)."""
+ if not self.metadata.is_geo_stats_set():
+ return None
+
+ if not self.metadata.geo_statistics().get().is_valid():
+ return None
+
+ cdef GeoStatistics geo_statistics =
GeoStatistics.__new__(GeoStatistics)
+ geo_statistics.init(self.metadata.geo_statistics(), self)
+ return geo_statistics
Review Comment:
Works great! Thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]