paleolimbot commented on code in PR #240:
URL: https://github.com/apache/parquet-format/pull/240#discussion_r1643008285


##########
src/main/thrift/parquet.thrift:
##########
@@ -237,6 +237,78 @@ struct SizeStatistics {
    3: optional list<i64> definition_level_histogram;
 }
 
+/**
+ * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge
+ * between points represent a straight cartesian line or the shortest line on
+ * the sphere.
+ */
+enum Edges {
+  PLANAR = 0;
+  SPHERICAL = 1;
+}
+
+/**
+ * A custom WKB-encoded geometry data to be used in geometry statistics.
+ * The geometry may be a polygon to encode an s2 or h3 covering to provide
+ * vendor-agnostic coverings, or an evelope of geometries when a bounding
+ * box cannot be built (e.g. a geometry has spherical edges, or if an edge
+ * of geographic coordinates crosses the antimeridian).
+ */
+struct Geometry {
+  /** Bytes of a WKB-encoded geometry */
+  1: required binary geometry;
+  /**
+   * Edges of the geometry if it is a polygon. It may be different to the
+   * edges attribute from the GEOMETRY logical type.
+   */
+  2: optional Edges edges;
+}
+
+/**
+ * Bounding box of geometries in the representation of min/max value pair of
+ * coordinates from each axis. Values of Z and M are omitted for 2D geometries.
+ */
+struct BoundingBox {
+  1: required double xmin;
+  2: required double xmax;
+  3: required double ymin;
+  4: required double ymax;
+  5: optional double zmin;
+  6: optional double zmax;
+  7: optional double mmin;
+  8: optional double mmax;
+}
+
+struct Covering {
+  optional BoundingBox bbox    // A bounding box of geometries if it can be 
built.
+  optional Geometry covering   // A covering polygon of geometries if bbox is 
unavailable.
+}
+
+/** Statistics specific to GEOMETRY logical type */
+struct GeometryStatistics {
+  /** Covering of geometries */
+  1: optional Covering covering;
+
+  /**
+   * The geometry types of all geometries, or an empty array if they are not

Review Comment:
   An integer might make more sense if text is not appropriate (text is nice 
for JSON metadata but perhaps not for Parquet implementations). Since this is a 
WKB type, the integer used to encode the geometry type and dimensions is very 
easy to extract: 
https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
 .
   
   The integers used in WKB (the ISO variety, not EWKB) have the property that 
`type_id / 1000` is either 0, 1, 2, or 3 and `type_id % 1000` is the 
`geometry_type`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to