wgtmac commented on code in PR #240:
URL: https://github.com/apache/parquet-format/pull/240#discussion_r1642999052


##########
src/main/thrift/parquet.thrift:
##########
@@ -237,6 +237,78 @@ struct SizeStatistics {
    3: optional list<i64> definition_level_histogram;
 }
 
+/**
+ * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge
+ * between points represent a straight cartesian line or the shortest line on
+ * the sphere.
+ */
+enum Edges {
+  PLANAR = 0;
+  SPHERICAL = 1;
+}
+
+/**
+ * A custom WKB-encoded geometry data to be used in geometry statistics.
+ * The geometry may be a polygon to encode an s2 or h3 covering to provide
+ * vendor-agnostic coverings, or an evelope of geometries when a bounding
+ * box cannot be built (e.g. a geometry has spherical edges, or if an edge
+ * of geographic coordinates crosses the antimeridian).
+ */
+struct Geometry {
+  /** Bytes of a WKB-encoded geometry */
+  1: required binary geometry;
+  /**
+   * Edges of the geometry if it is a polygon. It may be different to the
+   * edges attribute from the GEOMETRY logical type.
+   */
+  2: optional Edges edges;
+}
+
+/**
+ * Bounding box of geometries in the representation of min/max value pair of
+ * coordinates from each axis. Values of Z and M are omitted for 2D geometries.
+ */
+struct BoundingBox {
+  1: required double xmin;
+  2: required double xmax;
+  3: required double ymin;
+  4: required double ymax;
+  5: optional double zmin;
+  6: optional double zmax;
+  7: optional double mmin;
+  8: optional double mmax;
+}
+
+struct Covering {
+  optional BoundingBox bbox    // A bounding box of geometries if it can be 
built.
+  optional Geometry covering   // A covering polygon of geometries if bbox is 
unavailable.
+}
+
+/** Statistics specific to GEOMETRY logical type */
+struct GeometryStatistics {
+  /** Covering of geometries */
+  1: optional Covering covering;
+
+  /**
+   * The geometry types of all geometries, or an empty array if they are not

Review Comment:
   `geometry_types` in the GeoParquet is a list of string. Should we use enum 
instead? However, we need to define items for different dimensions, e.g. 
"POINT" and "POINT Z".



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to