rdblue commented on code in PR #240:
URL: https://github.com/apache/parquet-format/pull/240#discussion_r1768971787


##########
src/main/thrift/parquet.thrift:
##########
@@ -237,6 +237,135 @@ struct SizeStatistics {
    3: optional list<i64> definition_level_histogram;
 }
 
+/**
+ * Physical type and encoding for the geometry type.
+ */
+enum GeometryEncoding {
+  /**
+   * Allowed for physical type: BYTE_ARRAY.
+   *
+   * Well-known binary (WKB) representations of geometries.
+   *
+   * To be clear, we follow the same rule of WKB and coordinate axis order from
+   * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4]
+   * supporting XY, XYZ, XYM, XYZM and the standard geometry types
+   * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
+   * and GeometryCollection). Coordinate order is always (x, y) where x is
+   * easting or longitude and y is northing or latitude. This ordering 
explicitly
+   * overrides the axis order as specified in the CRS following the GeoPackage
+   * specification [5].
+   *
+   * This is the preferred encoding for maximum portability. It also supports
+   * GeometryStatistics to be set in the column chunk and page index.
+   *
+   * [1] 
https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92
+   * [2] 
https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155
+   * [3] https://portal.ogc.org/files/?artifact_id=18241
+   * [4] https://www.iso.org/standard/60343.html
+   * [5] https://www.geopackage.org/spec130/#gpb_spec
+   */
+  WKB = 0;
+}
+
+/**
+ * Interpretation for edges of elements of a GEOMETRY logical type. In other
+ * words, whether a point between two vertices should be interpolated in
+ * its XY dimensions as if it were a Cartesian line connecting the two
+ * vertices (planar) or the shortest spherical arc between the longitude
+ * and latitude represented by the two vertices (spherical). This value
+ * applies to all non-point geometry objects and is independent of the
+ * coordinate reference system.
+ *
+ * Because most systems currently assume planar edges and do not support
+ * spherical edges, planar should be used as the default value.
+ */
+enum EdgeInterpolation {
+  PLANAR = 0;
+  SPHERICAL = 1;
+}
+
+/**
+ * A custom binary-encoded polygon or multi-polygon to represent a covering of
+ * geometries. For example, it may be a bounding box or an envelope of 
geometries
+ * when a bounding box cannot be built (e.g. a geometry has spherical edges, 
or if
+ * an edge of geographic coordinates crosses the antimeridian). It may be
+ * extended in future versions to provide vendor-agnostic coverings like
+ * vectors of cells on a discrete global grid (e.g., S2 or H3 cells).
+ */
+struct Covering {
+  /**
+   * A type of covering. Currently accepted values: "WKB".
+   */
+  1: required string kind;
+  /**
+   * A payload specific to kind. Below are the supported values:
+   * - WKB: well-known binary of a POLYGON or MULTI-POLYGON that completely
+   *   covers the contents. This will be interpreted according to the same CRS
+   *   and edges defined by the logical type.
+   */
+  2: required binary value;
+}
+
+/**
+ * Bounding box of geometries in the representation of min/max value pair of
+ * coordinates from each axis. Values of Z and M are omitted for 2D geometries.
+ * Filter pushdown on geometries using this is only safe for planar spatial
+ * filters.
+ */
+struct BoundingBox {
+  1: required double xmin;
+  2: required double xmax;
+  3: required double ymin;
+  4: required double ymax;
+  5: optional double zmin;
+  6: optional double zmax;
+  7: optional double mmin;
+  8: optional double mmax;
+}
+
+/** Statistics specific to GEOMETRY logical type */
+struct GeometryStatistics {
+  /** A bounding box of geometries */
+  1: optional BoundingBox bbox;
+
+  /**
+   * A list of coverings of geometries.
+   * Note that It is allowed to have more than one covering of the same kind 
and
+   * implementation is free to use any of them. It is recommended to have at 
most
+   * one covering for each kind.
+   */
+  2: optional list<Covering> coverings;
+
+  /**
+   * The geometry types of all geometries, or an empty array if they are not
+   * known. This is borrowed from `geometry_types` column metadata of 
GeoParquet [1]
+   * except that values in the list are WKB (ISO variant) integer codes [2]. 
Table
+   * below shows the most common geometry types and their codes:
+   *
+   * | Type               | XY   | XYZ  | XYM  | XYZM |
+   * | :----------------- | :--- | :--- | :--- | :--: |
+   * | Point              | 0001 | 1001 | 2001 | 3001 |
+   * | LineString         | 0002 | 1002 | 2002 | 3002 |
+   * | Polygon            | 0003 | 1003 | 2003 | 3003 |
+   * | MultiPoint         | 0004 | 1004 | 2004 | 3004 |
+   * | MultiLineString    | 0005 | 1005 | 2005 | 3005 |
+   * | MultiPolygon       | 0006 | 1006 | 2006 | 3006 |
+   * | GeometryCollection | 0007 | 1007 | 2007 | 3007 |
+   *
+   * In addition, the following rules are used:
+   * - A list of multiple values indicates that multiple geometry types are
+   *   present (e.g. `[0003, 0006]`).
+   * - An empty array explicitly signals that the geometry types are not known.
+   * - The geometry types in the list must be unique (e.g. `[0001, 0001]`
+   *   is not valid).
+   *
+   * Please refer to links below for more detail:
+   * [1] 
https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+   * [2] 
https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L159
+   */
+  3: optional list<i32> geometry_types;

Review Comment:
   What is this used for? Is it for some type of pushdown?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to