szehon-ho commented on code in PR #240:
URL: https://github.com/apache/parquet-format/pull/240#discussion_r1597375301


##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation 
regulated
+ * by the Open Geospatial Consortium (OGC). 
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+  1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)
+ */
+struct GeometryType {
+  /**
+   * The subtype of the geometry.
+   * If set, all values in the column must be of the same subtype.
+   * If not set, the column may contain values of any subtype.
+   */
+  1: optional GeometrySubType subtype;
+  /**
+   * The dimension of the geometry.
+   * For now only 2D geometry is supported and the value must be 2 if set.
+   */
+  2: optional byte dimension;
+  /**
+   * Coordinate Reference System, i.e. mapping of how coordinates refer to
+   * precise locations on earth.
+   * For now only OGC:CRS84 is supported.
+   */
+  3: optional string crs;
+  4: required Edges edges;

Review Comment:
   If we support Edges=spherical, looks like we need to support 'orientation' 
in order to correctly interpret polygons.  See:  
https://docs.google.com/document/d/1iVFbrRNEzZl8tDcZC81GFt01QJkLJsI9E2NBOt21IRI/edit?disco=AAABL-z6zbI



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation 
regulated
+ * by the Open Geospatial Consortium (OGC). 
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+  1: WKB WKB

Review Comment:
   new to parquet, but why is this not an enum as well?



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation 
regulated
+ * by the Open Geospatial Consortium (OGC). 
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+  1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)

Review Comment:
   In the proposal 
https://docs.google.com/document/d/1iVFbrRNEzZl8tDcZC81GFt01QJkLJsI9E2NBOt21IRI/edit#heading=h.kierb15tccn8
 we explored more advanced encodings (using group).  are we still considering 
that, and wondering how we will support those?



##########
src/main/thrift/parquet.thrift:
##########
@@ -270,8 +270,11 @@ struct Statistics {
     * may set min_value="B", max_value="C". Such more compact values must 
still be
     * valid values within the column's logical type.
     *
-    * Values are encoded using PLAIN encoding, except that variable-length byte
-    * arrays do not include a length prefix.
+    * Values are encoded using PLAIN encoding, except that:
+    * 1) variable-length byte arrays do not include a length prefix.
+    * 2) geometry logical type with BoundingBoxOrder uses max_value/min_value 
pair

Review Comment:
   This is great to have as part of statitics, but trying to wrap my head 
around how writer implementations in WKB case can get min_value/max_value, not 
sure if they have the bounding box easily unless they deserialze.



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation 
regulated
+ * by the Open Geospatial Consortium (OGC). 
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+  1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)
+ */
+struct GeometryType {
+  /**
+   * The subtype of the geometry.
+   * If set, all values in the column must be of the same subtype.
+   * If not set, the column may contain values of any subtype.
+   */
+  1: optional GeometrySubType subtype;
+  /**
+   * The dimension of the geometry.
+   * For now only 2D geometry is supported and the value must be 2 if set.
+   */
+  2: optional byte dimension;

Review Comment:
   not sure if its too ambitious, as bounding box would not work for 3d.



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {

Review Comment:
   I feel this is not too useful to specify on type, as any geospatial encoding 
can support any subtype?  



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to