paleolimbot commented on code in PR #240:
URL: https://github.com/apache/parquet-format/pull/240#discussion_r1601585275


##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation 
regulated
+ * by the Open Geospatial Consortium (OGC). 
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+  1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)
+ */
+struct GeometryType {
+  /**
+   * The subtype of the geometry.
+   * If set, all values in the column must be of the same subtype.
+   * If not set, the column may contain values of any subtype.
+   */
+  1: optional GeometrySubType subtype;
+  /**
+   * The dimension of the geometry.
+   * For now only 2D geometry is supported and the value must be 2 if set.
+   */
+  2: optional byte dimension;
+  /**
+   * Coordinate Reference System, i.e. mapping of how coordinates refer to
+   * precise locations on earth.
+   * For now only OGC:CRS84 is supported.
+   */
+  3: optional string crs;
+  4: required Edges edges;

Review Comment:
   > in order to correctly interpret polygons
   
   Technically that only applies to polygons that cover more than half the 
earth. As long as your polygons fit within a hemisphere (as most polygons do), 
you don't need to know if they are oriented (although it is faster to import 
them if you do know this in advance).



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;

Review Comment:
   These enum values should probably follow WKB (i.e., `GEOMETRY = 0`, `POINT = 
1`, etc.)



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation 
regulated
+ * by the Open Geospatial Consortium (OGC). 
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+  1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)
+ */
+struct GeometryType {
+  /**
+   * The subtype of the geometry.
+   * If set, all values in the column must be of the same subtype.
+   * If not set, the column may contain values of any subtype.
+   */
+  1: optional GeometrySubType subtype;
+  /**
+   * The dimension of the geometry.
+   * For now only 2D geometry is supported and the value must be 2 if set.
+   */
+  2: optional byte dimension;

Review Comment:
   `dimension` (in the singular) is typically referring to the point (0), line 
(1) or polygon (2)-ness of a feature. I would recommend using an enum here to 
cover all the options (xy, xyz, xym, xyzm).



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation 
regulated
+ * by the Open Geospatial Consortium (OGC). 
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+  1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)

Review Comment:
   There are definitely other ways to represent geometry (see 
https://github.com/opengeospatial/geoparquet/pull/189 for the single-encoding 
types we just added to GeoParquet)



##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
 struct BsonType {
 }
 
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial 
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+  POINT = 0;
+  LINESTRING = 1;
+  POLYGON = 2;
+  MULTIPOINT = 3;
+  MULTILINESTRING = 4;
+  MULTIPOLYGON = 5;
+  GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+  PLANAR = 0;
+  // SPHERICAL = 1; // not supported yet

Review Comment:
   This should definitely be supported (both Snowflake and BigQuery export 
edges like this, have geometry types, and can export Parquet I think)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to