szehon-ho commented on code in PR #240:
URL: https://github.com/apache/parquet-format/pull/240#discussion_r1597375301
##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
struct BsonType {
}
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+ POINT = 0;
+ LINESTRING = 1;
+ POLYGON = 2;
+ MULTIPOINT = 3;
+ MULTILINESTRING = 4;
+ MULTIPOLYGON = 5;
+ GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+ PLANAR = 0;
+ // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation
regulated
+ * by the Open Geospatial Consortium (OGC).
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+ 1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)
+ */
+struct GeometryType {
+ /**
+ * The subtype of the geometry.
+ * If set, all values in the column must be of the same subtype.
+ * If not set, the column may contain values of any subtype.
+ */
+ 1: optional GeometrySubType subtype;
+ /**
+ * The dimension of the geometry.
+ * For now only 2D geometry is supported and the value must be 2 if set.
+ */
+ 2: optional byte dimension;
+ /**
+ * Coordinate Reference System, i.e. mapping of how coordinates refer to
+ * precise locations on earth.
+ * For now only OGC:CRS84 is supported.
+ */
+ 3: optional string crs;
+ 4: required Edges edges;
Review Comment:
If we support Edges=spherical, looks like we need to support 'orientation'
in order to correctly interpret polygons. See:
https://docs.google.com/document/d/1iVFbrRNEzZl8tDcZC81GFt01QJkLJsI9E2NBOt21IRI/edit?disco=AAABL-z6zbI
##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
struct BsonType {
}
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+ POINT = 0;
+ LINESTRING = 1;
+ POLYGON = 2;
+ MULTIPOINT = 3;
+ MULTILINESTRING = 4;
+ MULTIPOLYGON = 5;
+ GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+ PLANAR = 0;
+ // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation
regulated
+ * by the Open Geospatial Consortium (OGC).
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+ 1: WKB WKB
Review Comment:
new to parquet, but why is this not an enum as well?
##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
struct BsonType {
}
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+ POINT = 0;
+ LINESTRING = 1;
+ POLYGON = 2;
+ MULTIPOINT = 3;
+ MULTILINESTRING = 4;
+ MULTIPOLYGON = 5;
+ GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+ PLANAR = 0;
+ // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation
regulated
+ * by the Open Geospatial Consortium (OGC).
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+ 1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)
Review Comment:
In the proposal
https://docs.google.com/document/d/1iVFbrRNEzZl8tDcZC81GFt01QJkLJsI9E2NBOt21IRI/edit#heading=h.kierb15tccn8
we explored more advanced encodings (using group). are we still considering
that, and wondering how we will support those?
##########
src/main/thrift/parquet.thrift:
##########
@@ -270,8 +270,11 @@ struct Statistics {
* may set min_value="B", max_value="C". Such more compact values must
still be
* valid values within the column's logical type.
*
- * Values are encoded using PLAIN encoding, except that variable-length byte
- * arrays do not include a length prefix.
+ * Values are encoded using PLAIN encoding, except that:
+ * 1) variable-length byte arrays do not include a length prefix.
+ * 2) geometry logical type with BoundingBoxOrder uses max_value/min_value
pair
Review Comment:
This is great to have as part of statitics, but trying to wrap my head
around how writer implementations in WKB case can get min_value/max_value, not
sure if they have the bounding box easily unless they deserialze.
##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
struct BsonType {
}
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
+ POINT = 0;
+ LINESTRING = 1;
+ POLYGON = 2;
+ MULTIPOINT = 3;
+ MULTILINESTRING = 4;
+ MULTIPOLYGON = 5;
+ GEOMETRY_COLLECTION = 6;
+}
+
+/**
+ * Interpretation for edges, i.e. whether the edge between points
+ * represent a straight cartesian line or the shortest line on the sphere
+ */
+enum Edges {
+ PLANAR = 0;
+ // SPHERICAL = 1; // not supported yet
+}
+
+/**
+ * Well-Known Binary. This is a well-known and popular binary representation
regulated
+ * by the Open Geospatial Consortium (OGC).
+ */
+struct WKB {}
+/**
+ * Encoding for geospatial data.
+ */
+union GeospatialEncoding {
+ 1: WKB WKB
+}
+
+/**
+ * Geometry logical type annotation
+ *
+ * Allowed for physical types: BINARY (added in 2.11.0)
+ */
+struct GeometryType {
+ /**
+ * The subtype of the geometry.
+ * If set, all values in the column must be of the same subtype.
+ * If not set, the column may contain values of any subtype.
+ */
+ 1: optional GeometrySubType subtype;
+ /**
+ * The dimension of the geometry.
+ * For now only 2D geometry is supported and the value must be 2 if set.
+ */
+ 2: optional byte dimension;
Review Comment:
not sure if its too ambitious, as bounding box would not work for 3d.
##########
src/main/thrift/parquet.thrift:
##########
@@ -373,6 +376,69 @@ struct JsonType {
struct BsonType {
}
+/**
+ * A geometry can be any of the following subtypes.
+ * The list of geospatial subtypes is taken from the OGC (Open Geospatial
Consortium)
+ * SFA (Simple Feature Access) Part 1- Common Architecture.
+ */
+enum GeometrySubType {
Review Comment:
I feel this is not too useful to specify on type, as any geospatial encoding
can support any subtype?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]