wgtmac commented on code in PR #43196:
URL: https://github.com/apache/arrow/pull/43196#discussion_r1720790638
##########
cpp/src/parquet/types.cc:
##########
@@ -463,6 +463,31 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
return UUIDLogicalType::Make();
} else if (type.__isset.FLOAT16) {
return Float16LogicalType::Make();
+ } else if (type.__isset.GEOMETRY) {
+ std::string crs;
+ if (type.GEOMETRY.__isset.crs) {
+ crs = type.GEOMETRY.crs;
+ }
+
+ LogicalType::GeometryEdges::edges edges =
LogicalType::GeometryEdges::UNKNOWN;
+ if (type.GEOMETRY.edges == format::Edges::PLANAR) {
+ edges = LogicalType::GeometryEdges::PLANAR;
+ } else if (type.GEOMETRY.edges == format::Edges::SPHERICAL) {
+ edges = LogicalType::GeometryEdges::SPHERICAL;
+ }
+
+ LogicalType::GeometryEncoding::geometry_encoding encoding =
+ LogicalType::GeometryEncoding::UNKNOWN;
+ if (type.GEOMETRY.encoding == format::GeometryEncoding::WKB) {
+ encoding = LogicalType::GeometryEncoding::WKB;
+ }
+
+ std::string metadata;
+ if (type.GEOMETRY.__isset.crs) {
Review Comment:
```suggestion
if (type.GEOMETRY.__isset.metadata) {
```
##########
cpp/src/parquet/statistics.h:
##########
@@ -24,6 +24,7 @@
#include <string>
#include <utility>
+#include "parquet/geometry_util.h"
Review Comment:
Could we use forward declaration? This is a public header.
##########
cpp/src/parquet/types.h:
##########
@@ -166,6 +167,14 @@ class PARQUET_EXPORT LogicalType {
enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS };
};
+ struct GeometryEncoding {
Review Comment:
Should we use `enum class GeometryEncoding` instead?
##########
cpp/src/parquet/types.cc:
##########
@@ -463,6 +463,31 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
return UUIDLogicalType::Make();
} else if (type.__isset.FLOAT16) {
return Float16LogicalType::Make();
+ } else if (type.__isset.GEOMETRY) {
+ std::string crs;
+ if (type.GEOMETRY.__isset.crs) {
+ crs = type.GEOMETRY.crs;
+ }
+
+ LogicalType::GeometryEdges::edges edges =
LogicalType::GeometryEdges::UNKNOWN;
+ if (type.GEOMETRY.edges == format::Edges::PLANAR) {
+ edges = LogicalType::GeometryEdges::PLANAR;
+ } else if (type.GEOMETRY.edges == format::Edges::SPHERICAL) {
+ edges = LogicalType::GeometryEdges::SPHERICAL;
+ }
+
+ LogicalType::GeometryEncoding::geometry_encoding encoding =
+ LogicalType::GeometryEncoding::UNKNOWN;
+ if (type.GEOMETRY.encoding == format::GeometryEncoding::WKB) {
+ encoding = LogicalType::GeometryEncoding::WKB;
+ }
+
+ std::string metadata;
Review Comment:
The `metadata` field has been changed from string to binary type as
suggested by @emkornfield to be more flexible (e.g. encoding):
https://github.com/apache/parquet-format/pull/240#discussion_r1639285008.
I'm not sure if this makes implementation difficult to deal with the complexity.
##########
cpp/src/parquet/geometry_util.h:
##########
@@ -0,0 +1,603 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <unordered_set>
+
+#include "arrow/util/endian.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
+#include "parquet/exception.h"
+
+namespace parquet::geometry {
+
+constexpr double kInf = std::numeric_limits<double>::infinity();
+
+struct Dimensions {
+ enum dimensions { XY = 0, XYZ = 1, XYM = 2, XYZM = 3 };
+
+ static dimensions FromWKB(uint32_t wkb_geometry_type) {
+ switch (wkb_geometry_type / 1000) {
+ case 0:
+ return XY;
+ case 1:
+ return XYZ;
+ case 2:
+ return XYM;
+ case 3:
+ return XYZM;
+ default:
+ throw ParquetException("Invalid wkb_geometry_type: ",
wkb_geometry_type);
+ }
+ }
+
+ template <dimensions dims>
+ constexpr static uint32_t size();
+
+ template <>
+ constexpr uint32_t size<XY>() {
+ return 2;
+ }
+
+ template <>
+ constexpr uint32_t size<XYZ>() {
+ return 3;
+ }
+
+ template <>
+ constexpr uint32_t size<XYM>() {
+ return 3;
+ }
+
+ template <>
+ constexpr uint32_t size<XYZM>() {
+ return 4;
+ }
+
+ static uint32_t size(dimensions dims) {
+ switch (dims) {
+ case XY:
+ return size<XY>();
+ case XYZ:
+ return size<XYZ>();
+ case XYM:
+ return size<XYM>();
+ case XYZM:
+ return size<XYZM>();
+ default:
+ return 0;
+ }
+ }
+
+ // Where to look in a coordinate with this dimension
+ // for the X, Y, Z, and M dimensions, respectively.
+ static std::array<int, 4> ToXYZM(dimensions dims) {
+ switch (dims) {
+ case XY:
+ return {0, 1, -1, -1};
+ case XYZ:
+ return {0, 1, 2, -1};
+ case XYM:
+ return {0, 1, -1, 2};
+ case XYZM:
+ return {0, 1, 2, 3};
+ default:
+ return {-1, -1, -1, -1};
+ }
+ }
+
+ static std::string ToString(dimensions dims) {
+ switch (dims) {
+ case XY:
+ return "XY";
+ case XYZ:
+ return "XYZ";
+ case XYM:
+ return "XYM";
+ case XYZM:
+ return "XYZM";
+ default:
+ return "";
+ }
+ }
+};
+
+struct GeometryType {
+ enum geometry_type {
+ POINT = 1,
+ LINESTRING = 2,
+ POLYGON = 3,
+ MULTIPOINT = 4,
+ MULTILINESTRING = 5,
+ MULTIPOLYGON = 6,
+ GEOMETRYCOLLECTION = 7
+ };
+
+ static geometry_type FromWKB(uint32_t wkb_geometry_type) {
+ switch (wkb_geometry_type % 1000) {
+ case 1:
+ return POINT;
+ case 2:
+ return LINESTRING;
+ case 3:
+ return POLYGON;
+ case 4:
+ return MULTIPOINT;
+ case 5:
+ return MULTILINESTRING;
+ case 6:
+ return MULTIPOLYGON;
+ case 7:
+ return GEOMETRYCOLLECTION;
+ default:
+ throw ParquetException("Invalid wkb_geometry_type: ",
wkb_geometry_type);
+ }
+ }
+
+ static std::string ToString(geometry_type geometry_type) {
+ switch (geometry_type) {
+ case POINT:
+ return "POINT";
+ case LINESTRING:
+ return "LINESTRING";
+ case POLYGON:
+ return "POLYGON";
+ case MULTIPOINT:
+ return "MULTIPOINT";
+ case MULTILINESTRING:
+ return "MULTILINESTRING";
+ case MULTIPOLYGON:
+ return "MULTIPOLYGON";
+ case GEOMETRYCOLLECTION:
+ return "GEOMETRYCOLLECTION";
+ default:
+ return "";
+ }
+ }
+};
+
+class WKBBuffer {
Review Comment:
Should we move them into arrow/util instead of parquet only?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]