This is an automated email from the ASF dual-hosted git repository.
maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 692c8b7a59 GH-46306: [C++][Parquet] Should use LoadEnumSafe for geo
enum (#46307)
692c8b7a59 is described below
commit 692c8b7a597cf561c4788a7332d2681ed2002dd9
Author: mwish <[email protected]>
AuthorDate: Tue May 6 14:35:46 2025 +0800
GH-46306: [C++][Parquet] Should use LoadEnumSafe for geo enum (#46307)
### Rationale for this change
OSS-Fuzz reports an error about loading unknown enum.
### What changes are included in this PR?
Add LoadEnumSafe for geo enum
### Are these changes tested?
Covered by existing
### Are there any user-facing changes?
no
* GitHub Issue: #46306
Authored-by: mwish <[email protected]>
Signed-off-by: mwish <[email protected]>
---
cpp/src/parquet/thrift_internal.h | 47 +++++++++++++++++++++++----------------
cpp/src/parquet/types.cc | 2 +-
2 files changed, 29 insertions(+), 20 deletions(-)
diff --git a/cpp/src/parquet/thrift_internal.h
b/cpp/src/parquet/thrift_internal.h
index ea50ab7420..ae44be32d1 100644
--- a/cpp/src/parquet/thrift_internal.h
+++ b/cpp/src/parquet/thrift_internal.h
@@ -106,6 +106,25 @@ static inline BoundaryOrder::type
FromThriftUnsafe(format::BoundaryOrder::type t
return static_cast<BoundaryOrder::type>(type);
}
+static inline GeometryLogicalType::EdgeInterpolationAlgorithm FromThriftUnsafe(
+ format::EdgeInterpolationAlgorithm::type type) {
+ switch (type) {
+ case format::EdgeInterpolationAlgorithm::SPHERICAL:
+ return GeometryLogicalType::EdgeInterpolationAlgorithm::SPHERICAL;
+ case format::EdgeInterpolationAlgorithm::VINCENTY:
+ return GeometryLogicalType::EdgeInterpolationAlgorithm::VINCENTY;
+ case format::EdgeInterpolationAlgorithm::THOMAS:
+ return GeometryLogicalType::EdgeInterpolationAlgorithm::THOMAS;
+ case format::EdgeInterpolationAlgorithm::ANDOYER:
+ return GeometryLogicalType::EdgeInterpolationAlgorithm::ANDOYER;
+ case format::EdgeInterpolationAlgorithm::KARNEY:
+ return GeometryLogicalType::EdgeInterpolationAlgorithm::KARNEY;
+ default:
+ ARROW_DCHECK(false) << "Cannot reach here";
+ return GeometryLogicalType::EdgeInterpolationAlgorithm::UNKNOWN;
+ }
+}
+
namespace internal {
template <typename T>
@@ -221,6 +240,15 @@ inline typename Compression::type LoadEnumSafe(const
format::CompressionCodec::t
return FromThriftUnsafe(*in);
}
+inline typename LogicalType::EdgeInterpolationAlgorithm LoadEnumSafe(
+ const format::EdgeInterpolationAlgorithm::type* in) {
+ if (ARROW_PREDICT_FALSE(*in < format::EdgeInterpolationAlgorithm::SPHERICAL
||
+ *in > format::EdgeInterpolationAlgorithm::KARNEY)) {
+ return LogicalType::EdgeInterpolationAlgorithm::UNKNOWN;
+ }
+ return FromThriftUnsafe(*in);
+}
+
// Safe non-enum converters
static inline AadMetadata FromThrift(format::AesGcmV1 aesGcmV1) {
@@ -281,25 +309,6 @@ static inline format::EdgeInterpolationAlgorithm::type
ToThrift(
}
}
-static inline LogicalType::EdgeInterpolationAlgorithm FromThrift(
- const format::EdgeInterpolationAlgorithm::type algorithm) {
- switch (algorithm) {
- case format::EdgeInterpolationAlgorithm::SPHERICAL:
- return LogicalType::EdgeInterpolationAlgorithm::SPHERICAL;
- case format::EdgeInterpolationAlgorithm::VINCENTY:
- return LogicalType::EdgeInterpolationAlgorithm::VINCENTY;
- case format::EdgeInterpolationAlgorithm::THOMAS:
- return LogicalType::EdgeInterpolationAlgorithm::THOMAS;
- case format::EdgeInterpolationAlgorithm::ANDOYER:
- return LogicalType::EdgeInterpolationAlgorithm::ANDOYER;
- case format::EdgeInterpolationAlgorithm::KARNEY:
- return LogicalType::EdgeInterpolationAlgorithm::KARNEY;
- default:
- throw ParquetException("Unknown value for geometry algorithm: ",
- static_cast<int>(algorithm));
- }
-}
-
static inline EncryptionAlgorithm FromThrift(format::EncryptionAlgorithm
encryption) {
EncryptionAlgorithm encryption_algorithm;
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 09f8a7d390..1387c222a7 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -501,7 +501,7 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
if (!type.GEOGRAPHY.__isset.algorithm) {
algorithm = LogicalType::EdgeInterpolationAlgorithm::SPHERICAL;
} else {
- algorithm = ::parquet::FromThrift(type.GEOGRAPHY.algorithm);
+ algorithm = LoadEnumSafe(&type.GEOGRAPHY.algorithm);
}
return GeographyLogicalType::Make(std::move(crs), algorithm);