This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 692c8b7a59 GH-46306: [C++][Parquet] Should use LoadEnumSafe for geo 
enum (#46307)
692c8b7a59 is described below

commit 692c8b7a597cf561c4788a7332d2681ed2002dd9
Author: mwish <[email protected]>
AuthorDate: Tue May 6 14:35:46 2025 +0800

    GH-46306: [C++][Parquet] Should use LoadEnumSafe for geo enum (#46307)
    
    ### Rationale for this change
    
    OSS-Fuzz reports an error about loading unknown enum.
    
    ### What changes are included in this PR?
    
    Add LoadEnumSafe for geo enum
    
    ### Are these changes tested?
    
    Covered by existing
    
    ### Are there any user-facing changes?
    
    no
    
    * GitHub Issue: #46306
    
    Authored-by: mwish <[email protected]>
    Signed-off-by: mwish <[email protected]>
---
 cpp/src/parquet/thrift_internal.h | 47 +++++++++++++++++++++++----------------
 cpp/src/parquet/types.cc          |  2 +-
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/cpp/src/parquet/thrift_internal.h 
b/cpp/src/parquet/thrift_internal.h
index ea50ab7420..ae44be32d1 100644
--- a/cpp/src/parquet/thrift_internal.h
+++ b/cpp/src/parquet/thrift_internal.h
@@ -106,6 +106,25 @@ static inline BoundaryOrder::type 
FromThriftUnsafe(format::BoundaryOrder::type t
   return static_cast<BoundaryOrder::type>(type);
 }
 
+static inline GeometryLogicalType::EdgeInterpolationAlgorithm FromThriftUnsafe(
+    format::EdgeInterpolationAlgorithm::type type) {
+  switch (type) {
+    case format::EdgeInterpolationAlgorithm::SPHERICAL:
+      return GeometryLogicalType::EdgeInterpolationAlgorithm::SPHERICAL;
+    case format::EdgeInterpolationAlgorithm::VINCENTY:
+      return GeometryLogicalType::EdgeInterpolationAlgorithm::VINCENTY;
+    case format::EdgeInterpolationAlgorithm::THOMAS:
+      return GeometryLogicalType::EdgeInterpolationAlgorithm::THOMAS;
+    case format::EdgeInterpolationAlgorithm::ANDOYER:
+      return GeometryLogicalType::EdgeInterpolationAlgorithm::ANDOYER;
+    case format::EdgeInterpolationAlgorithm::KARNEY:
+      return GeometryLogicalType::EdgeInterpolationAlgorithm::KARNEY;
+    default:
+      ARROW_DCHECK(false) << "Cannot reach here";
+      return GeometryLogicalType::EdgeInterpolationAlgorithm::UNKNOWN;
+  }
+}
+
 namespace internal {
 
 template <typename T>
@@ -221,6 +240,15 @@ inline typename Compression::type LoadEnumSafe(const 
format::CompressionCodec::t
   return FromThriftUnsafe(*in);
 }
 
+inline typename LogicalType::EdgeInterpolationAlgorithm LoadEnumSafe(
+    const format::EdgeInterpolationAlgorithm::type* in) {
+  if (ARROW_PREDICT_FALSE(*in < format::EdgeInterpolationAlgorithm::SPHERICAL 
||
+                          *in > format::EdgeInterpolationAlgorithm::KARNEY)) {
+    return LogicalType::EdgeInterpolationAlgorithm::UNKNOWN;
+  }
+  return FromThriftUnsafe(*in);
+}
+
 // Safe non-enum converters
 
 static inline AadMetadata FromThrift(format::AesGcmV1 aesGcmV1) {
@@ -281,25 +309,6 @@ static inline format::EdgeInterpolationAlgorithm::type 
ToThrift(
   }
 }
 
-static inline LogicalType::EdgeInterpolationAlgorithm FromThrift(
-    const format::EdgeInterpolationAlgorithm::type algorithm) {
-  switch (algorithm) {
-    case format::EdgeInterpolationAlgorithm::SPHERICAL:
-      return LogicalType::EdgeInterpolationAlgorithm::SPHERICAL;
-    case format::EdgeInterpolationAlgorithm::VINCENTY:
-      return LogicalType::EdgeInterpolationAlgorithm::VINCENTY;
-    case format::EdgeInterpolationAlgorithm::THOMAS:
-      return LogicalType::EdgeInterpolationAlgorithm::THOMAS;
-    case format::EdgeInterpolationAlgorithm::ANDOYER:
-      return LogicalType::EdgeInterpolationAlgorithm::ANDOYER;
-    case format::EdgeInterpolationAlgorithm::KARNEY:
-      return LogicalType::EdgeInterpolationAlgorithm::KARNEY;
-    default:
-      throw ParquetException("Unknown value for geometry algorithm: ",
-                             static_cast<int>(algorithm));
-  }
-}
-
 static inline EncryptionAlgorithm FromThrift(format::EncryptionAlgorithm 
encryption) {
   EncryptionAlgorithm encryption_algorithm;
 
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 09f8a7d390..1387c222a7 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -501,7 +501,7 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
     if (!type.GEOGRAPHY.__isset.algorithm) {
       algorithm = LogicalType::EdgeInterpolationAlgorithm::SPHERICAL;
     } else {
-      algorithm = ::parquet::FromThrift(type.GEOGRAPHY.algorithm);
+      algorithm = LoadEnumSafe(&type.GEOGRAPHY.algorithm);
     }
 
     return GeographyLogicalType::Make(std::move(crs), algorithm);

Reply via email to