This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 19fb7bbfae GH-46724: [C++][Parquet] OSSFuzz: Prevent from Bad-cast in 
handling statistics (#46725)
19fb7bbfae is described below

commit 19fb7bbfae61a52faa21934516310fedc8aaefd2
Author: mwish <[email protected]>
AuthorDate: Tue Jun 24 18:39:30 2025 +0800

    GH-46724: [C++][Parquet] OSSFuzz: Prevent from Bad-cast in handling 
statistics (#46725)
    
    ### Rationale for this change
    
    See issue
    
    ### What changes are included in this PR?
    
    Checked before a cast here
    
    ### Are these changes tested?
    
    no
    
    ### Are there any user-facing changes?
    
    no
    * GitHub Issue: #46724
    
    Authored-by: mwish <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/parquet/metadata.cc | 10 ++++++++--
 cpp/src/parquet/metadata.h  |  2 +-
 testing                     |  2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index 81567ba75b..97e502f46b 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -122,7 +122,13 @@ static std::shared_ptr<geospatial::GeoStatistics> 
MakeColumnGeometryStats(
 
 std::shared_ptr<Statistics> MakeColumnStats(const format::ColumnMetaData& 
meta_data,
                                             const ColumnDescriptor* descr) {
-  switch (static_cast<Type::type>(meta_data.type)) {
+  auto metadata_type = LoadEnumSafe(&meta_data.type);
+  if (descr->physical_type() != metadata_type) {
+    throw ParquetException(
+        "ColumnMetaData type does not match ColumnDescriptor physical type: " +
+        TypeToString(metadata_type) + " vs. " + 
TypeToString(descr->physical_type()));
+  }
+  switch (metadata_type) {
     case Type::BOOLEAN:
       return MakeTypedColumnStats<BooleanType>(meta_data, descr);
     case Type::INT32:
@@ -1541,7 +1547,7 @@ bool ApplicationVersion::VersionEq(const 
ApplicationVersion& other_version) cons
 // 
parquet-mr/parquet-column/src/main/java/org/apache/parquet/CorruptStatistics.java
 // PARQUET-686 has more discussion on statistics
 bool ApplicationVersion::HasCorrectStatistics(Type::type col_type,
-                                              EncodedStatistics& statistics,
+                                              const EncodedStatistics& 
statistics,
                                               SortOrder::type sort_order) 
const {
   // parquet-cpp version 1.3.0 and parquet-mr 1.10.0 onwards stats are computed
   // correctly for all types
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index f7b7bf9aac..3380adbf56 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -72,7 +72,7 @@ class PARQUET_EXPORT ApplicationVersion {
   bool VersionEq(const ApplicationVersion& other_version) const;
 
   // Checks if the Version has the correct statistics for a given column
-  bool HasCorrectStatistics(Type::type primitive, EncodedStatistics& 
statistics,
+  bool HasCorrectStatistics(Type::type primitive, const EncodedStatistics& 
statistics,
                             SortOrder::type sort_order = SortOrder::SIGNED) 
const;
 };
 
diff --git a/testing b/testing
index d2a1371230..ab28408180 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit d2a13712303498963395318a4eb42872e66aead7
+Subproject commit ab28408180afb1e46621732dc6cbf0f1c44ed444

Reply via email to