This is an automated email from the ASF dual-hosted git repository.

bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 84c15da199 GH-38738: [C++] Check variadic buffer counts in bounds 
(#38740)
84c15da199 is described below

commit 84c15da1997559c37841dc16f9e2c70c643dd9d2
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Mon Nov 27 12:10:08 2023 -0500

    GH-38738: [C++] Check variadic buffer counts in bounds (#38740)
    
    
    
    ### Rationale for this change
    
    Invalid variadic buffer counts can cause allocating storage for variadic 
buffers to fail.
    
    ### What changes are included in this PR?
    
    Check variadic buffer counts are valid before they are used as an allocator 
argument.
    
    ### Are these changes tested?
    
    They pass with the fuzzer testcase.
    
    ### Are there any user-facing changes?
    
    No
    
    * Closes: #38738
    
    Lead-authored-by: Benjamin Kietzman <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Benjamin Kietzman <[email protected]>
---
 cpp/CMakePresets.json                  | 15 +++++++++++++++
 cpp/src/arrow/ipc/reader.cc            | 13 +++++++++----
 docs/source/developers/cpp/fuzzing.rst | 24 ++++++++++++++++--------
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json
index f6324c1c0a..a15b204c39 100644
--- a/cpp/CMakePresets.json
+++ b/cpp/CMakePresets.json
@@ -430,6 +430,21 @@
       ],
       "displayName": "Benchmarking build with with everything enabled",
       "cacheVariables": {}
+    },
+    {
+      "name": "fuzzing",
+      "inherits": "base",
+      "displayName": "Debug build with IPC and Parquet fuzzing targets",
+      "cacheVariables": {
+        "CMAKE_BUILD_TYPE": "Debug",
+        "CMAKE_C_COMPILER": "clang",
+        "CMAKE_CXX_COMPILER": "clang++",
+        "ARROW_USE_ASAN": "ON",
+        "ARROW_USE_UBSAN": "ON",
+        "ARROW_IPC": "ON",
+        "ARROW_PARQUET": "ON",
+        "ARROW_FUZZING": "ON"
+      }
     }
   ]
 }
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 2ea2a4bd12..d272c78560 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -254,7 +254,12 @@ class ArrayLoader {
     if (i >= static_cast<int>(variadic_counts->size())) {
       return Status::IOError("variadic_count_index out of range.");
     }
-    return static_cast<size_t>(variadic_counts->Get(i));
+    int64_t count = variadic_counts->Get(i);
+    if (count < 0 || count > std::numeric_limits<int32_t>::max()) {
+      return Status::IOError(
+          "variadic_count must be representable as a positive int32_t, got ", 
count, ".");
+    }
+    return static_cast<size_t>(count);
   }
 
   Status GetFieldMetadata(int field_index, ArrayData* out) {
@@ -388,10 +393,10 @@ class ArrayLoader {
     RETURN_NOT_OK(LoadCommon(type.id()));
     RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
 
-    ARROW_ASSIGN_OR_RAISE(auto character_buffer_count,
+    ARROW_ASSIGN_OR_RAISE(auto data_buffer_count,
                           GetVariadicCount(variadic_count_index_++));
-    out_->buffers.resize(character_buffer_count + 2);
-    for (size_t i = 0; i < character_buffer_count; ++i) {
+    out_->buffers.resize(data_buffer_count + 2);
+    for (size_t i = 0; i < data_buffer_count; ++i) {
       RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[i + 2]));
     }
     return Status::OK();
diff --git a/docs/source/developers/cpp/fuzzing.rst 
b/docs/source/developers/cpp/fuzzing.rst
index bd7b303d4a..851d58fb56 100644
--- a/docs/source/developers/cpp/fuzzing.rst
+++ b/docs/source/developers/cpp/fuzzing.rst
@@ -36,9 +36,9 @@ areas ingesting potentially invalid or malicious data.
 Fuzz Targets and Utilities
 ==========================
 
-By passing the ``-DARROW_FUZZING=ON`` CMake option, you will build
-the fuzz targets corresponding to the aforementioned Arrow features, as well
-as additional related utilities.
+By passing the ``-DARROW_FUZZING=ON`` CMake option (or equivalently, using
+the ``fuzzing`` preset), you will build the fuzz targets corresponding to
+the aforementioned Arrow features, as well as additional related utilities.
 
 Generating the seed corpus
 --------------------------
@@ -85,11 +85,7 @@ various sanitizer checks enabled.
 
 .. code-block::
 
-   $ cmake .. -GNinja \
-       -DCMAKE_BUILD_TYPE=Debug \
-       -DARROW_USE_ASAN=on \
-       -DARROW_USE_UBSAN=on \
-       -DARROW_FUZZING=on
+   $ cmake .. --preset=fuzzing
 
 Then, assuming you have downloaded the crashing data file (let's call it
 ``testcase-arrow-ipc-file-fuzz-123465``), you can reproduce the crash
@@ -101,3 +97,15 @@ by running the affected fuzz target on that file:
 
 (you may want to run that command under a debugger so as to inspect the
 program state more closely)
+
+Using conda
+-----------
+
+The fuzzing executables must be compiled with clang and linked to libraries
+which provide a fuzzing runtime. If you are using conda to provide your
+dependencies, you may need to install these before building the fuzz targets:
+
+.. code-block::
+
+   $ conda install clang clangxx compiler-rt
+   $ cmake .. --preset=fuzzing

Reply via email to