This is an automated email from the ASF dual-hosted git repository.

bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 0ef7351986 GH-41407: [C++] Use static method to fill scalar scratch 
space to prevent ub (#41421)
0ef7351986 is described below

commit 0ef7351986ee8b967e210d0f9c7a9c8e4d4038fd
Author: Rossi Sun <[email protected]>
AuthorDate: Wed May 1 02:01:39 2024 +0800

    GH-41407: [C++] Use static method to fill scalar scratch space to prevent 
ub (#41421)
    
    
    
    ### Rationale for this change
    
    In #40237, I introduced scalar scratch space filling in concrete scalar 
sub-class constructor, in which there is a static down-casting of `this` to 
sub-class pointer. Though this is common in CRTP, it happens in base cast 
constructor. And this is reported in #41407 to be UB by UBSAN's "vptr" 
sanitizing.
    
    I'm not a language lawyer to tell if this is a true/false-positive. So I 
proposed two approaches:
    1. The easy way: add suppression in [1], like we already did for 
`shared_ptr`. But apparently this won't be feasible if this is a true-positive 
(need some language lawyer's help to confirm).
    2. The hard way: totally avoid this so-to-speak UB but may introduce more 
boilerplate code. This PR is the hard way.
    
    [1] https://github.com/apache/arrow/blob/main/r/tools/ubsan.supp
    
    ### What changes are included in this PR?
    
    Make `FillScratchSpace` static.
    
    ### Are these changes tested?
    
    The existing UT should cover it well.
    
    ### Are there any user-facing changes?
    
    None.
    
    * GitHub Issue: #41407
    
    Lead-authored-by: Ruoxi Sun <[email protected]>
    Co-authored-by: Rossi Sun <[email protected]>
    Co-authored-by: Benjamin Kietzman <[email protected]>
    Signed-off-by: Benjamin Kietzman <[email protected]>
---
 cpp/src/arrow/scalar.cc |  73 +++++++++++++++++--------------
 cpp/src/arrow/scalar.h  | 112 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 130 insertions(+), 55 deletions(-)

diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index 8e8d390366..7d8084e17c 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -563,15 +563,17 @@ Status Scalar::ValidateFull() const {
 BaseBinaryScalar::BaseBinaryScalar(std::string s, std::shared_ptr<DataType> 
type)
     : BaseBinaryScalar(Buffer::FromString(std::move(s)), std::move(type)) {}
 
-void BinaryScalar::FillScratchSpace() {
+void BinaryScalar::FillScratchSpace(uint8_t* scratch_space,
+                                    const std::shared_ptr<Buffer>& value) {
   FillScalarScratchSpace(
-      scratch_space_,
+      scratch_space,
       {int32_t(0), value ? static_cast<int32_t>(value->size()) : int32_t(0)});
 }
 
-void BinaryViewScalar::FillScratchSpace() {
+void BinaryViewScalar::FillScratchSpace(uint8_t* scratch_space,
+                                        const std::shared_ptr<Buffer>& value) {
   static_assert(sizeof(BinaryViewType::c_type) <= 
internal::kScalarScratchSpaceSize);
-  auto* view = new (&scratch_space_) BinaryViewType::c_type;
+  auto* view = new (scratch_space) BinaryViewType::c_type;
   if (value) {
     *view = util::ToBinaryView(std::string_view{*value}, 0, 0);
   } else {
@@ -579,9 +581,10 @@ void BinaryViewScalar::FillScratchSpace() {
   }
 }
 
-void LargeBinaryScalar::FillScratchSpace() {
+void LargeBinaryScalar::FillScratchSpace(uint8_t* scratch_space,
+                                         const std::shared_ptr<Buffer>& value) 
{
   FillScalarScratchSpace(
-      scratch_space_,
+      scratch_space,
       {int64_t(0), value ? static_cast<int64_t>(value->size()) : int64_t(0)});
 }
 
@@ -612,36 +615,40 @@ BaseListScalar::BaseListScalar(std::shared_ptr<Array> 
value,
 }
 
 ListScalar::ListScalar(std::shared_ptr<Array> value, bool is_valid)
-    : BaseListScalar(value, list(value->type()), is_valid) {}
+    : ListScalar(value, list(value->type()), is_valid) {}
 
-void ListScalar::FillScratchSpace() {
+void ListScalar::FillScratchSpace(uint8_t* scratch_space,
+                                  const std::shared_ptr<Array>& value) {
   FillScalarScratchSpace(
-      scratch_space_,
+      scratch_space,
       {int32_t(0), value ? static_cast<int32_t>(value->length()) : 
int32_t(0)});
 }
 
 LargeListScalar::LargeListScalar(std::shared_ptr<Array> value, bool is_valid)
-    : BaseListScalar(value, large_list(value->type()), is_valid) {}
+    : LargeListScalar(value, large_list(value->type()), is_valid) {}
 
-void LargeListScalar::FillScratchSpace() {
-  FillScalarScratchSpace(scratch_space_,
+void LargeListScalar::FillScratchSpace(uint8_t* scratch_space,
+                                       const std::shared_ptr<Array>& value) {
+  FillScalarScratchSpace(scratch_space,
                          {int64_t(0), value ? value->length() : int64_t(0)});
 }
 
 ListViewScalar::ListViewScalar(std::shared_ptr<Array> value, bool is_valid)
-    : BaseListScalar(value, list_view(value->type()), is_valid) {}
+    : ListViewScalar(value, list_view(value->type()), is_valid) {}
 
-void ListViewScalar::FillScratchSpace() {
+void ListViewScalar::FillScratchSpace(uint8_t* scratch_space,
+                                      const std::shared_ptr<Array>& value) {
   FillScalarScratchSpace(
-      scratch_space_,
+      scratch_space,
       {int32_t(0), value ? static_cast<int32_t>(value->length()) : 
int32_t(0)});
 }
 
 LargeListViewScalar::LargeListViewScalar(std::shared_ptr<Array> value, bool 
is_valid)
-    : BaseListScalar(value, large_list_view(value->type()), is_valid) {}
+    : LargeListViewScalar(value, large_list_view(value->type()), is_valid) {}
 
-void LargeListViewScalar::FillScratchSpace() {
-  FillScalarScratchSpace(scratch_space_,
+void LargeListViewScalar::FillScratchSpace(uint8_t* scratch_space,
+                                           const std::shared_ptr<Array>& 
value) {
+  FillScalarScratchSpace(scratch_space,
                          {int64_t(0), value ? value->length() : int64_t(0)});
 }
 
@@ -652,11 +659,12 @@ inline std::shared_ptr<DataType> MakeMapType(const 
std::shared_ptr<DataType>& pa
 }
 
 MapScalar::MapScalar(std::shared_ptr<Array> value, bool is_valid)
-    : BaseListScalar(value, MakeMapType(value->type()), is_valid) {}
+    : MapScalar(value, MakeMapType(value->type()), is_valid) {}
 
-void MapScalar::FillScratchSpace() {
+void MapScalar::FillScratchSpace(uint8_t* scratch_space,
+                                 const std::shared_ptr<Array>& value) {
   FillScalarScratchSpace(
-      scratch_space_,
+      scratch_space,
       {int32_t(0), value ? static_cast<int32_t>(value->length()) : 
int32_t(0)});
 }
 
@@ -705,7 +713,9 @@ Result<std::shared_ptr<Scalar>> 
StructScalar::field(FieldRef ref) const {
 
 RunEndEncodedScalar::RunEndEncodedScalar(std::shared_ptr<Scalar> value,
                                          std::shared_ptr<DataType> type)
-    : Scalar{std::move(type), value->is_valid}, value{std::move(value)} {
+    : Scalar{std::move(type), value->is_valid},
+      ArraySpanFillFromScalarScratchSpace(*this->type),
+      value{std::move(value)} {
   ARROW_CHECK_EQ(this->type->id(), Type::RUN_END_ENCODED);
 }
 
@@ -716,18 +726,18 @@ RunEndEncodedScalar::RunEndEncodedScalar(const 
std::shared_ptr<DataType>& type)
 
 RunEndEncodedScalar::~RunEndEncodedScalar() = default;
 
-void RunEndEncodedScalar::FillScratchSpace() {
-  auto run_end = run_end_type()->id();
+void RunEndEncodedScalar::FillScratchSpace(uint8_t* scratch_space, const 
DataType& type) {
+  Type::type run_end = checked_cast<const 
RunEndEncodedType&>(type).run_end_type()->id();
   switch (run_end) {
     case Type::INT16:
-      FillScalarScratchSpace(scratch_space_, {int16_t(1)});
+      FillScalarScratchSpace(scratch_space, {int16_t(1)});
       break;
     case Type::INT32:
-      FillScalarScratchSpace(scratch_space_, {int32_t(1)});
+      FillScalarScratchSpace(scratch_space, {int32_t(1)});
       break;
     default:
       DCHECK_EQ(run_end, Type::INT64);
-      FillScalarScratchSpace(scratch_space_, {int64_t(1)});
+      FillScalarScratchSpace(scratch_space, {int64_t(1)});
   }
 }
 
@@ -806,6 +816,7 @@ Result<TimestampScalar> 
TimestampScalar::FromISO8601(std::string_view iso8601,
 SparseUnionScalar::SparseUnionScalar(ValueType value, int8_t type_code,
                                      std::shared_ptr<DataType> type)
     : UnionScalar(std::move(type), type_code, /*is_valid=*/true),
+      ArraySpanFillFromScalarScratchSpace(type_code),
       value(std::move(value)) {
   const auto child_ids = checked_cast<const 
SparseUnionType&>(*this->type).child_ids();
   if (type_code >= 0 && static_cast<size_t>(type_code) < child_ids.size() &&
@@ -833,13 +844,13 @@ std::shared_ptr<Scalar> 
SparseUnionScalar::FromValue(std::shared_ptr<Scalar> val
   return std::make_shared<SparseUnionScalar>(field_values, type_code, 
std::move(type));
 }
 
-void SparseUnionScalar::FillScratchSpace() {
-  auto* union_scratch_space = 
reinterpret_cast<UnionScratchSpace*>(&scratch_space_);
+void SparseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t 
type_code) {
+  auto* union_scratch_space = 
reinterpret_cast<UnionScratchSpace*>(scratch_space);
   union_scratch_space->type_code = type_code;
 }
 
-void DenseUnionScalar::FillScratchSpace() {
-  auto* union_scratch_space = 
reinterpret_cast<UnionScratchSpace*>(&scratch_space_);
+void DenseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t 
type_code) {
+  auto* union_scratch_space = 
reinterpret_cast<UnionScratchSpace*>(scratch_space);
   union_scratch_space->type_code = type_code;
   FillScalarScratchSpace(union_scratch_space->offsets, {int32_t(0), 
int32_t(1)});
 }
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index a7ee6a417d..982a4c5113 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -141,7 +141,12 @@ struct ARROW_EXPORT ArraySpanFillFromScalarScratchSpace {
   alignas(int64_t) mutable uint8_t scratch_space_[kScalarScratchSpaceSize];
 
  private:
-  ArraySpanFillFromScalarScratchSpace() { 
static_cast<Impl*>(this)->FillScratchSpace(); }
+  template <typename... Args>
+  explicit ArraySpanFillFromScalarScratchSpace(Args&&... args) {
+    Impl::FillScratchSpace(scratch_space_, std::forward<Args>(args)...);
+  }
+
+  ArraySpanFillFromScalarScratchSpace() = delete;
 
   friend Impl;
 };
@@ -278,20 +283,32 @@ struct ARROW_EXPORT BaseBinaryScalar : public 
internal::PrimitiveScalarBase {
 struct ARROW_EXPORT BinaryScalar
     : public BaseBinaryScalar,
       private internal::ArraySpanFillFromScalarScratchSpace<BinaryScalar> {
-  using BaseBinaryScalar::BaseBinaryScalar;
   using TypeClass = BinaryType;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<BinaryScalar>;
 
+  explicit BinaryScalar(std::shared_ptr<DataType> type)
+      : BaseBinaryScalar(std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+  BinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
+      : BaseBinaryScalar(std::move(value), std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+  BinaryScalar(std::string s, std::shared_ptr<DataType> type)
+      : BaseBinaryScalar(std::move(s), std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   explicit BinaryScalar(std::shared_ptr<Buffer> value)
       : BinaryScalar(std::move(value), binary()) {}
 
-  explicit BinaryScalar(std::string s) : BaseBinaryScalar(std::move(s), 
binary()) {}
+  explicit BinaryScalar(std::string s) : BinaryScalar(std::move(s), binary()) 
{}
 
   BinaryScalar() : BinaryScalar(binary()) {}
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Buffer>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -312,23 +329,35 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar {
 struct ARROW_EXPORT BinaryViewScalar
     : public BaseBinaryScalar,
       private internal::ArraySpanFillFromScalarScratchSpace<BinaryViewScalar> {
-  using BaseBinaryScalar::BaseBinaryScalar;
   using TypeClass = BinaryViewType;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<BinaryViewScalar>;
 
+  explicit BinaryViewScalar(std::shared_ptr<DataType> type)
+      : BaseBinaryScalar(std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+  BinaryViewScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> 
type)
+      : BaseBinaryScalar(std::move(value), std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+  BinaryViewScalar(std::string s, std::shared_ptr<DataType> type)
+      : BaseBinaryScalar(std::move(s), std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   explicit BinaryViewScalar(std::shared_ptr<Buffer> value)
       : BinaryViewScalar(std::move(value), binary_view()) {}
 
   explicit BinaryViewScalar(std::string s)
-      : BaseBinaryScalar(std::move(s), binary_view()) {}
+      : BinaryViewScalar(std::move(s), binary_view()) {}
 
   BinaryViewScalar() : BinaryViewScalar(binary_view()) {}
 
   std::string_view view() const override { return 
std::string_view(*this->value); }
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Buffer>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -350,24 +379,33 @@ struct ARROW_EXPORT StringViewScalar : public 
BinaryViewScalar {
 struct ARROW_EXPORT LargeBinaryScalar
     : public BaseBinaryScalar,
       private internal::ArraySpanFillFromScalarScratchSpace<LargeBinaryScalar> 
{
-  using BaseBinaryScalar::BaseBinaryScalar;
   using TypeClass = LargeBinaryType;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<LargeBinaryScalar>;
 
+  explicit LargeBinaryScalar(std::shared_ptr<DataType> type)
+      : BaseBinaryScalar(std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   LargeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> 
type)
-      : BaseBinaryScalar(std::move(value), std::move(type)) {}
+      : BaseBinaryScalar(std::move(value), std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+  LargeBinaryScalar(std::string s, std::shared_ptr<DataType> type)
+      : BaseBinaryScalar(std::move(s), std::move(type)),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
 
   explicit LargeBinaryScalar(std::shared_ptr<Buffer> value)
       : LargeBinaryScalar(std::move(value), large_binary()) {}
 
   explicit LargeBinaryScalar(std::string s)
-      : BaseBinaryScalar(std::move(s), large_binary()) {}
+      : LargeBinaryScalar(std::move(s), large_binary()) {}
 
   LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {}
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Buffer>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -550,14 +588,19 @@ struct ARROW_EXPORT ListScalar
     : public BaseListScalar,
       private internal::ArraySpanFillFromScalarScratchSpace<ListScalar> {
   using TypeClass = ListType;
-  using BaseListScalar::BaseListScalar;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<ListScalar>;
 
+  ListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+             bool is_valid = true)
+      : BaseListScalar(std::move(value), std::move(type), is_valid),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   explicit ListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Array>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -567,14 +610,19 @@ struct ARROW_EXPORT LargeListScalar
     : public BaseListScalar,
       private internal::ArraySpanFillFromScalarScratchSpace<LargeListScalar> {
   using TypeClass = LargeListType;
-  using BaseListScalar::BaseListScalar;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<LargeListScalar>;
 
+  LargeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+                  bool is_valid = true)
+      : BaseListScalar(std::move(value), std::move(type), is_valid),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   explicit LargeListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Array>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -584,14 +632,19 @@ struct ARROW_EXPORT ListViewScalar
     : public BaseListScalar,
       private internal::ArraySpanFillFromScalarScratchSpace<ListViewScalar> {
   using TypeClass = ListViewType;
-  using BaseListScalar::BaseListScalar;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<ListViewScalar>;
 
+  ListViewScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+                 bool is_valid = true)
+      : BaseListScalar(std::move(value), std::move(type), is_valid),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   explicit ListViewScalar(std::shared_ptr<Array> value, bool is_valid = true);
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Array>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -601,14 +654,19 @@ struct ARROW_EXPORT LargeListViewScalar
     : public BaseListScalar,
       private 
internal::ArraySpanFillFromScalarScratchSpace<LargeListViewScalar> {
   using TypeClass = LargeListViewType;
-  using BaseListScalar::BaseListScalar;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<LargeListViewScalar>;
 
+  LargeListViewScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> 
type,
+                      bool is_valid = true)
+      : BaseListScalar(std::move(value), std::move(type), is_valid),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   explicit LargeListViewScalar(std::shared_ptr<Array> value, bool is_valid = 
true);
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Array>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -618,14 +676,19 @@ struct ARROW_EXPORT MapScalar
     : public BaseListScalar,
       private internal::ArraySpanFillFromScalarScratchSpace<MapScalar> {
   using TypeClass = MapType;
-  using BaseListScalar::BaseListScalar;
   using ArraySpanFillFromScalarScratchSpace =
       internal::ArraySpanFillFromScalarScratchSpace<MapScalar>;
 
+  MapScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+            bool is_valid = true)
+      : BaseListScalar(std::move(value), std::move(type), is_valid),
+        ArraySpanFillFromScalarScratchSpace(this->value) {}
+
   explicit MapScalar(std::shared_ptr<Array> value, bool is_valid = true);
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space,
+                               const std::shared_ptr<Array>& value);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -707,7 +770,7 @@ struct ARROW_EXPORT SparseUnionScalar
                                            std::shared_ptr<DataType> type);
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -733,10 +796,11 @@ struct ARROW_EXPORT DenseUnionScalar
 
   DenseUnionScalar(ValueType value, int8_t type_code, 
std::shared_ptr<DataType> type)
       : UnionScalar(std::move(type), type_code, value->is_valid),
+        ArraySpanFillFromScalarScratchSpace(type_code),
         value(std::move(value)) {}
 
  private:
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;
@@ -772,7 +836,7 @@ struct ARROW_EXPORT RunEndEncodedScalar
  private:
   const TypeClass& ree_type() const { return 
internal::checked_cast<TypeClass&>(*type); }
 
-  void FillScratchSpace();
+  static void FillScratchSpace(uint8_t* scratch_space, const DataType& type);
 
   friend ArraySpan;
   friend ArraySpanFillFromScalarScratchSpace;

Reply via email to