This is an automated email from the ASF dual-hosted git repository.
bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0ef7351986 GH-41407: [C++] Use static method to fill scalar scratch
space to prevent ub (#41421)
0ef7351986 is described below
commit 0ef7351986ee8b967e210d0f9c7a9c8e4d4038fd
Author: Rossi Sun <[email protected]>
AuthorDate: Wed May 1 02:01:39 2024 +0800
GH-41407: [C++] Use static method to fill scalar scratch space to prevent
ub (#41421)
### Rationale for this change
In #40237, I introduced scalar scratch space filling in concrete scalar
sub-class constructor, in which there is a static down-casting of `this` to
sub-class pointer. Though this is common in CRTP, it happens in base cast
constructor. And this is reported in #41407 to be UB by UBSAN's "vptr"
sanitizing.
I'm not a language lawyer to tell if this is a true/false-positive. So I
proposed two approaches:
1. The easy way: add suppression in [1], like we already did for
`shared_ptr`. But apparently this won't be feasible if this is a true-positive
(need some language lawyer's help to confirm).
2. The hard way: totally avoid this so-to-speak UB but may introduce more
boilerplate code. This PR is the hard way.
[1] https://github.com/apache/arrow/blob/main/r/tools/ubsan.supp
### What changes are included in this PR?
Make `FillScratchSpace` static.
### Are these changes tested?
The existing UT should cover it well.
### Are there any user-facing changes?
None.
* GitHub Issue: #41407
Lead-authored-by: Ruoxi Sun <[email protected]>
Co-authored-by: Rossi Sun <[email protected]>
Co-authored-by: Benjamin Kietzman <[email protected]>
Signed-off-by: Benjamin Kietzman <[email protected]>
---
cpp/src/arrow/scalar.cc | 73 +++++++++++++++++--------------
cpp/src/arrow/scalar.h | 112 +++++++++++++++++++++++++++++++++++++-----------
2 files changed, 130 insertions(+), 55 deletions(-)
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index 8e8d390366..7d8084e17c 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -563,15 +563,17 @@ Status Scalar::ValidateFull() const {
BaseBinaryScalar::BaseBinaryScalar(std::string s, std::shared_ptr<DataType>
type)
: BaseBinaryScalar(Buffer::FromString(std::move(s)), std::move(type)) {}
-void BinaryScalar::FillScratchSpace() {
+void BinaryScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Buffer>& value) {
FillScalarScratchSpace(
- scratch_space_,
+ scratch_space,
{int32_t(0), value ? static_cast<int32_t>(value->size()) : int32_t(0)});
}
-void BinaryViewScalar::FillScratchSpace() {
+void BinaryViewScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Buffer>& value) {
static_assert(sizeof(BinaryViewType::c_type) <=
internal::kScalarScratchSpaceSize);
- auto* view = new (&scratch_space_) BinaryViewType::c_type;
+ auto* view = new (scratch_space) BinaryViewType::c_type;
if (value) {
*view = util::ToBinaryView(std::string_view{*value}, 0, 0);
} else {
@@ -579,9 +581,10 @@ void BinaryViewScalar::FillScratchSpace() {
}
}
-void LargeBinaryScalar::FillScratchSpace() {
+void LargeBinaryScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Buffer>& value)
{
FillScalarScratchSpace(
- scratch_space_,
+ scratch_space,
{int64_t(0), value ? static_cast<int64_t>(value->size()) : int64_t(0)});
}
@@ -612,36 +615,40 @@ BaseListScalar::BaseListScalar(std::shared_ptr<Array>
value,
}
ListScalar::ListScalar(std::shared_ptr<Array> value, bool is_valid)
- : BaseListScalar(value, list(value->type()), is_valid) {}
+ : ListScalar(value, list(value->type()), is_valid) {}
-void ListScalar::FillScratchSpace() {
+void ListScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value) {
FillScalarScratchSpace(
- scratch_space_,
+ scratch_space,
{int32_t(0), value ? static_cast<int32_t>(value->length()) :
int32_t(0)});
}
LargeListScalar::LargeListScalar(std::shared_ptr<Array> value, bool is_valid)
- : BaseListScalar(value, large_list(value->type()), is_valid) {}
+ : LargeListScalar(value, large_list(value->type()), is_valid) {}
-void LargeListScalar::FillScratchSpace() {
- FillScalarScratchSpace(scratch_space_,
+void LargeListScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value) {
+ FillScalarScratchSpace(scratch_space,
{int64_t(0), value ? value->length() : int64_t(0)});
}
ListViewScalar::ListViewScalar(std::shared_ptr<Array> value, bool is_valid)
- : BaseListScalar(value, list_view(value->type()), is_valid) {}
+ : ListViewScalar(value, list_view(value->type()), is_valid) {}
-void ListViewScalar::FillScratchSpace() {
+void ListViewScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value) {
FillScalarScratchSpace(
- scratch_space_,
+ scratch_space,
{int32_t(0), value ? static_cast<int32_t>(value->length()) :
int32_t(0)});
}
LargeListViewScalar::LargeListViewScalar(std::shared_ptr<Array> value, bool
is_valid)
- : BaseListScalar(value, large_list_view(value->type()), is_valid) {}
+ : LargeListViewScalar(value, large_list_view(value->type()), is_valid) {}
-void LargeListViewScalar::FillScratchSpace() {
- FillScalarScratchSpace(scratch_space_,
+void LargeListViewScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>&
value) {
+ FillScalarScratchSpace(scratch_space,
{int64_t(0), value ? value->length() : int64_t(0)});
}
@@ -652,11 +659,12 @@ inline std::shared_ptr<DataType> MakeMapType(const
std::shared_ptr<DataType>& pa
}
MapScalar::MapScalar(std::shared_ptr<Array> value, bool is_valid)
- : BaseListScalar(value, MakeMapType(value->type()), is_valid) {}
+ : MapScalar(value, MakeMapType(value->type()), is_valid) {}
-void MapScalar::FillScratchSpace() {
+void MapScalar::FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value) {
FillScalarScratchSpace(
- scratch_space_,
+ scratch_space,
{int32_t(0), value ? static_cast<int32_t>(value->length()) :
int32_t(0)});
}
@@ -705,7 +713,9 @@ Result<std::shared_ptr<Scalar>>
StructScalar::field(FieldRef ref) const {
RunEndEncodedScalar::RunEndEncodedScalar(std::shared_ptr<Scalar> value,
std::shared_ptr<DataType> type)
- : Scalar{std::move(type), value->is_valid}, value{std::move(value)} {
+ : Scalar{std::move(type), value->is_valid},
+ ArraySpanFillFromScalarScratchSpace(*this->type),
+ value{std::move(value)} {
ARROW_CHECK_EQ(this->type->id(), Type::RUN_END_ENCODED);
}
@@ -716,18 +726,18 @@ RunEndEncodedScalar::RunEndEncodedScalar(const
std::shared_ptr<DataType>& type)
RunEndEncodedScalar::~RunEndEncodedScalar() = default;
-void RunEndEncodedScalar::FillScratchSpace() {
- auto run_end = run_end_type()->id();
+void RunEndEncodedScalar::FillScratchSpace(uint8_t* scratch_space, const
DataType& type) {
+ Type::type run_end = checked_cast<const
RunEndEncodedType&>(type).run_end_type()->id();
switch (run_end) {
case Type::INT16:
- FillScalarScratchSpace(scratch_space_, {int16_t(1)});
+ FillScalarScratchSpace(scratch_space, {int16_t(1)});
break;
case Type::INT32:
- FillScalarScratchSpace(scratch_space_, {int32_t(1)});
+ FillScalarScratchSpace(scratch_space, {int32_t(1)});
break;
default:
DCHECK_EQ(run_end, Type::INT64);
- FillScalarScratchSpace(scratch_space_, {int64_t(1)});
+ FillScalarScratchSpace(scratch_space, {int64_t(1)});
}
}
@@ -806,6 +816,7 @@ Result<TimestampScalar>
TimestampScalar::FromISO8601(std::string_view iso8601,
SparseUnionScalar::SparseUnionScalar(ValueType value, int8_t type_code,
std::shared_ptr<DataType> type)
: UnionScalar(std::move(type), type_code, /*is_valid=*/true),
+ ArraySpanFillFromScalarScratchSpace(type_code),
value(std::move(value)) {
const auto child_ids = checked_cast<const
SparseUnionType&>(*this->type).child_ids();
if (type_code >= 0 && static_cast<size_t>(type_code) < child_ids.size() &&
@@ -833,13 +844,13 @@ std::shared_ptr<Scalar>
SparseUnionScalar::FromValue(std::shared_ptr<Scalar> val
return std::make_shared<SparseUnionScalar>(field_values, type_code,
std::move(type));
}
-void SparseUnionScalar::FillScratchSpace() {
- auto* union_scratch_space =
reinterpret_cast<UnionScratchSpace*>(&scratch_space_);
+void SparseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t
type_code) {
+ auto* union_scratch_space =
reinterpret_cast<UnionScratchSpace*>(scratch_space);
union_scratch_space->type_code = type_code;
}
-void DenseUnionScalar::FillScratchSpace() {
- auto* union_scratch_space =
reinterpret_cast<UnionScratchSpace*>(&scratch_space_);
+void DenseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t
type_code) {
+ auto* union_scratch_space =
reinterpret_cast<UnionScratchSpace*>(scratch_space);
union_scratch_space->type_code = type_code;
FillScalarScratchSpace(union_scratch_space->offsets, {int32_t(0),
int32_t(1)});
}
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index a7ee6a417d..982a4c5113 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -141,7 +141,12 @@ struct ARROW_EXPORT ArraySpanFillFromScalarScratchSpace {
alignas(int64_t) mutable uint8_t scratch_space_[kScalarScratchSpaceSize];
private:
- ArraySpanFillFromScalarScratchSpace() {
static_cast<Impl*>(this)->FillScratchSpace(); }
+ template <typename... Args>
+ explicit ArraySpanFillFromScalarScratchSpace(Args&&... args) {
+ Impl::FillScratchSpace(scratch_space_, std::forward<Args>(args)...);
+ }
+
+ ArraySpanFillFromScalarScratchSpace() = delete;
friend Impl;
};
@@ -278,20 +283,32 @@ struct ARROW_EXPORT BaseBinaryScalar : public
internal::PrimitiveScalarBase {
struct ARROW_EXPORT BinaryScalar
: public BaseBinaryScalar,
private internal::ArraySpanFillFromScalarScratchSpace<BinaryScalar> {
- using BaseBinaryScalar::BaseBinaryScalar;
using TypeClass = BinaryType;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<BinaryScalar>;
+ explicit BinaryScalar(std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+ BinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(value), std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+ BinaryScalar(std::string s, std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(s), std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
explicit BinaryScalar(std::shared_ptr<Buffer> value)
: BinaryScalar(std::move(value), binary()) {}
- explicit BinaryScalar(std::string s) : BaseBinaryScalar(std::move(s),
binary()) {}
+ explicit BinaryScalar(std::string s) : BinaryScalar(std::move(s), binary())
{}
BinaryScalar() : BinaryScalar(binary()) {}
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Buffer>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -312,23 +329,35 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar {
struct ARROW_EXPORT BinaryViewScalar
: public BaseBinaryScalar,
private internal::ArraySpanFillFromScalarScratchSpace<BinaryViewScalar> {
- using BaseBinaryScalar::BaseBinaryScalar;
using TypeClass = BinaryViewType;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<BinaryViewScalar>;
+ explicit BinaryViewScalar(std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+ BinaryViewScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType>
type)
+ : BaseBinaryScalar(std::move(value), std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+ BinaryViewScalar(std::string s, std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(s), std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
explicit BinaryViewScalar(std::shared_ptr<Buffer> value)
: BinaryViewScalar(std::move(value), binary_view()) {}
explicit BinaryViewScalar(std::string s)
- : BaseBinaryScalar(std::move(s), binary_view()) {}
+ : BinaryViewScalar(std::move(s), binary_view()) {}
BinaryViewScalar() : BinaryViewScalar(binary_view()) {}
std::string_view view() const override { return
std::string_view(*this->value); }
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Buffer>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -350,24 +379,33 @@ struct ARROW_EXPORT StringViewScalar : public
BinaryViewScalar {
struct ARROW_EXPORT LargeBinaryScalar
: public BaseBinaryScalar,
private internal::ArraySpanFillFromScalarScratchSpace<LargeBinaryScalar>
{
- using BaseBinaryScalar::BaseBinaryScalar;
using TypeClass = LargeBinaryType;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<LargeBinaryScalar>;
+ explicit LargeBinaryScalar(std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
LargeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType>
type)
- : BaseBinaryScalar(std::move(value), std::move(type)) {}
+ : BaseBinaryScalar(std::move(value), std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
+ LargeBinaryScalar(std::string s, std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(s), std::move(type)),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
explicit LargeBinaryScalar(std::shared_ptr<Buffer> value)
: LargeBinaryScalar(std::move(value), large_binary()) {}
explicit LargeBinaryScalar(std::string s)
- : BaseBinaryScalar(std::move(s), large_binary()) {}
+ : LargeBinaryScalar(std::move(s), large_binary()) {}
LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {}
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Buffer>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -550,14 +588,19 @@ struct ARROW_EXPORT ListScalar
: public BaseListScalar,
private internal::ArraySpanFillFromScalarScratchSpace<ListScalar> {
using TypeClass = ListType;
- using BaseListScalar::BaseListScalar;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<ListScalar>;
+ ListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+ bool is_valid = true)
+ : BaseListScalar(std::move(value), std::move(type), is_valid),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
explicit ListScalar(std::shared_ptr<Array> value, bool is_valid = true);
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -567,14 +610,19 @@ struct ARROW_EXPORT LargeListScalar
: public BaseListScalar,
private internal::ArraySpanFillFromScalarScratchSpace<LargeListScalar> {
using TypeClass = LargeListType;
- using BaseListScalar::BaseListScalar;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<LargeListScalar>;
+ LargeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+ bool is_valid = true)
+ : BaseListScalar(std::move(value), std::move(type), is_valid),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
explicit LargeListScalar(std::shared_ptr<Array> value, bool is_valid = true);
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -584,14 +632,19 @@ struct ARROW_EXPORT ListViewScalar
: public BaseListScalar,
private internal::ArraySpanFillFromScalarScratchSpace<ListViewScalar> {
using TypeClass = ListViewType;
- using BaseListScalar::BaseListScalar;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<ListViewScalar>;
+ ListViewScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+ bool is_valid = true)
+ : BaseListScalar(std::move(value), std::move(type), is_valid),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
explicit ListViewScalar(std::shared_ptr<Array> value, bool is_valid = true);
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -601,14 +654,19 @@ struct ARROW_EXPORT LargeListViewScalar
: public BaseListScalar,
private
internal::ArraySpanFillFromScalarScratchSpace<LargeListViewScalar> {
using TypeClass = LargeListViewType;
- using BaseListScalar::BaseListScalar;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<LargeListViewScalar>;
+ LargeListViewScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType>
type,
+ bool is_valid = true)
+ : BaseListScalar(std::move(value), std::move(type), is_valid),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
explicit LargeListViewScalar(std::shared_ptr<Array> value, bool is_valid =
true);
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -618,14 +676,19 @@ struct ARROW_EXPORT MapScalar
: public BaseListScalar,
private internal::ArraySpanFillFromScalarScratchSpace<MapScalar> {
using TypeClass = MapType;
- using BaseListScalar::BaseListScalar;
using ArraySpanFillFromScalarScratchSpace =
internal::ArraySpanFillFromScalarScratchSpace<MapScalar>;
+ MapScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+ bool is_valid = true)
+ : BaseListScalar(std::move(value), std::move(type), is_valid),
+ ArraySpanFillFromScalarScratchSpace(this->value) {}
+
explicit MapScalar(std::shared_ptr<Array> value, bool is_valid = true);
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space,
+ const std::shared_ptr<Array>& value);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -707,7 +770,7 @@ struct ARROW_EXPORT SparseUnionScalar
std::shared_ptr<DataType> type);
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -733,10 +796,11 @@ struct ARROW_EXPORT DenseUnionScalar
DenseUnionScalar(ValueType value, int8_t type_code,
std::shared_ptr<DataType> type)
: UnionScalar(std::move(type), type_code, value->is_valid),
+ ArraySpanFillFromScalarScratchSpace(type_code),
value(std::move(value)) {}
private:
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;
@@ -772,7 +836,7 @@ struct ARROW_EXPORT RunEndEncodedScalar
private:
const TypeClass& ree_type() const { return
internal::checked_cast<TypeClass&>(*type); }
- void FillScratchSpace();
+ static void FillScratchSpace(uint8_t* scratch_space, const DataType& type);
friend ArraySpan;
friend ArraySpanFillFromScalarScratchSpace;