js8544 commented on code in PR #38504:
URL: https://github.com/apache/arrow/pull/38504#discussion_r1385967985
##########
cpp/src/arrow/array/util.cc:
##########
@@ -669,11 +671,18 @@ class RepeatedArrayFactory {
enable_if_base_binary<T, Status> Visit(const T&) {
const std::shared_ptr<Buffer>& value = scalar<T>().value;
std::shared_ptr<Buffer> values_buffer, offsets_buffer;
- RETURN_NOT_OK(CreateBufferOf(value->data(), value->size(),
&values_buffer));
auto size = static_cast<typename T::offset_type>(value->size());
+
+ int64_t total_size;
Review Comment:
Should this be `typename T::offset_type`? If `total_size` is fixed to be
int64_t, it always calls the int64 version of `MultiplyWithOverflow`. But
`StringArray` uses int32_t for offsets so some false positive cases can go
through.
For example:
```cpp
auto scalar = std::make_shared<StringScalar>("aa");
int64_t length = static_cast<int32_t>(std::numeric_limits<int32_t>::max()) /
2 + 1;
auto array_result = MakeArrayFromScalar(*scalar, length);
```
`array_result` will be `ok` but when I print the array it says `Invalid
array: Negative offsets in binary array`
##########
cpp/src/arrow/array/array_test.cc:
##########
@@ -685,6 +685,19 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
}
}
+TEST_F(TestArray, TestMakeArrayFromScalarOverflow) {
+ auto scalar = std::make_shared<StringScalar>("aa");
+
+ // Use a length that will cause an overflow when multiplied by the size of
the string
+ int64_t length = static_cast<int64_t>(std::numeric_limits<int64_t>::max()) /
2 + 1;
Review Comment:
We should probably test `StringArray` with `int32_t::max` and
`LargeStringArray` with `int64_t::max`
##########
cpp/src/arrow/array/array_test.cc:
##########
@@ -685,6 +685,19 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
}
}
+TEST_F(TestArray, TestMakeArrayFromScalarOverflow) {
+ auto scalar = std::make_shared<StringScalar>("aa");
+
+ // Use a length that will cause an overflow when multiplied by the size of
the string
+ int64_t length = static_cast<int64_t>(std::numeric_limits<int64_t>::max()) /
2 + 1;
+ auto array_result = MakeArrayFromScalar(*scalar, length);
+
+ std::string err_msg = "offset overflow in repeated array construction";
+ ASSERT_FALSE(array_result.ok());
+ ASSERT_EQ(array_result.status().code(), StatusCode::Invalid);
+ ASSERT_EQ(array_result.status().message().substr(0, err_msg.length()),
err_msg);
Review Comment:
You can use `EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr(err_msg), array_result);` to simplify this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]