js8544 commented on code in PR #38504:
URL: https://github.com/apache/arrow/pull/38504#discussion_r1385967985


##########
cpp/src/arrow/array/util.cc:
##########
@@ -669,11 +671,18 @@ class RepeatedArrayFactory {
   enable_if_base_binary<T, Status> Visit(const T&) {
     const std::shared_ptr<Buffer>& value = scalar<T>().value;
     std::shared_ptr<Buffer> values_buffer, offsets_buffer;
-    RETURN_NOT_OK(CreateBufferOf(value->data(), value->size(), 
&values_buffer));
     auto size = static_cast<typename T::offset_type>(value->size());
+
+    int64_t total_size;

Review Comment:
   Should this be `typename T::offset_type`? If `total_size` is fixed to be 
int64_t, it always calls the int64 version of `MultiplyWithOverflow`. But 
`StringArray` uses int32_t for offsets so some false positive cases can go 
through.
   For example:
   ```cpp
   auto scalar = std::make_shared<StringScalar>("aa");
   int64_t length = static_cast<int32_t>(std::numeric_limits<int32_t>::max()) / 
2 + 1;
   auto array_result = MakeArrayFromScalar(*scalar, length);
   ```
   `array_result` will be `ok` but when I print the array it says `Invalid 
array: Negative offsets in binary array`



##########
cpp/src/arrow/array/array_test.cc:
##########
@@ -685,6 +685,19 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
   }
 }
 
+TEST_F(TestArray, TestMakeArrayFromScalarOverflow) {
+  auto scalar = std::make_shared<StringScalar>("aa");
+
+  // Use a length that will cause an overflow when multiplied by the size of 
the string
+  int64_t length = static_cast<int64_t>(std::numeric_limits<int64_t>::max()) / 
2 + 1;

Review Comment:
   We should probably test `StringArray` with `int32_t::max` and 
`LargeStringArray` with `int64_t::max`



##########
cpp/src/arrow/array/array_test.cc:
##########
@@ -685,6 +685,19 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
   }
 }
 
+TEST_F(TestArray, TestMakeArrayFromScalarOverflow) {
+  auto scalar = std::make_shared<StringScalar>("aa");
+
+  // Use a length that will cause an overflow when multiplied by the size of 
the string
+  int64_t length = static_cast<int64_t>(std::numeric_limits<int64_t>::max()) / 
2 + 1;
+  auto array_result = MakeArrayFromScalar(*scalar, length);
+
+  std::string err_msg = "offset overflow in repeated array construction";
+  ASSERT_FALSE(array_result.ok());
+  ASSERT_EQ(array_result.status().code(), StatusCode::Invalid);
+  ASSERT_EQ(array_result.status().message().substr(0, err_msg.length()), 
err_msg);

Review Comment:
   You can use `EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, 
::testing::HasSubstr(err_msg), array_result);` to simplify this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to