js8544 commented on code in PR #38504:
URL: https://github.com/apache/arrow/pull/38504#discussion_r1389013727


##########
cpp/src/arrow/array/util.cc:
##########
@@ -669,11 +671,29 @@ class RepeatedArrayFactory {
   enable_if_base_binary<T, Status> Visit(const T&) {
     const std::shared_ptr<Buffer>& value = scalar<T>().value;
     std::shared_ptr<Buffer> values_buffer, offsets_buffer;
-    RETURN_NOT_OK(CreateBufferOf(value->data(), value->size(), 
&values_buffer));
     auto size = static_cast<typename T::offset_type>(value->size());
+
+    typename T::offset_type length_casted;
+    if (length_ < 0) {
+      return Status::Invalid("length cannot be negative: " + 
std::to_string(length_));

Review Comment:
   I think the negativity check can be moved to the top level 
`MakeArrayFromScalar` since it applies to all types.



##########
cpp/src/arrow/array/array_test.cc:
##########
@@ -685,6 +685,50 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
   }
 }
 
+TEST_F(TestArray, TestMakeArrayFromScalarStringOverflow) {
+  auto scalar = std::make_shared<StringScalar>("aa");
+
+  // Use a length that will cause an overflow when multiplied by the size of 
the string
+  int64_t length = static_cast<int32_t>(std::numeric_limits<int32_t>::max()) / 
2 + 1;
+  auto array_result = MakeArrayFromScalar(*scalar, length);
+
+  std::string err_msg = "offset overflow in repeated array construction";
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr(err_msg), 
array_result);

Review Comment:
   For each case, can you also add a case where it "just" works to avoid false 
positives. E.g. "aa" and `int32::max/2`



##########
cpp/src/arrow/array/util.cc:
##########
@@ -669,11 +671,29 @@ class RepeatedArrayFactory {
   enable_if_base_binary<T, Status> Visit(const T&) {
     const std::shared_ptr<Buffer>& value = scalar<T>().value;
     std::shared_ptr<Buffer> values_buffer, offsets_buffer;
-    RETURN_NOT_OK(CreateBufferOf(value->data(), value->size(), 
&values_buffer));
     auto size = static_cast<typename T::offset_type>(value->size());
+
+    typename T::offset_type length_casted;
+    if (length_ < 0) {
+      return Status::Invalid("length cannot be negative: " + 
std::to_string(length_));
+    } else if (length_ > std::numeric_limits<typename T::offset_type>::max()) {
+      return Status::Invalid(
+          "length exceeds the maximum value of offset_type: " + 
std::to_string(length_) +
+          " is greater than " +
+          std::to_string(std::numeric_limits<typename T::offset_type>::max()));

Review Comment:
   I have no problem having this check here.
   BTW You can just use `Status::Invalid("some message ", length_, " some 
message ", number)` without using `to_string` and `+`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to