Anakin100100 commented on code in PR #49375:
URL: https://github.com/apache/arrow/pull/49375#discussion_r2867211177
##########
cpp/src/arrow/compute/kernels/scalar_if_else_test.cc:
##########
@@ -608,6 +609,115 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) {
CheckIfElseOutput(cond, left, right, expected_data);
}
+std::shared_ptr<Array> MakeOversizedBuffer(int32_t data_length) {
+ auto offsets = Buffer::FromVector(std::vector<int32_t>{0, data_length});
+ auto values = Buffer::FromString("x");
+ return MakeArray(
+ ArrayData::Make(binary(), 1, {nullptr, std::move(offsets),
std::move(values)}));
+}
+
+Result<std::shared_ptr<Array>> MakeAllocatedVarBinaryArray(
+ const std::shared_ptr<DataType>& type, int64_t data_length) {
+ ARROW_ASSIGN_OR_RAISE(auto values, AllocateBuffer(data_length));
+ if (type->id() == Type::STRING || type->id() == Type::BINARY) {
+ if (data_length > std::numeric_limits<int32_t>::max()) {
+ return Status::Invalid("data_length exceeds int32 offset range");
+ }
+ auto offsets =
+ Buffer::FromVector(std::vector<int32_t>{0,
static_cast<int32_t>(data_length)});
+ return MakeArray(
+ ArrayData::Make(type, 1, {nullptr, std::move(offsets),
std::move(values)}));
+ }
+ if (type->id() != Type::LARGE_STRING && type->id() != Type::LARGE_BINARY) {
+ return Status::TypeError("unsupported var-binary type for helper: ",
*type);
+ }
+ auto offsets = Buffer::FromVector(std::vector<int64_t>{0, data_length});
+ return MakeArray(
+ ArrayData::Make(type, 1, {nullptr, std::move(offsets),
std::move(values)}));
+}
+
+TEST_F(TestIfElseKernel, IfElseStringAAAAllocatedDataCapacityError) {
+ constexpr int64_t kPerSide =
+ static_cast<int64_t>(std::numeric_limits<int32_t>::max() / 2) + 4096;
+
+ auto string_type = TypeTraits<StringType>::type_singleton();
+ auto cond = ArrayFromJSON(boolean(), "[true]");
+ ASSERT_OK_AND_ASSIGN(auto left, MakeAllocatedVarBinaryArray(string_type,
kPerSide));
+ ASSERT_OK_AND_ASSIGN(auto right, MakeAllocatedVarBinaryArray(string_type,
kPerSide));
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ CapacityError,
+ ::testing::HasSubstr("Result may exceed offset capacity for this type"),
+ CallFunction("if_else", {cond, left, right}));
+}
+
+TEST_F(TestIfElseKernel, IfElseLargeStringAAADoesNotCapacityOverflow) {
+ constexpr int64_t kPerSide =
+ static_cast<int64_t>(std::numeric_limits<int32_t>::max() / 2) + 4096;
+
+ auto large_string_type = TypeTraits<LargeStringType>::type_singleton();
+ auto cond = ArrayFromJSON(boolean(), "[true]");
+ ASSERT_OK_AND_ASSIGN(auto left,
+ MakeAllocatedVarBinaryArray(large_string_type,
kPerSide));
+ ASSERT_OK_AND_ASSIGN(auto right,
+ MakeAllocatedVarBinaryArray(large_string_type,
kPerSide));
+
+ auto maybe_out = CallFunction("if_else", {cond, left, right});
+ ASSERT_FALSE(maybe_out.status().IsCapacityError()) << maybe_out.status();
+}
+
+TEST_F(TestIfElseKernel, IfElseBinaryCapacityErrorAAA) {
Review Comment:
If we checked that if the sum of results could overflow then checking that
if they are both at capacity also triggers the check isn't really needed
##########
cpp/src/arrow/compute/kernels/scalar_if_else_test.cc:
##########
@@ -608,6 +609,115 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) {
CheckIfElseOutput(cond, left, right, expected_data);
}
+std::shared_ptr<Array> MakeOversizedBuffer(int32_t data_length) {
+ auto offsets = Buffer::FromVector(std::vector<int32_t>{0, data_length});
+ auto values = Buffer::FromString("x");
+ return MakeArray(
+ ArrayData::Make(binary(), 1, {nullptr, std::move(offsets),
std::move(values)}));
+}
+
+Result<std::shared_ptr<Array>> MakeAllocatedVarBinaryArray(
+ const std::shared_ptr<DataType>& type, int64_t data_length) {
+ ARROW_ASSIGN_OR_RAISE(auto values, AllocateBuffer(data_length));
+ if (type->id() == Type::STRING || type->id() == Type::BINARY) {
+ if (data_length > std::numeric_limits<int32_t>::max()) {
+ return Status::Invalid("data_length exceeds int32 offset range");
+ }
+ auto offsets =
+ Buffer::FromVector(std::vector<int32_t>{0,
static_cast<int32_t>(data_length)});
+ return MakeArray(
+ ArrayData::Make(type, 1, {nullptr, std::move(offsets),
std::move(values)}));
+ }
+ if (type->id() != Type::LARGE_STRING && type->id() != Type::LARGE_BINARY) {
+ return Status::TypeError("unsupported var-binary type for helper: ",
*type);
+ }
+ auto offsets = Buffer::FromVector(std::vector<int64_t>{0, data_length});
+ return MakeArray(
+ ArrayData::Make(type, 1, {nullptr, std::move(offsets),
std::move(values)}));
+}
+
+TEST_F(TestIfElseKernel, IfElseStringAAAAllocatedDataCapacityError) {
+ constexpr int64_t kPerSide =
+ static_cast<int64_t>(std::numeric_limits<int32_t>::max() / 2) + 4096;
+
+ auto string_type = TypeTraits<StringType>::type_singleton();
+ auto cond = ArrayFromJSON(boolean(), "[true]");
+ ASSERT_OK_AND_ASSIGN(auto left, MakeAllocatedVarBinaryArray(string_type,
kPerSide));
+ ASSERT_OK_AND_ASSIGN(auto right, MakeAllocatedVarBinaryArray(string_type,
kPerSide));
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ CapacityError,
+ ::testing::HasSubstr("Result may exceed offset capacity for this type"),
+ CallFunction("if_else", {cond, left, right}));
+}
+
+TEST_F(TestIfElseKernel, IfElseLargeStringAAADoesNotCapacityOverflow) {
Review Comment:
ok
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]