felipecrv commented on code in PR #43190:
URL: https://github.com/apache/arrow/pull/43190#discussion_r1681254541


##########
cpp/src/arrow/array/concatenate_test.cc:
##########
@@ -661,14 +663,104 @@ TEST_F(ConcatenateTest, ExtensionType) {
   });
 }
 
+std::shared_ptr<DataType> LargeVersionOfType(const std::shared_ptr<DataType>& 
type) {
+  switch (type->id()) {
+    case Type::BINARY:
+      return large_binary();
+    case Type::STRING:
+      return large_utf8();
+    case Type::LIST:
+      return large_list(static_cast<const ListType&>(*type).value_type());
+    case Type::LIST_VIEW:
+      return large_list_view(static_cast<const 
ListViewType&>(*type).value_type());
+    case Type::LARGE_BINARY:
+    case Type::LARGE_STRING:
+    case Type::LARGE_LIST:
+    case Type::LARGE_LIST_VIEW:
+      return type;
+    default:
+      Unreachable();
+      break;
+  }
+}
+
+std::shared_ptr<DataType> fixed_size_list_of_1(std::shared_ptr<DataType> type) 
{
+  return fixed_size_list(std::move(type), 1);
+}
+
 TEST_F(ConcatenateTest, OffsetOverflow) {
-  auto fake_long = ArrayFromJSON(utf8(), "[\"\"]");
-  fake_long->data()->GetMutableValues<int32_t>(1)[1] =
+  using TypeFactory = std::shared_ptr<DataType> (*)(std::shared_ptr<DataType>);
+  static const std::vector<TypeFactory> kNestedTypeFactories = {
+      list, large_list, list_view, large_list_view, fixed_size_list_of_1,
+  };
+
+  auto* pool = default_memory_pool();
+  std::shared_ptr<DataType> suggested_cast;
+  for (auto& ty : {binary(), utf8()}) {
+    auto large_ty = LargeVersionOfType(ty);
+
+    auto fake_long = ArrayFromJSON(ty, "[\"\"]");
+    fake_long->data()->GetMutableValues<int32_t>(1)[1] =
+        std::numeric_limits<int32_t>::max();
+    // XXX: since the data fake_long claims to own isn't there, this would

Review Comment:
   I just moved this XXX comment. I think it was written by you :)



##########
cpp/src/arrow/array/concatenate.cc:
##########
@@ -337,20 +382,40 @@ class ConcatenateImpl {
     return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
   }
 
-  Status Visit(const BinaryType&) {
+  Status Visit(const BinaryType& input_type) {
     std::vector<Range> value_ranges;
     ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
-    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, 
&out_->buffers[1],
-                                              &value_ranges));
+    ARROW_ASSIGN_OR_RAISE(
+        auto outcome, ConcatenateOffsets<int32_t>(index_buffers, pool_, 
&out_->buffers[1],
+                                                  &value_ranges));
+    switch (outcome) {
+      case OffsetBufferOpOutcome::kOk:
+        break;
+      case OffsetBufferOpOutcome::kOffsetOverflow:
+        switch (input_type.id()) {
+          case Type::BINARY:
+            suggested_cast_ = large_binary();
+            break;
+          case Type::STRING:
+            suggested_cast_ = large_utf8();
+            break;
+          default:
+            DCHECK(false) << "unexpected type id from BinaryType: " << 
input_type;
+            break;

Review Comment:
   Yes, but this string would go into the compiled binary unnecessarily. The 
only consequence of reaching `default` here is not suggesting a cast which is 
fine. But debug assert can tell an Arrow C++ developer that a value isn't being 
handled.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to