bkietz commented on a change in pull request #10606:
URL: https://github.com/apache/arrow/pull/10606#discussion_r665454642



##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -1668,6 +1668,82 @@ struct ListImpl : public Selection<ListImpl<Type>, Type> 
{
   }
 };
 
+struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> {
+  using Base = Selection<DenseUnionImpl, DenseUnionType>;
+  LIFT_BASE_MEMBERS();
+
+  Int32Builder value_offset_builder;
+  Int8Builder child_id_builder;
+  std::vector<int8_t> type_codes;
+  std::vector<std::shared_ptr<Int32Builder>> child_indices_builders;
+
+  DenseUnionImpl(KernelContext* ctx, const ExecBatch& batch, int64_t 
output_length,
+                 Datum* out)
+      : Base(ctx, batch, output_length, out),
+        value_offset_builder(ctx->memory_pool()),
+        child_id_builder(ctx->memory_pool()) {
+    DenseUnionArray typed_values(this->values);
+    type_codes = typed_values.union_type()->type_codes();
+    child_indices_builders.reserve(type_codes.size());
+    for (size_t i = 0; i < type_codes.size(); i++) {
+      child_indices_builders.push_back(
+          std::make_shared<Int32Builder>(ctx->memory_pool()));
+    }

Review comment:
       ```suggestion
           value_offset_builder_(ctx->memory_pool()),
           child_id_builder_(ctx->memory_pool()),
           type_codes_(checked_cast<const 
UnionType&>(*this->values->type).type_codes()),
           child_indices_builders_(type_codes_.size()) {
       for (auto& child_indices_builder : child_indices_builders_) {
         child_indices_builder = Int32Builder(ctx->memory_pool()));
       }
   ```
   
   class ArrayBuilder doesn't currently allow assignment like this but it 
should: please also add `ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);` to class 
ArrayBuilder

##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -1668,6 +1668,82 @@ struct ListImpl : public Selection<ListImpl<Type>, Type> 
{
   }
 };
 
+struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> {
+  using Base = Selection<DenseUnionImpl, DenseUnionType>;
+  LIFT_BASE_MEMBERS();
+
+  Int32Builder value_offset_builder;
+  Int8Builder child_id_builder;
+  std::vector<int8_t> type_codes;
+  std::vector<std::shared_ptr<Int32Builder>> child_indices_builders;
+
+  DenseUnionImpl(KernelContext* ctx, const ExecBatch& batch, int64_t 
output_length,
+                 Datum* out)
+      : Base(ctx, batch, output_length, out),
+        value_offset_builder(ctx->memory_pool()),
+        child_id_builder(ctx->memory_pool()) {
+    DenseUnionArray typed_values(this->values);
+    type_codes = typed_values.union_type()->type_codes();
+    child_indices_builders.reserve(type_codes.size());
+    for (size_t i = 0; i < type_codes.size(); i++) {
+      child_indices_builders.push_back(
+          std::make_shared<Int32Builder>(ctx->memory_pool()));
+    }
+  }
+
+  template <typename Adapter>
+  Status GenerateOutput() {
+    DenseUnionArray typed_values(this->values);
+    Adapter adapter(this);
+    RETURN_NOT_OK(adapter.Generate(
+        [&](int64_t index) {
+          int8_t child_id = typed_values.child_id(index);
+          child_id_builder.UnsafeAppend(type_codes[child_id]);
+          int32_t value_offset = typed_values.value_offset(index);
+          
value_offset_builder.UnsafeAppend(child_indices_builders[child_id]->length());
+          RETURN_NOT_OK(child_indices_builders[child_id]->Reserve(1));
+          child_indices_builders[child_id]->UnsafeAppend(value_offset);
+          return Status::OK();
+        },
+        [&]() {
+          int8_t child_id = 0;
+          child_id_builder.UnsafeAppend(type_codes[child_id]);
+          
value_offset_builder.UnsafeAppend(child_indices_builders[child_id]->length());
+          RETURN_NOT_OK(child_indices_builders[child_id]->Reserve(1));
+          child_indices_builders[child_id]->UnsafeAppendNull();
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  Status Init() override {
+    RETURN_NOT_OK(child_id_builder.Reserve(output_length));
+    RETURN_NOT_OK(value_offset_builder.Reserve(output_length));
+    return Status::OK();
+  }
+
+  Status Finish() override {
+    std::shared_ptr<Array> child_ids;
+    std::shared_ptr<Array> value_offsets;
+    RETURN_NOT_OK(child_id_builder.Finish(&child_ids));
+    RETURN_NOT_OK(value_offset_builder.Finish(&value_offsets));
+
+    DenseUnionArray typed_values(this->values);
+    auto num_fields = typed_values.num_fields();
+    BufferVector buffers = {nullptr, checked_cast<const 
Int8Array&>(*child_ids).values(),
+                            checked_cast<const 
Int32Array&>(*value_offsets).values()};
+    *out = ArrayData(typed_values.type(), child_ids->length(), 
std::move(buffers), 0);
+    for (auto i = 0; i < num_fields; i++) {
+      std::shared_ptr<Int32Array> child_indices_array;
+      RETURN_NOT_OK(child_indices_builders[i]->Finish(&child_indices_array));

Review comment:
       Nit: prefer result returning overloads
   ```suggestion
         ARROW_ASSIGN_OR_RAISE(auto child_indices_array,
                               child_indices_builders[i]->Finish());
   ```

##########
File path: cpp/src/arrow/compute/kernels/vector_selection_test.cc
##########
@@ -1281,33 +1280,32 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) {
 
 class TestTakeKernelWithUnion : public TestTakeKernelTyped<UnionType> {};
 
-// TODO: Restore Union take functionality
-TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) {
-  for (auto union_ : UnionTypeFactories()) {
-    auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 
5});
-    auto union_json = R"([
+// TODO: Restore Sparse Union take functionality

Review comment:
       Are you going to address this in this PR? If not, please file a follow 
up JIRA and remove this comment or include that JIRA's number here

##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -1668,6 +1668,82 @@ struct ListImpl : public Selection<ListImpl<Type>, Type> 
{
   }
 };
 
+struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> {
+  using Base = Selection<DenseUnionImpl, DenseUnionType>;
+  LIFT_BASE_MEMBERS();
+
+  Int32Builder value_offset_builder;
+  Int8Builder child_id_builder;

Review comment:
       It's worth noting that ArrayBuilder always produces a null bitmap, which 
is not necessary for the buffers these are used to produce. See also: 
TypedBufferBuilder

##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -1668,6 +1668,82 @@ struct ListImpl : public Selection<ListImpl<Type>, Type> 
{
   }
 };
 
+struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> {
+  using Base = Selection<DenseUnionImpl, DenseUnionType>;
+  LIFT_BASE_MEMBERS();
+
+  Int32Builder value_offset_builder;
+  Int8Builder child_id_builder;
+  std::vector<int8_t> type_codes;
+  std::vector<std::shared_ptr<Int32Builder>> child_indices_builders;

Review comment:
       Nit: this class is large enough to warrant naming members clearly
   ```suggestion
     Int32Builder value_offset_builder_;
     Int8Builder child_id_builder_;
     std::vector<int8_t> type_codes_;
     std::vector<Int32Builder> child_indices_builders_;
   ```

##########
File path: cpp/src/arrow/compute/kernels/vector_selection_test.cc
##########
@@ -1281,33 +1280,32 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) {
 
 class TestTakeKernelWithUnion : public TestTakeKernelTyped<UnionType> {};
 
-// TODO: Restore Union take functionality
-TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) {
-  for (auto union_ : UnionTypeFactories()) {
-    auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 
5});
-    auto union_json = R"([
+// TODO: Restore Sparse Union take functionality
+TEST_F(TestTakeKernelWithUnion, TakeUnion) {
+  auto union_type = dense_union({field("a", int32()), field("b", utf8())}, {2, 
5});
+  auto union_json = R"([
       null,
       [2, 222],
       [5, "hello"],
       [5, "eh"],
       null,
       [2, 111]

Review comment:
       Since union doesn't support top level nulls, please be explicit about 
the type_id associated with a null and test with nulls in each child array
   ```suggestion
         [2, null],
         [2, 222],
         [5, "hello"],
         [5, "eh"],
         [2, null],
         [2, 111],
         [5, null]
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to