jvanstraten commented on a change in pull request #12116:
URL: https://github.com/apache/arrow/pull/12116#discussion_r802860477



##########
File path: cpp/src/arrow/array/util.cc
##########
@@ -534,6 +533,151 @@ class NullArrayFactory {
   std::shared_ptr<Buffer> buffer_;
 };
 
+// mutable version of ImmutableNullArrayFactory
+class NullArrayFactory {
+ private:
+  Result<std::shared_ptr<Buffer>> CreateZeroByteBuffer(size_t 
scalar_size_bytes) const {
+    ARROW_ASSIGN_OR_RAISE(auto buffer,
+                          AllocateBuffer(length_ * scalar_size_bytes, pool_));
+    std::memset(buffer->mutable_data(), 0, buffer->size());
+    return std::shared_ptr<Buffer>(std::move(buffer));
+  }
+
+  Result<std::shared_ptr<Buffer>> CreateZeroOffsetBuffer(size_t 
index_size_bytes) const {
+    ARROW_ASSIGN_OR_RAISE(auto buffer,
+                          AllocateBuffer((length_ + 1) * index_size_bytes, 
pool_));
+    std::memset(buffer->mutable_data(), 0, buffer->size());
+    return std::shared_ptr<Buffer>(std::move(buffer));
+  }
+
+  Result<std::shared_ptr<Buffer>> CreateZeroBitBuffer(size_t scalar_size_bits) 
const {
+    ARROW_ASSIGN_OR_RAISE(auto buffer,
+                          AllocateBuffer((length_ * scalar_size_bits + 7) / 8, 
pool_));
+    std::memset(buffer->mutable_data(), 0, buffer->size());
+    return std::shared_ptr<Buffer>(std::move(buffer));
+  }
+
+  static Result<std::shared_ptr<Buffer>> CreateEmptyBuffer() { return 
AllocateBuffer(0); }
+
+ public:
+  NullArrayFactory(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                   int64_t length)
+      : pool_(pool), type_(type), length_(length) {}
+
+  Result<std::shared_ptr<ArrayData>> Create() {
+    std::vector<std::shared_ptr<ArrayData>> child_data(type_->num_fields());
+    ARROW_ASSIGN_OR_RAISE(auto validity, CreateZeroBitBuffer(1));
+    out_ = ArrayData::Make(type_, length_, {validity}, child_data, length_, 0);
+    RETURN_NOT_OK(VisitTypeInline(*type_, this));
+    return out_;
+  }
+
+  Status Visit(const NullType&) {
+    out_->buffers.resize(1, nullptr);
+    return Status::OK();
+  }
+
+  Status Visit(const FixedWidthType& type) {
+    out_->buffers.resize(2);
+    // values
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[1], 
CreateZeroBitBuffer(type.bit_width()));
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_base_binary<T, Status> Visit(const T&) {
+    out_->buffers.resize(3);
+    // offsets
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[1],
+                          CreateZeroOffsetBuffer(sizeof(typename 
T::offset_type)));
+    // values
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[2], CreateEmptyBuffer());
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_var_size_list<T, Status> Visit(const T& type) {
+    out_->buffers.resize(2);
+    // offsets
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[1],
+                          CreateZeroOffsetBuffer(sizeof(typename 
T::offset_type)));
+    // values
+    ARROW_ASSIGN_OR_RAISE(out_->child_data[0], CreateChild(type, 0, 
/*length=*/0));
+    return Status::OK();
+  }
+
+  Status Visit(const FixedSizeListType& type) {
+    ARROW_ASSIGN_OR_RAISE(out_->child_data[0],
+                          CreateChild(type, 0, length_ * type.list_size()));
+    return Status::OK();
+  }
+
+  Status Visit(const StructType& type) {
+    for (int i = 0; i < type_->num_fields(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(type, i, 
length_));
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const UnionType& type) {
+    out_->buffers.resize(2);
+
+    // First buffer is always null
+    out_->buffers[0] = nullptr;
+
+    // type ID buffer
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[1], AllocateBuffer(length_, pool_));
+    std::memset(out_->buffers[1]->mutable_data(), type.type_codes()[0], 
length_);
+
+    // For sparse unions, we now create children with the same length as the
+    // parent
+    int64_t child_length = length_;
+    if (type.mode() == UnionMode::DENSE) {
+      // For dense unions, we set the offsets to all zero and create children
+      // with length 1
+      out_->buffers.resize(3);
+      ARROW_ASSIGN_OR_RAISE(out_->buffers[2], 
CreateZeroByteBuffer(sizeof(int32_t)));
+
+      child_length = 1;
+    }
+    for (int i = 0; i < type_->num_fields(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(type, i, 
child_length));
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const DictionaryType& type) {
+    out_->buffers.resize(2);
+    // dictionary indices
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[1], 
CreateZeroBitBuffer(type.bit_width()));
+    // dictionary data
+    ARROW_ASSIGN_OR_RAISE(auto typed_null_dict, 
MakeArrayOfNull(type.value_type(), 0));
+    out_->dictionary = typed_null_dict->data();
+    return Status::OK();
+  }
+
+  Status Visit(const ExtensionType& type) {
+    out_->child_data.resize(type.storage_type()->num_fields());
+    RETURN_NOT_OK(VisitTypeInline(*type.storage_type(), this));
+    return Status::OK();
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("construction of all-null ", type);
+  }
+
+  Result<std::shared_ptr<ArrayData>> CreateChild(const DataType& type, int i,
+                                                 int64_t length) {
+    ImmutableNullArrayFactory child_factory(pool_, type.field(i)->type(), 
length);

Review comment:
       Yes it does, good catch. In fact, it looks like I completely forgot to 
refactor this part of the code for mutable vs immutable as the implied version. 
Fixed in b30526c and then refactored in 21507bf.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to