felipecrv commented on code in PR #44447:
URL: https://github.com/apache/arrow/pull/44447#discussion_r1873689442
##########
cpp/src/arrow/array/util.cc:
##########
@@ -915,6 +917,71 @@ Result<std::shared_ptr<Array>>
MakeEmptyArray(std::shared_ptr<DataType> type,
return builder->Finish();
}
+Result<std::shared_ptr<Array>> MakeMaskArray(const std::vector<int64_t>&
indices,
+ int64_t length, MemoryPool* pool)
{
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length, pool));
+ bit_util::SetBitsTo(buffer->mutable_data(), 0, length, false);
+ for (int64_t index : indices) {
+ if (index < 0 || index >= length) {
+ return Status::IndexError("Index out of bounds: ", index);
+ }
+ bit_util::SetBit(buffer->mutable_data(), index);
+ }
+ return std::make_shared<BooleanArray>(length, buffer);
+}
+
+template <typename IndexType>
+Result<std::shared_ptr<Array>> MakeMaskArrayImpl(
+ const std::shared_ptr<NumericArray<IndexType>>& indices, int64_t length,
+ MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length, pool));
+ bit_util::SetBitsTo(buffer->mutable_data(), 0, length, false);
+ for (int64_t i = 0; i < indices->length(); ++i) {
+ int64_t index = indices->Value(i);
Review Comment:
The value could be null and nulls must be skipped.
##########
cpp/src/arrow/array/util.cc:
##########
@@ -915,6 +917,71 @@ Result<std::shared_ptr<Array>>
MakeEmptyArray(std::shared_ptr<DataType> type,
return builder->Finish();
}
+Result<std::shared_ptr<Array>> MakeMaskArray(const std::vector<int64_t>&
indices,
+ int64_t length, MemoryPool* pool)
{
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length, pool));
+ bit_util::SetBitsTo(buffer->mutable_data(), 0, length, false);
+ for (int64_t index : indices) {
+ if (index < 0 || index >= length) {
+ return Status::IndexError("Index out of bounds: ", index);
+ }
+ bit_util::SetBit(buffer->mutable_data(), index);
+ }
+ return std::make_shared<BooleanArray>(length, buffer);
+}
+
+template <typename IndexType>
+Result<std::shared_ptr<Array>> MakeMaskArrayImpl(
+ const std::shared_ptr<NumericArray<IndexType>>& indices, int64_t length,
+ MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length, pool));
+ bit_util::SetBitsTo(buffer->mutable_data(), 0, length, false);
+ for (int64_t i = 0; i < indices->length(); ++i) {
+ int64_t index = indices->Value(i);
+ if (index < 0 || index >= length) {
+ return Status::IndexError("Index out of bounds: ", index);
+ }
+ bit_util::SetBit(buffer->mutable_data(), index);
+ }
+ return std::make_shared<BooleanArray>(length, buffer);
+}
+
+Result<std::shared_ptr<Array>> MakeMaskArray(const std::shared_ptr<Array>&
indices,
+ int64_t length, MemoryPool* pool)
{
+ if (indices->null_count() > 0) {
+ return Status::Invalid("Indices array must not contain null values");
+ }
Review Comment:
Hmm. If it takes an Arrow array of indices it should be able to handle
nulls. The loop can be specialized based on the result of
`indices->MayHaveNulls()` so the common case doesn't have to check the validity
bitmap of every iteration.
##########
cpp/src/arrow/array/util.cc:
##########
@@ -915,6 +917,71 @@ Result<std::shared_ptr<Array>>
MakeEmptyArray(std::shared_ptr<DataType> type,
return builder->Finish();
}
+Result<std::shared_ptr<Array>> MakeMaskArray(const std::vector<int64_t>&
indices,
+ int64_t length, MemoryPool* pool)
{
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length, pool));
+ bit_util::SetBitsTo(buffer->mutable_data(), 0, length, false);
Review Comment:
```suggestion
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length, pool));
```
##########
cpp/src/arrow/array/util.h:
##########
@@ -69,6 +69,33 @@ ARROW_EXPORT
Result<std::shared_ptr<Array>> MakeEmptyArray(std::shared_ptr<DataType> type,
MemoryPool* pool =
default_memory_pool());
+/// \brief Create an Array representing a boolean mask
+///
+/// The mask will have all elements set to false except for those
+/// indices specified in the indices vector.
+///
+/// \param[in] indices Which indices in the mask should be set to true
+/// \param[in] length The total length of the mask
+/// \param[in] pool the memory pool to allocate memory from
+/// \return the resulting Array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeMaskArray(const std::vector<int64_t>&
indices,
Review Comment:
As this function doesn't need to take ownership of `indices`, it can accept
a `span<int64_t>` instead of a `std::vector<int64_t> &` so both vectors or any
contiguous buffer if int64_t can be passed.
##########
cpp/src/arrow/array/util.cc:
##########
@@ -915,6 +917,71 @@ Result<std::shared_ptr<Array>>
MakeEmptyArray(std::shared_ptr<DataType> type,
return builder->Finish();
}
+Result<std::shared_ptr<Array>> MakeMaskArray(const std::vector<int64_t>&
indices,
+ int64_t length, MemoryPool* pool)
{
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length, pool));
+ bit_util::SetBitsTo(buffer->mutable_data(), 0, length, false);
+ for (int64_t index : indices) {
+ if (index < 0 || index >= length) {
+ return Status::IndexError("Index out of bounds: ", index);
+ }
+ bit_util::SetBit(buffer->mutable_data(), index);
+ }
+ return std::make_shared<BooleanArray>(length, buffer);
+}
+
+template <typename IndexType>
+Result<std::shared_ptr<Array>> MakeMaskArrayImpl(
+ const std::shared_ptr<NumericArray<IndexType>>& indices, int64_t length,
+ MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length, pool));
+ bit_util::SetBitsTo(buffer->mutable_data(), 0, length, false);
Review Comment:
```suggestion
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length, pool));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]