This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fe1f7c0de4 GH-41955: [C++] Follow up of adding null_bitmap to
MapArray::FromArrays (#41956)
fe1f7c0de4 is described below
commit fe1f7c0de4557784b20d3936f22d2efaed01a9d0
Author: Alenka Frim <[email protected]>
AuthorDate: Thu Jun 13 19:06:28 2024 +0200
GH-41955: [C++] Follow up of adding null_bitmap to MapArray::FromArrays
(#41956)
### Rationale for this change
There have been some new comments regarding the work done in
https://github.com/apache/arrow/pull/41757.
### What changes are included in this PR?
This PR addresses the comments from
https://github.com/apache/arrow/pull/41757#pullrequestreview-2094287563
### Are these changes tested?
Yes. Existing tests should pass.
### Are there any user-facing changes?
No.
* GitHub Issue: #41955
Lead-authored-by: AlenkaF <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/array/array_list_test.cc | 20 +++++++++++++----
cpp/src/arrow/array/array_nested.cc | 41 ++++++++++++++++++----------------
cpp/src/arrow/array/array_nested.h | 6 ++---
python/pyarrow/tests/test_array.py | 14 ++++++++++++
4 files changed, 55 insertions(+), 26 deletions(-)
diff --git a/cpp/src/arrow/array/array_list_test.cc
b/cpp/src/arrow/array/array_list_test.cc
index 063b68706b..3d18d5f967 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1369,14 +1369,26 @@ TEST_F(TestMapArray, FromArrays) {
ASSERT_RAISES(Invalid,
MapArray::FromArrays(offsets1, keys_with_null, tmp_items,
pool_));
- // With null_bitmap
- ASSERT_OK_AND_ASSIGN(auto map7, MapArray::FromArrays(offsets1, keys, items,
pool_,
-
offsets3->data()->buffers[0]));
+ // With null_bitmap and null_count=1
+ auto null_bitmap_1 = ArrayFromJSON(boolean(), "[1, 0,
1]")->data()->buffers[1];
+ ASSERT_OK_AND_ASSIGN(auto map7,
+ MapArray::FromArrays(offsets1, keys, items, pool_,
null_bitmap_1));
ASSERT_OK(map7->Validate());
MapArray expected7(map_type, length, offsets1->data()->buffers[1], keys,
items,
- offsets3->data()->buffers[0], 1);
+ null_bitmap_1, 1);
+ ASSERT_EQ(map7->null_count(), 1);
AssertArraysEqual(expected7, *map7);
+ // With null_bitmap and null_count=2
+ auto null_bitmap_2 = ArrayFromJSON(boolean(), "[0, 1,
0]")->data()->buffers[1];
+ ASSERT_OK_AND_ASSIGN(auto map8,
+ MapArray::FromArrays(offsets1, keys, items, pool_,
null_bitmap_2));
+ ASSERT_OK(map8->Validate());
+ MapArray expected8(map_type, length, offsets1->data()->buffers[1], keys,
items,
+ null_bitmap_2, 2);
+ ASSERT_EQ(map8->null_count(), 2);
+ AssertArraysEqual(expected8, *map8);
+
// Null bitmap and offset with null
ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets3, keys, items, pool_,
offsets3->data()->buffers[0]));
diff --git a/cpp/src/arrow/array/array_nested.cc
b/cpp/src/arrow/array/array_nested.cc
index 2f6bca3d57..47c0fd3582 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr<DataType>& type,
int64_t length,
Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
- MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
+ MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap) {
using offset_type = typename MapType::offset_type;
using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
@@ -836,7 +836,7 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
return Status::NotImplemented("Null bitmap with offsets slice not
supported.");
}
- if (offsets->null_count() > 0) {
+ if (offsets->data()->MayHaveNulls()) {
ARROW_ASSIGN_OR_RAISE(auto buffers,
CleanListOffsets<MapType>(NULLPTR, *offsets, pool));
return std::make_shared<MapArray>(type, offsets->length() - 1,
std::move(buffers),
@@ -847,30 +847,32 @@ Result<std::shared_ptr<Array>>
MapArray::FromArraysInternal(
const auto& typed_offsets = checked_cast<const OffsetArrayType&>(*offsets);
BufferVector buffers;
- int64_t null_count;
- if (null_bitmap != nullptr) {
- buffers = BufferVector({std::move(null_bitmap), typed_offsets.values()});
- null_count = null_bitmap->size();
- } else {
- buffers = BufferVector({null_bitmap, typed_offsets.values()});
- null_count = 0;
+ buffers.resize(2);
+ int64_t null_count = 0;
+ if (null_bitmap) {
+ buffers[0] = std::move(null_bitmap);
+ null_count = kUnknownNullCount;
}
+ buffers[1] = typed_offsets.values();
return std::make_shared<MapArray>(type, offsets->length() - 1,
std::move(buffers), keys,
items, /*null_count=*/null_count,
offsets->offset());
}
-Result<std::shared_ptr<Array>> MapArray::FromArrays(
- const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
- const std::shared_ptr<Array>& items, MemoryPool* pool,
- const std::shared_ptr<Buffer>& null_bitmap) {
+Result<std::shared_ptr<Array>> MapArray::FromArrays(const
std::shared_ptr<Array>& offsets,
+ const
std::shared_ptr<Array>& keys,
+ const
std::shared_ptr<Array>& items,
+ MemoryPool* pool,
+ std::shared_ptr<Buffer>
null_bitmap) {
return FromArraysInternal(std::make_shared<MapType>(keys->type(),
items->type()),
- offsets, keys, items, pool, null_bitmap);
+ offsets, keys, items, pool,
std::move(null_bitmap));
}
-Result<std::shared_ptr<Array>> MapArray::FromArrays(
- std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
- const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
- MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
+Result<std::shared_ptr<Array>> MapArray::FromArrays(std::shared_ptr<DataType>
type,
+ const
std::shared_ptr<Array>& offsets,
+ const
std::shared_ptr<Array>& keys,
+ const
std::shared_ptr<Array>& items,
+ MemoryPool* pool,
+ std::shared_ptr<Buffer>
null_bitmap) {
if (type->id() != Type::MAP) {
return Status::TypeError("Expected map type, got ", type->ToString());
}
@@ -881,7 +883,8 @@ Result<std::shared_ptr<Array>> MapArray::FromArrays(
if (!map_type.item_type()->Equals(items->type())) {
return Status::TypeError("Mismatching map items type");
}
- return FromArraysInternal(std::move(type), offsets, keys, items, pool,
null_bitmap);
+ return FromArraysInternal(std::move(type), offsets, keys, items, pool,
+ std::move(null_bitmap));
}
Status MapArray::ValidateChildData(
diff --git a/cpp/src/arrow/array/array_nested.h
b/cpp/src/arrow/array/array_nested.h
index f96b6bd3b1..a6d4977839 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -537,13 +537,13 @@ class ARROW_EXPORT MapArray : public ListArray {
static Result<std::shared_ptr<Array>> FromArrays(
const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>&
keys,
const std::shared_ptr<Array>& items, MemoryPool* pool =
default_memory_pool(),
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR);
static Result<std::shared_ptr<Array>> FromArrays(
std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
MemoryPool* pool = default_memory_pool(),
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR);
const MapType* map_type() const { return map_type_; }
@@ -563,7 +563,7 @@ class ARROW_EXPORT MapArray : public ListArray {
static Result<std::shared_ptr<Array>> FromArraysInternal(
std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
- MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
+ MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap = NULLPTR);
private:
const MapType* map_type_;
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index 1032ab9add..becf00ead8 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1097,6 +1097,7 @@ def test_map_from_arrays():
items.type),
mask=pa.array([False, True, False], type=pa.bool_())
)
+ assert result.null_count == 1
assert result.equals(expected)
# pass in null bitmap without the type
@@ -1106,6 +1107,19 @@ def test_map_from_arrays():
)
assert result.equals(expected)
+ # pass in null bitmap with two nulls
+ offsets = [0, None, None, 6]
+ pyentries = [None, None, pypairs[2:]]
+
+ result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, pa.map_(
+ keys.type,
+ items.type),
+ mask=pa.array([True, True, False], type=pa.bool_())
+ )
+ expected = pa.array(pyentries, type=pa.map_(pa.binary(), pa.int32()))
+ assert result.null_count == 2
+ assert result.equals(expected)
+
# error if null bitmap and offsets with nulls passed
msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
with pytest.raises(pa.ArrowInvalid, match=msg1):