This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e4baf6be21 GH-38553 : [C++] Replace null_count with MayHaveNulls in
ListArrayFromArray and MapArray (#41957)
e4baf6be21 is described below
commit e4baf6be2167eb6ccbda90275304336f49998eac
Author: Alenka Frim <[email protected]>
AuthorDate: Wed Jun 5 01:41:30 2024 +0200
GH-38553 : [C++] Replace null_count with MayHaveNulls in ListArrayFromArray
and MapArray (#41957)
### Rationale for this change
Offsets could have `null_count() == -1` (`kUnknownNullCount`) meaning that
offsets might contain nulls that are not accounted for which can produce
failures (https://github.com/apache/arrow/issues/38553) when working with
`ListArray` or `MapArray`. `null_count()` should be replaced with
`MayHaveNulls()`.
### What changes are included in this PR?
`null_count` is replaced with `MayHaveNulls` in `ListArrayFromArray`,
`MapArray::FromArraysInternal` and `MapArray::ValidateChildData`. Some tests
had to be updated.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* GitHub Issue: #38553
Authored-by: AlenkaF <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/array/array_list_test.cc | 2 +-
cpp/src/arrow/array/array_nested.cc | 8 ++++----
python/pyarrow/tests/test_array.py | 14 +++++++++++++-
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/cpp/src/arrow/array/array_list_test.cc
b/cpp/src/arrow/array/array_list_test.cc
index 55f91dc341..063b68706b 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1383,7 +1383,7 @@ TEST_F(TestMapArray, FromArrays) {
// Null bitmap and offset with offset
ASSERT_RAISES(NotImplemented,
- MapArray::FromArrays(offsets3->Slice(2), keys, items, pool_,
+ MapArray::FromArrays(offsets1->Slice(2), keys, items, pool_,
offsets3->data()->buffers[0]));
}
diff --git a/cpp/src/arrow/array/array_nested.cc
b/cpp/src/arrow/array/array_nested.cc
index bb5c6bf018..2f6bca3d57 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -115,7 +115,7 @@ Result<std::shared_ptr<typename
TypeTraits<TYPE>::ArrayType>> ListArrayFromArray
return Status::TypeError("List offsets must be ",
OffsetArrowType::type_name());
}
- if (null_bitmap != nullptr && offsets.null_count() > 0) {
+ if (null_bitmap != nullptr && offsets.data()->MayHaveNulls()) {
return Status::Invalid(
"Ambiguous to specify both validity map and offsets with nulls");
}
@@ -827,7 +827,7 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
return Status::Invalid("Map key and item arrays must be equal length");
}
- if (null_bitmap != nullptr && offsets->null_count() > 0) {
+ if (null_bitmap != nullptr && offsets->data()->MayHaveNulls()) {
return Status::Invalid(
"Ambiguous to specify both validity map and offsets with nulls");
}
@@ -893,13 +893,13 @@ Status MapArray::ValidateChildData(
if (pair_data->type->id() != Type::STRUCT) {
return Status::Invalid("Map array child array should have struct type");
}
- if (pair_data->null_count != 0) {
+ if (pair_data->MayHaveNulls()) {
return Status::Invalid("Map array child array should have no nulls");
}
if (pair_data->child_data.size() != 2) {
return Status::Invalid("Map array child array should have two fields");
}
- if (pair_data->child_data[0]->null_count != 0) {
+ if (pair_data->child_data[0]->MayHaveNulls()) {
return Status::Invalid("Map array keys array should have no nulls");
}
return Status::OK();
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index 49a00517fc..88394c77e4 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1013,6 +1013,18 @@ def
test_list_array_types_from_arrays_fail(list_array_type, list_type_factory):
arr_slice.offsets, arr_slice.values, mask=arr_slice.is_null())
+def test_map_cast():
+ # GH-38553
+ t = pa.map_(pa.int64(), pa.int64())
+ arr = pa.array([{1: 2}], type=t)
+ result = arr.cast(pa.map_(pa.int32(), pa.int64()))
+
+ t_expected = pa.map_(pa.int32(), pa.int64())
+ expected = pa.array([{1: 2}], type=t_expected)
+
+ assert result.equals(expected)
+
+
def test_map_labelled():
# ARROW-13735
t = pa.map_(pa.field("name", "string", nullable=False), "int64")
@@ -1105,7 +1117,7 @@ def test_map_from_arrays():
# error if null bitmap passed to sliced offset
msg2 = 'Null bitmap with offsets slice not supported.'
- offsets = pa.array(offsets, pa.int32())
+ offsets = pa.array([0, 2, 2, 6], pa.int32())
with pytest.raises(pa.ArrowNotImplementedError, match=msg2):
pa.MapArray.from_arrays(offsets.slice(2), keys, items, pa.map_(
keys.type,