This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e21952f969 GH-40750: [C++][Python] Map child Array constructed from
keys and items shouldn't have offset (#40871)
e21952f969 is described below
commit e21952f969cd9d0906a86898f561088606447359
Author: Alenka Frim <[email protected]>
AuthorDate: Wed May 8 13:47:21 2024 +0200
GH-40750: [C++][Python] Map child Array constructed from keys and items
shouldn't have offset (#40871)
### Rationale for this change
When `MapArray` is constructed from `keys` and `items` array the offset of
the list offsets is passed down to the struct child array which is not correct.
### What changes are included in this PR?
This PR fixes this issue.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Shouldn't be.
* GitHub Issue: #40750
Authored-by: AlenkaF <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
---
cpp/src/arrow/array/array_list_test.cc | 16 +++++++++++++++-
cpp/src/arrow/array/array_nested.cc | 2 +-
python/pyarrow/tests/test_array.py | 24 ++++++++++++++++++++++++
3 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/array/array_list_test.cc
b/cpp/src/arrow/array/array_list_test.cc
index 18afcc90d7..e79ce6fe17 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1287,7 +1287,7 @@ TEST_F(TestMapArray, ValidateErrorNullKey) {
}
TEST_F(TestMapArray, FromArrays) {
- std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, keys, items;
+ std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, offsets5,
keys, items;
std::vector<bool> offsets_is_valid3 = {true, false, true, true};
std::vector<bool> offsets_is_valid4 = {true, true, false, true};
@@ -1342,6 +1342,20 @@ TEST_F(TestMapArray, FromArrays) {
// Zero-length offsets
ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1->Slice(0, 0), keys,
items, pool_));
+ // Offseted offsets
+ ASSERT_OK_AND_ASSIGN(auto map5,
+ MapArray::FromArrays(offsets1->Slice(1), keys, items,
pool_));
+ ASSERT_OK(map5->Validate());
+
+ AssertArraysEqual(*expected1.Slice(1), *map5);
+
+ std::vector<MapType::offset_type> offset5_values = {2, 2, 6};
+ ArrayFromVector<OffsetType, offset_type>(offset5_values, &offsets5);
+ ASSERT_OK_AND_ASSIGN(auto map6, MapArray::FromArrays(offsets5, keys, items,
pool_));
+ ASSERT_OK(map6->Validate());
+
+ AssertArraysEqual(*map5, *map6);
+
// Offsets not the right type
ASSERT_RAISES(TypeError, MapArray::FromArrays(keys, offsets1, items, pool_));
diff --git a/cpp/src/arrow/array/array_nested.cc
b/cpp/src/arrow/array/array_nested.cc
index 24e0dfb708..1be771d822 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -790,7 +790,7 @@ MapArray::MapArray(const std::shared_ptr<DataType>& type,
int64_t length,
const std::shared_ptr<Array>& items, int64_t null_count,
int64_t offset) {
auto pair_data = ArrayData::Make(type->fields()[0]->type(),
keys->data()->length,
- {nullptr}, {keys->data(), items->data()},
0, offset);
+ {nullptr}, {keys->data(), items->data()},
0);
auto map_data =
ArrayData::Make(type, length, std::move(buffers), {pair_data},
null_count, offset);
SetData(map_data);
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index 3754daeb9b..dbe29c5730 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1099,6 +1099,30 @@ def test_map_from_arrays():
with pytest.raises(ValueError):
pa.MapArray.from_arrays(offsets, keys_with_null, items)
+ # Check if offset in offsets > 0
+ offsets = pa.array(offsets, pa.int32())
+ result = pa.MapArray.from_arrays(offsets.slice(1), keys, items)
+ expected = pa.MapArray.from_arrays([1, 3, 5], keys, items)
+
+ assert result.equals(expected)
+ assert result.offset == 1
+ assert expected.offset == 0
+
+ offsets = pa.array([0, 0, 0, 0, 0, 0], pa.int32())
+ result = pa.MapArray.from_arrays(
+ offsets.slice(1),
+ pa.array([], pa.string()),
+ pa.array([], pa.string()),
+ )
+ expected = pa.MapArray.from_arrays(
+ [0, 0, 0, 0, 0],
+ pa.array([], pa.string()),
+ pa.array([], pa.string()),
+ )
+ assert result.equals(expected)
+ assert result.offset == 1
+ assert expected.offset == 0
+
def test_fixed_size_list_from_arrays():
values = pa.array(range(12), pa.int64())