This is an automated email from the ASF dual-hosted git repository.

alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e21952f969 GH-40750: [C++][Python] Map child Array constructed from 
keys and items shouldn't have offset (#40871)
e21952f969 is described below

commit e21952f969cd9d0906a86898f561088606447359
Author: Alenka Frim <[email protected]>
AuthorDate: Wed May 8 13:47:21 2024 +0200

    GH-40750: [C++][Python] Map child Array constructed from keys and items 
shouldn't have offset (#40871)
    
    ### Rationale for this change
    
    When `MapArray` is constructed from `keys` and `items` array the offset of 
the list offsets is passed down to the struct child array which is not correct.
    
    ### What changes are included in this PR?
    
    This PR fixes this issue.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Shouldn't be.
    * GitHub Issue: #40750
    
    Authored-by: AlenkaF <[email protected]>
    Signed-off-by: AlenkaF <[email protected]>
---
 cpp/src/arrow/array/array_list_test.cc | 16 +++++++++++++++-
 cpp/src/arrow/array/array_nested.cc    |  2 +-
 python/pyarrow/tests/test_array.py     | 24 ++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/array/array_list_test.cc 
b/cpp/src/arrow/array/array_list_test.cc
index 18afcc90d7..e79ce6fe17 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1287,7 +1287,7 @@ TEST_F(TestMapArray, ValidateErrorNullKey) {
 }
 
 TEST_F(TestMapArray, FromArrays) {
-  std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, keys, items;
+  std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, offsets5, 
keys, items;
 
   std::vector<bool> offsets_is_valid3 = {true, false, true, true};
   std::vector<bool> offsets_is_valid4 = {true, true, false, true};
@@ -1342,6 +1342,20 @@ TEST_F(TestMapArray, FromArrays) {
   // Zero-length offsets
   ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1->Slice(0, 0), keys, 
items, pool_));
 
+  // Offseted offsets
+  ASSERT_OK_AND_ASSIGN(auto map5,
+                       MapArray::FromArrays(offsets1->Slice(1), keys, items, 
pool_));
+  ASSERT_OK(map5->Validate());
+
+  AssertArraysEqual(*expected1.Slice(1), *map5);
+
+  std::vector<MapType::offset_type> offset5_values = {2, 2, 6};
+  ArrayFromVector<OffsetType, offset_type>(offset5_values, &offsets5);
+  ASSERT_OK_AND_ASSIGN(auto map6, MapArray::FromArrays(offsets5, keys, items, 
pool_));
+  ASSERT_OK(map6->Validate());
+
+  AssertArraysEqual(*map5, *map6);
+
   // Offsets not the right type
   ASSERT_RAISES(TypeError, MapArray::FromArrays(keys, offsets1, items, pool_));
 
diff --git a/cpp/src/arrow/array/array_nested.cc 
b/cpp/src/arrow/array/array_nested.cc
index 24e0dfb708..1be771d822 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -790,7 +790,7 @@ MapArray::MapArray(const std::shared_ptr<DataType>& type, 
int64_t length,
                    const std::shared_ptr<Array>& items, int64_t null_count,
                    int64_t offset) {
   auto pair_data = ArrayData::Make(type->fields()[0]->type(), 
keys->data()->length,
-                                   {nullptr}, {keys->data(), items->data()}, 
0, offset);
+                                   {nullptr}, {keys->data(), items->data()}, 
0);
   auto map_data =
       ArrayData::Make(type, length, std::move(buffers), {pair_data}, 
null_count, offset);
   SetData(map_data);
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 3754daeb9b..dbe29c5730 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1099,6 +1099,30 @@ def test_map_from_arrays():
     with pytest.raises(ValueError):
         pa.MapArray.from_arrays(offsets, keys_with_null, items)
 
+    # Check if offset in offsets > 0
+    offsets = pa.array(offsets, pa.int32())
+    result = pa.MapArray.from_arrays(offsets.slice(1), keys, items)
+    expected = pa.MapArray.from_arrays([1, 3, 5], keys, items)
+
+    assert result.equals(expected)
+    assert result.offset == 1
+    assert expected.offset == 0
+
+    offsets = pa.array([0, 0, 0, 0, 0, 0], pa.int32())
+    result = pa.MapArray.from_arrays(
+        offsets.slice(1),
+        pa.array([], pa.string()),
+        pa.array([], pa.string()),
+    )
+    expected = pa.MapArray.from_arrays(
+        [0, 0, 0, 0, 0],
+        pa.array([], pa.string()),
+        pa.array([], pa.string()),
+    )
+    assert result.equals(expected)
+    assert result.offset == 1
+    assert expected.offset == 0
+
 
 def test_fixed_size_list_from_arrays():
     values = pa.array(range(12), pa.int64())

Reply via email to