This is an automated email from the ASF dual-hosted git repository.

westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 65202d0178 GH-34619: [C++] Add extension array handling to ArraySpan 
conversion (#34684)
65202d0178 is described below

commit 65202d0178d74e84f5478e669470c9bc7d712c59
Author: Weston Pace <[email protected]>
AuthorDate: Fri Mar 24 06:13:14 2023 -0700

    GH-34619: [C++] Add extension array handling to ArraySpan conversion 
(#34684)
    
    ### Rationale for this change
    
    The take kernel would generate a segmentation fault when used on an 
extension array with a dictionary storage type.
    
    ### What changes are included in this PR?
    
    Conversion to/from ArraySpan now uses the storage type to determine if it 
needs to assign a dictionary
    
    ### Are these changes tested?
    
    I added unit tests that verify ArraySpan round trip for most types, 
including an extension dictionary type (and verified this test fails without 
the fix).
    
    ### Are there any user-facing changes?
    
    No
    * Closes: #34619
    
    Authored-by: Weston Pace <[email protected]>
    Signed-off-by: Weston Pace <[email protected]>
---
 cpp/src/arrow/array/array_test.cc | 16 ++++++++++++++++
 cpp/src/arrow/array/data.cc       | 19 ++++++++++++++-----
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/array/array_test.cc 
b/cpp/src/arrow/array/array_test.cc
index a434a75374..d18b16ba78 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -661,6 +661,13 @@ TEST_F(TestArray, TestMakeArrayFromMapScalar) {
   AssertAppendScalar(pool_, std::make_shared<MapScalar>(scalar));
 }
 
+void CheckSpanRoundTrip(const Array& array) {
+  ArraySpan span;
+  span.SetMembers(*array.data());
+  std::shared_ptr<Array> array2 = span.ToArray();
+  AssertArraysEqual(array, *array2);
+}
+
 TEST_F(TestArray, TestMakeEmptyArray) {
   FieldVector union_fields1({field("a", utf8()), field("b", int32())});
   FieldVector union_fields2({field("a", null()), field("b", 
list(large_utf8()))});
@@ -696,9 +703,18 @@ TEST_F(TestArray, TestMakeEmptyArray) {
     ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(type));
     ASSERT_OK(array->ValidateFull());
     ASSERT_EQ(array->length(), 0);
+    CheckSpanRoundTrip(*array);
   }
 }
 
+TEST_F(TestArray, ExtensionSpanRoundTrip) {
+  // Other types are checked in MakeEmptyArray but MakeEmptyArray doesn't
+  // work for extension types so we check that here
+  ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(dictionary(int8(), utf8())));
+  auto ext_array = ExtensionArray(dict_extension_type(), std::move(array));
+  CheckSpanRoundTrip(ext_array);
+}
+
 TEST_F(TestArray, TestAppendArraySlice) {
   auto scalars = GetScalars();
   ArraySpan span;
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index b13022f9c3..e23facce63 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -157,6 +157,11 @@ void ArraySpan::SetMembers(const ArrayData& data) {
   }
 
   Type::type type_id = this->type->id();
+  if (type_id == Type::EXTENSION) {
+    const ExtensionType* ext_type = checked_cast<const 
ExtensionType*>(this->type);
+    type_id = ext_type->storage_type()->id();
+  }
+
   if (data.buffers[0] == nullptr && type_id != Type::NA &&
       type_id != Type::SPARSE_UNION && type_id != Type::DENSE_UNION) {
     // This should already be zero but we make for sure
@@ -168,7 +173,7 @@ void ArraySpan::SetMembers(const ArrayData& data) {
     this->buffers[i] = {};
   }
 
-  if (this->type->id() == Type::DICTIONARY) {
+  if (type_id == Type::DICTIONARY) {
     this->child_data.resize(1);
     this->child_data[0].SetMembers(*data.dictionary);
   } else {
@@ -412,16 +417,20 @@ std::shared_ptr<ArrayData> ArraySpan::ToArrayData() const 
{
     result->buffers.emplace_back(this->GetBuffer(i));
   }
 
-  if (this->type->id() == Type::NA) {
+  Type::type type_id = this->type->id();
+  if (type_id == Type::EXTENSION) {
+    const ExtensionType* ext_type = checked_cast<const 
ExtensionType*>(this->type);
+    type_id = ext_type->storage_type()->id();
+  }
+
+  if (type_id == Type::NA) {
     result->null_count = this->length;
   } else if (this->buffers[0].data == nullptr) {
     // No validity bitmap, so the null count is 0
     result->null_count = 0;
   }
 
-  // TODO(wesm): what about extension arrays?
-
-  if (this->type->id() == Type::DICTIONARY) {
+  if (type_id == Type::DICTIONARY) {
     result->dictionary = this->dictionary().ToArrayData();
   } else {
     // Emit children, too

Reply via email to