This is an automated email from the ASF dual-hosted git repository.

rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d08d5e64fc GH-48470: [Python] Construct UuidArray from list of 
UuidScalars (#48746)
d08d5e64fc is described below

commit d08d5e64fcfd8759d3a7089eced3e9a2d7a17f20
Author: tadeja <[email protected]>
AuthorDate: Wed Mar 18 13:11:39 2026 +0100

    GH-48470: [Python] Construct UuidArray from list of UuidScalars (#48746)
    
    ### Rationale for this change
    Fixes #48470. Also fixes all extension types, not just UUID.
    
    ### What changes are included in this PR?
    An extension scalar is unwrapped to its storage type when building arrays.
    
    ### Are these changes tested?
    Yes, new `test_array_from_extension_scalars` covers builtin (uuid, bool8, 
json_, opaque) and custom types across all storage types (int, float, bool, 
string, binary, large string/binary, decimal, fixed-size binary, struct, 
timestamp, duration, date).
    
    ### Are there any user-facing changes?
    Now user can run such an example to get the output below instead of 
`ArrowInvalid` message.
    This now works for any extension type, not just UUID.
    ```python
    import pyarrow as pa
    pa.array([pa.scalar(b'1'*16, type=pa.uuid())], type=pa.uuid())
    ```
    
    ```
    <pyarrow.lib.UuidArray object at 0x128186970>
    [
      31313131313131313131313131313131
    ]
    ```
    * GitHub Issue: #48470
    
    Lead-authored-by: Tadeja Kadunc <[email protected]>
    Co-authored-by: tadeja <[email protected]>
    Co-authored-by: Rok Mihevc <[email protected]>
    Signed-off-by: Rok Mihevc <[email protected]>
---
 python/pyarrow/src/arrow/python/python_to_arrow.cc | 26 +++++++---
 python/pyarrow/tests/test_extension_type.py        | 55 ++++++++++++++++++++++
 2 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc 
b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index c70510a480..e7ce54abcd 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -584,6 +584,14 @@ class PyConverter : public Converter<PyObject*, 
PyConversionOptions> {
   }
 };
 
+// Helper function to unwrap extension scalar to its storage scalar
+const Scalar& GetStorageScalar(const Scalar& scalar) {
+  if (scalar.type->id() == Type::EXTENSION) {
+    return *checked_cast<const ExtensionScalar&>(scalar).value;
+  }
+  return scalar;
+}
+
 template <typename T, typename Enable = void>
 class PyPrimitiveConverter;
 
@@ -663,7 +671,8 @@ class PyPrimitiveConverter<
     } else if (arrow::py::is_scalar(value)) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
                             arrow::py::unwrap_scalar(value));
-      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+      ARROW_RETURN_NOT_OK(
+          this->primitive_builder_->AppendScalar(GetStorageScalar(*scalar)));
     } else {
       ARROW_ASSIGN_OR_RAISE(
           auto converted, PyValue::Convert(this->primitive_type_, 
this->options_, value));
@@ -684,7 +693,8 @@ class PyPrimitiveConverter<
     } else if (arrow::py::is_scalar(value)) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
                             arrow::py::unwrap_scalar(value));
-      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+      ARROW_RETURN_NOT_OK(
+          this->primitive_builder_->AppendScalar(GetStorageScalar(*scalar)));
     } else {
       ARROW_ASSIGN_OR_RAISE(
           auto converted, PyValue::Convert(this->primitive_type_, 
this->options_, value));
@@ -710,7 +720,8 @@ class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, 
FixedSizeBinaryType>::
     } else if (arrow::py::is_scalar(value)) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
                             arrow::py::unwrap_scalar(value));
-      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+      ARROW_RETURN_NOT_OK(
+          this->primitive_builder_->AppendScalar(GetStorageScalar(*scalar)));
     } else {
       ARROW_RETURN_NOT_OK(
           PyValue::Convert(this->primitive_type_, this->options_, value, 
view_));
@@ -747,7 +758,8 @@ class PyPrimitiveConverter<
     } else if (arrow::py::is_scalar(value)) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
                             arrow::py::unwrap_scalar(value));
-      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+      ARROW_RETURN_NOT_OK(
+          this->primitive_builder_->AppendScalar(GetStorageScalar(*scalar)));
     } else {
       ARROW_RETURN_NOT_OK(
           PyValue::Convert(this->primitive_type_, this->options_, value, 
view_));
@@ -791,7 +803,7 @@ class PyDictionaryConverter<U, enable_if_has_c_type<U>>
     } else if (arrow::py::is_scalar(value)) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
                             arrow::py::unwrap_scalar(value));
-      return this->value_builder_->AppendScalar(*scalar, 1);
+      return this->value_builder_->AppendScalar(GetStorageScalar(*scalar), 1);
     } else {
       ARROW_ASSIGN_OR_RAISE(auto converted,
                             PyValue::Convert(this->value_type_, 
this->options_, value));
@@ -810,7 +822,7 @@ class PyDictionaryConverter<U, enable_if_has_string_view<U>>
     } else if (arrow::py::is_scalar(value)) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
                             arrow::py::unwrap_scalar(value));
-      return this->value_builder_->AppendScalar(*scalar, 1);
+      return this->value_builder_->AppendScalar(GetStorageScalar(*scalar), 1);
     } else {
       ARROW_RETURN_NOT_OK(
           PyValue::Convert(this->value_type_, this->options_, value, view_));
@@ -983,7 +995,7 @@ class PyStructConverter : public 
StructConverter<PyConverter, PyConverterTrait>
     } else if (arrow::py::is_scalar(value)) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
                             arrow::py::unwrap_scalar(value));
-      return this->struct_builder_->AppendScalar(*scalar);
+      return this->struct_builder_->AppendScalar(GetStorageScalar(*scalar));
     }
     switch (input_kind_) {
       case InputKind::DICT:
diff --git a/python/pyarrow/tests/test_extension_type.py 
b/python/pyarrow/tests/test_extension_type.py
index 66fcfc0556..465b556876 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import contextlib
+import datetime
 import os
 import shutil
 import subprocess
@@ -1486,6 +1487,60 @@ def test_uuid_bytes_property_raises():
         pa.scalar(bad)
 
 
+def test_array_from_extension_scalars():
+    # One case per C++ converter: FixedSizeBinary, Binary/String
+    builtin_cases = [
+        (pa.uuid(), [b"0123456789abcdef"]),
+        (pa.opaque(pa.binary(), "t", "v"), [b"x", b"y"]),
+    ]
+    for ext_type, values in builtin_cases:
+        scalars = [pa.scalar(v, type=ext_type) for v in values]
+        result = pa.array(scalars, type=ext_type)
+        assert result.equals(pa.array(values, type=ext_type))
+
+    # One case per C++ converter: Numeric, Timestamp/Duration, Struct
+    custom_cases = [
+        (IntegerType(), [100, 200]),
+        (AnnotatedType(pa.timestamp("us"), "ts"),
+         [datetime.datetime(2023, 1, 1)]),
+        (MyStructType(), [{"left": 1, "right": 2}]),
+    ]
+    for ext_type, values in custom_cases:
+        with registered_extension_type(ext_type):
+            scalars = [pa.scalar(v, type=ext_type) for v in values]
+            result = pa.array(scalars, type=ext_type)
+            assert result.equals(pa.array(values, type=ext_type))
+
+    # Null handling
+    uuid_type = pa.uuid()
+    scalars = [pa.scalar(b"0123456789abcdef", type=uuid_type),
+               pa.scalar(None, type=uuid_type)]
+    result = pa.array(scalars, type=uuid_type)
+    assert result[0].is_valid and not result[1].is_valid
+
+    # ExtensionScalar.from_storage path
+    scalars = [
+        pa.ExtensionScalar.from_storage(uuid_type, b"0123456789abcdef"),
+        pa.ExtensionScalar.from_storage(uuid_type, None),
+    ]
+    result = pa.array(scalars, type=uuid_type)
+    expected = pa.array([b"0123456789abcdef", None], type=uuid_type)
+    assert result.equals(expected)
+
+    # Type inference without explicit type
+    u = uuid4()
+    scalars = [pa.scalar(u, type=pa.uuid()), None]
+    result = pa.array(scalars)
+    assert result.type == pa.uuid()
+    assert result[0].as_py() == u
+    assert not result[1].is_valid
+
+    # Mixed extension scalars and raw Python objects
+    u1, u2 = uuid4(), uuid4()
+    result = pa.array([pa.scalar(u1, type=pa.uuid()), u2], type=pa.uuid())
+    assert result.equals(pa.array([u1, u2], type=pa.uuid()))
+
+
 def test_tensor_type():
     tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 3])
     assert tensor_type.extension_name == "arrow.fixed_shape_tensor"

Reply via email to