rok commented on code in PR #48746:
URL: https://github.com/apache/arrow/pull/48746#discussion_r2891547824
##########
python/pyarrow/src/arrow/python/python_to_arrow.cc:
##########
@@ -584,6 +584,14 @@ class PyConverter : public Converter<PyObject*,
PyConversionOptions> {
}
};
+// Helper function to unwrap extension scalar to its storage scalar
+const Scalar& GetStorageScalar(const Scalar& scalar) {
+ if (scalar.type->id() == Type::EXTENSION) {
+ return *checked_cast<const ExtensionScalar&>(scalar).value;
+ }
+ return scalar;
+}
+
Review Comment:
How about using `Result` here?
```suggestion
// Helper function to unwrap extension scalar to its storage scalar
Result<const Scalar*> GetStorageScalar(const Scalar& scalar) {
if (scalar.type->id() != Type::EXTENSION) {
return &scalar;
}
const auto& extension_scalar = checked_cast<const
ExtensionScalar&>(scalar);
return extension_scalar.value.get();
}
```
##########
python/pyarrow/tests/test_extension_type.py:
##########
@@ -1486,6 +1486,71 @@ def bytes(self):
pa.scalar(bad)
+def test_array_from_extension_scalars():
+ import datetime
+ from decimal import Decimal
+
+ builtin_cases = [
+ (pa.uuid(), [b"0123456789abcdef"]),
+ (pa.bool8(), [0, 1]),
+ (pa.json_(pa.string()), ['{"a":1}', '{"b":2}']),
+ (pa.opaque(pa.binary(), "t", "v"), [b"x", b"y"]),
+ ]
+ for ext_type, values in builtin_cases:
+ scalars = [pa.scalar(v, type=ext_type) for v in values]
+ result = pa.array(scalars, type=ext_type)
+ expected = pa.array(values, type=ext_type)
+ assert result.equals(expected)
+
+ # Custom extension types requiring registration
+ custom_cases = [
+ (TinyIntType(), [1, 2]),
+ (IntegerType(), [100, 200]),
+ (LabelType(), ["a", "b"]),
+ (MyStructType(), [{"left": 1, "right": 2}]),
+ (AnnotatedType(pa.timestamp("us"), "ts"),
+ [datetime.datetime(2023, 1, 1)]),
+ (AnnotatedType(pa.duration("s"), "dur"),
+ [datetime.timedelta(seconds=100)]),
+ (AnnotatedType(pa.date32(), "date"),
+ [datetime.date(2023, 1, 1)]),
+ (AnnotatedType(pa.float64(), "f"), [1.5, 2.5]),
+ (AnnotatedType(pa.bool_(), "b"), [True, False]),
+ (AnnotatedType(pa.binary(), "bin"), [b"x", b"y"]),
+ (AnnotatedType(pa.decimal128(10, 2), "dec"),
+ [Decimal("1.50"), Decimal("2.75")]),
+ (AnnotatedType(pa.large_string(), "lstr"), ["hello", "world"]),
+ (AnnotatedType(pa.large_binary(), "lbin"), [b"ab", b"cd"]),
+ ]
+ for ext_type, values in custom_cases:
+ with registered_extension_type(ext_type):
+ scalars = [pa.scalar(v, type=ext_type) for v in values]
+ result = pa.array(scalars, type=ext_type)
+ expected = pa.array(values, type=ext_type)
+ assert result.equals(expected)
+
+ # Null handling
+ uuid_type = pa.uuid()
+ scalars = [pa.scalar(b"0123456789abcdef", type=uuid_type),
+ pa.scalar(None, type=uuid_type)]
+ result = pa.array(scalars, type=uuid_type)
+ assert result[0].is_valid and not result[1].is_valid
+
Review Comment:
Maybe test `pa.ExtensionScalar.from_storage` path too?
```suggestion
scalars = [
pa.ExtensionScalar.from_storage(uuid_type, b"0123456789abcdef"),
pa.ExtensionScalar.from_storage(uuid_type, None),
]
result = pa.array(scalars, type=uuid_type)
assert result.to_pylist() == [UUID(bytes=b"0123456789abcdef"), None]
```
##########
python/pyarrow/tests/test_extension_type.py:
##########
@@ -1486,6 +1486,71 @@ def bytes(self):
pa.scalar(bad)
Review Comment:
Should such a test pass or fail?
```suggestion
def test_array_from_mismatched_extension_scalar_type():
scalar = pa.scalar(b"1" * 16, type=ExampleUuidType())
with pytest.raises(TypeError):
pa.array([scalar], type=ExampleUuidType2())
```
##########
python/pyarrow/src/arrow/python/python_to_arrow.cc:
##########
@@ -663,7 +671,8 @@ class PyPrimitiveConverter<
} else if (arrow::py::is_scalar(value)) {
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
arrow::py::unwrap_scalar(value));
- ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+ ARROW_RETURN_NOT_OK(
+ this->primitive_builder_->AppendScalar(GetStorageScalar(*scalar)));
Review Comment:
If we change to `Result` we'll also need:
```suggestion
ARROW_ASSIGN_OR_RAISE(const Scalar* storage_scalar,
GetStorageScalar(*scalar));
ARROW_RETURN_NOT_OK(
this->primitive_builder_->AppendScalar(*storage_scalar));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]