This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 62770b602a [Variant]: Implement `DataType::Dictionary` support for
`cast_to_variant` kernel (#8173)
62770b602a is described below
commit 62770b602a7ed4ade22f593300f9f193b568fe16
Author: Liam Bao <[email protected]>
AuthorDate: Wed Aug 20 14:57:48 2025 -0400
[Variant]: Implement `DataType::Dictionary` support for `cast_to_variant`
kernel (#8173)
# Which issue does this PR close?
- Closes #8062
# Rationale for this change
# What changes are included in this PR?
Implement `DataType::Dictionary` in `cast_to_variant`
# Are these changes tested?
Yes
# Are there any user-facing changes?
New cast type supported
Co-authored-by: Andrew Lamb <[email protected]>
---
parquet-variant-compute/src/cast_to_variant.rs | 70 +++++++++++++++++++++++---
1 file changed, 64 insertions(+), 6 deletions(-)
diff --git a/parquet-variant-compute/src/cast_to_variant.rs
b/parquet-variant-compute/src/cast_to_variant.rs
index 7eeb4da632..cdafb64b32 100644
--- a/parquet-variant-compute/src/cast_to_variant.rs
+++ b/parquet-variant-compute/src/cast_to_variant.rs
@@ -502,6 +502,27 @@ pub fn cast_to_variant(input: &dyn Array) ->
Result<VariantArray, ArrowError> {
builder
);
}
+ DataType::Dictionary(_, _) => {
+ let dict_array = input.as_any_dictionary();
+ let values_variant_array =
cast_to_variant(dict_array.values().as_ref())?;
+ let normalized_keys = dict_array.normalized_keys();
+ let keys = dict_array.keys();
+
+ for (i, key_idx) in normalized_keys.iter().enumerate() {
+ if keys.is_null(i) {
+ builder.append_null();
+ continue;
+ }
+
+ if values_variant_array.is_null(*key_idx) {
+ builder.append_null();
+ continue;
+ }
+
+ let value = values_variant_array.value(*key_idx);
+ builder.append_variant(value);
+ }
+ }
dt => {
return Err(ArrowError::CastError(format!(
"Unsupported data type for casting to Variant: {dt:?}",
@@ -520,12 +541,12 @@ mod tests {
use super::*;
use arrow::array::{
ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
Decimal128Array,
- Decimal256Array, Decimal32Array, Decimal64Array,
FixedSizeBinaryBuilder, Float16Array,
- Float32Array, Float64Array, GenericByteBuilder,
GenericByteViewBuilder, Int16Array,
- Int32Array, Int64Array, Int8Array, IntervalYearMonthArray,
LargeStringArray, NullArray,
- StringArray, StringViewArray, StructArray, Time32MillisecondArray,
Time32SecondArray,
- Time64MicrosecondArray, Time64NanosecondArray, UInt16Array,
UInt32Array, UInt64Array,
- UInt8Array,
+ Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray,
FixedSizeBinaryBuilder,
+ Float16Array, Float32Array, Float64Array, GenericByteBuilder,
GenericByteViewBuilder,
+ Int16Array, Int32Array, Int64Array, Int8Array, IntervalYearMonthArray,
LargeStringArray,
+ NullArray, StringArray, StringViewArray, StructArray,
Time32MillisecondArray,
+ Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
UInt16Array, UInt32Array,
+ UInt64Array, UInt8Array,
};
use arrow::buffer::NullBuffer;
use arrow_schema::{Field, Fields};
@@ -1826,6 +1847,43 @@ mod tests {
);
}
+ #[test]
+ fn test_cast_to_variant_dictionary() {
+ let values = StringArray::from(vec!["apple", "banana", "cherry",
"date"]);
+ let keys = Int32Array::from(vec![Some(0), Some(1), None, Some(2),
Some(0), Some(3)]);
+ let dict_array = DictionaryArray::<Int32Type>::try_new(keys,
Arc::new(values)).unwrap();
+
+ run_test(
+ Arc::new(dict_array),
+ vec![
+ Some(Variant::from("apple")),
+ Some(Variant::from("banana")),
+ None,
+ Some(Variant::from("cherry")),
+ Some(Variant::from("apple")),
+ Some(Variant::from("date")),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_cast_to_variant_dictionary_with_nulls() {
+ // Test dictionary with null values in the values array
+ let values = StringArray::from(vec![Some("a"), None, Some("c")]);
+ let keys = Int8Array::from(vec![Some(0), Some(1), Some(2), Some(0)]);
+ let dict_array = DictionaryArray::<Int8Type>::try_new(keys,
Arc::new(values)).unwrap();
+
+ run_test(
+ Arc::new(dict_array),
+ vec![
+ Some(Variant::from("a")),
+ None, // key 1 points to null value
+ Some(Variant::from("c")),
+ Some(Variant::from("a")),
+ ],
+ );
+ }
+
/// Converts the given `Array` to a `VariantArray` and tests the conversion
/// against the expected values. It also tests the handling of nulls by
/// setting one element to null and verifying the output.