vegarsti commented on code in PR #8589:
URL: https://github.com/apache/arrow-rs/pull/8589#discussion_r2462609061
##########
arrow-cast/src/cast/mod.rs:
##########
@@ -11415,4 +11446,434 @@ mod tests {
"Invalid argument error: -1.0 is too small to store in a Decimal32
of precision 1. Min is -0.9"
);
}
+
+ #[test]
+ fn test_run_end_encoded_to_primitive() {
+ // Create a RunEndEncoded array: [1, 1, 2, 2, 2, 3]
+ let run_ends = Int32Array::from(vec![2, 5, 6]);
+ let values = Int32Array::from(vec![1, 2, 3]);
+ let run_array = RunArray::<Int32Type>::try_new(&run_ends,
&values).unwrap();
+ let array_ref = Arc::new(run_array) as ArrayRef;
+ // Cast to Int64
+ let cast_result = cast(&array_ref, &DataType::Int64).unwrap();
+ // Verify the result is a RunArray with Int64 values
+ let result_run_array =
cast_result.as_any().downcast_ref::<Int64Array>().unwrap();
+ assert_eq!(
+ result_run_array.values(),
+ &[1i64, 1i64, 2i64, 2i64, 2i64, 3i64]
+ );
+ }
+
+ #[test]
+ fn test_run_end_encoded_to_string() {
+ let run_ends = Int32Array::from(vec![2, 3, 5]);
+ let values = Int32Array::from(vec![10, 20, 30]);
+ let run_array = RunArray::<Int32Type>::try_new(&run_ends,
&values).unwrap();
+ let array_ref = Arc::new(run_array) as ArrayRef;
+
+ // Cast to String
+ let cast_result = cast(&array_ref, &DataType::Utf8).unwrap();
+
+ // Verify the result is a RunArray with String values
+ let result_array =
cast_result.as_any().downcast_ref::<StringArray>().unwrap();
+ // Check that values are correct
+ assert_eq!(result_array.value(0), "10");
+ assert_eq!(result_array.value(1), "10");
+ assert_eq!(result_array.value(2), "20");
+ }
+
+ #[test]
+ fn test_primitive_to_run_end_encoded() {
+ // Create an Int32 array with repeated values: [1, 1, 2, 2, 2, 3]
+ let source_array = Int32Array::from(vec![1, 1, 2, 2, 2, 3]);
+ let array_ref = Arc::new(source_array) as ArrayRef;
+
+ // Cast to RunEndEncoded<Int32, Int32>
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+
+ // Verify the result is a RunArray
+ let result_run_array = cast_result
+ .as_any()
+ .downcast_ref::<RunArray<Int32Type>>()
+ .unwrap();
+
+ // Check run structure: runs should end at positions [2, 5, 6]
+ assert_eq!(result_run_array.run_ends().values(), &[2, 5, 6]);
+
+ // Check values: should be [1, 2, 3]
+ let values_array =
result_run_array.values().as_primitive::<Int32Type>();
+ assert_eq!(values_array.values(), &[1, 2, 3]);
+ }
+
+ #[test]
+ fn test_primitive_to_run_end_encoded_with_nulls() {
+ let source_array = Int32Array::from(vec![
+ Some(1),
+ Some(1),
+ None,
+ None,
+ Some(2),
+ Some(2),
+ Some(3),
+ Some(3),
+ None,
+ None,
+ Some(4),
+ Some(4),
+ Some(5),
+ Some(5),
+ None,
+ None,
+ ]);
+ let array_ref = Arc::new(source_array) as ArrayRef;
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+ let result_run_array = cast_result
+ .as_any()
+ .downcast_ref::<RunArray<Int32Type>>()
+ .unwrap();
+ assert_eq!(
+ result_run_array.run_ends().values(),
+ &[2, 4, 6, 8, 10, 12, 14, 16]
+ );
+ assert_eq!(
+ result_run_array
+ .values()
+ .as_primitive::<Int32Type>()
+ .values(),
+ &[1, 0, 2, 3, 0, 4, 5, 0]
+ );
+ assert_eq!(result_run_array.values().null_count(), 3);
+ }
+
+ #[test]
+ fn test_primitive_to_run_end_encoded_with_nulls_consecutive() {
+ let source_array = Int64Array::from(vec![
+ Some(1),
+ Some(1),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ Some(4),
+ Some(20),
+ Some(500),
+ Some(500),
+ None,
+ None,
+ ]);
+ let array_ref = Arc::new(source_array) as ArrayRef;
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int16, false)),
+ Arc::new(Field::new("values", DataType::Int64, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+ let result_run_array = cast_result
+ .as_any()
+ .downcast_ref::<RunArray<Int16Type>>()
+ .unwrap();
+ assert_eq!(
+ result_run_array.run_ends().values(),
+ &[2, 10, 11, 12, 14, 16]
+ );
+ assert_eq!(
+ result_run_array
+ .values()
+ .as_primitive::<Int64Type>()
+ .values(),
+ &[1, 0, 4, 20, 500, 0]
+ );
+ assert_eq!(result_run_array.values().null_count(), 2);
+ }
+
+ #[test]
+ fn test_string_to_run_end_encoded() {
+ // Create a String array with repeated values: ["a", "a", "b", "c",
"c"]
+ let source_array = StringArray::from(vec!["a", "a", "b", "c", "c"]);
+ let array_ref = Arc::new(source_array) as ArrayRef;
+
+ // Cast to RunEndEncoded<Int32, String>
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Utf8, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+
+ // Verify the result is a RunArray
+ let result_run_array = cast_result
+ .as_any()
+ .downcast_ref::<RunArray<Int32Type>>()
+ .unwrap();
+
+ // Check run structure: runs should end at positions [2, 3, 5]
+ assert_eq!(result_run_array.run_ends().values(), &[2, 3, 5]);
+
+ // Check values: should be ["a", "b", "c"]
+ let values_array = result_run_array.values().as_string::<i32>();
+ assert_eq!(values_array.value(0), "a");
+ assert_eq!(values_array.value(1), "b");
+ assert_eq!(values_array.value(2), "c");
+ }
+
+ #[test]
+ fn test_cast_with_type_conversion() {
+ // Create an Int32 array: [1, 1, 2, 2, 3]
+ let source_array = Int32Array::from(vec![1, 1, 2, 2, 3]);
+ let array_ref = Arc::new(source_array) as ArrayRef;
+
+ // Cast to RunEndEncoded<Int32, String> (values get converted to
strings)
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Utf8, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+
+ // Verify the result is a RunArray with String values
+ let result_run_array = cast_result
+ .as_any()
+ .downcast_ref::<RunArray<Int32Type>>()
+ .unwrap();
+
+ // Check that values were converted to strings
+ assert_eq!(result_run_array.values().data_type(), &DataType::Utf8);
+
+ // Check run structure: runs should end at positions [2, 4, 5]
+ assert_eq!(result_run_array.run_ends().values(), &[2, 4, 5]);
+
+ // Check values: should be ["1", "2", "3"]
+ let values_array = result_run_array.values().as_string::<i32>();
+ assert_eq!(values_array.value(0), "1");
+ assert_eq!(values_array.value(1), "2");
+ assert_eq!(values_array.value(2), "3");
+ }
+
+ #[test]
+ fn test_empty_array_to_run_end_encoded() {
+ // Create an empty Int32 array
+ let source_array = Int32Array::from(Vec::<i32>::new());
+ let array_ref = Arc::new(source_array) as ArrayRef;
+
+ // Cast to RunEndEncoded<Int32, Int32>
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+
+ // Verify the result is an empty RunArray
+ let result_run_array = cast_result
+ .as_any()
+ .downcast_ref::<RunArray<Int32Type>>()
+ .unwrap();
+
+ // Check that both run_ends and values are empty
+ assert_eq!(result_run_array.run_ends().len(), 0);
+ assert_eq!(result_run_array.values().len(), 0);
+ }
+
+ #[test]
+ fn test_run_end_encoded_with_nulls() {
+ // Create a RunEndEncoded array with nulls: [1, 1, null, 2, 2]
+ let run_ends = Int32Array::from(vec![2, 3, 5]);
+ let values = Int32Array::from(vec![Some(1), None, Some(2)]);
+ let run_array = RunArray::<Int32Type>::try_new(&run_ends,
&values).unwrap();
+ let array_ref = Arc::new(run_array) as ArrayRef;
+
+ // Cast to String
+ let cast_result = cast(&array_ref, &DataType::Utf8).unwrap();
+
+ // Verify the result preserves nulls
+ let result_run_array =
cast_result.as_any().downcast_ref::<StringArray>().unwrap();
+ assert_eq!(result_run_array.value(0), "1");
+ assert!(result_run_array.is_null(2));
+ assert_eq!(result_run_array.value(4), "2");
+ }
+
+ #[test]
+ fn test_different_index_types() {
+ // Test with Int16 index type
+ let source_array = Int32Array::from(vec![1, 1, 2, 3, 3]);
+ let array_ref = Arc::new(source_array) as ArrayRef;
+
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int16, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+ assert_eq!(cast_result.data_type(), &target_type);
+
+ // Test with Int64 index type
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int64, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ let cast_result = cast(&array_ref, &target_type).unwrap();
+ assert_eq!(cast_result.data_type(), &target_type);
+ }
+
+ #[test]
+ fn test_unsupported_cast_to_run_end_encoded() {
+ // Create a Struct array - complex nested type that might not be
supported
+ let field = Field::new("item", DataType::Int32, false);
+ let struct_array = StructArray::from(vec![(
+ Arc::new(field),
+ Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+ )]);
+ let array_ref = Arc::new(struct_array) as ArrayRef;
+
+ // This should fail because:
+ // 1. The target type is not RunEndEncoded
+ // 2. The target type is not supported for casting from StructArray
+ let cast_result = cast(&array_ref, &DataType::FixedSizeBinary(10));
+
+ // Expect this to fail
+ assert!(cast_result.is_err());
+ }
+
+ /// Test casting RunEndEncoded<Int64, String> to RunEndEncoded<Int16,
String> should fail
+ #[test]
+ fn test_cast_run_end_encoded_int64_to_int16_should_fail() {
Review Comment:
That is true! In that case, the cast fails with `Invalid argument error:
Found null values in run_ends array. The run_ends array should not have null
values.` (added a test with that)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]