This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f5138fcb10 feat: add to concat different data types error message the
data types (#7166)
f5138fcb10 is described below
commit f5138fcb10040e2428f2dd540177f5f224d0281e
Author: Raz Luvaton <[email protected]>
AuthorDate: Sat Mar 8 11:36:37 2025 +0200
feat: add to concat different data types error message the data types
(#7166)
* feat: add to concat different data types error message the data types
* improve test, fix it and made the names to be in the order of appearance
* simplify
* change error message to only have up to 10 unique data types
and also change the data type order to appear in the same order as the
arrays for easier debugging
* add tests for not printing all the data types
---
arrow-select/src/concat.rs | 116 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 110 insertions(+), 6 deletions(-)
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index 050f4ae96a..09a151dd16 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -37,7 +37,7 @@ use arrow_array::*;
use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer,
OffsetBuffer};
use arrow_data::transform::{Capacities, MutableArrayData};
use arrow_schema::{ArrowError, DataType, FieldRef, SchemaRef};
-use std::sync::Arc;
+use std::{collections::HashSet, sync::Arc};
fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
let mut item_capacity = 0;
@@ -223,9 +223,35 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef,
ArrowError> {
let d = arrays[0].data_type();
if arrays.iter().skip(1).any(|array| array.data_type() != d) {
- return Err(ArrowError::InvalidArgumentError(
- "It is not possible to concatenate arrays of different data
types.".to_string(),
- ));
+ // Create error message with up to 10 unique data types in the order
they appear
+ let error_message = {
+ // 10 max unique data types to print and another 1 to know if
there are more
+ let mut unique_data_types = HashSet::with_capacity(11);
+
+ let mut error_message =
+ format!("It is not possible to concatenate arrays of different
data types ({d}");
+ unique_data_types.insert(d);
+
+ for array in arrays {
+ let is_unique = unique_data_types.insert(array.data_type());
+
+ if unique_data_types.len() == 11 {
+ error_message.push_str(", ...");
+ break;
+ }
+
+ if is_unique {
+ error_message.push_str(", ");
+ error_message.push_str(&array.data_type().to_string());
+ }
+ }
+
+ error_message.push_str(").");
+
+ error_message
+ };
+
+ return Err(ArrowError::InvalidArgumentError(error_message));
}
match d {
@@ -340,9 +366,87 @@ mod tests {
fn test_concat_incompatible_datatypes() {
let re = concat(&[
&PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
+ // 2 string to make sure we only mention unique types
&StringArray::from(vec![Some("hello"), Some("bar"),
Some("world")]),
+ &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
+ // Another type to make sure we are showing all the incompatible
types
+ &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
]);
- assert!(re.is_err());
+
+ assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is
not possible to concatenate arrays of different data types (Int64, Utf8,
Int32).");
+ }
+
+ #[test]
+ fn test_concat_10_incompatible_datatypes_should_include_all_of_them() {
+ let re = concat(&[
+ &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
+ // 2 string to make sure we only mention unique types
+ &StringArray::from(vec![Some("hello"), Some("bar"),
Some("world")]),
+ &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
+ // Another type to make sure we are showing all the incompatible
types
+ &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
+ // Non unique
+ &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0),
None]),
+ ]);
+
+ assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is
not possible to concatenate arrays of different data types (Int64, Utf8, Int32,
Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32).");
+ }
+
+ #[test]
+ fn test_concat_11_incompatible_datatypes_should_only_include_10() {
+ let re = concat(&[
+ &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
+ // 2 string to make sure we only mention unique types
+ &StringArray::from(vec![Some("hello"), Some("bar"),
Some("world")]),
+ &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
+ // Another type to make sure we are showing all the incompatible
types
+ &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
+ // Non unique
+ &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0),
None]),
+ &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0),
None]),
+ ]);
+
+ assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is
not possible to concatenate arrays of different data types (Int64, Utf8, Int32,
Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...).");
+ }
+
+ #[test]
+ fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() {
+ let re = concat(&[
+ &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
+ // 2 string to make sure we only mention unique types
+ &StringArray::from(vec![Some("hello"), Some("bar"),
Some("world")]),
+ &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
+ // Another type to make sure we are showing all the incompatible
types
+ &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
+ &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
+ // Non unique
+ &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
+ &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0),
None]),
+ &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0),
None]),
+ &PrimitiveArray::<Float16Type>::new_null(3),
+ &BooleanArray::from(vec![Some(true), Some(false), None]),
+ ]);
+
+ assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is
not possible to concatenate arrays of different data types (Int64, Utf8, Int32,
Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...).");
}
#[test]
@@ -924,7 +1028,7 @@ mod tests {
.unwrap();
let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
- assert_eq!(error.to_string(), "Invalid argument error: It is not
possible to concatenate arrays of different data types.");
+ assert_eq!(error.to_string(), "Invalid argument error: It is not
possible to concatenate arrays of different data types (Int32, Utf8).");
}
#[test]