alamb commented on a change in pull request #9682:
URL: https://github.com/apache/arrow/pull/9682#discussion_r593082721
##########
File path: rust/arrow/src/compute/kernels/cast.rs
##########
@@ -262,98 +275,126 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) ->
Result<ArrayRef> {
return Ok(array.clone());
}
match (from_type, to_type) {
- (Struct(_), _) => Err(ArrowError::ComputeError(
+ (Struct(_), _) => Err(ArrowError::CastError(
Review comment:
+1 for using a separate error variant for `CastError`
##########
File path: rust/arrow/src/compute/kernels/cast.rs
##########
@@ -849,7 +1137,11 @@ const EPOCH_DAYS_FROM_CE: i32 = 719_163;
/// We do not perform this check on primitive data types as we only use this
/// function internally, where it is guaranteed to be infallible.
#[allow(clippy::unnecessary_wraps)]
-fn cast_array_data<TO>(array: &ArrayRef, to_type: DataType) -> Result<ArrayRef>
+fn cast_array_data<TO>(
+ array: &ArrayRef,
+ to_type: DataType,
+ _cast_options: &CastOptions,
Review comment:
Given the comments say this function should be infallable it probably
doesn't need the `cast_options` parameter
##########
File path: rust/arrow/src/compute/kernels/cast.rs
##########
@@ -939,133 +1244,184 @@ where
from.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap(),
- )))
+ cast_options,
+ )?))
}
fn string_to_numeric_cast<T, Offset: StringOffsetSizeTrait>(
from: &GenericStringArray<Offset>,
-) -> PrimitiveArray<T>
+ cast_options: &CastOptions,
+) -> Result<PrimitiveArray<T>>
where
T: ArrowNumericType,
<T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
- let iter = (0..from.len()).map(|i| {
- if from.is_null(i) {
- None
- } else {
- lexical_core::parse(from.value(i).as_bytes()).ok()
- }
- });
+ let vec = (0..from.len())
+ .map(|i| {
+ if from.is_null(i) {
+ Ok(None)
+ } else {
+ let result = lexical_core::parse(from.value(i).as_bytes());
+ if cast_options.safe {
+ Ok(result.ok())
+ } else {
+ Some(result.map_err(|_| {
+ ArrowError::CastError(
Review comment:
I would suggest a more specific message here and in the other places
that `CastError` is raised. When I used postgres (years ago) it didn't included
these details in its error messages, which was heavily frustrating
Something like
```
ArrowError::CastError(format!("Cannot cast string '{}' to numeric type {}",
from.value(i), T::DATA_TYPE))
```
##########
File path: rust/arrow/src/compute/kernels/cast.rs
##########
@@ -1772,7 +2193,7 @@ mod tests {
fn test_cast_date32_to_date64() {
let a = Date32Array::from(vec![10000, 17890]);
let array = Arc::new(a) as ArrayRef;
- let b = cast(&array, &DataType::Date64).unwrap();
+ let b = cast(&array, &DataType::Date64,
&DEFAULT_CAST_OPTIONS).unwrap();
Review comment:
In the tests I would recommend using explicit cast options with the
variant being tested rather than `DEFAULT_CAST_OPTIONS` so it is clear what is
being tested -- the default behavior or the behavior of unsafe casts
##########
File path: rust/datafusion/src/physical_plan/expressions/cast.rs
##########
@@ -217,7 +237,38 @@ mod tests {
fn invalid_cast() {
// Ensure a useful error happens at plan time if invalid casts are used
let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
- let result = cast(col("a"), &schema, DataType::LargeBinary);
+
+ let result = cast(
+ col("a"),
+ &schema,
+ DataType::LargeBinary,
+ arrow::compute::DEFAULT_CAST_OPTIONS,
+ );
result.expect_err("expected Invalid CAST");
}
+
+ #[test]
+ fn invalid_cast_error() -> Result<()> {
+ // Ensure a useful error happens at plan time if invalid casts are used
Review comment:
🎉
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]