alamb commented on issue #7977: URL: https://github.com/apache/arrow-datafusion/issues/7977#issuecomment-1787934834
Here is an example of what a NEW api might look like ### Existing API (BTW this took me non trivial time to get the types to line up and compile correctly) ```rust //! Implementation of `to_timestamp` function that //! overrides the built in version in DataFusion because the semantics changed //! upstream: <https://github.com/apache/arrow-datafusion/pull/7844> /// Implementation of to_timestamp pub(crate) static TO_TIMESTAMP_UDF: Lazy<Arc<ScalarUDF>> = Lazy::new(|| { Arc::new(ScalarUDF::new( "to_timestamp", &Signature::uniform( 1, vec![ DataType::Int64, DataType::Timestamp(TimeUnit::Nanosecond, None), DataType::Timestamp(TimeUnit::Microsecond, None), DataType::Timestamp(TimeUnit::Millisecond, None), DataType::Timestamp(TimeUnit::Second, None), DataType::Utf8, ], Volatility::Immutable, ), &TO_TIMESTAMP_RETURN_TYPE, &TO_TIMESTAMP_IMPL, )) }); static TO_TIMESTAMP_RETURN_TYPE: Lazy<ReturnTypeFunction> = Lazy::new(|| { let func = |_arg_types: &[DataType]| Ok(Arc::new(DataType::Timestamp(TimeUnit::Nanosecond, None))); Arc::new(func) }); static TO_TIMESTAMP_IMPL: Lazy<ScalarFunctionImplementation> = Lazy::new(|| { let func = |args: &[ColumnarValue]| { if args.len() != 1 { return internal_err!("to_timestamp expected 1 argument, got {}", args.len()); } match args[0].data_type() { // call through to arrow cast kernel DataType::Int64 | DataType::Timestamp(_, _) => cast_column( &args[0], &DataType::Timestamp(TimeUnit::Nanosecond, None), None, ), DataType::Utf8 => datetime_expressions::to_timestamp_nanos(args), dt => internal_err!("to_timestamp does not support argument type '{dt}'"), } }; Arc::new(func) }); ``` Here is what such a function could look like as a trait, which I think is much more approachable to new users (in addition to being much more easily separated out, as @2010YOUY01 mentions above) ```rust /// Implementation of to_timestamp struct ToTimestamp {}; impl ScalarFunction for ToTimestamp { fn name(&self) -> &str { "to_timestamp" } fn signature(&self) -> Signature { Signature::uniform( 1, vec![ DataType::Int64, DataType::Timestamp(TimeUnit::Nanosecond, None), DataType::Timestamp(TimeUnit::Microsecond, None), DataType::Timestamp(TimeUnit::Millisecond, None), DataType::Timestamp(TimeUnit::Second, None), DataType::Utf8, ] } fn volatility(&self) -> Volatility { Volatility::Immutable, } fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) } fn evaluate(&self, args: &[ColumnarValue]) -> Result<Vec<ColumnarValue>>) { if args.len() != 1 { return internal_err!("to_timestamp expected 1 argument, got {}", args.len()); } match args[0].data_type() { // call through to arrow cast kernel DataType::Int64 | DataType::Timestamp(_, _) => cast_column( &args[0], &DataType::Timestamp(TimeUnit::Nanosecond, None), None, ), DataType::Utf8 => datetime_expressions::to_timestamp_nanos(args), dt => internal_err!("to_timestamp does not support argument type '{dt}'"), } } } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
