Omega359 commented on code in PR #9181:
URL: https://github.com/apache/arrow-datafusion/pull/9181#discussion_r1486434185
##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -502,6 +503,176 @@ pub fn make_current_time(
move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
}
+/// Returns a string representation of a date, time, timestamp or duration
based
+/// on a Chrono pattern.
+///
+/// The syntax for the patterns can be found at
+/// <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>
+///
+/// # Examples
+///
+/// ```
+/// # use chrono::prelude::*;
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # use datafusion_common::ScalarValue::TimestampNanosecond;
+/// # use std::sync::Arc;
+/// # use arrow_array::{Date32Array, RecordBatch, StringArray};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+/// let schema = Arc::new(Schema::new(vec![
+/// Field::new("values", DataType::Date32, false),
+/// Field::new("patterns", DataType::Utf8, false),
+/// ]));
+///
+/// let batch = RecordBatch::try_new(
+/// schema,
+/// vec![
+/// Arc::new(Date32Array::from(vec![
+/// 18506,
+/// 18507,
+/// 18508,
+/// 18509,
+/// ])),
+/// Arc::new(StringArray::from(vec![
+/// "%Y-%m-%d",
+/// "%Y:%m:%d",
+/// "%Y%m%d",
+/// "%d-%m-%Y",
+/// ])),
+/// ],
+/// )?;
+///
+/// let ctx = SessionContext::new();
+/// ctx.register_batch("t", batch)?;
+/// let df = ctx.table("t").await?;
+///
+/// // use the to_char function to convert col 'values',
+/// // to strings using patterns in col 'patterns'
+/// let df = df.with_column(
+/// "date_str",
+/// to_char(col("values"), col("patterns"))
+/// )?;
+/// // Note that providing a scalar value for the pattern
+/// // is more performant
+/// let df = df.with_column(
+/// "date_str2",
+/// to_char(col("values"), lit("%d-%m-%Y"))
+/// )?;
+/// // literals can be used as well with dataframe calls
+/// let timestamp = "2026-07-08T09:10:11"
+/// .parse::<NaiveDateTime>()
+/// .unwrap()
+/// .with_nanosecond(56789)
+/// .unwrap()
+/// .timestamp_nanos_opt()
+/// .unwrap();
+/// let df = df.with_column(
+/// "timestamp_str",
+/// to_char(lit(TimestampNanosecond(Some(timestamp), None)), lit("%d-%m-%Y
%H:%M:%S"))
+/// )?;
+///
+/// df.show().await?;
+///
+/// # Ok(())
+/// # }
+/// ```
+pub fn to_char(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 2 {
+ return exec_err!("to_char function requires 2 arguments, got {}",
args.len());
+ }
+
+ let is_scalar = args
Review Comment:
Great suggestion. So after implementing this the benchmark results improved:
```
❯ cargo criterion --bench to_char
Finished bench [optimized] target(s) in 0.20s
Gnuplot not found, using plotters backend
to_char_array_array_1000
time: [247.58 µs 250.45 µs 253.54 µs]
change: [-1.5593% +0.3878% +2.3496%] (p = 0.70 >
0.05)
No change in performance detected.
to_char_array_scalar_1000
time: [210.05 µs 211.33 µs 212.68 µs]
change: [-1.4277% +0.2494% +1.9011%] (p = 0.77 >
0.05)
No change in performance detected.
to_char_scalar_scalar_1000
time: [565.92 ns 571.56 ns 577.38 ns]
change: [-2.7551% -0.4663% +1.7012%] (p = 0.69 >
0.05)
No change in performance detected.
```
I'll be pushing these changes shortly.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]