Omega359 commented on code in PR #9181:
URL: https://github.com/apache/arrow-datafusion/pull/9181#discussion_r1486298071


##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -502,6 +503,176 @@ pub fn make_current_time(
     move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
 }
 
+/// Returns a string representation of a date, time, timestamp or duration 
based
+/// on a Chrono pattern.
+///
+/// The syntax for the patterns can be found at
+/// <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>
+///
+/// # Examples
+///
+/// ```
+/// # use chrono::prelude::*;
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # use datafusion_common::ScalarValue::TimestampNanosecond;
+/// # use std::sync::Arc;        
+/// # use arrow_array::{Date32Array, RecordBatch, StringArray};             
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+/// let schema = Arc::new(Schema::new(vec![
+///     Field::new("values", DataType::Date32, false),
+///     Field::new("patterns", DataType::Utf8, false),
+/// ]));
+///
+/// let batch = RecordBatch::try_new(
+///     schema,
+///     vec![
+///         Arc::new(Date32Array::from(vec![
+///             18506,
+///             18507,
+///             18508,
+///             18509,
+///         ])),
+///         Arc::new(StringArray::from(vec![
+///             "%Y-%m-%d",
+///             "%Y:%m:%d",
+///             "%Y%m%d",
+///             "%d-%m-%Y",
+///         ])),
+///     ],
+/// )?;
+///
+/// let ctx = SessionContext::new();
+/// ctx.register_batch("t", batch)?;
+/// let df = ctx.table("t").await?;
+///
+/// // use the to_char function to convert col 'values',
+/// // to strings using patterns in col 'patterns'
+/// let df = df.with_column(
+///     "date_str",
+///     to_char(col("values"), col("patterns"))
+/// )?;
+/// // Note that providing a scalar value for the pattern
+/// // is more performant
+/// let df = df.with_column(
+///     "date_str2",
+///     to_char(col("values"), lit("%d-%m-%Y"))
+/// )?;
+/// // literals can be used as well with dataframe calls
+/// let timestamp = "2026-07-08T09:10:11"
+///     .parse::<NaiveDateTime>()
+///     .unwrap()
+///     .with_nanosecond(56789)
+///     .unwrap()
+///     .timestamp_nanos_opt()
+///     .unwrap();
+/// let df = df.with_column(
+///     "timestamp_str",
+///     to_char(lit(TimestampNanosecond(Some(timestamp), None)), lit("%d-%m-%Y 
%H:%M:%S"))
+/// )?;
+///
+/// df.show().await?;
+///
+/// # Ok(())
+/// # }
+/// ```
+pub fn to_char(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 2 {
+        return exec_err!("to_char function requires 2 arguments, got {}", 
args.len());
+    }
+
+    let is_scalar = args
+        .iter()
+        .fold(Option::<usize>::None, |acc, arg| match arg {
+            ColumnarValue::Scalar(_) => acc,
+            ColumnarValue::Array(a) => Some(a.len()),
+        })
+        .is_none();
+
+    let args = ColumnarValue::values_to_arrays(args)?;
+    if is_scalar {
+        _to_char_scalar(&args)
+    } else {
+        _to_char_array(&args)
+    }
+}
+
+fn _build_format_options<'a>(
+    data_type: &DataType,
+    format: &'a str,
+) -> Result<FormatOptions<'a>, Result<ColumnarValue>> {
+    let format_options = match data_type {
+        DataType::Date32 => 
FormatOptions::new().with_date_format(Some(format)),
+        DataType::Date64 => 
FormatOptions::new().with_datetime_format(Some(format)),
+        DataType::Time32(_) => 
FormatOptions::new().with_time_format(Some(format)),
+        DataType::Time64(_) => 
FormatOptions::new().with_time_format(Some(format)),
+        DataType::Timestamp(_, _) => FormatOptions::new()
+            .with_timestamp_format(Some(format))
+            .with_timestamp_tz_format(Some(format)),
+        DataType::Duration(_) => FormatOptions::new().with_duration_format(
+            if "ISO8601".eq_ignore_ascii_case(format) {
+                DurationFormat::ISO8601
+            } else {
+                DurationFormat::Pretty
+            },
+        ),
+        other => {
+            return Err(exec_err!(
+                "to_char only supports date, time, timestamp and duration data 
types, received {other:?}"
+            ));
+        }
+    };
+    Ok(format_options)
+}
+
+fn _to_char_scalar(args: &[ArrayRef]) -> Result<ColumnarValue> {
+    if &DataType::Utf8 != args[1].data_type() {
+        return exec_err!(
+            "Format for `to_char` must be non-null Utf8, received {:?}",
+            args[1].data_type()
+        );
+    }
+
+    let format = args[1].as_string::<i32>().value(0);
+    let format_options = match _build_format_options(args[0].data_type(), 
format) {
+        Ok(value) => value,
+        Err(value) => return value,
+    };
+
+    let formatter = ArrayFormatter::try_new(args[0].as_ref(), 
&format_options)?;
+    let formatted = (0..args[0].len())
+        .map(|i| formatter.value(i).to_string())
+        .collect::<Vec<_>>();
+
+    Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+        formatted.first().unwrap().to_string(),
+    ))))
+}
+
+fn _to_char_array(args: &[ArrayRef]) -> Result<ColumnarValue> {
+    let mut results: Vec<String> = vec![];
+    let format_array = args[1].as_string::<i32>();
+    let data_type = args[0].data_type();
+
+    for idx in 0..args[0].len() {
+        let format = format_array.value(idx);
+        let format_options = match _build_format_options(data_type, format) {
+            Ok(value) => value,
+            Err(value) => return value,
+        };
+        // this isn't ideal but this can't use ValueFormatter as it isn't 
independent

Review Comment:
   Oh, I just realized I convert to an array in both cases. I'll investigate 
further



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to