alamb commented on code in PR #9181:
URL: https://github.com/apache/arrow-datafusion/pull/9181#discussion_r1486219769
##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -502,6 +503,176 @@ pub fn make_current_time(
move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
}
+/// Returns a string representation of a date, time, timestamp or duration
based
+/// on a Chrono pattern.
+///
+/// The syntax for the patterns can be found at
+/// <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>
+///
+/// # Examples
+///
+/// ```
+/// # use chrono::prelude::*;
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # use datafusion_common::ScalarValue::TimestampNanosecond;
+/// # use std::sync::Arc;
+/// # use arrow_array::{Date32Array, RecordBatch, StringArray};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+/// let schema = Arc::new(Schema::new(vec![
+/// Field::new("values", DataType::Date32, false),
+/// Field::new("patterns", DataType::Utf8, false),
+/// ]));
+///
+/// let batch = RecordBatch::try_new(
+/// schema,
+/// vec![
+/// Arc::new(Date32Array::from(vec![
+/// 18506,
+/// 18507,
+/// 18508,
+/// 18509,
+/// ])),
+/// Arc::new(StringArray::from(vec![
+/// "%Y-%m-%d",
+/// "%Y:%m:%d",
+/// "%Y%m%d",
+/// "%d-%m-%Y",
+/// ])),
+/// ],
+/// )?;
+///
+/// let ctx = SessionContext::new();
+/// ctx.register_batch("t", batch)?;
+/// let df = ctx.table("t").await?;
+///
+/// // use the to_char function to convert col 'values',
+/// // to strings using patterns in col 'patterns'
+/// let df = df.with_column(
+/// "date_str",
+/// to_char(col("values"), col("patterns"))
+/// )?;
+/// // Note that providing a scalar value for the pattern
+/// // is more performant
+/// let df = df.with_column(
+/// "date_str2",
+/// to_char(col("values"), lit("%d-%m-%Y"))
+/// )?;
+/// // literals can be used as well with dataframe calls
+/// let timestamp = "2026-07-08T09:10:11"
+/// .parse::<NaiveDateTime>()
+/// .unwrap()
+/// .with_nanosecond(56789)
+/// .unwrap()
+/// .timestamp_nanos_opt()
+/// .unwrap();
+/// let df = df.with_column(
+/// "timestamp_str",
+/// to_char(lit(TimestampNanosecond(Some(timestamp), None)), lit("%d-%m-%Y
%H:%M:%S"))
+/// )?;
+///
+/// df.show().await?;
+///
+/// # Ok(())
+/// # }
+/// ```
+pub fn to_char(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 2 {
+ return exec_err!("to_char function requires 2 arguments, got {}",
args.len());
+ }
+
+ let is_scalar = args
+ .iter()
+ .fold(Option::<usize>::None, |acc, arg| match arg {
+ ColumnarValue::Scalar(_) => acc,
+ ColumnarValue::Array(a) => Some(a.len()),
+ })
+ .is_none();
+
+ let args = ColumnarValue::values_to_arrays(args)?;
+ if is_scalar {
+ _to_char_scalar(&args)
+ } else {
+ _to_char_array(&args)
+ }
+}
+
+fn _build_format_options<'a>(
+ data_type: &DataType,
+ format: &'a str,
+) -> Result<FormatOptions<'a>, Result<ColumnarValue>> {
+ let format_options = match data_type {
+ DataType::Date32 =>
FormatOptions::new().with_date_format(Some(format)),
+ DataType::Date64 =>
FormatOptions::new().with_datetime_format(Some(format)),
+ DataType::Time32(_) =>
FormatOptions::new().with_time_format(Some(format)),
+ DataType::Time64(_) =>
FormatOptions::new().with_time_format(Some(format)),
+ DataType::Timestamp(_, _) => FormatOptions::new()
+ .with_timestamp_format(Some(format))
+ .with_timestamp_tz_format(Some(format)),
+ DataType::Duration(_) => FormatOptions::new().with_duration_format(
+ if "ISO8601".eq_ignore_ascii_case(format) {
+ DurationFormat::ISO8601
+ } else {
+ DurationFormat::Pretty
+ },
+ ),
+ other => {
+ return Err(exec_err!(
+ "to_char only supports date, time, timestamp and duration data
types, received {other:?}"
+ ));
+ }
+ };
+ Ok(format_options)
+}
+
+fn _to_char_scalar(args: &[ArrayRef]) -> Result<ColumnarValue> {
Review Comment:
```suggestion
/// Special version when arg[1] is a scalar
fn _to_char_scalar(args: &[ArrayRef]) -> Result<ColumnarValue> {
```
##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -2565,3 +2565,114 @@ select make_date(2024, 1, null);
query error DataFusion error: Arrow error: Cast error: Cannot cast string ''
to value of Int32 type
select make_date(2024, 1, '');
+
+
+##########
+## to_char tests
+##########
+
+statement ok
+create table formats (
+ dates date,
+ times time,
+ timestamps timestamp,
+ date_format varchar,
+ time_format varchar,
+ timestamp_format varchar)
+as values
+ ('2000-01-01'::date, '23:45:01'::time, '2024-01-01 06:00:00'::timestamp,
'%d:%m:%Y', '%H-%M-%S', '%d:%m:%Y %H-%M-%S'),
+ ('2003-04-05'::date, '04:56:32'::time, '2025-01-01 23:59:58'::timestamp,
'%d:%m:%Y', '%H::%M::%S', '%d:%m:%Y %H-%M-%S');
+
+
+query T
+select to_char(dates, date_format) from formats;
+----
+01:01:2000
+05:04:2003
+
+query T
+select date_format(dates, date_format) from formats;
+----
+01:01:2000
+05:04:2003
+
+query T
+select to_char(times, time_format) from formats;
+----
+23-45-01
+04::56::32
+
+query T
+select to_char(timestamps, date_format) from formats;
+----
+01:01:2024
+01:01:2025
+
+query T
+select to_char(timestamps, timestamp_format) from formats;
+----
+01:01:2024 06-00-00
+01:01:2025 23-59-58
+
+query T
+select to_char('2000-02-03'::date, '%Y:%d:%m');
+----
+2000:03:02
+
+query T
+select to_char(arrow_cast(12345::int, 'Time32(Second)'), '%H-%M-%S')
+----
+03-25-45
+
+query T
+select to_char(arrow_cast(12344567::int, 'Time32(Millisecond)'), '%H-%M-%S %f')
+----
+03-25-44 567000000
+
+query T
+select to_char(arrow_cast(12344567000, 'Time64(Microsecond)'), '%H-%M-%S %f')
+----
+03-25-44 567000000
+
+query T
+select to_char(arrow_cast(12344567890000, 'Time64(Nanosecond)'), '%H-%M-%S %f')
+----
+03-25-44 567890000
+
+query T
+select to_char(arrow_cast(TIMESTAMP '2023-08-03 14:38:50Z', 'Timestamp(Second,
None)'), '%d-%m-%Y %H-%M-%S')
+----
+03-08-2023 14-38-50
+
+query T
+select to_char(arrow_cast(123456, 'Duration(Second)'), 'pretty');
+----
+1 days 10 hours 17 mins 36 secs
+
+query T
+select to_char(arrow_cast(123456, 'Duration(Second)'), 'iso8601');
+----
+P1DT37056S
+
+query T
+select to_char(arrow_cast(123456, 'Duration(Second)'), null);
+----
+1 days 10 hours 17 mins 36 secs
+
+# this panics with a unhandled internal error in arrow-rs code
Review Comment:
Can you possibly file a ticket upstream in arrow about this (it would be
nice to have a real error rather than panic)
If not I can do so as well
##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -502,6 +503,176 @@ pub fn make_current_time(
move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
}
+/// Returns a string representation of a date, time, timestamp or duration
based
+/// on a Chrono pattern.
+///
+/// The syntax for the patterns can be found at
+/// <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>
+///
+/// # Examples
+///
+/// ```
+/// # use chrono::prelude::*;
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # use datafusion_common::ScalarValue::TimestampNanosecond;
+/// # use std::sync::Arc;
+/// # use arrow_array::{Date32Array, RecordBatch, StringArray};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+/// let schema = Arc::new(Schema::new(vec![
+/// Field::new("values", DataType::Date32, false),
+/// Field::new("patterns", DataType::Utf8, false),
+/// ]));
+///
+/// let batch = RecordBatch::try_new(
+/// schema,
+/// vec![
+/// Arc::new(Date32Array::from(vec![
+/// 18506,
+/// 18507,
+/// 18508,
+/// 18509,
+/// ])),
+/// Arc::new(StringArray::from(vec![
+/// "%Y-%m-%d",
+/// "%Y:%m:%d",
+/// "%Y%m%d",
+/// "%d-%m-%Y",
+/// ])),
+/// ],
+/// )?;
+///
+/// let ctx = SessionContext::new();
+/// ctx.register_batch("t", batch)?;
+/// let df = ctx.table("t").await?;
+///
+/// // use the to_char function to convert col 'values',
+/// // to strings using patterns in col 'patterns'
+/// let df = df.with_column(
+/// "date_str",
+/// to_char(col("values"), col("patterns"))
+/// )?;
+/// // Note that providing a scalar value for the pattern
+/// // is more performant
+/// let df = df.with_column(
+/// "date_str2",
+/// to_char(col("values"), lit("%d-%m-%Y"))
+/// )?;
+/// // literals can be used as well with dataframe calls
+/// let timestamp = "2026-07-08T09:10:11"
+/// .parse::<NaiveDateTime>()
+/// .unwrap()
+/// .with_nanosecond(56789)
+/// .unwrap()
+/// .timestamp_nanos_opt()
+/// .unwrap();
+/// let df = df.with_column(
+/// "timestamp_str",
+/// to_char(lit(TimestampNanosecond(Some(timestamp), None)), lit("%d-%m-%Y
%H:%M:%S"))
+/// )?;
+///
+/// df.show().await?;
+///
+/// # Ok(())
+/// # }
+/// ```
+pub fn to_char(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 2 {
+ return exec_err!("to_char function requires 2 arguments, got {}",
args.len());
+ }
+
+ let is_scalar = args
+ .iter()
+ .fold(Option::<usize>::None, |acc, arg| match arg {
+ ColumnarValue::Scalar(_) => acc,
+ ColumnarValue::Array(a) => Some(a.len()),
+ })
+ .is_none();
+
+ let args = ColumnarValue::values_to_arrays(args)?;
+ if is_scalar {
+ _to_char_scalar(&args)
+ } else {
+ _to_char_array(&args)
+ }
+}
+
+fn _build_format_options<'a>(
+ data_type: &DataType,
+ format: &'a str,
+) -> Result<FormatOptions<'a>, Result<ColumnarValue>> {
+ let format_options = match data_type {
+ DataType::Date32 =>
FormatOptions::new().with_date_format(Some(format)),
+ DataType::Date64 =>
FormatOptions::new().with_datetime_format(Some(format)),
+ DataType::Time32(_) =>
FormatOptions::new().with_time_format(Some(format)),
+ DataType::Time64(_) =>
FormatOptions::new().with_time_format(Some(format)),
+ DataType::Timestamp(_, _) => FormatOptions::new()
+ .with_timestamp_format(Some(format))
+ .with_timestamp_tz_format(Some(format)),
+ DataType::Duration(_) => FormatOptions::new().with_duration_format(
+ if "ISO8601".eq_ignore_ascii_case(format) {
+ DurationFormat::ISO8601
+ } else {
+ DurationFormat::Pretty
+ },
+ ),
+ other => {
+ return Err(exec_err!(
+ "to_char only supports date, time, timestamp and duration data
types, received {other:?}"
+ ));
+ }
+ };
+ Ok(format_options)
+}
+
+fn _to_char_scalar(args: &[ArrayRef]) -> Result<ColumnarValue> {
+ if &DataType::Utf8 != args[1].data_type() {
+ return exec_err!(
+ "Format for `to_char` must be non-null Utf8, received {:?}",
+ args[1].data_type()
+ );
+ }
+
+ let format = args[1].as_string::<i32>().value(0);
+ let format_options = match _build_format_options(args[0].data_type(),
format) {
+ Ok(value) => value,
+ Err(value) => return value,
+ };
+
+ let formatter = ArrayFormatter::try_new(args[0].as_ref(),
&format_options)?;
+ let formatted = (0..args[0].len())
+ .map(|i| formatter.value(i).to_string())
+ .collect::<Vec<_>>();
+
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+ formatted.first().unwrap().to_string(),
+ ))))
+}
+
+fn _to_char_array(args: &[ArrayRef]) -> Result<ColumnarValue> {
+ let mut results: Vec<String> = vec![];
+ let format_array = args[1].as_string::<i32>();
+ let data_type = args[0].data_type();
+
+ for idx in 0..args[0].len() {
+ let format = format_array.value(idx);
+ let format_options = match _build_format_options(data_type, format) {
+ Ok(value) => value,
+ Err(value) => return value,
+ };
+ // this isn't ideal but this can't use ValueFormatter as it isn't
independent
Review Comment:
One thing we do in regex code is to have a local cache (e.g `HashSet<String,
ArrayFormatter>`) to avoid recreating the formatter for each row, but we could
do this as a follow on
##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -502,6 +503,176 @@ pub fn make_current_time(
move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
}
+/// Returns a string representation of a date, time, timestamp or duration
based
+/// on a Chrono pattern.
+///
+/// The syntax for the patterns can be found at
+/// <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>
+///
+/// # Examples
+///
+/// ```
+/// # use chrono::prelude::*;
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # use datafusion_common::ScalarValue::TimestampNanosecond;
+/// # use std::sync::Arc;
+/// # use arrow_array::{Date32Array, RecordBatch, StringArray};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+/// let schema = Arc::new(Schema::new(vec![
+/// Field::new("values", DataType::Date32, false),
+/// Field::new("patterns", DataType::Utf8, false),
+/// ]));
+///
+/// let batch = RecordBatch::try_new(
+/// schema,
+/// vec![
+/// Arc::new(Date32Array::from(vec![
+/// 18506,
+/// 18507,
+/// 18508,
+/// 18509,
+/// ])),
+/// Arc::new(StringArray::from(vec![
+/// "%Y-%m-%d",
+/// "%Y:%m:%d",
+/// "%Y%m%d",
+/// "%d-%m-%Y",
+/// ])),
+/// ],
+/// )?;
+///
+/// let ctx = SessionContext::new();
+/// ctx.register_batch("t", batch)?;
+/// let df = ctx.table("t").await?;
+///
+/// // use the to_char function to convert col 'values',
+/// // to strings using patterns in col 'patterns'
+/// let df = df.with_column(
+/// "date_str",
+/// to_char(col("values"), col("patterns"))
+/// )?;
+/// // Note that providing a scalar value for the pattern
+/// // is more performant
+/// let df = df.with_column(
+/// "date_str2",
+/// to_char(col("values"), lit("%d-%m-%Y"))
+/// )?;
+/// // literals can be used as well with dataframe calls
+/// let timestamp = "2026-07-08T09:10:11"
+/// .parse::<NaiveDateTime>()
+/// .unwrap()
+/// .with_nanosecond(56789)
+/// .unwrap()
+/// .timestamp_nanos_opt()
+/// .unwrap();
+/// let df = df.with_column(
+/// "timestamp_str",
+/// to_char(lit(TimestampNanosecond(Some(timestamp), None)), lit("%d-%m-%Y
%H:%M:%S"))
+/// )?;
+///
+/// df.show().await?;
+///
+/// # Ok(())
+/// # }
+/// ```
+pub fn to_char(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 2 {
+ return exec_err!("to_char function requires 2 arguments, got {}",
args.len());
+ }
+
+ let is_scalar = args
Review Comment:
Doesn't this code look for any argument that is a scalar, not just the
format (second arg?)
I think the intent would be clearer (and possibly the code would also be
faster) if you handled the scalar cast explicitly. Something like this
(untested) perhaps:
```rust
match &args[1] {
// null format
Some(ColumnarValue::Scalar(ScalarValue::Utf8(None))) => {
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))
}
// constant format
Some(ColumnarValue::Scalar(ScalarValue::Utf8(Some(format)))) => {
// invoke to_char_scalar with the known string, without converting to
array
_to_char_scalar(&args[0], format)
},
// array
...
}
```
##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -502,6 +503,176 @@ pub fn make_current_time(
move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
}
+/// Returns a string representation of a date, time, timestamp or duration
based
+/// on a Chrono pattern.
+///
+/// The syntax for the patterns can be found at
+/// <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>
+///
+/// # Examples
+///
+/// ```
+/// # use chrono::prelude::*;
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # use datafusion_common::ScalarValue::TimestampNanosecond;
+/// # use std::sync::Arc;
+/// # use arrow_array::{Date32Array, RecordBatch, StringArray};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+/// let schema = Arc::new(Schema::new(vec![
+/// Field::new("values", DataType::Date32, false),
+/// Field::new("patterns", DataType::Utf8, false),
+/// ]));
+///
+/// let batch = RecordBatch::try_new(
+/// schema,
+/// vec![
+/// Arc::new(Date32Array::from(vec![
+/// 18506,
+/// 18507,
+/// 18508,
+/// 18509,
+/// ])),
+/// Arc::new(StringArray::from(vec![
+/// "%Y-%m-%d",
+/// "%Y:%m:%d",
+/// "%Y%m%d",
+/// "%d-%m-%Y",
+/// ])),
+/// ],
+/// )?;
+///
+/// let ctx = SessionContext::new();
+/// ctx.register_batch("t", batch)?;
+/// let df = ctx.table("t").await?;
+///
+/// // use the to_char function to convert col 'values',
+/// // to strings using patterns in col 'patterns'
+/// let df = df.with_column(
+/// "date_str",
+/// to_char(col("values"), col("patterns"))
+/// )?;
+/// // Note that providing a scalar value for the pattern
+/// // is more performant
+/// let df = df.with_column(
+/// "date_str2",
+/// to_char(col("values"), lit("%d-%m-%Y"))
+/// )?;
+/// // literals can be used as well with dataframe calls
Review Comment:
This is cool 👍
##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -2820,4 +2993,247 @@ mod tests {
"Arrow error: Cast error: Can't cast value 4294967295 to type
Int32"
);
}
+
+ #[test]
+ fn test_to_char() {
+ let date = "2020-01-02T03:04:05"
+ .parse::<NaiveDateTime>()
+ .unwrap()
+ .with_nanosecond(12345)
+ .unwrap();
+ let date2 = "2026-07-08T09:10:11"
+ .parse::<NaiveDateTime>()
+ .unwrap()
+ .with_nanosecond(56789)
+ .unwrap();
+
+ let scalar_data = vec![
+ (
+ ScalarValue::Date32(Some(18506)),
+ ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
+ "2020::09::01".to_string(),
+ ),
+ (
+ ScalarValue::Date64(Some(date.timestamp_millis())),
+ ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
+ "2020::01::02".to_string(),
+ ),
+ (
+ ScalarValue::Time32Second(Some(31851)),
+ ScalarValue::Utf8(Some("%H-%M-%S".to_string())),
+ "08-50-51".to_string(),
+ ),
+ (
+ ScalarValue::Time32Millisecond(Some(18506000)),
+ ScalarValue::Utf8(Some("%H-%M-%S".to_string())),
+ "05-08-26".to_string(),
+ ),
+ (
+ ScalarValue::Time64Microsecond(Some(12344567000)),
+ ScalarValue::Utf8(Some("%H-%M-%S %f".to_string())),
+ "03-25-44 567000000".to_string(),
+ ),
+ (
+ ScalarValue::Time64Nanosecond(Some(12344567890000)),
+ ScalarValue::Utf8(Some("%H-%M-%S %f".to_string())),
+ "03-25-44 567890000".to_string(),
+ ),
+ (
+ ScalarValue::TimestampSecond(Some(date.timestamp()), None),
+ ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H".to_string())),
+ "2020::01::02 05::04::03".to_string(),
+ ),
+ (
+
ScalarValue::TimestampMillisecond(Some(date.timestamp_millis()), None),
+ ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H".to_string())),
+ "2020::01::02 05::04::03".to_string(),
+ ),
+ (
+
ScalarValue::TimestampMicrosecond(Some(date.timestamp_micros()), None),
+ ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H
%f".to_string())),
+ "2020::01::02 05::04::03 000012000".to_string(),
+ ),
+ (
+ ScalarValue::TimestampNanosecond(
+ Some(date.timestamp_nanos_opt().unwrap()),
+ None,
+ ),
+ ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H
%f".to_string())),
+ "2020::01::02 05::04::03 000012345".to_string(),
+ ),
+ ];
+
+ for (value, format, expected) in scalar_data {
+ let result =
+ to_char(&[ColumnarValue::Scalar(value),
ColumnarValue::Scalar(format)])
+ .expect("that to_char parsed values without error");
+
+ if let ColumnarValue::Scalar(ScalarValue::Utf8(date)) = result {
+ assert_eq!(expected, date.unwrap());
+ } else {
+ panic!("Expected a scalar value")
+ }
+ }
+
+ let array_scalar_data = vec![
+ (
+ Arc::new(Date32Array::from(vec![18506, 18507])) as ArrayRef,
+ ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
+ StringArray::from(vec!["2020::09::01", "2020::09::02"]),
+ ),
+ (
+ Arc::new(Date64Array::from(vec![
+ date.timestamp_millis(),
+ date2.timestamp_millis(),
+ ])) as ArrayRef,
+ ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
+ StringArray::from(vec!["2020::01::02", "2026::07::08"]),
+ ),
+ ];
+
+ let array_array_data = vec![
+ (
+ Arc::new(Date32Array::from(vec![18506, 18507])) as ArrayRef,
+ StringArray::from(vec!["%Y::%m::%d", "%d::%m::%Y"]),
+ StringArray::from(vec!["2020::09::01", "02::09::2020"]),
+ ),
+ (
+ Arc::new(Date64Array::from(vec![
+ date.timestamp_millis(),
+ date2.timestamp_millis(),
+ ])) as ArrayRef,
+ StringArray::from(vec!["%Y::%m::%d", "%d::%m::%Y"]),
+ StringArray::from(vec!["2020::01::02", "08::07::2026"]),
+ ),
+ (
+ Arc::new(Time32MillisecondArray::from(vec![1850600, 1860700]))
+ as ArrayRef,
+ StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
+ StringArray::from(vec!["00:30:50", "00::31::00"]),
+ ),
+ (
+ Arc::new(Time32SecondArray::from(vec![18506, 18507])) as
ArrayRef,
+ StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
+ StringArray::from(vec!["05:08:26", "05::08::27"]),
+ ),
+ (
+ Arc::new(Time64MicrosecondArray::from(vec![12344567000,
22244567000]))
+ as ArrayRef,
+ StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
+ StringArray::from(vec!["03:25:44", "06::10::44"]),
+ ),
+ (
+ Arc::new(Time64NanosecondArray::from(vec![
+ 1234456789000,
+ 2224456789000,
+ ])) as ArrayRef,
+ StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
+ StringArray::from(vec!["00:20:34", "00::37::04"]),
+ ),
+ (
+ Arc::new(TimestampSecondArray::from(vec![
+ date.timestamp(),
+ date2.timestamp(),
+ ])) as ArrayRef,
+ StringArray::from(vec!["%Y::%m::%d %S::%M::%H", "%d::%m::%Y
%S-%M-%H"]),
+ StringArray::from(vec![
+ "2020::01::02 05::04::03",
+ "08::07::2026 11-10-09",
+ ]),
+ ),
+ (
+ Arc::new(TimestampMillisecondArray::from(vec![
+ date.timestamp_millis(),
+ date2.timestamp_millis(),
+ ])) as ArrayRef,
+ StringArray::from(vec![
+ "%Y::%m::%d %S::%M::%H %f",
+ "%d::%m::%Y %S-%M-%H %f",
+ ]),
+ StringArray::from(vec![
+ "2020::01::02 05::04::03 000000000",
+ "08::07::2026 11-10-09 000000000",
+ ]),
+ ),
+ (
+ Arc::new(TimestampMicrosecondArray::from(vec![
+ date.timestamp_micros(),
+ date2.timestamp_micros(),
+ ])) as ArrayRef,
+ StringArray::from(vec![
+ "%Y::%m::%d %S::%M::%H %f",
+ "%d::%m::%Y %S-%M-%H %f",
+ ]),
+ StringArray::from(vec![
+ "2020::01::02 05::04::03 000012000",
+ "08::07::2026 11-10-09 000056000",
+ ]),
+ ),
+ (
+ Arc::new(TimestampNanosecondArray::from(vec![
+ date.timestamp_nanos_opt().unwrap(),
+ date2.timestamp_nanos_opt().unwrap(),
+ ])) as ArrayRef,
+ StringArray::from(vec![
+ "%Y::%m::%d %S::%M::%H %f",
+ "%d::%m::%Y %S-%M-%H %f",
+ ]),
+ StringArray::from(vec![
+ "2020::01::02 05::04::03 000012345",
+ "08::07::2026 11-10-09 000056789",
+ ]),
+ ),
+ ];
+
+ for (value, format, expected) in array_scalar_data {
+ let result = to_char(&[
Review Comment:
These are good test cases 👍
I think we should add tests with:
1. The dates as a scalar, and the format as an array
2. Two scalar arguments
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]