This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 227d1f85ed improve null handling for to_char (#9689)
227d1f85ed is described below
commit 227d1f85ed77e55534e8c9b9052f3781676a7d2d
Author: Kunal Kundu <[email protected]>
AuthorDate: Sun Mar 24 23:41:21 2024 +0530
improve null handling for to_char (#9689)
* improve null handling for to_char
* early return from to_char for null format
* remove invalid comment, update example
* rename column for consistency across platforms for tests
* return None instead of empty string from to_char
* use arrow:new_null_array for fast init
---
datafusion-examples/examples/to_char.rs | 19 ++++++++++++++
datafusion/functions/src/datetime/to_char.rs | 32 ++++++++++++++++++-----
datafusion/sqllogictest/test_files/timestamps.slt | 14 +++++++---
3 files changed, 55 insertions(+), 10 deletions(-)
diff --git a/datafusion-examples/examples/to_char.rs
b/datafusion-examples/examples/to_char.rs
index ef616d72cc..f8ed68b46f 100644
--- a/datafusion-examples/examples/to_char.rs
+++ b/datafusion-examples/examples/to_char.rs
@@ -193,5 +193,24 @@ async fn main() -> Result<()> {
&result
);
+ // output format is null
+
+ let result = ctx
+ .sql("SELECT to_char(arrow_cast(123456, 'Duration(Second)'), null) as
result")
+ .await?
+ .collect()
+ .await?;
+
+ assert_batches_eq!(
+ &[
+ "+--------+",
+ "| result |",
+ "+--------+",
+ "| |",
+ "+--------+",
+ ],
+ &result
+ );
+
Ok(())
}
diff --git a/datafusion/functions/src/datetime/to_char.rs
b/datafusion/functions/src/datetime/to_char.rs
index 3ca098b1f9..ef5c45a5ad 100644
--- a/datafusion/functions/src/datetime/to_char.rs
+++ b/datafusion/functions/src/datetime/to_char.rs
@@ -19,7 +19,7 @@ use std::any::Any;
use std::sync::Arc;
use arrow::array::cast::AsArray;
-use arrow::array::{Array, ArrayRef, StringArray};
+use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{
Date32, Date64, Duration, Time32, Time64, Timestamp, Utf8,
@@ -109,7 +109,6 @@ impl ScalarUDFImpl for ToCharFunc {
}
match &args[1] {
- // null format, use default formats
ColumnarValue::Scalar(ScalarValue::Utf8(None))
| ColumnarValue::Scalar(ScalarValue::Null) => {
_to_char_scalar(args[0].clone(), None)
@@ -175,6 +174,18 @@ fn _to_char_scalar(
let data_type = &expression.data_type();
let is_scalar_expression = matches!(&expression, ColumnarValue::Scalar(_));
let array = expression.into_array(1)?;
+
+ if format.is_none() {
+ if is_scalar_expression {
+ return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
+ } else {
+ return Ok(ColumnarValue::Array(new_null_array(
+ &DataType::Utf8,
+ array.len(),
+ )));
+ }
+ }
+
let format_options = match _build_format_options(data_type, format) {
Ok(value) => value,
Err(value) => return value,
@@ -202,7 +213,7 @@ fn _to_char_scalar(
fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let arrays = ColumnarValue::values_to_arrays(args)?;
- let mut results: Vec<String> = vec![];
+ let mut results: Vec<Option<String>> = vec![];
let format_array = arrays[1].as_string::<i32>();
let data_type = arrays[0].data_type();
@@ -212,6 +223,10 @@ fn _to_char_array(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
} else {
Some(format_array.value(idx))
};
+ if format.is_none() {
+ results.push(None);
+ continue;
+ }
let format_options = match _build_format_options(data_type, format) {
Ok(value) => value,
Err(value) => return value,
@@ -221,7 +236,7 @@ fn _to_char_array(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
let formatter = ArrayFormatter::try_new(arrays[0].as_ref(),
&format_options)?;
let result = formatter.value(idx).try_to_string();
match result {
- Ok(value) => results.push(value),
+ Ok(value) => results.push(Some(value)),
Err(e) => return exec_err!("{}", e),
}
}
@@ -230,9 +245,12 @@ fn _to_char_array(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
ColumnarValue::Array(_) =>
Ok(ColumnarValue::Array(Arc::new(StringArray::from(
results,
)) as ArrayRef)),
- ColumnarValue::Scalar(_) =>
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
- results.first().unwrap().to_string(),
- )))),
+ ColumnarValue::Scalar(_) => match results.first().unwrap() {
+ Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+ value.to_string(),
+ )))),
+ None => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
+ },
}
}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index f718bbf14c..f0e04b522a 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2661,7 +2661,7 @@ PT123456S
query T
select to_char(arrow_cast(123456, 'Duration(Second)'), null);
----
-PT123456S
+NULL
query error DataFusion error: Execution error: Cast error: Format error
SELECT to_char(timestamps, '%X%K') from formats;
@@ -2672,14 +2672,22 @@ SELECT to_char('2000-02-03'::date, '%X%K');
query T
SELECT to_char(timestamps, null) from formats;
----
-2024-01-01T06:00:00Z
-2025-01-01T23:59:58Z
+NULL
+NULL
query T
SELECT to_char(null, '%d-%m-%Y');
----
(empty)
+query T
+SELECT to_char(column1, column2)
+FROM
+(VALUES ('2024-01-01 06:00:00'::timestamp, null), ('2025-01-01
23:59:58'::timestamp, '%d:%m:%Y %H-%M-%S'));
+----
+NULL
+01:01:2025 23-59-58
+
statement ok
drop table formats;