This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 227d1f85ed improve null handling for to_char (#9689)
227d1f85ed is described below

commit 227d1f85ed77e55534e8c9b9052f3781676a7d2d
Author: Kunal Kundu <[email protected]>
AuthorDate: Sun Mar 24 23:41:21 2024 +0530

    improve null handling for to_char (#9689)
    
    * improve null handling for to_char
    
    * early return from to_char for null format
    
    * remove invalid comment, update example
    
    * rename column for consistency across platforms for tests
    
    * return None instead of empty string from to_char
    
    * use arrow:new_null_array for fast init
---
 datafusion-examples/examples/to_char.rs           | 19 ++++++++++++++
 datafusion/functions/src/datetime/to_char.rs      | 32 ++++++++++++++++++-----
 datafusion/sqllogictest/test_files/timestamps.slt | 14 +++++++---
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/datafusion-examples/examples/to_char.rs 
b/datafusion-examples/examples/to_char.rs
index ef616d72cc..f8ed68b46f 100644
--- a/datafusion-examples/examples/to_char.rs
+++ b/datafusion-examples/examples/to_char.rs
@@ -193,5 +193,24 @@ async fn main() -> Result<()> {
         &result
     );
 
+    // output format is null
+
+    let result = ctx
+        .sql("SELECT to_char(arrow_cast(123456, 'Duration(Second)'), null) as 
result")
+        .await?
+        .collect()
+        .await?;
+
+    assert_batches_eq!(
+        &[
+            "+--------+",
+            "| result |",
+            "+--------+",
+            "|        |",
+            "+--------+",
+        ],
+        &result
+    );
+
     Ok(())
 }
diff --git a/datafusion/functions/src/datetime/to_char.rs 
b/datafusion/functions/src/datetime/to_char.rs
index 3ca098b1f9..ef5c45a5ad 100644
--- a/datafusion/functions/src/datetime/to_char.rs
+++ b/datafusion/functions/src/datetime/to_char.rs
@@ -19,7 +19,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::cast::AsArray;
-use arrow::array::{Array, ArrayRef, StringArray};
+use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{
     Date32, Date64, Duration, Time32, Time64, Timestamp, Utf8,
@@ -109,7 +109,6 @@ impl ScalarUDFImpl for ToCharFunc {
         }
 
         match &args[1] {
-            // null format, use default formats
             ColumnarValue::Scalar(ScalarValue::Utf8(None))
             | ColumnarValue::Scalar(ScalarValue::Null) => {
                 _to_char_scalar(args[0].clone(), None)
@@ -175,6 +174,18 @@ fn _to_char_scalar(
     let data_type = &expression.data_type();
     let is_scalar_expression = matches!(&expression, ColumnarValue::Scalar(_));
     let array = expression.into_array(1)?;
+
+    if format.is_none() {
+        if is_scalar_expression {
+            return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
+        } else {
+            return Ok(ColumnarValue::Array(new_null_array(
+                &DataType::Utf8,
+                array.len(),
+            )));
+        }
+    }
+
     let format_options = match _build_format_options(data_type, format) {
         Ok(value) => value,
         Err(value) => return value,
@@ -202,7 +213,7 @@ fn _to_char_scalar(
 
 fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     let arrays = ColumnarValue::values_to_arrays(args)?;
-    let mut results: Vec<String> = vec![];
+    let mut results: Vec<Option<String>> = vec![];
     let format_array = arrays[1].as_string::<i32>();
     let data_type = arrays[0].data_type();
 
@@ -212,6 +223,10 @@ fn _to_char_array(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
         } else {
             Some(format_array.value(idx))
         };
+        if format.is_none() {
+            results.push(None);
+            continue;
+        }
         let format_options = match _build_format_options(data_type, format) {
             Ok(value) => value,
             Err(value) => return value,
@@ -221,7 +236,7 @@ fn _to_char_array(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
         let formatter = ArrayFormatter::try_new(arrays[0].as_ref(), 
&format_options)?;
         let result = formatter.value(idx).try_to_string();
         match result {
-            Ok(value) => results.push(value),
+            Ok(value) => results.push(Some(value)),
             Err(e) => return exec_err!("{}", e),
         }
     }
@@ -230,9 +245,12 @@ fn _to_char_array(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
         ColumnarValue::Array(_) => 
Ok(ColumnarValue::Array(Arc::new(StringArray::from(
             results,
         )) as ArrayRef)),
-        ColumnarValue::Scalar(_) => 
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
-            results.first().unwrap().to_string(),
-        )))),
+        ColumnarValue::Scalar(_) => match results.first().unwrap() {
+            Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                value.to_string(),
+            )))),
+            None => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
+        },
     }
 }
 
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt 
b/datafusion/sqllogictest/test_files/timestamps.slt
index f718bbf14c..f0e04b522a 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2661,7 +2661,7 @@ PT123456S
 query T
 select to_char(arrow_cast(123456, 'Duration(Second)'), null);
 ----
-PT123456S
+NULL
 
 query error DataFusion error: Execution error: Cast error: Format error
 SELECT to_char(timestamps, '%X%K') from formats;
@@ -2672,14 +2672,22 @@ SELECT to_char('2000-02-03'::date, '%X%K');
 query T
 SELECT to_char(timestamps, null) from formats;
 ----
-2024-01-01T06:00:00Z
-2025-01-01T23:59:58Z
+NULL
+NULL
 
 query T
 SELECT to_char(null, '%d-%m-%Y');
 ----
 (empty)
 
+query T
+SELECT to_char(column1, column2)
+FROM
+(VALUES ('2024-01-01 06:00:00'::timestamp, null), ('2025-01-01 
23:59:58'::timestamp, '%d:%m:%Y %H-%M-%S'));
+----
+NULL
+01:01:2025 23-59-58
+
 statement ok
 drop table formats;
 

Reply via email to