This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 1d01b7d7b fix: support scalar processing for `space` function (#3408)
1d01b7d7b is described below

commit 1d01b7d7bc2acf6d4d549e5fd83006f53596f459
Author: Kazantsev Maksim <[email protected]>
AuthorDate: Sat Feb 7 06:44:53 2026 +0400

    fix: support scalar processing for `space` function (#3408)
---
 native/spark-expr/src/string_funcs/string_space.rs | 32 ++++++++++++++++++----
 .../sql-tests/expressions/string/string_space.sql  |  6 ++--
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/native/spark-expr/src/string_funcs/string_space.rs 
b/native/spark-expr/src/string_funcs/string_space.rs
index 4ab536279..78d94208d 100644
--- a/native/spark-expr/src/string_funcs/string_space.rs
+++ b/native/spark-expr/src/string_funcs/string_space.rs
@@ -21,7 +21,7 @@ use arrow::array::{
 };
 use arrow::buffer::MutableBuffer;
 use arrow::datatypes::{DataType, Int32Type};
-use datafusion::common::{exec_err, internal_datafusion_err, DataFusionError, 
Result};
+use datafusion::common::{exec_err, internal_datafusion_err, DataFusionError, 
Result, ScalarValue};
 use datafusion::logical_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
 };
@@ -86,15 +86,17 @@ impl ScalarUDFImpl for SparkStringSpace {
 pub fn spark_string_space(args: &[ColumnarValue; 1]) -> Result<ColumnarValue> {
     match args {
         [ColumnarValue::Array(array)] => {
-            let result = string_space(&array)?;
-
+            let result = string_space_array(&array)?;
             Ok(ColumnarValue::Array(result))
         }
-        _ => exec_err!("StringSpace(scalar) should be fold in Spark JVM 
side."),
+        [ColumnarValue::Scalar(scalar)] => {
+            let result = string_space_scalar(scalar)?;
+            Ok(ColumnarValue::Scalar(result))
+        }
     }
 }
 
-fn string_space(length: &dyn Array) -> std::result::Result<ArrayRef, 
DataFusionError> {
+fn string_space_array(length: &dyn Array) -> std::result::Result<ArrayRef, 
DataFusionError> {
     match length.data_type() {
         DataType::Int32 => {
             let array = length.as_any().downcast_ref::<Int32Array>().unwrap();
@@ -102,7 +104,7 @@ fn string_space(length: &dyn Array) -> 
std::result::Result<ArrayRef, DataFusionE
         }
         DataType::Dictionary(_, _) => {
             let dict = as_dictionary_array::<Int32Type>(length);
-            let values = string_space(dict.values())?;
+            let values = string_space_array(dict.values())?;
             let result = DictionaryArray::try_new(dict.keys().clone(), 
values)?;
             Ok(Arc::new(result))
         }
@@ -110,6 +112,24 @@ fn string_space(length: &dyn Array) -> 
std::result::Result<ArrayRef, DataFusionE
     }
 }
 
+fn string_space_scalar(scalar: &ScalarValue) -> Result<ScalarValue> {
+    match scalar {
+        ScalarValue::Int32(value) => {
+            let result = value.map(|v| {
+                if v <= 0 {
+                    String::new()
+                } else {
+                    " ".repeat(v as usize)
+                }
+            });
+            Ok(ScalarValue::Utf8(result))
+        }
+        other => {
+            exec_err!("Unsupported data type {other:?} for function `space`")
+        }
+    }
+}
+
 fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) -> 
ArrayRef {
     let array_len = length.len();
     let mut offsets = MutableBuffer::new((array_len + 1) * 
std::mem::size_of::<OffsetSize>());
diff --git 
a/spark/src/test/resources/sql-tests/expressions/string/string_space.sql 
b/spark/src/test/resources/sql-tests/expressions/string/string_space.sql
index ec24bfb97..4b826fbe3 100644
--- a/spark/src/test/resources/sql-tests/expressions/string/string_space.sql
+++ b/spark/src/test/resources/sql-tests/expressions/string/string_space.sql
@@ -26,11 +26,9 @@ INSERT INTO test_space VALUES (0), (1), (5), (NULL), (-1)
 query
 SELECT concat('[', space(n), ']') FROM test_space WHERE n >= 0 OR n IS NULL
 
--- Comet bug: space(-1) causes native crash "failed to round upto multiple of 
64"
--- https://github.com/apache/datafusion-comet/issues/3326
-query ignore(https://github.com/apache/datafusion-comet/issues/3326)
+query
 SELECT concat('[', space(n), ']') FROM test_space WHERE n < 0
 
 -- literal arguments
-query ignore(https://github.com/apache/datafusion-comet/issues/3337)
+query
 SELECT concat('[', space(5), ']'), concat('[', space(0), ']'), space(-1), 
space(NULL)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to