This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 1d01b7d7b fix: support scalar processing for `space` function (#3408)
1d01b7d7b is described below
commit 1d01b7d7bc2acf6d4d549e5fd83006f53596f459
Author: Kazantsev Maksim <[email protected]>
AuthorDate: Sat Feb 7 06:44:53 2026 +0400
fix: support scalar processing for `space` function (#3408)
---
native/spark-expr/src/string_funcs/string_space.rs | 32 ++++++++++++++++++----
.../sql-tests/expressions/string/string_space.sql | 6 ++--
2 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/native/spark-expr/src/string_funcs/string_space.rs
b/native/spark-expr/src/string_funcs/string_space.rs
index 4ab536279..78d94208d 100644
--- a/native/spark-expr/src/string_funcs/string_space.rs
+++ b/native/spark-expr/src/string_funcs/string_space.rs
@@ -21,7 +21,7 @@ use arrow::array::{
};
use arrow::buffer::MutableBuffer;
use arrow::datatypes::{DataType, Int32Type};
-use datafusion::common::{exec_err, internal_datafusion_err, DataFusionError,
Result};
+use datafusion::common::{exec_err, internal_datafusion_err, DataFusionError,
Result, ScalarValue};
use datafusion::logical_expr::{
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
};
@@ -86,15 +86,17 @@ impl ScalarUDFImpl for SparkStringSpace {
pub fn spark_string_space(args: &[ColumnarValue; 1]) -> Result<ColumnarValue> {
match args {
[ColumnarValue::Array(array)] => {
- let result = string_space(&array)?;
-
+ let result = string_space_array(&array)?;
Ok(ColumnarValue::Array(result))
}
- _ => exec_err!("StringSpace(scalar) should be fold in Spark JVM
side."),
+ [ColumnarValue::Scalar(scalar)] => {
+ let result = string_space_scalar(scalar)?;
+ Ok(ColumnarValue::Scalar(result))
+ }
}
}
-fn string_space(length: &dyn Array) -> std::result::Result<ArrayRef,
DataFusionError> {
+fn string_space_array(length: &dyn Array) -> std::result::Result<ArrayRef,
DataFusionError> {
match length.data_type() {
DataType::Int32 => {
let array = length.as_any().downcast_ref::<Int32Array>().unwrap();
@@ -102,7 +104,7 @@ fn string_space(length: &dyn Array) ->
std::result::Result<ArrayRef, DataFusionE
}
DataType::Dictionary(_, _) => {
let dict = as_dictionary_array::<Int32Type>(length);
- let values = string_space(dict.values())?;
+ let values = string_space_array(dict.values())?;
let result = DictionaryArray::try_new(dict.keys().clone(),
values)?;
Ok(Arc::new(result))
}
@@ -110,6 +112,24 @@ fn string_space(length: &dyn Array) ->
std::result::Result<ArrayRef, DataFusionE
}
}
+fn string_space_scalar(scalar: &ScalarValue) -> Result<ScalarValue> {
+ match scalar {
+ ScalarValue::Int32(value) => {
+ let result = value.map(|v| {
+ if v <= 0 {
+ String::new()
+ } else {
+ " ".repeat(v as usize)
+ }
+ });
+ Ok(ScalarValue::Utf8(result))
+ }
+ other => {
+ exec_err!("Unsupported data type {other:?} for function `space`")
+ }
+ }
+}
+
fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) ->
ArrayRef {
let array_len = length.len();
let mut offsets = MutableBuffer::new((array_len + 1) *
std::mem::size_of::<OffsetSize>());
diff --git
a/spark/src/test/resources/sql-tests/expressions/string/string_space.sql
b/spark/src/test/resources/sql-tests/expressions/string/string_space.sql
index ec24bfb97..4b826fbe3 100644
--- a/spark/src/test/resources/sql-tests/expressions/string/string_space.sql
+++ b/spark/src/test/resources/sql-tests/expressions/string/string_space.sql
@@ -26,11 +26,9 @@ INSERT INTO test_space VALUES (0), (1), (5), (NULL), (-1)
query
SELECT concat('[', space(n), ']') FROM test_space WHERE n >= 0 OR n IS NULL
--- Comet bug: space(-1) causes native crash "failed to round upto multiple of
64"
--- https://github.com/apache/datafusion-comet/issues/3326
-query ignore(https://github.com/apache/datafusion-comet/issues/3326)
+query
SELECT concat('[', space(n), ']') FROM test_space WHERE n < 0
-- literal arguments
-query ignore(https://github.com/apache/datafusion-comet/issues/3337)
+query
SELECT concat('[', space(5), ']'), concat('[', space(0), ']'), space(-1),
space(NULL)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]