alamb commented on code in PR #20635:
URL: https://github.com/apache/datafusion/pull/20635#discussion_r2886511442
##########
datafusion/functions/src/datetime/to_char.rs:
##########
@@ -194,144 +188,114 @@ fn build_format_options<'a>(
},
),
other => {
- return Err(exec_err!(
+ return exec_err!(
"to_char only supports date, time, timestamp and duration data
types, received {other:?}"
- ));
+ );
}
};
Ok(format_options)
}
-/// Special version when arg\[1] is a scalar
-fn to_char_scalar(
- expression: &ColumnarValue,
- format: Option<&str>,
-) -> Result<ColumnarValue> {
- // it's possible that the expression is a scalar however because
- // of the implementation in arrow-rs we need to convert it to an array
+/// Formats `expression` using a constant `format` string.
+fn to_char_scalar(expression: &ColumnarValue, format: &str) ->
Result<ColumnarValue> {
+ // ArrayFormatter requires an array, so scalar expressions must be
+ // converted to a 1-element array first.
let data_type = &expression.data_type();
let is_scalar_expression = matches!(&expression, ColumnarValue::Scalar(_));
- let array = expression.clone().into_array(1)?;
+ let array = expression.to_array(1)?;
- if format.is_none() {
- return if is_scalar_expression {
- Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
- } else {
- Ok(ColumnarValue::Array(new_null_array(&Utf8, array.len())))
- };
- }
+ let format_options = build_format_options(data_type, format)?;
+ let formatter = ArrayFormatter::try_new(array.as_ref(), &format_options)?;
- let format_options = match build_format_options(data_type, format) {
- Ok(value) => value,
- Err(value) => return value,
- };
+ // Pad the preallocated capacity a bit because format specifiers often
+ // expand the string (e.g., %Y -> "2026")
+ let fmt_len = format.len() + 10;
+ let mut builder = StringBuilder::with_capacity(array.len(), array.len() *
fmt_len);
- let formatter = ArrayFormatter::try_new(array.as_ref(), &format_options)?;
- let formatted: Result<Vec<Option<String>>, ArrowError> = (0..array.len())
- .map(|i| {
- if array.is_null(i) {
- Ok(None)
- } else {
- formatter.value(i).try_to_string().map(Some)
- }
- })
- .collect();
-
- if let Ok(formatted) = formatted {
- if is_scalar_expression {
- Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
- formatted.first().unwrap().clone(),
- )))
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ builder.append_null();
} else {
- Ok(ColumnarValue::Array(
- Arc::new(StringArray::from(formatted)) as ArrayRef
- ))
- }
- } else {
- // if the data type was a Date32, formatting could have failed because
the format string
- // contained datetime specifiers, so we'll retry by casting the date
array as a timestamp array
- if data_type == &Date32 {
- return to_char_scalar(&expression.cast_to(&Date64, None)?, format);
+ // Write directly into the builder's internal buffer, then
+ // commit the value with append_value("").
+ match formatter.value(i).write(&mut builder) {
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]