Jefffrey commented on code in PR #20635:
URL: https://github.com/apache/datafusion/pull/20635#discussion_r2878298833
##########
datafusion/functions/src/datetime/to_char.rs:
##########
@@ -221,117 +216,95 @@ fn to_char_scalar(
};
}
- let format_options = match build_format_options(data_type, format) {
- Ok(value) => value,
- Err(value) => return value,
- };
-
+ let format_options = build_format_options(data_type, format)?;
let formatter = ArrayFormatter::try_new(array.as_ref(), &format_options)?;
- let formatted: Result<Vec<Option<String>>, ArrowError> = (0..array.len())
- .map(|i| {
- if array.is_null(i) {
- Ok(None)
- } else {
- formatter.value(i).try_to_string().map(Some)
- }
- })
- .collect();
-
- if let Ok(formatted) = formatted {
- if is_scalar_expression {
- Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
- formatted.first().unwrap().clone(),
- )))
+
+ let fmt_len = format.map_or(20, |f| f.len() + 10);
+ let mut builder = StringBuilder::with_capacity(array.len(), array.len() *
fmt_len);
+
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ builder.append_null();
} else {
- Ok(ColumnarValue::Array(
- Arc::new(StringArray::from(formatted)) as ArrayRef
- ))
- }
- } else {
- // if the data type was a Date32, formatting could have failed because
the format string
- // contained datetime specifiers, so we'll retry by casting the date
array as a timestamp array
- if data_type == &Date32 {
- return to_char_scalar(&expression.cast_to(&Date64, None)?, format);
+ // Write directly into the builder's internal buffer, then
+ // commit the value with append_value("").
+ match formatter.value(i).write(&mut builder) {
+ Ok(()) => builder.append_value(""),
+ // Arrow's Date32 formatter only handles date specifiers
+ // (%Y, %m, %d, ...). Format strings with time specifiers
+ // (%H, %M, %S, ...) cause it to fail. When this happens,
+ // we retry by casting to Date64, whose datetime formatter
+ // handles both date and time specifiers (with zero for
+ // the time components).
+ Err(_) if data_type == &Date32 => {
+ return to_char_scalar(&expression.cast_to(&Date64, None)?,
format);
+ }
+ Err(e) => return Err(e.into()),
+ }
}
+ }
- exec_err!("{}", formatted.unwrap_err())
+ let result = builder.finish();
+ if is_scalar_expression {
+ let val = result.is_valid(0).then(|| result.value(0).to_string());
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(val)))
+ } else {
+ Ok(ColumnarValue::Array(Arc::new(result) as ArrayRef))
}
}
fn to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let arrays = ColumnarValue::values_to_arrays(args)?;
- let mut results: Vec<Option<String>> = vec![];
+ let data_array = &arrays[0];
let format_array = arrays[1].as_string::<i32>();
- let data_type = arrays[0].data_type();
+ let data_type = data_array.data_type();
- for idx in 0..arrays[0].len() {
- let format = if format_array.is_null(idx) {
- None
- } else {
- Some(format_array.value(idx))
- };
- if format.is_none() {
- results.push(None);
+ let fmt_len = 30;
Review Comment:
Would be good to provide some rationale for `30` here (even if its just
"arbitrary")
##########
datafusion/functions/src/datetime/to_char.rs:
##########
@@ -221,117 +216,95 @@ fn to_char_scalar(
};
}
- let format_options = match build_format_options(data_type, format) {
- Ok(value) => value,
- Err(value) => return value,
- };
-
+ let format_options = build_format_options(data_type, format)?;
let formatter = ArrayFormatter::try_new(array.as_ref(), &format_options)?;
- let formatted: Result<Vec<Option<String>>, ArrowError> = (0..array.len())
- .map(|i| {
- if array.is_null(i) {
- Ok(None)
- } else {
- formatter.value(i).try_to_string().map(Some)
- }
- })
- .collect();
-
- if let Ok(formatted) = formatted {
- if is_scalar_expression {
- Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
- formatted.first().unwrap().clone(),
- )))
+
+ let fmt_len = format.map_or(20, |f| f.len() + 10);
Review Comment:
We already know `format` is not `None` at this point (maybe we should
destructure it in the above check?)
Also is there a rationale for the `+ 10`?
##########
datafusion/functions/src/datetime/to_char.rs:
##########
@@ -221,117 +216,95 @@ fn to_char_scalar(
};
}
Review Comment:
If we're refactoring we could also simplify this to always return scalar
null here (no reason to return a null array since that's going to be less
efficient)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]