shivaaang commented on code in PR #20624:
URL: https://github.com/apache/datafusion/pull/20624#discussion_r2873384675
##########
datafusion/functions/src/unicode/translate.rs:
##########
@@ -93,7 +94,11 @@ impl ScalarUDFImpl for TranslateFunc {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- utf8_to_str_type(&arg_types[0], "translate")
+ if arg_types[0] == DataType::Utf8View {
+ Ok(DataType::Utf8View)
+ } else {
+ utf8_to_str_type(&arg_types[0], "translate")
+ }
Review Comment:
Good point, simplified.
##########
datafusion/functions/src/unicode/translate.rs:
##########
@@ -116,33 +121,42 @@ impl ScalarUDFImpl for TranslateFunc {
let ascii_table = build_ascii_translate_table(from_str, to_str);
let string_array =
args.args[0].to_array_of_size(args.number_rows)?;
+ let len = string_array.len();
let result = match string_array.data_type() {
DataType::Utf8View => {
let arr = string_array.as_string_view();
- translate_with_map::<i32, _>(
+ let builder = StringViewBuilder::with_capacity(len);
+ translate_with_map(
arr,
&from_map,
&to_graphemes,
ascii_table.as_ref(),
+ builder,
)
}
DataType::Utf8 => {
let arr = string_array.as_string::<i32>();
- translate_with_map::<i32, _>(
+ let builder =
+ GenericStringBuilder::<i32>::with_capacity(len, len *
4);
Review Comment:
Updated to use arr.value_data().len() at all call sites.
##########
datafusion/functions/src/unicode/translate.rs:
##########
@@ -172,41 +186,83 @@ fn try_as_scalar_str(cv: &ColumnarValue) -> Option<&str> {
}
fn invoke_translate(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let len = args[0].len();
match args[0].data_type() {
DataType::Utf8View => {
let string_array = args[0].as_string_view();
let from_array = args[1].as_string::<i32>();
let to_array = args[2].as_string::<i32>();
- translate::<i32, _, _>(string_array, from_array, to_array)
+ let builder = StringViewBuilder::with_capacity(len);
+ translate(string_array, from_array, to_array, builder)
}
DataType::Utf8 => {
let string_array = args[0].as_string::<i32>();
let from_array = args[1].as_string::<i32>();
let to_array = args[2].as_string::<i32>();
- translate::<i32, _, _>(string_array, from_array, to_array)
+ let builder = GenericStringBuilder::<i32>::with_capacity(len, len
* 4);
+ translate(string_array, from_array, to_array, builder)
}
DataType::LargeUtf8 => {
let string_array = args[0].as_string::<i64>();
let from_array = args[1].as_string::<i32>();
let to_array = args[2].as_string::<i32>();
- translate::<i64, _, _>(string_array, from_array, to_array)
+ let builder = GenericStringBuilder::<i64>::with_capacity(len, len
* 4);
+ translate(string_array, from_array, to_array, builder)
}
other => {
exec_err!("Unsupported data type {other:?} for function translate")
}
}
}
+/// Helper trait to abstract over different string builder types so `translate`
+/// and `translate_with_map` can produce the correct output array type.
+trait TranslateOutput {
Review Comment:
Agreed, replaced with Arrow's StringLikeArrayBuilder.
##########
datafusion/functions/src/unicode/translate.rs:
##########
@@ -172,41 +186,83 @@ fn try_as_scalar_str(cv: &ColumnarValue) -> Option<&str> {
}
fn invoke_translate(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let len = args[0].len();
match args[0].data_type() {
DataType::Utf8View => {
let string_array = args[0].as_string_view();
let from_array = args[1].as_string::<i32>();
let to_array = args[2].as_string::<i32>();
- translate::<i32, _, _>(string_array, from_array, to_array)
+ let builder = StringViewBuilder::with_capacity(len);
+ translate(string_array, from_array, to_array, builder)
}
DataType::Utf8 => {
let string_array = args[0].as_string::<i32>();
let from_array = args[1].as_string::<i32>();
let to_array = args[2].as_string::<i32>();
- translate::<i32, _, _>(string_array, from_array, to_array)
+ let builder = GenericStringBuilder::<i32>::with_capacity(len, len
* 4);
+ translate(string_array, from_array, to_array, builder)
}
DataType::LargeUtf8 => {
let string_array = args[0].as_string::<i64>();
let from_array = args[1].as_string::<i32>();
let to_array = args[2].as_string::<i32>();
- translate::<i64, _, _>(string_array, from_array, to_array)
+ let builder = GenericStringBuilder::<i64>::with_capacity(len, len
* 4);
+ translate(string_array, from_array, to_array, builder)
}
other => {
exec_err!("Unsupported data type {other:?} for function translate")
}
}
}
+/// Helper trait to abstract over different string builder types so `translate`
+/// and `translate_with_map` can produce the correct output array type.
+trait TranslateOutput {
Review Comment:
Thanks for the pointer, switched to StringLikeArrayBuilder. Works well here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]