Jefffrey commented on code in PR #20305:
URL: https://github.com/apache/datafusion/pull/20305#discussion_r2822774611
##########
datafusion/functions/src/unicode/translate.rs:
##########
@@ -99,6 +100,65 @@ impl ScalarUDFImpl for TranslateFunc {
&self,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
+ // When from and to are scalars, pre-build the translation map once
+ if let (Some(from_str), Some(to_str)) = (
+ try_as_scalar_str(&args.args[1]),
+ try_as_scalar_str(&args.args[2]),
+ ) {
+ let from_graphemes: Vec<&str> = from_str.graphemes(true).collect();
Review Comment:
Probably don't need to collect `from_graphemes`, can just iter directly on
graphemes?
##########
datafusion/functions/src/unicode/translate.rs:
##########
@@ -46,10 +46,10 @@ use datafusion_macros::user_doc;
+--------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
- argument(name = "chars", description = "Characters to translate."),
+ argument(name = "from", description = "The characters to be replaced."),
argument(
- name = "translation",
- description = "Translation characters. Translation characters replace
only characters at the same position in the **chars** string."
+ name = "to",
+ description = "The characters to replace them with. Each character in
**from** that is found in **str** is replaced by the character at the same
index in **to**. Any characters in **from** that don't have a corresponding
character in **to** are removed."
Review Comment:
Nice doc update; might be worth adding note about behaviour for duplicate
chars in `from`?
##########
datafusion/functions/src/unicode/translate.rs:
##########
@@ -99,6 +100,65 @@ impl ScalarUDFImpl for TranslateFunc {
&self,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
+ // When from and to are scalars, pre-build the translation map once
+ if let (Some(from_str), Some(to_str)) = (
+ try_as_scalar_str(&args.args[1]),
+ try_as_scalar_str(&args.args[2]),
+ ) {
+ let from_graphemes: Vec<&str> = from_str.graphemes(true).collect();
+ let to_graphemes: Vec<&str> = to_str.graphemes(true).collect();
+
+ let mut from_map: HashMap<&str, usize> = HashMap::new();
+ for (index, c) in from_graphemes.iter().enumerate() {
+ // Ignore characters that already exist in from_map
+ from_map.entry(*c).or_insert(index);
+ }
+
+ let ascii_table = build_ascii_translate_table(from_str, to_str);
+
+ let string_array = match &args.args[0] {
+ ColumnarValue::Array(arr) => Arc::clone(arr),
+ ColumnarValue::Scalar(s) =>
s.to_array_of_size(args.number_rows)?,
+ };
Review Comment:
```suggestion
let string_array =
args.args[0].to_array_of_size(args.number_rows)?;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]