alamb commented on code in PR #12093: URL: https://github.com/apache/datafusion/pull/12093#discussion_r1725294671
########## datafusion/functions/src/string/split_part.rs: ########## @@ -105,13 +108,70 @@ impl ScalarUDFImpl for SplitPartFunc { } } -macro_rules! process_split_part { - ($string_array: expr, $delimiter_array: expr, $n_array: expr) => {{ - let result = $string_array - .iter() - .zip($delimiter_array.iter()) - .zip($n_array.iter()) - .map(|((string, delimiter), n)| match (string, delimiter, n) { +/// Splits string at occurrences of delimiter and returns the n'th field (counting from one). +/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def' +pub fn split_part<StringArrayLen: OffsetSizeTrait, DelimiterArrayLen: OffsetSizeTrait>( Review Comment: 😍 love the function rather than macro ########## datafusion/functions/src/string/split_part.rs: ########## @@ -105,13 +108,70 @@ impl ScalarUDFImpl for SplitPartFunc { } } -macro_rules! process_split_part { - ($string_array: expr, $delimiter_array: expr, $n_array: expr) => {{ - let result = $string_array - .iter() - .zip($delimiter_array.iter()) - .zip($n_array.iter()) - .map(|((string, delimiter), n)| match (string, delimiter, n) { +/// Splits string at occurrences of delimiter and returns the n'th field (counting from one). +/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def' +pub fn split_part<StringArrayLen: OffsetSizeTrait, DelimiterArrayLen: OffsetSizeTrait>( + args: &[ArrayRef], +) -> Result<ArrayRef> { + let n_array = as_int64_array(&args[2])?; + + match (args[0].data_type(), args[1].data_type()) { + (DataType::Utf8View, DataType::Utf8View) => { + split_part_impl::<&StringViewArray, &StringViewArray, StringArrayLen>( + args[0].as_string_view(), + args[1].as_string_view(), + n_array, + ) + } + (_, DataType::Utf8View) => split_part_impl::< Review Comment: I almost wonder if it would be simpler to call `spit_part_impl` directly from `invoke` This extra level of indirection does save some repetition, but it might be clearer if all the possible combination of argument types were simply specified directly ########## datafusion/functions/src/string/split_part.rs: ########## @@ -125,58 +185,19 @@ macro_rules! process_split_part { } as usize; if index < len { - Ok(Some(split_string[index])) + builder.write_str(split_string[index])?; + builder.append_value(""); Review Comment: Is there a reason not to just call `append_value(split_string[index])` here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org