wiedld commented on code in PR #6849:
URL: https://github.com/apache/arrow-rs/pull/6849#discussion_r1898557423
##########
arrow-string/src/regexp.rs:
##########
@@ -333,42 +353,74 @@ fn get_scalar_pattern_flag<'a, OffsetSize:
OffsetSizeTrait>(
}
}
+fn get_scalar_pattern_flag_utf8view<'a>(
+ regex_array: &'a dyn Array,
+ flag_array: Option<&'a dyn Array>,
+) -> (Option<&'a str>, Option<&'a str>) {
+ let regex = regex_array.as_string_view();
+ let regex = regex.is_valid(0).then(|| regex.value(0));
+
+ if let Some(flag_array) = flag_array {
+ let flag = flag_array.as_string_view();
+ (regex, flag.is_valid(0).then(|| flag.value(0)))
+ } else {
+ (regex, None)
+ }
+}
+
+macro_rules! process_regexp_match {
+ ($array:expr, $regex:expr, $list_builder:expr) => {
+ $array
+ .iter()
+ .map(|value| {
+ match value {
+ // Required for Postgres compatibility:
+ // SELECT regexp_match('foobarbequebaz', ''); = {""}
+ Some(_) if $regex.as_str().is_empty() => {
+ $list_builder.values().append_value("");
+ $list_builder.append(true);
+ }
+ Some(value) => match $regex.captures(value) {
+ Some(caps) => {
+ let mut iter = caps.iter();
+ if caps.len() > 1 {
+ iter.next();
+ }
+ for m in iter.flatten() {
+
$list_builder.values().append_value(m.as_str());
+ }
+ $list_builder.append(true);
+ }
+ None => $list_builder.append(false),
+ },
+ None => $list_builder.append(false),
+ }
+ Ok(())
+ })
+ .collect::<Result<Vec<()>, ArrowError>>()?
+ };
+}
+
fn regexp_scalar_match<OffsetSize: OffsetSizeTrait>(
array: &GenericStringArray<OffsetSize>,
regex: &Regex,
) -> Result<ArrayRef, ArrowError> {
let builder: GenericStringBuilder<OffsetSize> =
GenericStringBuilder::with_capacity(0, 0);
let mut list_builder = ListBuilder::new(builder);
- array
- .iter()
- .map(|value| {
- match value {
- // Required for Postgres compatibility:
- // SELECT regexp_match('foobarbequebaz', ''); = {""}
- Some(_) if regex.as_str() == "" => {
- list_builder.values().append_value("");
- list_builder.append(true);
- }
- Some(value) => match regex.captures(value) {
- Some(caps) => {
- let mut iter = caps.iter();
- if caps.len() > 1 {
- iter.next();
- }
- for m in iter.flatten() {
- list_builder.values().append_value(m.as_str());
- }
+ process_regexp_match!(array, regex, list_builder);
- list_builder.append(true);
- }
- None => list_builder.append(false),
- },
- _ => list_builder.append(false),
- }
- Ok(())
- })
- .collect::<Result<Vec<()>, ArrowError>>()?;
+ Ok(Arc::new(list_builder.finish()))
+}
+
+fn regexp_scalar_match_utf8view(
+ array: &StringViewArray,
+ regex: &Regex,
+) -> Result<ArrayRef, ArrowError> {
+ let builder = StringViewBuilder::with_capacity(0);
+ let mut list_builder = ListBuilder::new(builder);
+
+ process_regexp_match!(array, regex, list_builder);
Review Comment:
I can do that in a follow up PR. Thank you for the input @alamb .
@tlm365 -- if the [additional
test](https://github.com/apache/arrow-rs/pull/6849#discussion_r1893499529) gets
added, then I can approve.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]