wiedld commented on code in PR #6849:
URL: https://github.com/apache/arrow-rs/pull/6849#discussion_r1900218093
##########
arrow-string/src/regexp.rs:
##########
@@ -333,42 +353,74 @@ fn get_scalar_pattern_flag<'a, OffsetSize:
OffsetSizeTrait>(
}
}
+fn get_scalar_pattern_flag_utf8view<'a>(
+ regex_array: &'a dyn Array,
+ flag_array: Option<&'a dyn Array>,
+) -> (Option<&'a str>, Option<&'a str>) {
+ let regex = regex_array.as_string_view();
+ let regex = regex.is_valid(0).then(|| regex.value(0));
+
+ if let Some(flag_array) = flag_array {
+ let flag = flag_array.as_string_view();
+ (regex, flag.is_valid(0).then(|| flag.value(0)))
+ } else {
+ (regex, None)
+ }
+}
+
+macro_rules! process_regexp_match {
+ ($array:expr, $regex:expr, $list_builder:expr) => {
+ $array
+ .iter()
+ .map(|value| {
+ match value {
+ // Required for Postgres compatibility:
+ // SELECT regexp_match('foobarbequebaz', ''); = {""}
+ Some(_) if $regex.as_str().is_empty() => {
+ $list_builder.values().append_value("");
+ $list_builder.append(true);
+ }
+ Some(value) => match $regex.captures(value) {
+ Some(caps) => {
+ let mut iter = caps.iter();
+ if caps.len() > 1 {
+ iter.next();
+ }
+ for m in iter.flatten() {
+
$list_builder.values().append_value(m.as_str());
+ }
+ $list_builder.append(true);
+ }
+ None => $list_builder.append(false),
+ },
+ None => $list_builder.append(false),
+ }
+ Ok(())
+ })
+ .collect::<Result<Vec<()>, ArrowError>>()?
+ };
+}
+
fn regexp_scalar_match<OffsetSize: OffsetSizeTrait>(
array: &GenericStringArray<OffsetSize>,
regex: &Regex,
) -> Result<ArrayRef, ArrowError> {
let builder: GenericStringBuilder<OffsetSize> =
GenericStringBuilder::with_capacity(0, 0);
let mut list_builder = ListBuilder::new(builder);
- array
- .iter()
- .map(|value| {
- match value {
- // Required for Postgres compatibility:
- // SELECT regexp_match('foobarbequebaz', ''); = {""}
- Some(_) if regex.as_str() == "" => {
- list_builder.values().append_value("");
- list_builder.append(true);
- }
- Some(value) => match regex.captures(value) {
- Some(caps) => {
- let mut iter = caps.iter();
- if caps.len() > 1 {
- iter.next();
- }
- for m in iter.flatten() {
- list_builder.values().append_value(m.as_str());
- }
+ process_regexp_match!(array, regex, list_builder);
- list_builder.append(true);
- }
- None => list_builder.append(false),
- },
- _ => list_builder.append(false),
- }
- Ok(())
- })
- .collect::<Result<Vec<()>, ArrowError>>()?;
+ Ok(Arc::new(list_builder.finish()))
+}
+
+fn regexp_scalar_match_utf8view(
+ array: &StringViewArray,
+ regex: &Regex,
+) -> Result<ArrayRef, ArrowError> {
+ let builder = StringViewBuilder::with_capacity(0);
+ let mut list_builder = ListBuilder::new(builder);
+
+ process_regexp_match!(array, regex, list_builder);
Review Comment:
@alamb @tlm365 -- Here is a proposed [trait ValuesBuilder:
ArrayBuilder](https://github.com/apache/arrow-rs/pull/6927) for common
interfaces in building a list of values.
Note that there were several other common sets of builder functionality
(such as building lists of lists), but I didn't address that due to (a) keep
the PR small, and (b) I knew only of this immediate use case (altho there are
likely others).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]