This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 5cc4e9f fix pattern handling in regexp_match function (#1065)
5cc4e9f is described below
commit 5cc4e9f53fab29e81ea7c98baac8ce277a0cb54a
Author: QP Hou <[email protected]>
AuthorDate: Tue Oct 5 12:40:46 2021 -0700
fix pattern handling in regexp_match function (#1065)
---
datafusion/src/physical_plan/regex_expressions.rs | 70 +++++++++++++++++++++--
1 file changed, 66 insertions(+), 4 deletions(-)
diff --git a/datafusion/src/physical_plan/regex_expressions.rs
b/datafusion/src/physical_plan/regex_expressions.rs
index 69b27ff..4a10d0d 100644
--- a/datafusion/src/physical_plan/regex_expressions.rs
+++ b/datafusion/src/physical_plan/regex_expressions.rs
@@ -47,10 +47,17 @@ macro_rules! downcast_string_arg {
/// extract a specific group from a string column, using a regular expression
pub fn regexp_match<T: StringOffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
match args.len() {
- 2 => compute::regexp_match(downcast_string_arg!(args[0], "string", T),
downcast_string_arg!(args[1], "pattern", T), None)
- .map_err(DataFusionError::ArrowError),
- 3 => compute::regexp_match(downcast_string_arg!(args[0], "string", T),
downcast_string_arg!(args[1], "pattern", T),
Some(downcast_string_arg!(args[1], "flags", T)))
- .map_err(DataFusionError::ArrowError),
+ 2 => {
+ let values = downcast_string_arg!(args[0], "string", T);
+ let regex = downcast_string_arg!(args[1], "pattern", T);
+ compute::regexp_match(values, regex,
None).map_err(DataFusionError::ArrowError)
+ }
+ 3 => {
+ let values = downcast_string_arg!(args[0], "string", T);
+ let regex = downcast_string_arg!(args[1], "pattern", T);
+ let flags = Some(downcast_string_arg!(args[2], "flags", T));
+ compute::regexp_match(values, regex,
flags).map_err(DataFusionError::ArrowError)
+ }
other => Err(DataFusionError::Internal(format!(
"regexp_match was called with {} arguments. It requires at least 2
and at most 3.",
other
@@ -170,3 +177,58 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args:
&[ArrayRef]) -> Result<Arr
))),
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::array::*;
+
+ #[test]
+ fn test_case_sensitive_regexp_match() {
+ let values = StringArray::from(vec!["abc"; 5]);
+ let patterns =
+ StringArray::from(vec!["^(a)", "^(A)", "(b|d)", "(B|D)",
"^(b|c)"]);
+
+ let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let mut expected_builder = ListBuilder::new(elem_builder);
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.append(false).unwrap();
+ expected_builder.values().append_value("b").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.append(false).unwrap();
+ expected_builder.append(false).unwrap();
+ let expected = expected_builder.finish();
+
+ let re = regexp_match::<i32>(&[Arc::new(values),
Arc::new(patterns)]).unwrap();
+
+ assert_eq!(re.as_ref(), &expected);
+ }
+
+ #[test]
+ fn test_case_insensitive_regexp_match() {
+ let values = StringArray::from(vec!["abc"; 5]);
+ let patterns =
+ StringArray::from(vec!["^(a)", "^(A)", "(b|d)", "(B|D)",
"^(b|c)"]);
+ let flags = StringArray::from(vec!["i"; 5]);
+
+ let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let mut expected_builder = ListBuilder::new(elem_builder);
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.values().append_value("b").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.values().append_value("b").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.append(false).unwrap();
+ let expected = expected_builder.finish();
+
+ let re =
+ regexp_match::<i32>(&[Arc::new(values), Arc::new(patterns),
Arc::new(flags)])
+ .unwrap();
+
+ assert_eq!(re.as_ref(), &expected);
+ }
+}