alamb commented on a change in pull request #1065:
URL: https://github.com/apache/arrow-datafusion/pull/1065#discussion_r722595501
##########
File path: datafusion/src/physical_plan/regex_expressions.rs
##########
@@ -47,10 +47,17 @@ macro_rules! downcast_string_arg {
/// extract a specific group from a string column, using a regular expression
pub fn regexp_match<T: StringOffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
match args.len() {
- 2 => compute::regexp_match(downcast_string_arg!(args[0], "string", T),
downcast_string_arg!(args[1], "pattern", T), None)
- .map_err(DataFusionError::ArrowError),
- 3 => compute::regexp_match(downcast_string_arg!(args[0], "string", T),
downcast_string_arg!(args[1], "pattern", T),
Some(downcast_string_arg!(args[1], "flags", T)))
- .map_err(DataFusionError::ArrowError),
+ 2 => {
+ let values = downcast_string_arg!(args[0], "string", T);
+ let regex = downcast_string_arg!(args[1], "pattern", T);
+ compute::regexp_match(values, regex,
None).map_err(DataFusionError::ArrowError)
Review comment:
https://docs.rs/arrow/5.5.0/arrow/compute/kernels/regexp/fn.regexp_match.html
for anyone else who may be looking
##########
File path: datafusion/src/physical_plan/regex_expressions.rs
##########
@@ -170,3 +177,58 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args:
&[ArrayRef]) -> Result<Arr
))),
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::array::*;
+
+ #[test]
+ fn test_case_sensitive_regexp_match() {
+ let values = StringArray::from(vec!["abc"; 5]);
+ let patterns =
+ StringArray::from(vec!["^(a)", "^(A)", "(b|d)", "(B|D)",
"^(b|c)"]);
+
+ let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let mut expected_builder = ListBuilder::new(elem_builder);
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.append(false).unwrap();
+ expected_builder.values().append_value("b").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.append(false).unwrap();
+ expected_builder.append(false).unwrap();
+ let expected = expected_builder.finish();
+
+ let re = regexp_match::<i32>(&[Arc::new(values),
Arc::new(patterns)]).unwrap();
+
+ assert_eq!(re.as_ref(), &expected);
+ }
+
+ #[test]
+ fn test_case_insensitive_regexp_match() {
+ let values = StringArray::from(vec!["abc"; 5]);
+ let patterns =
+ StringArray::from(vec!["^(a)", "^(A)", "(b|d)", "(B|D)",
"^(b|c)"]);
+ let flags = StringArray::from(vec!["i"; 5]);
+
+ let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let mut expected_builder = ListBuilder::new(elem_builder);
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.values().append_value("b").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.values().append_value("b").unwrap();
Review comment:
👍
##########
File path: datafusion/src/physical_plan/regex_expressions.rs
##########
@@ -170,3 +177,58 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args:
&[ArrayRef]) -> Result<Arr
))),
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::array::*;
+
+ #[test]
+ fn test_case_sensitive_regexp_match() {
+ let values = StringArray::from(vec!["abc"; 5]);
+ let patterns =
+ StringArray::from(vec!["^(a)", "^(A)", "(b|d)", "(B|D)",
"^(b|c)"]);
+
+ let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let mut expected_builder = ListBuilder::new(elem_builder);
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.append(false).unwrap();
+ expected_builder.values().append_value("b").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.append(false).unwrap();
+ expected_builder.append(false).unwrap();
+ let expected = expected_builder.finish();
+
+ let re = regexp_match::<i32>(&[Arc::new(values),
Arc::new(patterns)]).unwrap();
+
+ assert_eq!(re.as_ref(), &expected);
+ }
+
+ #[test]
+ fn test_case_insensitive_regexp_match() {
+ let values = StringArray::from(vec!["abc"; 5]);
+ let patterns =
+ StringArray::from(vec!["^(a)", "^(A)", "(b|d)", "(B|D)",
"^(b|c)"]);
+ let flags = StringArray::from(vec!["i"; 5]);
+
+ let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let mut expected_builder = ListBuilder::new(elem_builder);
+ expected_builder.values().append_value("a").unwrap();
+ expected_builder.append(true).unwrap();
+ expected_builder.values().append_value("a").unwrap();
Review comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]