Jefffrey commented on code in PR #20354:
URL: https://github.com/apache/datafusion/pull/20354#discussion_r2822558042
##########
datafusion/functions/src/regex/regexplike.rs:
##########
@@ -131,28 +132,39 @@ impl ScalarUDFImpl for RegexpLikeFunc {
) -> Result<ColumnarValue> {
let args = &args.args;
- let len = args
+ if args
.iter()
- .fold(Option::<usize>::None, |acc, arg| match arg {
- ColumnarValue::Scalar(_) => acc,
- ColumnarValue::Array(a) => Some(a.len()),
- });
-
- let is_scalar = len.is_none();
- let inferred_length = len.unwrap_or(1);
- let args = args
- .iter()
- .map(|arg| arg.to_array(inferred_length))
- .collect::<Result<Vec<_>>>()?;
-
- let result = regexp_like(&args);
- if is_scalar {
- // If all inputs are scalar, keeps output as scalar
- let result = result.and_then(|arr|
ScalarValue::try_from_array(&arr, 0));
- result.map(ColumnarValue::Scalar)
- } else {
- result.map(ColumnarValue::Array)
+ .all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
+ {
+ return regexp_like_scalar(args);
Review Comment:
We should fold this into the match below
##########
datafusion/functions/src/regex/regexplike.rs:
##########
@@ -314,6 +336,121 @@ pub fn regexp_like(args: &[ArrayRef]) -> Result<ArrayRef>
{
}
}
+fn scalar_string(value: &ScalarValue) -> Result<Option<&str>> {
+ match value {
+ ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) |
ScalarValue::Utf8View(v) => {
+ Ok(v.as_deref())
+ }
+ ScalarValue::Null => Ok(None),
+ _ => internal_err!(
+ "Unsupported data type {:?} for function `regexp_like`",
+ value.data_type()
+ ),
+ }
+}
+
+fn regexp_like_array_scalar(
+ values: &ArrayRef,
+ pattern: Option<&str>,
+ flags: Option<&str>,
+) -> Result<ArrayRef> {
+ use DataType::*;
+
+ if pattern.is_none() {
+ return Ok(Arc::new(BooleanArray::new_null(values.len())));
+ }
+
+ let pattern = pattern.unwrap();
+ let array = match values.data_type() {
+ Utf8 => {
+ let array = values.as_string::<i32>();
+ regexp::regexp_is_match_scalar(array, pattern, flags)?
+ }
+ Utf8View => {
+ let array = values.as_string_view();
+ regexp::regexp_is_match_scalar(array, pattern, flags)?
+ }
+ LargeUtf8 => {
+ let array = values.as_string::<i64>();
+ regexp::regexp_is_match_scalar(array, pattern, flags)?
+ }
+ other => {
+ return internal_err!(
+ "Unsupported data type {other:?} for function `regexp_like`"
+ );
+ }
+ };
+
+ Ok(Arc::new(array))
+}
+
+fn regexp_like_scalar(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ let flags = if args.len() == 3 {
+ match &args[2] {
+ ColumnarValue::Scalar(v) => scalar_string(v)?,
+ _ => {
+ return internal_err!(
+ "Unexpected non-scalar argument for function `regexp_like`"
+ );
+ }
+ }
+ } else {
+ None
+ };
+
+ if flags == Some("g") {
+ return plan_err!("regexp_like() does not support the \"global\"
option");
+ }
Review Comment:
We should call this fix out in the PR title/body now, and preferably add a
test for it
##########
datafusion/functions/src/regex/regexplike.rs:
##########
@@ -314,6 +329,105 @@ pub fn regexp_like(args: &[ArrayRef]) -> Result<ArrayRef>
{
}
}
+fn scalar_string(value: &ScalarValue) -> Result<Option<&str>> {
+ match value {
+ ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) |
ScalarValue::Utf8View(v) => {
+ Ok(v.as_deref())
+ }
+ ScalarValue::Null => Ok(None),
+ _ => internal_err!(
+ "Unsupported data type {:?} for function `regexp_like`",
+ value.data_type()
+ ),
+ }
+}
+
+fn regexp_like_array_scalar(
+ values: &ArrayRef,
+ pattern: Option<&str>,
+ flags: Option<&str>,
+) -> Result<ArrayRef> {
+ use DataType::*;
+
+ if pattern.is_none() {
+ return Ok(Arc::new(BooleanArray::new_null(values.len())));
+ }
+
+ let pattern = pattern.unwrap();
+ let array = match values.data_type() {
+ Utf8 => {
+ let array = values.as_string::<i32>();
+ regexp::regexp_is_match_scalar(array, pattern, flags)?
+ }
+ Utf8View => {
+ let array = values.as_string_view();
+ regexp::regexp_is_match_scalar(array, pattern, flags)?
+ }
+ LargeUtf8 => {
+ let array = values.as_string::<i64>();
+ regexp::regexp_is_match_scalar(array, pattern, flags)?
+ }
+ other => {
+ return internal_err!(
+ "Unsupported data type {other:?} for function `regexp_like`"
+ );
+ }
+ };
+
+ Ok(Arc::new(array))
+}
+
+fn regexp_like_scalar(args: &[ColumnarValue]) -> Result<ColumnarValue> {
Review Comment:
Pass in explicit args instead of a slice; to handle optional 3rd argument
use `Option`
##########
datafusion/functions/src/regex/regexplike.rs:
##########
@@ -314,6 +329,105 @@ pub fn regexp_like(args: &[ArrayRef]) -> Result<ArrayRef>
{
}
}
+fn scalar_string(value: &ScalarValue) -> Result<Option<&str>> {
Review Comment:
I'm fairly sure we already have a function to do this, `try_as_str` I
believe? Check the APIs available on `ScalarValue`
##########
datafusion/functions/src/regex/regexplike.rs:
##########
@@ -131,28 +132,39 @@ impl ScalarUDFImpl for RegexpLikeFunc {
) -> Result<ColumnarValue> {
let args = &args.args;
- let len = args
+ if args
.iter()
- .fold(Option::<usize>::None, |acc, arg| match arg {
- ColumnarValue::Scalar(_) => acc,
- ColumnarValue::Array(a) => Some(a.len()),
- });
-
- let is_scalar = len.is_none();
- let inferred_length = len.unwrap_or(1);
- let args = args
- .iter()
- .map(|arg| arg.to_array(inferred_length))
- .collect::<Result<Vec<_>>>()?;
-
- let result = regexp_like(&args);
- if is_scalar {
- // If all inputs are scalar, keeps output as scalar
- let result = result.and_then(|arr|
ScalarValue::try_from_array(&arr, 0));
- result.map(ColumnarValue::Scalar)
- } else {
- result.map(ColumnarValue::Array)
+ .all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
+ {
+ return regexp_like_scalar(args);
+ }
+
+ match args.as_slice() {
+ [ColumnarValue::Array(values), ColumnarValue::Scalar(pattern)] => {
+ let pattern = scalar_string(pattern)?;
+ let array = regexp_like_array_scalar(values, pattern, None)?;
+ return Ok(ColumnarValue::Array(array));
+ }
+ [
+ ColumnarValue::Array(values),
+ ColumnarValue::Scalar(pattern),
+ ColumnarValue::Scalar(flags),
+ ] => {
+ let flags = scalar_string(flags)?;
+ if flags.is_some_and(|flagz| flagz.contains('g')) {
+ return plan_err!(
+ "regexp_like() does not support the \"global\" option"
+ );
+ }
+ let pattern = scalar_string(pattern)?;
+ let array = regexp_like_array_scalar(values, pattern, flags)?;
+ return Ok(ColumnarValue::Array(array));
+ }
+ _ => {}
}
+
+ let args = ColumnarValue::values_to_arrays(args)?;
+ regexp_like(&args).map(ColumnarValue::Array)
Review Comment:
Fold this into the catch all arm above, so we don't need explicit `return`
statements for the other arms
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]