gerashegalov commented on code in PR #39383:
URL: https://github.com/apache/spark/pull/39383#discussion_r1061843543
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala:
##########
@@ -634,7 +634,12 @@ case class RegExpReplace(subject: Expression, regexp:
Expression, rep: Expressio
if (!p.equals(lastRegex)) {
// regex value changed
lastRegex = p.asInstanceOf[UTF8String].clone()
- pattern = Pattern.compile(lastRegex.toString)
+ try {
+ pattern = Pattern.compile(lastRegex.toString)
+ } catch {
+ case e: PatternSyntaxException =>
+ throw QueryExecutionErrors.invalidPatternError(prettyName,
e.getPattern)
Review Comment:
it would be useful to have another version of this method passing `e` as the
cause to SparkRuntimeException because it contains useful diagnostics for the
user:
```
java.util.regex.PatternSyntaxException: Unclosed counted closure near index 8
[a\d]{0, 2}
```
##########
sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala:
##########
@@ -663,4 +664,18 @@ class StringFunctionsSuite extends QueryTest with
SharedSparkSession {
start = 7,
stop = 47))
}
+
+ test("SPARK-41780: INVALID_PARAMETER_VALUE - invalid parameters `regexp` in
regexp_replace") {
+ checkError(
+ exception = intercept[SparkRuntimeException] {
+ sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')").collect()
+ },
+ errorClass = "INVALID_PARAMETER_VALUE",
+ parameters = Map(
+ "parameter" -> "regexp",
+ "functionName" -> "`regexp_replace`",
+ "expected" -> "'[a\\\\d]{0, 2}'"
Review Comment:
Looks like the parameter name is misnamed in INVALID_PARAMETER_VALUE
https://github.com/apache/spark/blob/f352f103ed512806abb3f642571a0c595b8b0509/core/src/main/resources/error/error-classes.json#L792
It should probably called something like `actual` or `invalid` , it's
anything but `expected`
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala:
##########
@@ -634,7 +634,12 @@ case class RegExpReplace(subject: Expression, regexp:
Expression, rep: Expressio
if (!p.equals(lastRegex)) {
// regex value changed
lastRegex = p.asInstanceOf[UTF8String].clone()
- pattern = Pattern.compile(lastRegex.toString)
+ try {
+ pattern = Pattern.compile(lastRegex.toString)
+ } catch {
+ case e: PatternSyntaxException =>
+ throw QueryExecutionErrors.invalidPatternError(prettyName,
e.getPattern)
Review Comment:
What about other instances of `Pattern.compile` in this class such as
`regexp_extract`?
also `rlike` does not hide PatternSyntaxException but does not use
QueryExecutionErrors.invalidPatternError
```
scala> sql("select rlike('', '[a\\\\d]{0, 2}')").collect()
java.util.regex.PatternSyntaxException: Unclosed counted closure near index 8
[a\d]{0, 2}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]