gerashegalov commented on code in PR #39383:
URL: https://github.com/apache/spark/pull/39383#discussion_r1061843543


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala:
##########
@@ -634,7 +634,12 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
     if (!p.equals(lastRegex)) {
       // regex value changed
       lastRegex = p.asInstanceOf[UTF8String].clone()
-      pattern = Pattern.compile(lastRegex.toString)
+      try {
+        pattern = Pattern.compile(lastRegex.toString)
+      } catch {
+        case e: PatternSyntaxException =>
+          throw QueryExecutionErrors.invalidPatternError(prettyName, 
e.getPattern)

Review Comment:
   it would be useful to have another version of this method passing `e` as the 
cause to SparkRuntimeException because it contains useful diagnostics for the 
user:
   ```
   java.util.regex.PatternSyntaxException: Unclosed counted closure near index 8
   [a\d]{0, 2}
   ```
   



##########
sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala:
##########
@@ -663,4 +664,18 @@ class StringFunctionsSuite extends QueryTest with 
SharedSparkSession {
         start = 7,
         stop = 47))
   }
+
+  test("SPARK-41780: INVALID_PARAMETER_VALUE - invalid parameters `regexp` in 
regexp_replace") {
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')").collect()
+      },
+      errorClass = "INVALID_PARAMETER_VALUE",
+      parameters = Map(
+        "parameter" -> "regexp",
+        "functionName" -> "`regexp_replace`",
+        "expected" -> "'[a\\\\d]{0, 2}'"

Review Comment:
   Looks like the parameter name is misnamed in INVALID_PARAMETER_VALUE 
https://github.com/apache/spark/blob/f352f103ed512806abb3f642571a0c595b8b0509/core/src/main/resources/error/error-classes.json#L792
 
   
   It should probably called something like `actual` or `invalid` , it's 
anything but `expected` 



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala:
##########
@@ -634,7 +634,12 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
     if (!p.equals(lastRegex)) {
       // regex value changed
       lastRegex = p.asInstanceOf[UTF8String].clone()
-      pattern = Pattern.compile(lastRegex.toString)
+      try {
+        pattern = Pattern.compile(lastRegex.toString)
+      } catch {
+        case e: PatternSyntaxException =>
+          throw QueryExecutionErrors.invalidPatternError(prettyName, 
e.getPattern)

Review Comment:
   What about other instances of `Pattern.compile` in this class  such as 
`regexp_extract`?
   
   also `rlike` does not hide PatternSyntaxException but does not use  
QueryExecutionErrors.invalidPatternError
   ```
   scala> sql("select rlike('', '[a\\\\d]{0, 2}')").collect()
   java.util.regex.PatternSyntaxException: Unclosed counted closure near index 8
   [a\d]{0, 2}
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to