This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e43db6cc887 [SPARK-42843][SQL] Update the error class _LEGACY_ERROR_TEMP_2007 to REGEX_GROUP_INDEX e43db6cc887 is described below commit e43db6cc88795547242d005259fdb9b6d992a774 Author: Liang Yan <ckgppl_...@sina.cn> AuthorDate: Thu May 4 09:54:12 2023 +0300 [SPARK-42843][SQL] Update the error class _LEGACY_ERROR_TEMP_2007 to REGEX_GROUP_INDEX ### What changes were proposed in this pull request? Update the error class _LEGACY_ERROR_TEMP_2007 to REGEX_GROUP_INDEX. ### Why are the changes needed? Fix jira issue [SPARK-42843](https://issues.apache.org/jira/browse/SPARK-42843). The original name just a number, update it to an informal name. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Current tests already covered it. Closes #40955 from liang3zy22/spark42843. Authored-by: Liang Yan <ckgppl_...@sina.cn> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 5 ++ .../catalyst/expressions/regexpExpressions.scala | 15 +++-- .../spark/sql/errors/QueryExecutionErrors.scala | 10 ++- .../expressions/RegexpExpressionsSuite.scala | 72 ++++++++++++++++++---- .../sql-tests/results/regexp-functions.sql.out | 72 ++++++++++++++-------- 5 files changed, 129 insertions(+), 45 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7718b8b9127..1d5ee78aa8c 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1009,6 +1009,11 @@ "<value>." ] }, + "REGEX_GROUP_INDEX" : { + "message" : [ + "Expects group index between 0 and <groupCount>, but got <groupIndex>." + ] + }, "ZERO_INDEX" : { "message" : [ "expects %1$, %2$ and so on, but got %0$." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 29510bc3852..2025a554998 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -746,12 +746,12 @@ object RegExpReplace { } object RegExpExtractBase { - def checkGroupIndex(groupCount: Int, groupIndex: Int): Unit = { + def checkGroupIndex(prettyName: String, groupCount: Int, groupIndex: Int): Unit = { if (groupIndex < 0) { throw QueryExecutionErrors.regexGroupIndexLessThanZeroError } else if (groupCount < groupIndex) { throw QueryExecutionErrors.regexGroupIndexExceedGroupCountError( - groupCount, groupIndex) + prettyName, groupCount, groupIndex) } } } @@ -857,7 +857,7 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio if (m.find) { val mr: MatchResult = m.toMatchResult val index = r.asInstanceOf[Int] - RegExpExtractBase.checkGroupIndex(mr.groupCount, index) + RegExpExtractBase.checkGroupIndex(prettyName, mr.groupCount, index) val group = mr.group(index) if (group == null) { // Pattern matched, but it's an optional group UTF8String.EMPTY_UTF8 @@ -887,7 +887,7 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio ${initLastMatcherCode(ctx, subject, regexp, matcher)} if ($matcher.find()) { java.util.regex.MatchResult $matchResult = $matcher.toMatchResult(); - $classNameRegExpExtractBase.checkGroupIndex($matchResult.groupCount(), $idx); + $classNameRegExpExtractBase.checkGroupIndex("$prettyName", $matchResult.groupCount(), $idx); if ($matchResult.group($idx) == null) { ${ev.value} = UTF8String.EMPTY_UTF8; } else { @@ -950,7 +950,7 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres while(m.find) { val mr: MatchResult = m.toMatchResult val index = r.asInstanceOf[Int] - RegExpExtractBase.checkGroupIndex(mr.groupCount, index) + RegExpExtractBase.checkGroupIndex(prettyName, mr.groupCount, index) val group = mr.group(index) if (group == null) { // Pattern matched, but it's an optional group matchResults += UTF8String.EMPTY_UTF8 @@ -982,7 +982,10 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres | java.util.ArrayList $matchResults = new java.util.ArrayList<UTF8String>(); | while ($matcher.find()) { | java.util.regex.MatchResult $matchResult = $matcher.toMatchResult(); - | $classNameRegExpExtractBase.checkGroupIndex($matchResult.groupCount(), $idx); + | $classNameRegExpExtractBase.checkGroupIndex( + | "$prettyName", + | $matchResult.groupCount(), + | $idx); | if ($matchResult.group($idx) == null) { | $matchResults.add(UTF8String.EMPTY_UTF8); | } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 1990cf40b2d..e7d310c25c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -364,10 +364,14 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { } def regexGroupIndexExceedGroupCountError( - groupCount: Int, groupIndex: Int): SparkIllegalArgumentException = { - new SparkIllegalArgumentException( - errorClass = "_LEGACY_ERROR_TEMP_2007", + funcName: String, + groupCount: Int, + groupIndex: Int): RuntimeException = { + new SparkRuntimeException( + errorClass = "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", messageParameters = Map( + "parameter" -> toSQLId("idx"), + "functionName" -> toSQLId(funcName), "groupCount" -> groupCount.toString(), "groupIndex" -> groupIndex.toString())) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index af051a1a9bc..ace0c7959a1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -391,12 +391,36 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val row11 = create_row("100-200", "(\\d+)-(\\d+)", -1) val row12 = create_row("100-200", "\\d+", -1) - checkExceptionInExpression[IllegalArgumentException]( - expr, row8, "Regex group count is 2, but the specified group index is 3") - checkExceptionInExpression[IllegalArgumentException]( - expr, row9, "Regex group count is 1, but the specified group index is 2") - checkExceptionInExpression[IllegalArgumentException]( - expr, row10, "Regex group count is 0, but the specified group index is 1") + checkErrorInExpression[SparkRuntimeException]( + expr, + row8, + "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + Map("parameter" -> "`idx`", + "functionName" -> "`regexp_extract`", + "groupCount" -> "2", + "groupIndex" -> "3" + ) + ) + checkErrorInExpression[SparkRuntimeException]( + expr, + row9, + "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + Map("parameter" -> "`idx`", + "functionName" -> "`regexp_extract`", + "groupCount" -> "1", + "groupIndex" -> "2" + ) + ) + checkErrorInExpression[SparkRuntimeException]( + expr, + row10, + "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + Map("parameter" -> "`idx`", + "functionName" -> "`regexp_extract`", + "groupCount" -> "0", + "groupIndex" -> "1" + ) + ) checkExceptionInExpression[IllegalArgumentException]( expr, row11, "The specified group index cannot be less than zero") checkExceptionInExpression[IllegalArgumentException]( @@ -445,12 +469,36 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val row12 = create_row("100-200,300-400,500-600", "(\\d+)-(\\d+)", -1) val row13 = create_row("100-200,300-400,500-600", "\\d+", -1) - checkExceptionInExpression[IllegalArgumentException]( - expr, row9, "Regex group count is 2, but the specified group index is 3") - checkExceptionInExpression[IllegalArgumentException]( - expr, row10, "Regex group count is 1, but the specified group index is 2") - checkExceptionInExpression[IllegalArgumentException]( - expr, row11, "Regex group count is 0, but the specified group index is 1") + checkErrorInExpression[SparkRuntimeException]( + expr, + row9, + "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + Map("parameter" -> "`idx`", + "functionName" -> "`regexp_extract_all`", + "groupCount" -> "2", + "groupIndex" -> "3" + ) + ) + checkErrorInExpression[SparkRuntimeException]( + expr, + row10, + "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + Map("parameter" -> "`idx`", + "functionName"-> "`regexp_extract_all`", + "groupCount" -> "1", + "groupIndex" -> "2" + ) + ) + checkErrorInExpression[SparkRuntimeException]( + expr, + row11, + "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + Map("parameter" -> "`idx`", + "functionName" -> "`regexp_extract_all`", + "groupCount" -> "0", + "groupIndex" -> "1" + ) + ) checkExceptionInExpression[IllegalArgumentException]( expr, row12, "The specified group index cannot be less than zero") checkExceptionInExpression[IllegalArgumentException]( diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out index b31dd8f57c1..c46b6590f9e 100644 --- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out @@ -4,12 +4,15 @@ SELECT regexp_extract('1a 2b 14m', '\\d+') -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract`", "groupCount" : "0", - "groupIndex" : "1" + "groupIndex" : "1", + "parameter" : "`idx`" } } @@ -27,12 +30,15 @@ SELECT regexp_extract('1a 2b 14m', '\\d+', 1) -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract`", "groupCount" : "0", - "groupIndex" : "1" + "groupIndex" : "1", + "parameter" : "`idx`" } } @@ -42,12 +48,15 @@ SELECT regexp_extract('1a 2b 14m', '\\d+', 2) -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract`", "groupCount" : "0", - "groupIndex" : "2" + "groupIndex" : "2", + "parameter" : "`idx`" } } @@ -116,12 +125,15 @@ SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 3) -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract`", "groupCount" : "2", - "groupIndex" : "3" + "groupIndex" : "3", + "parameter" : "`idx`" } } @@ -175,12 +187,15 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+') -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract_all`", "groupCount" : "0", - "groupIndex" : "1" + "groupIndex" : "1", + "parameter" : "`idx`" } } @@ -198,12 +213,15 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+', 1) -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract_all`", "groupCount" : "0", - "groupIndex" : "1" + "groupIndex" : "1", + "parameter" : "`idx`" } } @@ -213,12 +231,15 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+', 2) -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract_all`", "groupCount" : "0", - "groupIndex" : "2" + "groupIndex" : "2", + "parameter" : "`idx`" } } @@ -287,12 +308,15 @@ SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 3) -- !query schema struct<> -- !query output -org.apache.spark.SparkIllegalArgumentException +org.apache.spark.SparkRuntimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2007", + "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX", + "sqlState" : "22023", "messageParameters" : { + "functionName" : "`regexp_extract_all`", "groupCount" : "2", - "groupIndex" : "3" + "groupIndex" : "3", + "parameter" : "`idx`" } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org