This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e43db6cc887 [SPARK-42843][SQL] Update the error class
_LEGACY_ERROR_TEMP_2007 to REGEX_GROUP_INDEX
e43db6cc887 is described below
commit e43db6cc88795547242d005259fdb9b6d992a774
Author: Liang Yan <[email protected]>
AuthorDate: Thu May 4 09:54:12 2023 +0300
[SPARK-42843][SQL] Update the error class _LEGACY_ERROR_TEMP_2007 to
REGEX_GROUP_INDEX
### What changes were proposed in this pull request?
Update the error class _LEGACY_ERROR_TEMP_2007 to REGEX_GROUP_INDEX.
### Why are the changes needed?
Fix jira issue
[SPARK-42843](https://issues.apache.org/jira/browse/SPARK-42843). The original
name just a number, update it to an informal name.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Current tests already covered it.
Closes #40955 from liang3zy22/spark42843.
Authored-by: Liang Yan <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
core/src/main/resources/error/error-classes.json | 5 ++
.../catalyst/expressions/regexpExpressions.scala | 15 +++--
.../spark/sql/errors/QueryExecutionErrors.scala | 10 ++-
.../expressions/RegexpExpressionsSuite.scala | 72 ++++++++++++++++++----
.../sql-tests/results/regexp-functions.sql.out | 72 ++++++++++++++--------
5 files changed, 129 insertions(+), 45 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json
b/core/src/main/resources/error/error-classes.json
index 7718b8b9127..1d5ee78aa8c 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1009,6 +1009,11 @@
"<value>."
]
},
+ "REGEX_GROUP_INDEX" : {
+ "message" : [
+ "Expects group index between 0 and <groupCount>, but got
<groupIndex>."
+ ]
+ },
"ZERO_INDEX" : {
"message" : [
"expects %1$, %2$ and so on, but got %0$."
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 29510bc3852..2025a554998 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -746,12 +746,12 @@ object RegExpReplace {
}
object RegExpExtractBase {
- def checkGroupIndex(groupCount: Int, groupIndex: Int): Unit = {
+ def checkGroupIndex(prettyName: String, groupCount: Int, groupIndex: Int):
Unit = {
if (groupIndex < 0) {
throw QueryExecutionErrors.regexGroupIndexLessThanZeroError
} else if (groupCount < groupIndex) {
throw QueryExecutionErrors.regexGroupIndexExceedGroupCountError(
- groupCount, groupIndex)
+ prettyName, groupCount, groupIndex)
}
}
}
@@ -857,7 +857,7 @@ case class RegExpExtract(subject: Expression, regexp:
Expression, idx: Expressio
if (m.find) {
val mr: MatchResult = m.toMatchResult
val index = r.asInstanceOf[Int]
- RegExpExtractBase.checkGroupIndex(mr.groupCount, index)
+ RegExpExtractBase.checkGroupIndex(prettyName, mr.groupCount, index)
val group = mr.group(index)
if (group == null) { // Pattern matched, but it's an optional group
UTF8String.EMPTY_UTF8
@@ -887,7 +887,7 @@ case class RegExpExtract(subject: Expression, regexp:
Expression, idx: Expressio
${initLastMatcherCode(ctx, subject, regexp, matcher)}
if ($matcher.find()) {
java.util.regex.MatchResult $matchResult = $matcher.toMatchResult();
- $classNameRegExpExtractBase.checkGroupIndex($matchResult.groupCount(),
$idx);
+ $classNameRegExpExtractBase.checkGroupIndex("$prettyName",
$matchResult.groupCount(), $idx);
if ($matchResult.group($idx) == null) {
${ev.value} = UTF8String.EMPTY_UTF8;
} else {
@@ -950,7 +950,7 @@ case class RegExpExtractAll(subject: Expression, regexp:
Expression, idx: Expres
while(m.find) {
val mr: MatchResult = m.toMatchResult
val index = r.asInstanceOf[Int]
- RegExpExtractBase.checkGroupIndex(mr.groupCount, index)
+ RegExpExtractBase.checkGroupIndex(prettyName, mr.groupCount, index)
val group = mr.group(index)
if (group == null) { // Pattern matched, but it's an optional group
matchResults += UTF8String.EMPTY_UTF8
@@ -982,7 +982,10 @@ case class RegExpExtractAll(subject: Expression, regexp:
Expression, idx: Expres
| java.util.ArrayList $matchResults = new
java.util.ArrayList<UTF8String>();
| while ($matcher.find()) {
| java.util.regex.MatchResult $matchResult =
$matcher.toMatchResult();
- |
$classNameRegExpExtractBase.checkGroupIndex($matchResult.groupCount(), $idx);
+ | $classNameRegExpExtractBase.checkGroupIndex(
+ | "$prettyName",
+ | $matchResult.groupCount(),
+ | $idx);
| if ($matchResult.group($idx) == null) {
| $matchResults.add(UTF8String.EMPTY_UTF8);
| } else {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1990cf40b2d..e7d310c25c2 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -364,10 +364,14 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase {
}
def regexGroupIndexExceedGroupCountError(
- groupCount: Int, groupIndex: Int): SparkIllegalArgumentException = {
- new SparkIllegalArgumentException(
- errorClass = "_LEGACY_ERROR_TEMP_2007",
+ funcName: String,
+ groupCount: Int,
+ groupIndex: Int): RuntimeException = {
+ new SparkRuntimeException(
+ errorClass = "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
messageParameters = Map(
+ "parameter" -> toSQLId("idx"),
+ "functionName" -> toSQLId(funcName),
"groupCount" -> groupCount.toString(),
"groupIndex" -> groupIndex.toString()))
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index af051a1a9bc..ace0c7959a1 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -391,12 +391,36 @@ class RegexpExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
val row11 = create_row("100-200", "(\\d+)-(\\d+)", -1)
val row12 = create_row("100-200", "\\d+", -1)
- checkExceptionInExpression[IllegalArgumentException](
- expr, row8, "Regex group count is 2, but the specified group index is 3")
- checkExceptionInExpression[IllegalArgumentException](
- expr, row9, "Regex group count is 1, but the specified group index is 2")
- checkExceptionInExpression[IllegalArgumentException](
- expr, row10, "Regex group count is 0, but the specified group index is
1")
+ checkErrorInExpression[SparkRuntimeException](
+ expr,
+ row8,
+ "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ Map("parameter" -> "`idx`",
+ "functionName" -> "`regexp_extract`",
+ "groupCount" -> "2",
+ "groupIndex" -> "3"
+ )
+ )
+ checkErrorInExpression[SparkRuntimeException](
+ expr,
+ row9,
+ "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ Map("parameter" -> "`idx`",
+ "functionName" -> "`regexp_extract`",
+ "groupCount" -> "1",
+ "groupIndex" -> "2"
+ )
+ )
+ checkErrorInExpression[SparkRuntimeException](
+ expr,
+ row10,
+ "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ Map("parameter" -> "`idx`",
+ "functionName" -> "`regexp_extract`",
+ "groupCount" -> "0",
+ "groupIndex" -> "1"
+ )
+ )
checkExceptionInExpression[IllegalArgumentException](
expr, row11, "The specified group index cannot be less than zero")
checkExceptionInExpression[IllegalArgumentException](
@@ -445,12 +469,36 @@ class RegexpExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
val row12 = create_row("100-200,300-400,500-600", "(\\d+)-(\\d+)", -1)
val row13 = create_row("100-200,300-400,500-600", "\\d+", -1)
- checkExceptionInExpression[IllegalArgumentException](
- expr, row9, "Regex group count is 2, but the specified group index is 3")
- checkExceptionInExpression[IllegalArgumentException](
- expr, row10, "Regex group count is 1, but the specified group index is
2")
- checkExceptionInExpression[IllegalArgumentException](
- expr, row11, "Regex group count is 0, but the specified group index is
1")
+ checkErrorInExpression[SparkRuntimeException](
+ expr,
+ row9,
+ "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ Map("parameter" -> "`idx`",
+ "functionName" -> "`regexp_extract_all`",
+ "groupCount" -> "2",
+ "groupIndex" -> "3"
+ )
+ )
+ checkErrorInExpression[SparkRuntimeException](
+ expr,
+ row10,
+ "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ Map("parameter" -> "`idx`",
+ "functionName"-> "`regexp_extract_all`",
+ "groupCount" -> "1",
+ "groupIndex" -> "2"
+ )
+ )
+ checkErrorInExpression[SparkRuntimeException](
+ expr,
+ row11,
+ "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ Map("parameter" -> "`idx`",
+ "functionName" -> "`regexp_extract_all`",
+ "groupCount" -> "0",
+ "groupIndex" -> "1"
+ )
+ )
checkExceptionInExpression[IllegalArgumentException](
expr, row12, "The specified group index cannot be less than zero")
checkExceptionInExpression[IllegalArgumentException](
diff --git
a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
index b31dd8f57c1..c46b6590f9e 100644
--- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
@@ -4,12 +4,15 @@ SELECT regexp_extract('1a 2b 14m', '\\d+')
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract`",
"groupCount" : "0",
- "groupIndex" : "1"
+ "groupIndex" : "1",
+ "parameter" : "`idx`"
}
}
@@ -27,12 +30,15 @@ SELECT regexp_extract('1a 2b 14m', '\\d+', 1)
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract`",
"groupCount" : "0",
- "groupIndex" : "1"
+ "groupIndex" : "1",
+ "parameter" : "`idx`"
}
}
@@ -42,12 +48,15 @@ SELECT regexp_extract('1a 2b 14m', '\\d+', 2)
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract`",
"groupCount" : "0",
- "groupIndex" : "2"
+ "groupIndex" : "2",
+ "parameter" : "`idx`"
}
}
@@ -116,12 +125,15 @@ SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 3)
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract`",
"groupCount" : "2",
- "groupIndex" : "3"
+ "groupIndex" : "3",
+ "parameter" : "`idx`"
}
}
@@ -175,12 +187,15 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+')
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract_all`",
"groupCount" : "0",
- "groupIndex" : "1"
+ "groupIndex" : "1",
+ "parameter" : "`idx`"
}
}
@@ -198,12 +213,15 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+', 1)
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract_all`",
"groupCount" : "0",
- "groupIndex" : "1"
+ "groupIndex" : "1",
+ "parameter" : "`idx`"
}
}
@@ -213,12 +231,15 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+', 2)
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract_all`",
"groupCount" : "0",
- "groupIndex" : "2"
+ "groupIndex" : "2",
+ "parameter" : "`idx`"
}
}
@@ -287,12 +308,15 @@ SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)',
3)
-- !query schema
struct<>
-- !query output
-org.apache.spark.SparkIllegalArgumentException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+ "errorClass" : "INVALID_PARAMETER_VALUE.REGEX_GROUP_INDEX",
+ "sqlState" : "22023",
"messageParameters" : {
+ "functionName" : "`regexp_extract_all`",
"groupCount" : "2",
- "groupIndex" : "3"
+ "groupIndex" : "3",
+ "parameter" : "`idx`"
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]