[spark] branch master updated: [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c49415412e3 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 c49415412e3 is described below commit c49415412e3027a171c2691df97fe8757f26a0aa Author: narek_karapetian AuthorDate: Sat Feb 4 15:18:53 2023 +0300 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 ### What changes were proposed in this pull request? This PR proposes to assign name to `_LEGACY_ERROR_TEMP_1185` -> `IDENTIFIER_TOO_MANY_NAME_PARTS` ### Why are the changes needed? We should assign proper name to LEGACY_ERROR_TEMP* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Additional test cases were added Closes #39723 from NarekDW/SPARK-41302. Lead-authored-by: narek_karapetian Co-authored-by: Narek Karapetian Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +++ .../sql/connector/catalog/CatalogV2Implicits.scala | 20 + .../spark/sql/errors/QueryCompilationErrors.scala | 9 ++ .../sql/errors/QueryCompilationErrorsSuite.scala | 35 ++ .../datasources/v2/V2SessionCatalogSuite.scala | 18 +++ .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala| 19 6 files changed, 89 insertions(+), 23 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 67de6c6a29d..7ecd924ea8d 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -575,6 +575,12 @@ ], "sqlState" : "42805" }, + "IDENTIFIER_TOO_MANY_NAME_PARTS" : { +"message" : [ + " is not a valid identifier as it has more than 2 name parts." +], +"sqlState" : "42601" + }, "INCOMPARABLE_PIVOT_COLUMN" : { "message" : [ "Invalid pivot column . Pivot columns must be comparable." @@ -2851,11 +2857,6 @@ "Catalog does not support ." ] }, - "_LEGACY_ERROR_TEMP_1185" : { -"message" : [ - " is not a valid as it has more than 2 name parts." -] - }, "_LEGACY_ERROR_TEMP_1186" : { "message" : [ "Multi-part identifier cannot be empty." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index d9f15d84d89..0c9282f9675 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -130,22 +130,20 @@ private[sql] object CatalogV2Implicits { } } +def original: String = ident.namespace() :+ ident.name() mkString "." + def asMultipartIdentifier: Seq[String] = ident.namespace :+ ident.name def asTableIdentifier: TableIdentifier = ident.namespace match { case ns if ns.isEmpty => TableIdentifier(ident.name) case Array(dbName) => TableIdentifier(ident.name, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = ident.namespace() match { case ns if ns.isEmpty => FunctionIdentifier(ident.name()) case Array(dbName) => FunctionIdentifier(ident.name(), Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "FunctionIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } } @@ -159,20 +157,18 @@ private[sql] object CatalogV2Implicits { def asTableIdentifier: TableIdentifier = parts match { case Seq(tblName) => TableIdentifier(tblName) case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = parts match { case Seq(funcName) => FunctionIdentifier(funcName) case Seq(dbName, funcName) => FunctionIdentifier(funcName, Some(dbName
[spark] branch branch-3.4 updated: [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 8aaa655da37 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 8aaa655da37 is described below commit 8aaa655da37616c004ab12de6983bcc368212d43 Author: narek_karapetian AuthorDate: Sat Feb 4 15:18:53 2023 +0300 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 ### What changes were proposed in this pull request? This PR proposes to assign name to `_LEGACY_ERROR_TEMP_1185` -> `IDENTIFIER_TOO_MANY_NAME_PARTS` ### Why are the changes needed? We should assign proper name to LEGACY_ERROR_TEMP* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Additional test cases were added Closes #39723 from NarekDW/SPARK-41302. Lead-authored-by: narek_karapetian Co-authored-by: Narek Karapetian Signed-off-by: Max Gekk (cherry picked from commit c49415412e3027a171c2691df97fe8757f26a0aa) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +++ .../sql/connector/catalog/CatalogV2Implicits.scala | 20 + .../spark/sql/errors/QueryCompilationErrors.scala | 9 ++ .../sql/errors/QueryCompilationErrorsSuite.scala | 35 ++ .../datasources/v2/V2SessionCatalogSuite.scala | 18 +++ .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala| 19 6 files changed, 89 insertions(+), 23 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 069f10423a5..42de98ccb87 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -575,6 +575,12 @@ ], "sqlState" : "42805" }, + "IDENTIFIER_TOO_MANY_NAME_PARTS" : { +"message" : [ + " is not a valid identifier as it has more than 2 name parts." +], +"sqlState" : "42601" + }, "INCOMPARABLE_PIVOT_COLUMN" : { "message" : [ "Invalid pivot column . Pivot columns must be comparable." @@ -2851,11 +2857,6 @@ "Catalog does not support ." ] }, - "_LEGACY_ERROR_TEMP_1185" : { -"message" : [ - " is not a valid as it has more than 2 name parts." -] - }, "_LEGACY_ERROR_TEMP_1186" : { "message" : [ "Multi-part identifier cannot be empty." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index d9f15d84d89..0c9282f9675 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -130,22 +130,20 @@ private[sql] object CatalogV2Implicits { } } +def original: String = ident.namespace() :+ ident.name() mkString "." + def asMultipartIdentifier: Seq[String] = ident.namespace :+ ident.name def asTableIdentifier: TableIdentifier = ident.namespace match { case ns if ns.isEmpty => TableIdentifier(ident.name) case Array(dbName) => TableIdentifier(ident.name, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = ident.namespace() match { case ns if ns.isEmpty => FunctionIdentifier(ident.name()) case Array(dbName) => FunctionIdentifier(ident.name(), Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "FunctionIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } } @@ -159,20 +157,18 @@ private[sql] object CatalogV2Implicits { def asTableIdentifier: TableIdentifier = parts match { case Seq(tblName) => TableIdentifier(tblName) case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = parts match { case Seq(funcName)
[spark] branch branch-3.4 updated: [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 5b40e8feb56 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` 5b40e8feb56 is described below commit 5b40e8feb56a3a595bee03e0c5c096266f5c3c63 Author: itholic AuthorDate: Sat Feb 4 13:15:25 2023 +0300 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` ### What changes were proposed in this pull request? This PR proposes to introduce new error class `INCOMPATIBLE_JOIN_TYPES` to improve the error message for incompatible join type usage. ### Why are the changes needed? The existing error classes `LATERAL_NATURAL_JOIN` and `NATURAL_CROSS_JOIN` are not logically belong under `UNSUPPORTED_FEATURE`, and their error message is not very clear to understand for end-users. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated UTs. Closes #39805 from itholic/NATURAL_CROSS_JOIN. Authored-by: itholic Signed-off-by: Max Gekk (cherry picked from commit d9c0e8754d1c24ee49f9ee13efa60a5e78b18172) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 16 ++-- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 8 ++-- .../apache/spark/sql/errors/QueryParsingErrors.scala | 18 -- .../spark/sql/catalyst/parser/PlanParserSuite.scala| 5 +++-- .../resources/sql-tests/results/join-lateral.sql.out | 8 ++-- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 10 ++ 6 files changed, 35 insertions(+), 30 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 8bcfc527eaa..069f10423a5 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -586,6 +586,12 @@ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, + "INCOMPATIBLE_JOIN_TYPES" : { +"message" : [ + "The join types and are incompatible." +], +"sqlState" : "42613" + }, "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : { "message" : [ "The SQL query of view has an incompatible schema change and column cannot be resolved. Expected columns named but got .", @@ -1559,11 +1565,6 @@ "JOIN USING with LATERAL correlation." ] }, - "LATERAL_NATURAL_JOIN" : { -"message" : [ - "NATURAL join with LATERAL correlation." -] - }, "LITERAL_TYPE" : { "message" : [ "Literal for '' of ." @@ -1579,11 +1580,6 @@ "The target JDBC server hosting table does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error." ] }, - "NATURAL_CROSS_JOIN" : { -"message" : [ - "NATURAL CROSS JOIN." -] - }, "ORC_TYPE_CAST" : { "message" : [ "Unable to convert of Orc to data type ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d2a1cb1eb16..dfc6e21d4a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1331,10 +1331,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit throw new IllegalStateException(s"Unimplemented joinCriteria: $c") case None if ctx.NATURAL != null => if (ctx.LATERAL != null) { -throw QueryParsingErrors.lateralJoinWithNaturalJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.LATERAL.toString, joinType2 = ctx.NATURAL.toString, ctx = ctx +) } if (baseJoinType == Cross) { -throw QueryParsingErrors.naturalCrossJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.NATURAL.toString, joinType2 = baseJoinType.toString, ctx = ctx +) } (NaturalJoin(baseJoinType), None) case None => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/
[spark] branch master updated: [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d9c0e8754d1 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` d9c0e8754d1 is described below commit d9c0e8754d1c24ee49f9ee13efa60a5e78b18172 Author: itholic AuthorDate: Sat Feb 4 13:15:25 2023 +0300 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` ### What changes were proposed in this pull request? This PR proposes to introduce new error class `INCOMPATIBLE_JOIN_TYPES` to improve the error message for incompatible join type usage. ### Why are the changes needed? The existing error classes `LATERAL_NATURAL_JOIN` and `NATURAL_CROSS_JOIN` are not logically belong under `UNSUPPORTED_FEATURE`, and their error message is not very clear to understand for end-users. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated UTs. Closes #39805 from itholic/NATURAL_CROSS_JOIN. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 16 ++-- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 8 ++-- .../apache/spark/sql/errors/QueryParsingErrors.scala | 18 -- .../spark/sql/catalyst/parser/PlanParserSuite.scala| 5 +++-- .../resources/sql-tests/results/join-lateral.sql.out | 8 ++-- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 10 ++ 6 files changed, 35 insertions(+), 30 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index d6a95b22b50..67de6c6a29d 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -586,6 +586,12 @@ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, + "INCOMPATIBLE_JOIN_TYPES" : { +"message" : [ + "The join types and are incompatible." +], +"sqlState" : "42613" + }, "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : { "message" : [ "The SQL query of view has an incompatible schema change and column cannot be resolved. Expected columns named but got .", @@ -1559,11 +1565,6 @@ "JOIN USING with LATERAL correlation." ] }, - "LATERAL_NATURAL_JOIN" : { -"message" : [ - "NATURAL join with LATERAL correlation." -] - }, "LITERAL_TYPE" : { "message" : [ "Literal for '' of ." @@ -1579,11 +1580,6 @@ "The target JDBC server hosting table does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error." ] }, - "NATURAL_CROSS_JOIN" : { -"message" : [ - "NATURAL CROSS JOIN." -] - }, "ORC_TYPE_CAST" : { "message" : [ "Unable to convert of Orc to data type ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d2a1cb1eb16..dfc6e21d4a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1331,10 +1331,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit throw new IllegalStateException(s"Unimplemented joinCriteria: $c") case None if ctx.NATURAL != null => if (ctx.LATERAL != null) { -throw QueryParsingErrors.lateralJoinWithNaturalJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.LATERAL.toString, joinType2 = ctx.NATURAL.toString, ctx = ctx +) } if (baseJoinType == Cross) { -throw QueryParsingErrors.naturalCrossJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.NATURAL.toString, joinType2 = baseJoinType.toString, ctx = ctx +) } (NaturalJoin(baseJoinType), None) case None => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.sc
[spark] branch master updated: [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 69229a5dc8b [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 69229a5dc8b is described below commit 69229a5dc8b7614352ea57cc88e93b1154115760 Author: itholic AuthorDate: Sat Feb 4 11:59:52 2023 +0300 [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2412, "UNSUPPORTED_EXPR_FOR_WINDOW". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*"` Closes #39869 from itholic/LEGACY_2412. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 4 ++-- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +++-- .../sql/errors/QueryExecutionErrorsSuite.scala | 25 ++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 030c65e2056..d6a95b22b50 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1483,6 +1483,12 @@ "Invalid expressions: []" ] }, + "UNSUPPORTED_EXPR_FOR_WINDOW" : { +"message" : [ + "Expression not supported within a window function." +], +"sqlState" : "42P20" + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported:" @@ -5172,11 +5178,6 @@ "Cannot specify order by or frame for ''." ] }, - "_LEGACY_ERROR_TEMP_2412" : { -"message" : [ - "Expression '' not supported within a window function." -] - }, "_LEGACY_ERROR_TEMP_2413" : { "message" : [ "Input argument to must be a constant." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 57852bd950d..cca54a8742d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -320,8 +320,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) => // OK case other => other.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2412", - messageParameters = Map("sqlExpr" -> other.toString)) + errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", + messageParameters = Map("sqlExpr" -> toSQLExpr(other))) } case s: SubqueryExpression => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 56bb8b0ccc2..71d3deb36c2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -172,7 +172,7 @@ class AnalysisErrorSuite extends AnalysisTest { "inputType" -> "\"DATE\"", "requiredType" -> "\"INT\"")) - errorTest( + errorClassTest( "invalid window function", testRelation2.select( WindowExpression( @@ -181,7 +181,8 @@ class AnalysisErrorSuite extends AnalysisTest { UnresolvedAttribute("a") :: Nil, SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, UnspecifiedFrame)).as("window")), -"not supported within a window function" :: Nil) +errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", +messageParameters = Map("sqlExpr" -> "\"0\"")) errorTest( "distinct aggregate function in window", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryE
[spark] branch branch-3.4 updated: [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 0f99b20164a [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 0f99b20164a is described below commit 0f99b20164a8b0301a842ae9df1cd1ebd9da0ba7 Author: itholic AuthorDate: Sat Feb 4 11:59:52 2023 +0300 [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2412, "UNSUPPORTED_EXPR_FOR_WINDOW". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*"` Closes #39869 from itholic/LEGACY_2412. Authored-by: itholic Signed-off-by: Max Gekk (cherry picked from commit 69229a5dc8b7614352ea57cc88e93b1154115760) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 4 ++-- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +++-- .../sql/errors/QueryExecutionErrorsSuite.scala | 25 ++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7cd70bda8bb..8bcfc527eaa 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1483,6 +1483,12 @@ "Invalid expressions: []" ] }, + "UNSUPPORTED_EXPR_FOR_WINDOW" : { +"message" : [ + "Expression not supported within a window function." +], +"sqlState" : "42P20" + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported:" @@ -5172,11 +5178,6 @@ "Cannot specify order by or frame for ''." ] }, - "_LEGACY_ERROR_TEMP_2412" : { -"message" : [ - "Expression '' not supported within a window function." -] - }, "_LEGACY_ERROR_TEMP_2413" : { "message" : [ "Input argument to must be a constant." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 57852bd950d..cca54a8742d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -320,8 +320,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) => // OK case other => other.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2412", - messageParameters = Map("sqlExpr" -> other.toString)) + errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", + messageParameters = Map("sqlExpr" -> toSQLExpr(other))) } case s: SubqueryExpression => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 56bb8b0ccc2..71d3deb36c2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -172,7 +172,7 @@ class AnalysisErrorSuite extends AnalysisTest { "inputType" -> "\"DATE\"", "requiredType" -> "\"INT\"")) - errorTest( + errorClassTest( "invalid window function", testRelation2.select( WindowExpression( @@ -181,7 +181,8 @@ class AnalysisErrorSuite extends AnalysisTest { UnresolvedAttribute("a") :: Nil, SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, UnspecifiedFrame)).as("window")), -"not supported within a window function" :: Nil) +errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", +messageParameters = Map("sqlExpr" -> "\"0\"")) errorTest( "distinct aggregate function in window", diff --git a/sql/core/src/t
[spark] branch branch-3.4 updated: [SPARK-41985][SQL][FOLLOWUP] Remove alias in GROUP BY only when the expr is resolved
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 0106e17fca6 [SPARK-41985][SQL][FOLLOWUP] Remove alias in GROUP BY only when the expr is resolved 0106e17fca6 is described below commit 0106e17fca6f1c51f737af67097fd28d89329b20 Author: Wenchen Fan AuthorDate: Fri Feb 3 15:40:33 2023 +0300 [SPARK-41985][SQL][FOLLOWUP] Remove alias in GROUP BY only when the expr is resolved ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/39508 to fix a regression. We should not remove aliases from grouping expressions if they are not resolved, as the alias may be necessary for resolution, such as `CreateNamedStruct`. ### Why are the changes needed? fix a regression ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #39867 from cloud-fan/column. Lead-authored-by: Wenchen Fan Co-authored-by: Wenchen Fan Signed-off-by: Max Gekk (cherry picked from commit 02b39f0b880a2ecf63167355d9644e91c98588a8) Signed-off-by: Max Gekk --- .../sql/catalyst/analysis/ResolveReferencesInAggregate.scala | 8 +++- sql/core/src/test/resources/sql-tests/inputs/group-by.sql | 3 +++ .../src/test/resources/sql-tests/results/group-by.sql.out | 11 +++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala index 4af2ecc91ab..1a9ed4ce16e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala @@ -96,7 +96,13 @@ object ResolveReferencesInAggregate extends SQLConfHelper // can't find the grouping expressions via `semanticEquals` and the analysis will fail. // Example rules: ResolveGroupingAnalytics (See SPARK-31670 for more details) and // ResolveLateralColumnAliasReference. - groupingExpressions = resolvedGroupExprs.map(trimAliases), + groupingExpressions = resolvedGroupExprs.map { e => +// Only trim the alias if the expression is resolved, as the alias may be needed to resolve +// the expression, such as `NamePlaceHolder` in `CreateNamedStruct`. +// Note: this rule will be invoked even if the Aggregate is fully resolved. So alias in +// GROUP BY will be removed eventually, by following iterations. +if (e.resolved) trimAliases(e) else e + }, aggregateExpressions = resolvedAggExprsWithOuter) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 1615c43cc7e..c812403ba2c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -34,6 +34,9 @@ SELECT a + b, COUNT(b) FROM testData GROUP BY a + b; SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1; SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1; +-- struct() in group by +SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa); + -- Aggregate with nulls. SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a) FROM testData; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 0402039fafa..6e7592d6978 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -145,6 +145,17 @@ struct<((a + 1) + 1):int,count(b):bigint> NULL 1 +-- !query +SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa) +-- !query schema +struct +-- !query output +2 +2 +2 +3 + + -- !query SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a) FROM testData - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41985][SQL][FOLLOWUP] Remove alias in GROUP BY only when the expr is resolved
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 02b39f0b880 [SPARK-41985][SQL][FOLLOWUP] Remove alias in GROUP BY only when the expr is resolved 02b39f0b880 is described below commit 02b39f0b880a2ecf63167355d9644e91c98588a8 Author: Wenchen Fan AuthorDate: Fri Feb 3 15:40:33 2023 +0300 [SPARK-41985][SQL][FOLLOWUP] Remove alias in GROUP BY only when the expr is resolved ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/39508 to fix a regression. We should not remove aliases from grouping expressions if they are not resolved, as the alias may be necessary for resolution, such as `CreateNamedStruct`. ### Why are the changes needed? fix a regression ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #39867 from cloud-fan/column. Lead-authored-by: Wenchen Fan Co-authored-by: Wenchen Fan Signed-off-by: Max Gekk --- .../sql/catalyst/analysis/ResolveReferencesInAggregate.scala | 8 +++- sql/core/src/test/resources/sql-tests/inputs/group-by.sql | 3 +++ .../src/test/resources/sql-tests/results/group-by.sql.out | 11 +++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala index 4af2ecc91ab..1a9ed4ce16e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala @@ -96,7 +96,13 @@ object ResolveReferencesInAggregate extends SQLConfHelper // can't find the grouping expressions via `semanticEquals` and the analysis will fail. // Example rules: ResolveGroupingAnalytics (See SPARK-31670 for more details) and // ResolveLateralColumnAliasReference. - groupingExpressions = resolvedGroupExprs.map(trimAliases), + groupingExpressions = resolvedGroupExprs.map { e => +// Only trim the alias if the expression is resolved, as the alias may be needed to resolve +// the expression, such as `NamePlaceHolder` in `CreateNamedStruct`. +// Note: this rule will be invoked even if the Aggregate is fully resolved. So alias in +// GROUP BY will be removed eventually, by following iterations. +if (e.resolved) trimAliases(e) else e + }, aggregateExpressions = resolvedAggExprsWithOuter) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 1615c43cc7e..c812403ba2c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -34,6 +34,9 @@ SELECT a + b, COUNT(b) FROM testData GROUP BY a + b; SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1; SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1; +-- struct() in group by +SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa); + -- Aggregate with nulls. SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a) FROM testData; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 0402039fafa..6e7592d6978 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -145,6 +145,17 @@ struct<((a + 1) + 1):int,count(b):bigint> NULL 1 +-- !query +SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa) +-- !query schema +struct +-- !query output +2 +2 +2 +3 + + -- !query SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a) FROM testData - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.4 updated: [SPARK-42234][SQL] Rename error class: `UNSUPPORTED_FEATURE.REPEATED_PIVOT`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 5ca47b63328 [SPARK-42234][SQL] Rename error class: `UNSUPPORTED_FEATURE.REPEATED_PIVOT` 5ca47b63328 is described below commit 5ca47b63328faf97c0b09af67f51814274c5f9bc Author: itholic AuthorDate: Fri Feb 3 15:17:00 2023 +0300 [SPARK-42234][SQL] Rename error class: `UNSUPPORTED_FEATURE.REPEATED_PIVOT` ### What changes were proposed in this pull request? This PR proposes to rename `UNSUPPORTED_FEATURE.REPEATED_PIVOT` to `REPEATED_CLAUSE`. ### Why are the changes needed? `REPEATED_PIVOT` is actually not an `UNSUPPORTED_FEATURE`, and there must be other cases we should cover in more generic way ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated UTs. Closes #39795 from itholic/REPEATED_PIVOT. Lead-authored-by: itholic Co-authored-by: Haejoon Lee <44108233+itho...@users.noreply.github.com> Signed-off-by: Max Gekk (cherry picked from commit a916a059100a53583fb987b47ffde5745627fdb8) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 ++- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala| 6 +++--- .../scala/org/apache/spark/sql/RelationalGroupedDataset.scala | 4 +++- .../apache/spark/sql/errors/QueryExecutionErrorsSuite.scala | 6 +++--- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 0a929d5f48e..7cd70bda8bb 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1208,6 +1208,12 @@ ], "sqlState" : "42K03" }, + "REPEATED_CLAUSE" : { +"message" : [ + "The clause may be used at most once per operation." +], +"sqlState" : "42614" + }, "ROUTINE_ALREADY_EXISTS" : { "message" : [ "Cannot create the function because it already exists.", @@ -1597,11 +1603,6 @@ "Python UDF in the ON clause of a JOIN. In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause." ] }, - "REPEATED_PIVOT" : { -"message" : [ - "Repeated PIVOT operation." -] - }, "SET_NAMESPACE_PROPERTY" : { "message" : [ " is a reserved namespace property, ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index c64c26e510b..b3bd7b727bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2602,10 +2602,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { cause = null) } - def repeatedPivotsUnsupportedError(): Throwable = { + def repeatedPivotsUnsupportedError(clause: String, operation: String): Throwable = { new SparkUnsupportedOperationException( - errorClass = "UNSUPPORTED_FEATURE.REPEATED_PIVOT", - messageParameters = Map.empty[String, String]) + errorClass = "REPEATED_CLAUSE", + messageParameters = Map("clause" -> clause, "operation" -> operation)) } def pivotNotAfterGroupByUnsupportedError(): Throwable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala index 61517de0dfa..b168bbc4b42 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala @@ -483,7 +483,9 @@ class RelationalGroupedDataset protected[sql]( groupingExprs, RelationalGroupedDataset.PivotType(pivotColumn.expr, valueExprs)) case _: RelationalGroupedDataset.PivotType => -throw QueryExecutionErrors.repeatedPivotsUnsupportedError() +throw QueryExecutionErrors.repeatedPivotsUnsupportedError( + clause = "PIVOT", operation = "SUBQUERY" +) case _ => throw QueryExecutionErrors.pivotNotAfterGroupByUnsupportedError() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/Que
[spark] branch master updated: [SPARK-42234][SQL] Rename error class: `UNSUPPORTED_FEATURE.REPEATED_PIVOT`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new a916a059100 [SPARK-42234][SQL] Rename error class: `UNSUPPORTED_FEATURE.REPEATED_PIVOT` a916a059100 is described below commit a916a059100a53583fb987b47ffde5745627fdb8 Author: itholic AuthorDate: Fri Feb 3 15:17:00 2023 +0300 [SPARK-42234][SQL] Rename error class: `UNSUPPORTED_FEATURE.REPEATED_PIVOT` ### What changes were proposed in this pull request? This PR proposes to rename `UNSUPPORTED_FEATURE.REPEATED_PIVOT` to `REPEATED_CLAUSE`. ### Why are the changes needed? `REPEATED_PIVOT` is actually not an `UNSUPPORTED_FEATURE`, and there must be other cases we should cover in more generic way ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated UTs. Closes #39795 from itholic/REPEATED_PIVOT. Lead-authored-by: itholic Co-authored-by: Haejoon Lee <44108233+itho...@users.noreply.github.com> Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 ++- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala| 6 +++--- .../scala/org/apache/spark/sql/RelationalGroupedDataset.scala | 4 +++- .../apache/spark/sql/errors/QueryExecutionErrorsSuite.scala | 6 +++--- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 178eda8ce11..030c65e2056 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1208,6 +1208,12 @@ ], "sqlState" : "42K03" }, + "REPEATED_CLAUSE" : { +"message" : [ + "The clause may be used at most once per operation." +], +"sqlState" : "42614" + }, "ROUTINE_ALREADY_EXISTS" : { "message" : [ "Cannot create the function because it already exists.", @@ -1597,11 +1603,6 @@ "Python UDF in the ON clause of a JOIN. In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause." ] }, - "REPEATED_PIVOT" : { -"message" : [ - "Repeated PIVOT operation." -] - }, "SET_NAMESPACE_PROPERTY" : { "message" : [ " is a reserved namespace property, ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index c64c26e510b..b3bd7b727bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2602,10 +2602,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { cause = null) } - def repeatedPivotsUnsupportedError(): Throwable = { + def repeatedPivotsUnsupportedError(clause: String, operation: String): Throwable = { new SparkUnsupportedOperationException( - errorClass = "UNSUPPORTED_FEATURE.REPEATED_PIVOT", - messageParameters = Map.empty[String, String]) + errorClass = "REPEATED_CLAUSE", + messageParameters = Map("clause" -> clause, "operation" -> operation)) } def pivotNotAfterGroupByUnsupportedError(): Throwable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala index 61517de0dfa..b168bbc4b42 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala @@ -483,7 +483,9 @@ class RelationalGroupedDataset protected[sql]( groupingExprs, RelationalGroupedDataset.PivotType(pivotColumn.expr, valueExprs)) case _: RelationalGroupedDataset.PivotType => -throw QueryExecutionErrors.repeatedPivotsUnsupportedError() +throw QueryExecutionErrors.repeatedPivotsUnsupportedError( + clause = "PIVOT", operation = "SUBQUERY" +) case _ => throw QueryExecutionErrors.pivotNotAfterGroupByUnsupportedError() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index c679e4f707f..5d4b8e0b0c4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sq
[spark] branch master updated: [MINOR][SQL] Enhance data type check error message
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new a3c6b6b0232 [MINOR][SQL] Enhance data type check error message a3c6b6b0232 is described below commit a3c6b6b0232ddd3f09a2ed4b5e6a2f6e538c6733 Author: Yuming Wang AuthorDate: Fri Feb 3 10:36:44 2023 +0300 [MINOR][SQL] Enhance data type check error message ### What changes were proposed in this pull request? This PR adds the expression to data type check error message. Before This PR: ``` requirement failed: All input types must be the same except nullable, containsNull, valueContainsNull flags. The input types found are DecimalType(30,2) DecimalType(35,2). ``` After this PR: ``` requirement failed: All input types must be the same except nullable, containsNull, valueContainsNull flags. The expression is: CASE WHEN upper(TAX_STATE#472) IN (UK,EU) THEN broundQTY#507 * ITEM_PRICE#506) + coalesce(ITEM_SALES_TAX_AMT#510, 0.00)) / QTY#507), 2) ELSE cast(ITEM_PRICE#506 as decimal(35,2)) END. The input types found are DecimalType(30,2) DecimalType(35,2). ``` ### Why are the changes needed? It is difficult to find out which expression has this issue when there are lots of expressions. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual testing. Closes #39851 from wangyum/Expression. Authored-by: Yuming Wang Signed-off-by: Max Gekk --- .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index de0e90285f5..7d5169ca8ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -1298,8 +1298,9 @@ trait ComplexTypeMergingExpression extends Expression { "The collection of input data types must not be empty.") require( TypeCoercion.haveSameType(inputTypesForMerging), - "All input types must be the same except nullable, containsNull, valueContainsNull flags." + -s" The input types found are\n\t${inputTypesForMerging.mkString("\n\t")}") + "All input types must be the same except nullable, containsNull, valueContainsNull flags. " + +s"The expression is: $this. " + +s"The input types found are\n\t${inputTypesForMerging.mkString("\n\t")}.") } private lazy val internalDataType: DataType = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.4 updated: [SPARK-42232][SQL] Rename error class: `UNSUPPORTED_FEATURE.JDBC_TRANSACTION`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new b3ebb69e387 [SPARK-42232][SQL] Rename error class: `UNSUPPORTED_FEATURE.JDBC_TRANSACTION` b3ebb69e387 is described below commit b3ebb69e3874dc9a30c8b74e3ab6b673fa378e73 Author: itholic AuthorDate: Thu Feb 2 12:16:35 2023 +0300 [SPARK-42232][SQL] Rename error class: `UNSUPPORTED_FEATURE.JDBC_TRANSACTION` ### What changes were proposed in this pull request? This PR proposes to rename error class `UNSUPPORTED_FEATURE.JDBC_TRANSACTION` into `UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER` ### Why are the changes needed? To provide precious and better error message to end-users. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Fixed UTs. Closes #39799 from itholic/JDBC_TRANSACTION. Authored-by: itholic Signed-off-by: Max Gekk (cherry picked from commit 1cae312a74a2e6e2d82e87c09c208380be1a09fb) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 6 +++--- .../spark/sql/execution/datasources/jdbc/JdbcUtils.scala | 2 +- .../apache/spark/sql/errors/QueryExecutionErrorsSuite.scala| 8 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e1598a4bc7b..92a81e78fd9 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1522,11 +1522,6 @@ "INSERT INTO with IF NOT EXISTS in the PARTITION spec." ] }, - "JDBC_TRANSACTION" : { -"message" : [ - "The target JDBC server does not support transactions and can only support ALTER TABLE with a single action." -] - }, "LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC" : { "message" : [ "Referencing a lateral column alias in the aggregate function ." @@ -1567,6 +1562,11 @@ "Multiple bucket TRANSFORMs." ] }, + "MULTI_ACTION_ALTER" : { +"message" : [ + "The target JDBC server hosting table does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error." +] + }, "NATURAL_CROSS_JOIN" : { "message" : [ "NATURAL CROSS JOIN." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 82429ae1141..c64c26e510b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1039,10 +1039,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Map("n" -> n.toString(), "jdbcNumPartitions" -> jdbcNumPartitions)) } - def transactionUnsupportedByJdbcServerError(): Throwable = { + def multiActionAlterError(tableName: String): Throwable = { new SparkSQLFeatureNotSupportedException( - errorClass = "UNSUPPORTED_FEATURE.JDBC_TRANSACTION", - messageParameters = Map.empty[String, String]) + errorClass = "UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER", + messageParameters = Map("tableName" -> tableName)) } def dataTypeUnsupportedYetError(dataType: DataType): SparkUnsupportedOperationException = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 76599c53db9..4b0d461e237 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -947,7 +947,7 @@ object JdbcUtils extends Logging with SQLConfHelper { metaData.getDatabaseMajorVersion)(0)) } else { if (!metaData.supportsTransactions) { -throw QueryExecutionErrors.transactionUnsupportedByJdbcServerError() +throw QueryExecutionErrors.multiActionAlterError(tableName) } else { conn.setAutoCommit(false) val statement = conn.createStatement diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErr
[spark] branch master updated: [SPARK-42232][SQL] Rename error class: `UNSUPPORTED_FEATURE.JDBC_TRANSACTION`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1cae312a74a [SPARK-42232][SQL] Rename error class: `UNSUPPORTED_FEATURE.JDBC_TRANSACTION` 1cae312a74a is described below commit 1cae312a74a2e6e2d82e87c09c208380be1a09fb Author: itholic AuthorDate: Thu Feb 2 12:16:35 2023 +0300 [SPARK-42232][SQL] Rename error class: `UNSUPPORTED_FEATURE.JDBC_TRANSACTION` ### What changes were proposed in this pull request? This PR proposes to rename error class `UNSUPPORTED_FEATURE.JDBC_TRANSACTION` into `UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER` ### Why are the changes needed? To provide precious and better error message to end-users. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Fixed UTs. Closes #39799 from itholic/JDBC_TRANSACTION. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 6 +++--- .../spark/sql/execution/datasources/jdbc/JdbcUtils.scala | 2 +- .../apache/spark/sql/errors/QueryExecutionErrorsSuite.scala| 8 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 14ab0c59c30..178eda8ce11 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1522,11 +1522,6 @@ "INSERT INTO with IF NOT EXISTS in the PARTITION spec." ] }, - "JDBC_TRANSACTION" : { -"message" : [ - "The target JDBC server does not support transactions and can only support ALTER TABLE with a single action." -] - }, "LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC" : { "message" : [ "Referencing a lateral column alias in the aggregate function ." @@ -1567,6 +1562,11 @@ "Multiple bucket TRANSFORMs." ] }, + "MULTI_ACTION_ALTER" : { +"message" : [ + "The target JDBC server hosting table does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error." +] + }, "NATURAL_CROSS_JOIN" : { "message" : [ "NATURAL CROSS JOIN." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 82429ae1141..c64c26e510b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1039,10 +1039,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Map("n" -> n.toString(), "jdbcNumPartitions" -> jdbcNumPartitions)) } - def transactionUnsupportedByJdbcServerError(): Throwable = { + def multiActionAlterError(tableName: String): Throwable = { new SparkSQLFeatureNotSupportedException( - errorClass = "UNSUPPORTED_FEATURE.JDBC_TRANSACTION", - messageParameters = Map.empty[String, String]) + errorClass = "UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER", + messageParameters = Map("tableName" -> tableName)) } def dataTypeUnsupportedYetError(dataType: DataType): SparkUnsupportedOperationException = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 76599c53db9..4b0d461e237 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -947,7 +947,7 @@ object JdbcUtils extends Logging with SQLConfHelper { metaData.getDatabaseMajorVersion)(0)) } else { if (!metaData.supportsTransactions) { -throw QueryExecutionErrors.transactionUnsupportedByJdbcServerError() +throw QueryExecutionErrors.multiActionAlterError(tableName) } else { conn.setAutoCommit(false) val statement = conn.createStatement diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 70311a2f7b8..c
[spark] branch branch-3.4 updated: [3.4][SPARK-41490][SQL] Assign name to _LEGACY_ERROR_TEMP_2441
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 8dae1245e32 [3.4][SPARK-41490][SQL] Assign name to _LEGACY_ERROR_TEMP_2441 8dae1245e32 is described below commit 8dae1245e32f9e51c8178e03ab3a1c20c856a621 Author: itholic AuthorDate: Thu Feb 2 10:13:18 2023 +0300 [3.4][SPARK-41490][SQL] Assign name to _LEGACY_ERROR_TEMP_2441 ### What changes were proposed in this pull request? backport for https://github.com/apache/spark/pull/39700 ### Why are the changes needed? We should want to include PRs related to error classes into Spark 3.4. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The CI should pass. Closes #39830 from itholic/41490-3.4. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 12 ++-- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 ++--- .../spark/sql/catalyst/analysis/ResolveSubquerySuite.scala | 6 +- .../sql-tests/results/postgreSQL/window_part3.sql.out| 10 -- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 9038968e7f7..e1598a4bc7b 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1471,6 +1471,12 @@ }, "sqlState" : "0A000" }, + "UNSUPPORTED_EXPR_FOR_OPERATOR" : { +"message" : [ + "A query operator contains one or more unsupported expressions. Consider to rewrite it to avoid window functions, aggregate functions, and generator functions in the WHERE clause.", + "Invalid expressions: []" +] + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported:" @@ -5296,12 +5302,6 @@ "in operator ." ] }, - "_LEGACY_ERROR_TEMP_2441" : { -"message" : [ - "The query operator `` contains one or more unsupported expression types Aggregate, Window or Generate.", - "Invalid expressions: []." -] - }, "_LEGACY_ERROR_TEMP_2443" : { "message" : [ "Multiple definitions of observed metrics named '': ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 5c00e1ec9f5..57852bd950d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -731,11 +731,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB case other if PlanHelper.specialExpressionsInUnsupportedOperator(other).nonEmpty => val invalidExprSqls = - PlanHelper.specialExpressionsInUnsupportedOperator(other).map(_.sql) + PlanHelper.specialExpressionsInUnsupportedOperator(other).map(toSQLExpr) other.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2441", + errorClass = "UNSUPPORTED_EXPR_FOR_OPERATOR", messageParameters = Map( -"operator" -> other.nodeName, "invalidExprSqls" -> invalidExprSqls.mkString(", "))) // This should not happen, resolved Project or Aggregate should restore or resolve diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index 7b99153acf9..67265fe6f3b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala @@ -187,7 +187,11 @@ class ResolveSubquerySuite extends AnalysisTest { test("lateral join with unsupported expressions") { val plan = lateralJoin(t1, t0.select(($"a" + $"b").as("c")), condition = Some(sum($"a") === sum($"c"))) -assertAnalysisError(plan, Seq("Invalid expressions: [sum(a), sum(c)]")) +assertAnalysisErrorClass( + plan, + expectedErrorClass = "UNSUPPORTED_EXPR_FOR_OPERATOR", + expectedMessageParameters = Map("invalidExprSqls" -> "\"
[spark] branch branch-3.4 updated (a38400413c4 -> 03b2a7da77e)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git from a38400413c4 [SPARK-42217][SQL] Support implicit lateral column alias in queries with Window add 03b2a7da77e [3.4][SPARK-41489][SQL] Assign name to _LEGACY_ERROR_TEMP_2415 No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 10 +- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 7 --- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +++-- .../apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 14 ++ .../optimizer/ReplaceNullWithFalseInPredicateSuite.scala | 11 +++ 5 files changed, 33 insertions(+), 14 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.4 updated: [3.4][SPARK-41488][SQL] Assign name to _LEGACY_ERROR_TEMP_1176 (and 1177)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new c421b519438 [3.4][SPARK-41488][SQL] Assign name to _LEGACY_ERROR_TEMP_1176 (and 1177) c421b519438 is described below commit c421b51943857a2a50ffbf7fac952b4c53ffae87 Author: itholic AuthorDate: Wed Feb 1 18:53:11 2023 +0300 [3.4][SPARK-41488][SQL] Assign name to _LEGACY_ERROR_TEMP_1176 (and 1177) ### What changes were proposed in this pull request? backport for https://github.com/apache/spark/pull/39705 ### Why are the changes needed? We should want to include PRs related to error classes into Spark 3.4. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The CI should pass. Closes #39833 from itholic/41488-3.4. Lead-authored-by: itholic Co-authored-by: Runyao Chen Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 40 ++--- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 2 + .../spark/sql/errors/QueryCompilationErrors.scala | 11 +-- .../spark/sql/errors/QueryParsingErrors.scala | 21 + .../sql-tests/inputs/columnresolution-negative.sql | 14 +++ .../results/columnresolution-negative.sql.out | 100 + .../spark/sql/errors/QueryParsingErrorsSuite.scala | 72 +++ .../spark/sql/execution/SQLViewTestSuite.scala | 6 +- 9 files changed, 248 insertions(+), 20 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 1cad00ad417..89a8c2a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -581,6 +581,35 @@ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, + "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : { +"message" : [ + "The SQL query of view has an incompatible schema change and column cannot be resolved. Expected columns named but got .", + "Please try to re-create the view by running: ." +] + }, + "INCOMPLETE_TYPE_DEFINITION" : { +"message" : [ + "Incomplete complex type:" +], +"subClass" : { + "ARRAY" : { +"message" : [ + "The definition of \"ARRAY\" type is incomplete. You must provide an element type. For example: \"ARRAY\"." +] + }, + "MAP" : { +"message" : [ + "The definition of \"MAP\" type is incomplete. You must provide a key type and a value type. For example: \"MAP\"." +] + }, + "STRUCT" : { +"message" : [ + "The definition of \"STRUCT\" type is incomplete. You must provide at least one field type. For example: \"STRUCT\"." +] + } +}, +"sqlState" : "42K01" + }, "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : { "message" : [ "You may get a different result due to the upgrading to" @@ -2773,17 +2802,6 @@ "Unsupported data type ." ] }, - "_LEGACY_ERROR_TEMP_1176" : { -"message" : [ - "The SQL query of view has an incompatible schema change and column cannot be resolved. Expected columns named but got .", - "Please try to re-create the view by running: ." -] - }, - "_LEGACY_ERROR_TEMP_1177" : { -"message" : [ - "The SQL query of view has an incompatible schema change and column cannot be resolved. Expected columns named but got ." -] - }, "_LEGACY_ERROR_TEMP_1178" : { "message" : [ "The number of partitions can't be specified with unspecified distribution. Invalid writer requirements detected." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 7a92c46577d..bf66afffdde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2092,7 +2092,7 @@ class Analyzer(override val catalogManager: CatalogManager) val attrCandidates = getAttrCandidates() val matched = attrCandidates.filter(a => resolver(
[spark] branch branch-3.4 updated: [3.4][SPARK-42229][CORE] Migrate `SparkCoreErrors` into error classes
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new c4f750b1e22 [3.4][SPARK-42229][CORE] Migrate `SparkCoreErrors` into error classes c4f750b1e22 is described below commit c4f750b1e221d34c8aaf78d8ead8383c0bb8aeaa Author: itholic AuthorDate: Wed Feb 1 18:46:02 2023 +0300 [3.4][SPARK-42229][CORE] Migrate `SparkCoreErrors` into error classes ### What changes were proposed in this pull request? backport for https://github.com/apache/spark/pull/39791 ### Why are the changes needed? We should want to include PRs related to error classes into Spark 3.4. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The CI should pass. Closes #39832 from itholic/42229-3.4. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 222 ++ .../org/apache/spark/errors/SparkCoreErrors.scala | 260 +++-- 2 files changed, 415 insertions(+), 67 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f0bbc26aae1..1cad00ad417 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -5288,5 +5288,227 @@ "message" : [ "Operation not allowed: only works on table with location provided: " ] + }, + "_LEGACY_ERROR_TEMP_3000" : { +"message" : [ + "Unexpected Py4J server ." +] + }, + "_LEGACY_ERROR_TEMP_3001" : { +"message" : [ + "EOFException occurred while reading the port number from 's stdout." +] + }, + "_LEGACY_ERROR_TEMP_3002" : { +"message" : [ + "Data of type is not supported" +] + }, + "_LEGACY_ERROR_TEMP_3003" : { +"message" : [ + "Could not compute split, block of RDD not found" +] + }, + "_LEGACY_ERROR_TEMP_3004" : { +"message" : [ + "Attempted to use after its blocks have been removed!" +] + }, + "_LEGACY_ERROR_TEMP_3005" : { +"message" : [ + "Histogram on either an empty RDD or RDD containing +/-infinity or NaN" +] + }, + "_LEGACY_ERROR_TEMP_3006" : { +"message" : [ + "empty RDD" +] + }, + "_LEGACY_ERROR_TEMP_3007" : { +"message" : [ + "Checkpoint block not found! Either the executor", + "that originally checkpointed this partition is no longer alive, or the original RDD is", + "unpersisted. If this problem persists, you may consider using `rdd.checkpoint()`", + "instead, which is slower than local checkpointing but more fault-tolerant." +] + }, + "_LEGACY_ERROR_TEMP_3008" : { +"message" : [ + "Cannot use map-side combining with array keys." +] + }, + "_LEGACY_ERROR_TEMP_3009" : { +"message" : [ + "HashPartitioner cannot partition array keys." +] + }, + "_LEGACY_ERROR_TEMP_3010" : { +"message" : [ + "reduceByKeyLocally() does not support array keys" +] + }, + "_LEGACY_ERROR_TEMP_3011" : { +"message" : [ + "This RDD lacks a SparkContext. It could happen in the following cases:", + "(1) RDD transformations and actions are NOT invoked by the driver, but inside of other transformations; for example, rdd1.map(x => rdd2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the rdd1.map transformation. For more information, see SPARK-5063.", + "(2) When a Spark Streaming job recovers from checkpoint, this exception will be hit if a reference to an RDD not defined by the streaming job is used in DStream operations. For more information, See SPARK-13758." +] + }, + "_LEGACY_ERROR_TEMP_3012" : { +"message" : [ + "Cannot change storage level of an RDD after it was already assigned a level" +] + }, + "_LEGACY_ERROR_TEMP_3013" : { +"message" : [ + "Can only zip RDDs with same number of elements in each partition" +] + }, + "_LEGACY_ERROR_TEMP_3014" : { +"message" : [ + "empty collection" +] + }, + "_LEGACY_ERROR_TEMP_3015" : { +"message" : [ + "countByValueApprox() does not support arrays" +
[spark] branch branch-3.4 updated (f64f146b8c2 -> 5b84b679d8f)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git from f64f146b8c2 [SPARK-42278][SQL] DS V2 pushdown supports supports JDBC dialects compile `SortOrder` by themselves add 5b84b679d8f [3.4][SPARK-42239][SQL] Integrate `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY` No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 5 - .../scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-42236][SQL] Refine `NULLABLE_ARRAY_OR_MAP_ELEMENT`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1cba3b98160 [SPARK-42236][SQL] Refine `NULLABLE_ARRAY_OR_MAP_ELEMENT` 1cba3b98160 is described below commit 1cba3b98160ad9d7cdf29e84ff0191598177835c Author: itholic AuthorDate: Tue Jan 31 19:35:57 2023 +0300 [SPARK-42236][SQL] Refine `NULLABLE_ARRAY_OR_MAP_ELEMENT` ### What changes were proposed in this pull request? This PR proposes to refine `NULLABLE_ARRAY_OR_MAP_ELEMENT` into main-sub classes structure. `NOT_NULL_CONSTRAINT_VIOLATION` - `ARRAY_ELEMENT` - `MAP_VALUE` ### Why are the changes needed? The name of error class is misleading, and we can make this more generic so that we reuse for various situation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated & added UTs. Closes #39804 from itholic/NULLABLE_ARRAY_OR_MAP_ELEMENT. Authored-by: itholic Signed-off-by: Max Gekk --- .../spark/sql/protobuf/ProtobufDeserializer.scala | 5 +++-- core/src/main/resources/error/error-classes.json | 24 -- .../plans/logical/basicLogicalOperators.scala | 4 ++-- .../spark/sql/errors/QueryCompilationErrors.scala | 16 +-- .../apache/spark/sql/DataFrameToSchemaSuite.scala | 14 - 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala index 224e22c0f52..37278fab8a3 100644 --- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala +++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala @@ -91,7 +91,8 @@ private[sql] class ProtobufDeserializer( val element = iterator.next() if (element == null) { if (!containsNull) { -throw QueryCompilationErrors.nullableArrayOrMapElementError(protoElementPath) +throw QueryCompilationErrors.notNullConstraintViolationArrayElementError( + protoElementPath) } else { elementUpdater.setNullAt(i) } @@ -129,7 +130,7 @@ private[sql] class ProtobufDeserializer( keyWriter(keyUpdater, i, field.getField(keyField)) if (field.getField(valueField) == null) { if (!valueContainsNull) { -throw QueryCompilationErrors.nullableArrayOrMapElementError(protoPath) +throw QueryCompilationErrors.notNullConstraintViolationMapValueError(protoPath) } else { valueUpdater.setNullAt(i) } diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 56d85ed866c..230b616800f 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1038,6 +1038,24 @@ "Operation is not allowed for because it is not a partitioned table." ] }, + "NOT_NULL_CONSTRAINT_VIOLATION" : { +"message" : [ + "Assigning a NULL is not allowed here." +], +"subClass" : { + "ARRAY_ELEMENT" : { +"message" : [ + "The array is defined to contain only elements that are NOT NULL." +] + }, + "MAP_VALUE" : { +"message" : [ + "The map is defined to contain only values that are NOT NULL." +] + } +}, +"sqlState" : "42000" + }, "NO_HANDLER_FOR_UDAF" : { "message" : [ "No handler for UDAF ''. Use sparkSession.udf.register(...) instead." @@ -1053,12 +1071,6 @@ "UDF class doesn't implement any UDF interface." ] }, - "NULLABLE_ARRAY_OR_MAP_ELEMENT" : { -"message" : [ - "Array or map at contains nullable element while it's required to be non-nullable." -], -"sqlState" : "42000" - }, "NULLABLE_COLUMN_OR_FIELD" : { "message" : [ "Column or field is nullable while it's required to be non-nullable." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index a8dfb8fbd84..74929bf5d79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/s
[spark] branch branch-3.4 updated: [SPARK-42236][SQL] Refine `NULLABLE_ARRAY_OR_MAP_ELEMENT`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 94a6f2afa75 [SPARK-42236][SQL] Refine `NULLABLE_ARRAY_OR_MAP_ELEMENT` 94a6f2afa75 is described below commit 94a6f2afa758ad375980e92a876158e1900ff53b Author: itholic AuthorDate: Tue Jan 31 19:35:57 2023 +0300 [SPARK-42236][SQL] Refine `NULLABLE_ARRAY_OR_MAP_ELEMENT` ### What changes were proposed in this pull request? This PR proposes to refine `NULLABLE_ARRAY_OR_MAP_ELEMENT` into main-sub classes structure. `NOT_NULL_CONSTRAINT_VIOLATION` - `ARRAY_ELEMENT` - `MAP_VALUE` ### Why are the changes needed? The name of error class is misleading, and we can make this more generic so that we reuse for various situation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated & added UTs. Closes #39804 from itholic/NULLABLE_ARRAY_OR_MAP_ELEMENT. Authored-by: itholic Signed-off-by: Max Gekk (cherry picked from commit 1cba3b98160ad9d7cdf29e84ff0191598177835c) Signed-off-by: Max Gekk --- .../spark/sql/protobuf/ProtobufDeserializer.scala | 5 +++-- core/src/main/resources/error/error-classes.json | 24 -- .../plans/logical/basicLogicalOperators.scala | 4 ++-- .../spark/sql/errors/QueryCompilationErrors.scala | 16 +-- .../apache/spark/sql/DataFrameToSchemaSuite.scala | 14 - 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala index 224e22c0f52..37278fab8a3 100644 --- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala +++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala @@ -91,7 +91,8 @@ private[sql] class ProtobufDeserializer( val element = iterator.next() if (element == null) { if (!containsNull) { -throw QueryCompilationErrors.nullableArrayOrMapElementError(protoElementPath) +throw QueryCompilationErrors.notNullConstraintViolationArrayElementError( + protoElementPath) } else { elementUpdater.setNullAt(i) } @@ -129,7 +130,7 @@ private[sql] class ProtobufDeserializer( keyWriter(keyUpdater, i, field.getField(keyField)) if (field.getField(valueField) == null) { if (!valueContainsNull) { -throw QueryCompilationErrors.nullableArrayOrMapElementError(protoPath) +throw QueryCompilationErrors.notNullConstraintViolationMapValueError(protoPath) } else { valueUpdater.setNullAt(i) } diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index d881e48d604..b70f03b06a6 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1004,6 +1004,24 @@ "Operation is not allowed for because it is not a partitioned table." ] }, + "NOT_NULL_CONSTRAINT_VIOLATION" : { +"message" : [ + "Assigning a NULL is not allowed here." +], +"subClass" : { + "ARRAY_ELEMENT" : { +"message" : [ + "The array is defined to contain only elements that are NOT NULL." +] + }, + "MAP_VALUE" : { +"message" : [ + "The map is defined to contain only values that are NOT NULL." +] + } +}, +"sqlState" : "42000" + }, "NO_HANDLER_FOR_UDAF" : { "message" : [ "No handler for UDAF ''. Use sparkSession.udf.register(...) instead." @@ -1019,12 +1037,6 @@ "UDF class doesn't implement any UDF interface." ] }, - "NULLABLE_ARRAY_OR_MAP_ELEMENT" : { -"message" : [ - "Array or map at contains nullable element while it's required to be non-nullable." -], -"sqlState" : "42000" - }, "NULLABLE_COLUMN_OR_FIELD" : { "message" : [ "Column or field is nullable while it's required to be non-nullable." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index a8dfb8fbd84..74929bf5d79 1
[spark] branch master updated: [SPARK-42229][CORE] Migrate `SparkCoreErrors` into error classes
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 11a75371705 [SPARK-42229][CORE] Migrate `SparkCoreErrors` into error classes 11a75371705 is described below commit 11a753717052283e0523dcc6d11e585fcd0e4e6b Author: itholic AuthorDate: Tue Jan 31 11:31:00 2023 +0300 [SPARK-42229][CORE] Migrate `SparkCoreErrors` into error classes ### What changes were proposed in this pull request? This PR proposes to migrate SparkCoreErrors into `_LEGACY_ERROR_TEMP_` error classes. ### Why are the changes needed? To leverage the error message framework for better error message handling. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually tested, the existing CI should pass. Closes #39791 from itholic/SPARK-42229. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 222 ++ .../org/apache/spark/errors/SparkCoreErrors.scala | 260 +++-- 2 files changed, 415 insertions(+), 67 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e589ea22bf2..20adcfd75a5 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -5294,5 +5294,227 @@ "message" : [ "Operation not allowed: only works on table with location provided: " ] + }, + "_LEGACY_ERROR_TEMP_3000" : { +"message" : [ + "Unexpected Py4J server ." +] + }, + "_LEGACY_ERROR_TEMP_3001" : { +"message" : [ + "EOFException occurred while reading the port number from 's stdout." +] + }, + "_LEGACY_ERROR_TEMP_3002" : { +"message" : [ + "Data of type is not supported" +] + }, + "_LEGACY_ERROR_TEMP_3003" : { +"message" : [ + "Could not compute split, block of RDD not found" +] + }, + "_LEGACY_ERROR_TEMP_3004" : { +"message" : [ + "Attempted to use after its blocks have been removed!" +] + }, + "_LEGACY_ERROR_TEMP_3005" : { +"message" : [ + "Histogram on either an empty RDD or RDD containing +/-infinity or NaN" +] + }, + "_LEGACY_ERROR_TEMP_3006" : { +"message" : [ + "empty RDD" +] + }, + "_LEGACY_ERROR_TEMP_3007" : { +"message" : [ + "Checkpoint block not found! Either the executor", + "that originally checkpointed this partition is no longer alive, or the original RDD is", + "unpersisted. If this problem persists, you may consider using `rdd.checkpoint()`", + "instead, which is slower than local checkpointing but more fault-tolerant." +] + }, + "_LEGACY_ERROR_TEMP_3008" : { +"message" : [ + "Cannot use map-side combining with array keys." +] + }, + "_LEGACY_ERROR_TEMP_3009" : { +"message" : [ + "HashPartitioner cannot partition array keys." +] + }, + "_LEGACY_ERROR_TEMP_3010" : { +"message" : [ + "reduceByKeyLocally() does not support array keys" +] + }, + "_LEGACY_ERROR_TEMP_3011" : { +"message" : [ + "This RDD lacks a SparkContext. It could happen in the following cases:", + "(1) RDD transformations and actions are NOT invoked by the driver, but inside of other transformations; for example, rdd1.map(x => rdd2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the rdd1.map transformation. For more information, see SPARK-5063.", + "(2) When a Spark Streaming job recovers from checkpoint, this exception will be hit if a reference to an RDD not defined by the streaming job is used in DStream operations. For more information, See SPARK-13758." +] + }, + "_LEGACY_ERROR_TEMP_3012" : { +"message" : [ + "Cannot change storage level of an RDD after it was already assigned a level" +] + }, + "_LEGACY_ERROR_TEMP_3013" : { +"message" : [ + "Can only zip RDDs with same number of elements in each partition" +] + }, + "_LEGACY_ERROR_TEMP_3014" : { +"message" : [ + "empty collection" +] + }, + "_LEGACY_ERROR_TEMP_3015" : { +"message" : [ + "countByVa
[spark] branch master updated: [SPARK-42239][SQL] Integrate `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 3aa92fb37e2 [SPARK-42239][SQL] Integrate `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY` 3aa92fb37e2 is described below commit 3aa92fb37e2b26b9736bfc726715e64f77a46dff Author: itholic AuthorDate: Mon Jan 30 22:56:57 2023 +0300 [SPARK-42239][SQL] Integrate `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY` ### What changes were proposed in this pull request? This is raised from https://github.com/apache/spark/pull/39543/files#r1082159637, to integrate the `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY` and `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY_OUTPUT` ### Why are the changes needed? Logically, `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY` and `MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY_OUTPUT` are exactly the same. We should deduplicate the error classes when possible. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated the existing UT. Closes #39806 from itholic/MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY1. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 5 - .../scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e6dcdf1bed8..172308a8713 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1703,11 +1703,6 @@ "Correlated scalar subqueries must be aggregated to return at most one row." ] }, - "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY_OUTPUT" : { -"message" : [ - "The output of a correlated scalar subquery must be aggregated." -] - }, "NON_CORRELATED_COLUMNS_IN_GROUP_BY" : { "message" : [ "A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns: ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index bccced3dff6..bc7b031a738 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -868,7 +868,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB if (aggregates.isEmpty) { expr.failAnalysis( errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." + -"MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY_OUTPUT", +"MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY", messageParameters = Map.empty) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 6c204e40670..5d667bbdd8c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -558,7 +558,7 @@ class SubquerySuite extends QueryTest checkErrorMatchPVals( exception2, errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." + -"MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY_OUTPUT", +"MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY", parameters = Map.empty[String, String], sqlState = None, context = ExpectedContext( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.4 updated: [SPARK-42233][SQL] Improve error message for `PIVOT_AFTER_GROUP_BY`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 6b156ac067b [SPARK-42233][SQL] Improve error message for `PIVOT_AFTER_GROUP_BY` 6b156ac067b is described below commit 6b156ac067b33f342beac5b8443f521ee44ba87f Author: itholic AuthorDate: Mon Jan 30 16:50:42 2023 +0300 [SPARK-42233][SQL] Improve error message for `PIVOT_AFTER_GROUP_BY` ### What changes were proposed in this pull request? This PR proposes to improve error message for `PIVOT_AFTER_GROUP_BY` to give users better error message. ### Why are the changes needed? The current error message only shows the cause, not a solution. We should provide proper solution as well in the error message. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The existing CI should pass. Closes #39793 from itholic/PIVOT_AFTER_GROUP_BY. Authored-by: itholic Signed-off-by: Max Gekk (cherry picked from commit c1bee1058667b631ed4e027ebb9791698023e9c9) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 8975fe279c2..af5e17d56d4 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1523,7 +1523,7 @@ }, "PIVOT_AFTER_GROUP_BY" : { "message" : [ - "PIVOT clause following a GROUP BY clause." + "PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery." ] }, "PIVOT_TYPE" : { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-42233][SQL] Improve error message for `PIVOT_AFTER_GROUP_BY`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c1bee105866 [SPARK-42233][SQL] Improve error message for `PIVOT_AFTER_GROUP_BY` c1bee105866 is described below commit c1bee1058667b631ed4e027ebb9791698023e9c9 Author: itholic AuthorDate: Mon Jan 30 16:50:42 2023 +0300 [SPARK-42233][SQL] Improve error message for `PIVOT_AFTER_GROUP_BY` ### What changes were proposed in this pull request? This PR proposes to improve error message for `PIVOT_AFTER_GROUP_BY` to give users better error message. ### Why are the changes needed? The current error message only shows the cause, not a solution. We should provide proper solution as well in the error message. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The existing CI should pass. Closes #39793 from itholic/PIVOT_AFTER_GROUP_BY. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 67e324db1dc..e6dcdf1bed8 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1557,7 +1557,7 @@ }, "PIVOT_AFTER_GROUP_BY" : { "message" : [ - "PIVOT clause following a GROUP BY clause." + "PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery." ] }, "PIVOT_TYPE" : { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41490][SQL] Assign name to _LEGACY_ERROR_TEMP_2441
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 04517fc803e [SPARK-41490][SQL] Assign name to _LEGACY_ERROR_TEMP_2441 04517fc803e is described below commit 04517fc803e400388acd1d70dd0634125c78d91f Author: itholic AuthorDate: Mon Jan 30 16:22:24 2023 +0300 [SPARK-41490][SQL] Assign name to _LEGACY_ERROR_TEMP_2441 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2441, "UNSUPPORTED_EXPR_FOR_OPERATOR". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39700 from itholic/LEGACY_2441. Lead-authored-by: itholic Co-authored-by: Haejoon Lee <44108233+itho...@users.noreply.github.com> Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 12 ++-- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 ++--- .../spark/sql/catalyst/analysis/ResolveSubquerySuite.scala | 6 +- .../sql-tests/results/postgreSQL/window_part3.sql.out| 10 -- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 653b1ebd013..67e324db1dc 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1459,6 +1459,12 @@ }, "sqlState" : "0A000" }, + "UNSUPPORTED_EXPR_FOR_OPERATOR" : { +"message" : [ + "A query operator contains one or more unsupported expressions. Consider to rewrite it to avoid window functions, aggregate functions, and generator functions in the WHERE clause.", + "Invalid expressions: []" +] + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported:" @@ -5280,12 +5286,6 @@ "in operator ." ] }, - "_LEGACY_ERROR_TEMP_2441" : { -"message" : [ - "The query operator `` contains one or more unsupported expression types Aggregate, Window or Generate.", - "Invalid expressions: []." -] - }, "_LEGACY_ERROR_TEMP_2443" : { "message" : [ "Multiple definitions of observed metrics named '': ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 276bf714a34..bccced3dff6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -724,11 +724,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB case other if PlanHelper.specialExpressionsInUnsupportedOperator(other).nonEmpty => val invalidExprSqls = - PlanHelper.specialExpressionsInUnsupportedOperator(other).map(_.sql) + PlanHelper.specialExpressionsInUnsupportedOperator(other).map(toSQLExpr) other.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2441", + errorClass = "UNSUPPORTED_EXPR_FOR_OPERATOR", messageParameters = Map( -"operator" -> other.nodeName, "invalidExprSqls" -> invalidExprSqls.mkString(", "))) // This should not happen, resolved Project or Aggregate should restore or resolve diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index 7b99153acf9..67265fe6f3b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala @@ -187,7 +187,11 @@ class ResolveSubquerySuite extends AnalysisTest { test("lateral join with unsupported expressions") { val plan = lateralJoin(t1, t0.select(($"a" + $"b").as("c")), condition = Some(sum($"a") === sum($"c"))) -assertAnalysisError(plan, Seq("Invalid expressions: [sum(a), sum(c)]")) +assertAnalysisErrorClass( + plan, + e
[spark] branch master updated: [SPARK-41489][SQL] Assign name to _LEGACY_ERROR_TEMP_2415
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d2fc1992058 [SPARK-41489][SQL] Assign name to _LEGACY_ERROR_TEMP_2415 d2fc1992058 is described below commit d2fc19920588f2f6c83c31a9519702f9416190fe Author: itholic AuthorDate: Sun Jan 29 08:45:14 2023 +0300 [SPARK-41489][SQL] Assign name to _LEGACY_ERROR_TEMP_2415 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2415, "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39701 from itholic/LEGACY_2415. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 7 --- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +++-- .../apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 14 ++ .../optimizer/ReplaceNullWithFalseInPredicateSuite.scala | 11 +++ 5 files changed, 33 insertions(+), 14 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ae766de3e20..936f996f3a4 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -265,6 +265,11 @@ "Input to should all be the same type, but it's ." ] }, + "FILTER_NOT_BOOLEAN" : { +"message" : [ + "Filter expression of type is not a boolean." +] + }, "HASH_MAP_TYPE" : { "message" : [ "Input to the function cannot contain elements of the \"MAP\" type. In Spark, same maps may have different hashcode, thus hash expressions are prohibited on \"MAP\" elements. To restore previous behavior set \"spark.sql.legacy.allowHashOnMapType\" to \"true\"." @@ -5175,11 +5180,6 @@ "Event time must be defined on a window or a timestamp, but is of type ." ] }, - "_LEGACY_ERROR_TEMP_2415" : { -"message" : [ - "filter expression '' of type is not a boolean." -] - }, "_LEGACY_ERROR_TEMP_2416" : { "message" : [ "join condition '' of type is not a boolean." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index d5ef71adc4f..276bf714a34 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -355,10 +355,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB } case f: Filter if f.condition.dataType != BooleanType => f.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2415", + errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN", messageParameters = Map( -"filter" -> f.condition.sql, -"type" -> f.condition.dataType.catalogString)) +"sqlExpr" -> f.expressions.map(toSQLExpr).mkString(","), +"filter" -> toSQLExpr(f.condition), +"type" -> toSQLType(f.condition.dataType))) case j @ Join(_, _, _, Some(condition), _) if condition.dataType != BooleanType => j.failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index faa8c1f4558..56bb8b0ccc2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -349,10 +349,11 @@ class AnalysisErrorSuite extends AnalysisTest { "UNRESOLVED_COLUMN.WITH_SUGGESTION", Map("objectName" -> "`b`", "proposal" -> "`a`, `c`, `a3`")) - errorTest( + errorClassTest( "non-boolean filters", testRelation.where(L
[spark] branch master updated: [SPARK-41931][SQL] Better error message for incomplete complex type definition
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0ef7afe0dc3 [SPARK-41931][SQL] Better error message for incomplete complex type definition 0ef7afe0dc3 is described below commit 0ef7afe0dc3723b97b750c071a908f363e514a26 Author: Runyao Chen AuthorDate: Fri Jan 27 18:06:32 2023 +0300 [SPARK-41931][SQL] Better error message for incomplete complex type definition ### What changes were proposed in this pull request? This PR improves error messages for `ARRAY` / `MAP` / `STRUCT` types without element type specification. A new error class `INCOMPLETE_TYPE_DEFINITION` with subclasses (`ARRAY`, `MAP`, and `STRUCT`) is introduced. **Details** In the case where we do `CAST AS` or `CREATE` a complex type without specifying its element type, e.g. ``` CREATE TABLE t (col ARRAY) ``` `[UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"` error would be thrown, while we do support the `ARRAY` type and just require it to be typed. This PR proposes a better error message like ``` The definition of `ARRAY` type is incomplete. You must provide an element type. For example: `ARRAY`. ``` ### Why are the changes needed? The previous error message for incomplete complex types is confusing. A `UNSUPPORTED_DATATYPE` error would be thrown, while we do support complex types. We just require complex types to have their element types specified. We need a clear error message with an example in this case. ### Does this PR introduce _any_ user-facing change? Yes, this PR changes the error message which is user-facing. Error message before this PR: ``` spark-sql> SELECT CAST(array(1, 2, 3) AS ARRAY); [UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"(line 1, pos 30) ``` Error message after this PR: ``` [INCOMPLETE_TYPE_DEFINITION.ARRAY] Incomplete complex type: The definition of `ARRAY` type is incomplete. You must provide an element type. For example: `ARRAY`. ``` Similarly for MAP and STRUCT types. ### How was this patch tested? Added unit tests covering CAST and CREATE with ARRAY / STRUCT / MAP types and their nested combinations. Closes #39711 from RunyaoChen/better_error_msg_nested_type. Lead-authored-by: Runyao Chen Co-authored-by: RunyaoChen Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 23 +++ .../spark/sql/catalyst/parser/AstBuilder.scala | 2 + .../spark/sql/errors/QueryParsingErrors.scala | 21 +++ .../spark/sql/errors/QueryParsingErrorsSuite.scala | 72 ++ 4 files changed, 118 insertions(+) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e6876751a22..ae766de3e20 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -592,6 +592,29 @@ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, + "INCOMPLETE_TYPE_DEFINITION" : { +"message" : [ + "Incomplete complex type:" +], +"subClass" : { + "ARRAY" : { +"message" : [ + "The definition of \"ARRAY\" type is incomplete. You must provide an element type. For example: \"ARRAY\"." +] + }, + "MAP" : { +"message" : [ + "The definition of \"MAP\" type is incomplete. You must provide a key type and a value type. For example: \"MAP\"." +] + }, + "STRUCT" : { +"message" : [ + "The definition of \"STRUCT\" type is incomplete. You must provide at least one field type. For example: \"STRUCT\"." +] + } +}, +"sqlState" : "42K01" + }, "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : { "message" : [ "You may get a different result due to the upgrading to" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c6e50f3f514..d2a1cb1eb16 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2889,6 +2889,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with S
[spark] branch master updated: [SPARK-42158][SQL] Integrate `_LEGACY_ERROR_TEMP_1003` into `FIELD_NOT_FOUND`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f373df8a757 [SPARK-42158][SQL] Integrate `_LEGACY_ERROR_TEMP_1003` into `FIELD_NOT_FOUND` f373df8a757 is described below commit f373df8a757e36ea84275c637087045d6cca3939 Author: itholic AuthorDate: Fri Jan 27 10:40:47 2023 +0300 [SPARK-42158][SQL] Integrate `_LEGACY_ERROR_TEMP_1003` into `FIELD_NOT_FOUND` ### What changes were proposed in this pull request? This PR proposes to integrate `_LEGACY_ERROR_TEMP_1003` into `FIELD_NOT_FOUND` ### Why are the changes needed? We should deduplicate the similar error classes into single error class by merging them. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Fixed exiting UTs. Closes #39706 from itholic/LEGACY_1003. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 5 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 +- .../spark/sql/errors/QueryCompilationErrors.scala | 8 ++- .../spark/sql/connector/AlterTableTests.scala | 18 +- .../connector/V2CommandsCaseSensitivitySuite.scala | 64 +++--- 5 files changed, 65 insertions(+), 33 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 5d2e184874a..e6876751a22 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -2031,11 +2031,6 @@ "Try moving this class out of its parent class." ] }, - "_LEGACY_ERROR_TEMP_1003" : { -"message" : [ - "Couldn't find the reference column for at ." -] - }, "_LEGACY_ERROR_TEMP_1004" : { "message" : [ "Window specification is not defined in the WINDOW clause." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index f0c22471afa..6f27c97ddf9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -4053,9 +4053,8 @@ class Analyzer(override val catalogManager: CatalogManager) case Some(colName) => ResolvedFieldPosition(ColumnPosition.after(colName)) case None => -val name = if (resolvedParentName.isEmpty) "root" else resolvedParentName.quoted throw QueryCompilationErrors.referenceColNotFoundForAlterTableChangesError( - after, name) + col.colName, allFields) } case _ => ResolvedFieldPosition(u.position) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index c415fb91c5d..1a8c42b599e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -295,10 +295,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { } def referenceColNotFoundForAlterTableChangesError( - after: TableChange.After, parentName: String): Throwable = { + fieldName: String, fields: Array[String]): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1003", - messageParameters = Map("after" -> after.toString, "parentName" -> parentName)) + errorClass = "FIELD_NOT_FOUND", + messageParameters = Map( +"fieldName" -> toSQLId(fieldName), +"fields" -> fields.mkString(", "))) } def windowSpecificationNotDefinedError(windowName: String): Throwable = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala index b69a0628f3e..2047212a4ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala @@ -160,7 +160,11 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase { val e1 = intercept[AnalysisException]( sql(s"ALTER TABLE $t ADD COLUMN c string AFTER non_exist")) - assert(e1.getMessage().contain
[spark] branch master updated: [SPARK-41948][SQL] Fix NPE for error classes: CANNOT_PARSE_JSON_FIELD
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new cc1674d66ef [SPARK-41948][SQL] Fix NPE for error classes: CANNOT_PARSE_JSON_FIELD cc1674d66ef is described below commit cc1674d66ef34f540aa7bd5c7e465605e264e040 Author: panbingkun AuthorDate: Mon Jan 23 15:15:59 2023 +0300 [SPARK-41948][SQL] Fix NPE for error classes: CANNOT_PARSE_JSON_FIELD ### What changes were proposed in this pull request? The pr aims to fix NPE for error classes: CANNOT_PARSE_JSON_FIELD. ### Why are the changes needed? 1. When I want to delete redundant 'toString()' in code block as follow https://user-images.githubusercontent.com/15246973/211269145-0f087bb1-dc93-480c-9f9d-afde5ac1c8de.png;> I found the UT("select from_json('[1, \"2\", 3]', 'array')") failed. Why can it succeed before deletion? `parse.getCurrentName.toString()` => null.toString() => throw NPE, but follow logical can cover it, https://github.com/apache/spark/blob/15a0f55246bee7b043bd6081f53744fbf74403eb/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala#L569-L573 But obviously this is not our original intention. After deletion, The IllegalArgumentException will be thrown. `parse.getCurrentName` => throw java.lang.IllegalArgumentException as follow: `Caused by: java.lang.IllegalArgumentException: Cannot resolve variable 'fieldName' (enableSubstitutionInVariables=false). at org.apache.commons.text.StringSubstitutor.substitute(StringSubstitutor.java:1532) at org.apache.commons.text.StringSubstitutor.substitute(StringSubstitutor.java:1389) at org.apache.commons.text.StringSubstitutor.replace(StringSubstitutor.java:893) at org.apache.spark.ErrorClassesJsonReader.getErrorMessage(ErrorClassesJSONReader.scala:51) ... 140 more ` Above code can't handle IllegalArgumentException, so the UT failed. So, we should consider the case where `parse.getCurrentName` is null. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Existed UT. Closes #39466 from panbingkun/SPARK-41948. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala | 4 ++-- sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 8128c460602..9c8c764cf92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1443,8 +1443,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { new SparkRuntimeException( errorClass = "CANNOT_PARSE_JSON_FIELD", messageParameters = Map( -"fieldName" -> parser.getCurrentName.toString(), -"fieldValue" -> parser.getText.toString(), +"fieldName" -> toSQLValue(parser.getCurrentName, StringType), +"fieldValue" -> parser.getText, "jsonType" -> jsonType.toString(), "dataType" -> toSQLType(dataType))) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 6e16533eb30..57c54e88229 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -27,6 +27,7 @@ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.spark.{SparkException, SparkRuntimeException} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Literal, StructsToJson} +import org.apache.spark.sql.catalyst.expressions.Cast._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -785,7 +786,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { exception = ExceptionUtils.getRootCause(exception).asInstanceOf[SparkRuntimeException], errorClass = "CANNOT_PARSE_JSON_FIELD", parameters = Map( - "fieldName" -> "a", + "fieldName" -> toSQLValue("a", StringType), "fieldValue" -&
[spark] branch master updated: [SPARK-41575][SQL] Assign name to _LEGACY_ERROR_TEMP_2054
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new aaee89a12fd [SPARK-41575][SQL] Assign name to _LEGACY_ERROR_TEMP_2054 aaee89a12fd is described below commit aaee89a12fd9b8ca3c57fa4283a51ce229dd7b71 Author: itholic AuthorDate: Tue Jan 10 16:25:15 2023 +0300 [SPARK-41575][SQL] Assign name to _LEGACY_ERROR_TEMP_2054 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2054, "TASK_WRITE_FAILED". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39394 from itholic/LEGACY_2054. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +-- .../spark/sql/errors/QueryExecutionErrors.scala| 6 +- .../execution/datasources/FileFormatWriter.scala | 2 +- .../apache/spark/sql/CharVarcharTestSuite.scala| 82 +++--- .../org/apache/spark/sql/sources/InsertSuite.scala | 16 +++-- .../spark/sql/HiveCharVarcharTestSuite.scala | 27 +++ 6 files changed, 104 insertions(+), 39 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a3acb940585..edf46a0fe09 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1187,6 +1187,11 @@ ], "sqlState" : "42000" }, + "TASK_WRITE_FAILED" : { +"message" : [ + "Task failed while writing rows to ." +] + }, "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS" : { "message" : [ "Cannot create the temporary view because it already exists.", @@ -3728,11 +3733,6 @@ "buildReader is not supported for " ] }, - "_LEGACY_ERROR_TEMP_2054" : { -"message" : [ - "Task failed while writing rows. " -] - }, "_LEGACY_ERROR_TEMP_2055" : { "message" : [ "", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 17fc38812f8..9598933d941 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -782,10 +782,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Map("format" -> format)) } - def taskFailedWhileWritingRowsError(cause: Throwable): Throwable = { + def taskFailedWhileWritingRowsError(path: String, cause: Throwable): Throwable = { new SparkException( - errorClass = "_LEGACY_ERROR_TEMP_2054", - messageParameters = Map("message" -> cause.getMessage), + errorClass = "TASK_WRITE_FAILED", + messageParameters = Map("path" -> path), cause = cause) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala index 6285095c647..5c4d662c145 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala @@ -423,7 +423,7 @@ object FileFormatWriter extends Logging { // We throw the exception and let Executor throw ExceptionFailure to abort the job. throw new TaskOutputFileAlreadyExistException(f) case t: Throwable => -throw QueryExecutionErrors.taskFailedWhileWritingRowsError(t) +throw QueryExecutionErrors.taskFailedWhileWritingRowsError(description.path, t) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 95c2e5085d9..c0ceebaa9a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -178,26 +178,6 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { } } - test("char/varchar type values length check: partitioned columns of other types") { -Seq("CHAR(5)", "VARCHAR(5)").for
[spark] branch master updated: [SPARK-41947][CORE][DOCS] Update the contents of error class guidelines
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 786594734bd [SPARK-41947][CORE][DOCS] Update the contents of error class guidelines 786594734bd is described below commit 786594734bd79017ebd42eb117b62958afad07bb Author: itholic AuthorDate: Mon Jan 9 23:24:09 2023 +0300 [SPARK-41947][CORE][DOCS] Update the contents of error class guidelines ### What changes were proposed in this pull request? This PR proposes to update error class guidelines for `core/src/main/resources/error/README.md`. ### Why are the changes needed? Because some of contents are out of date, and no longer valid for current behavior. ### Does this PR introduce _any_ user-facing change? No. It fixed the developer guidelines for error class. ### How was this patch tested? The existing CI should pass. Closes #39464 from itholic/SPARK-41947. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/README.md | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/core/src/main/resources/error/README.md b/core/src/main/resources/error/README.md index 23e62cd25fb..8ea9e37c27f 100644 --- a/core/src/main/resources/error/README.md +++ b/core/src/main/resources/error/README.md @@ -8,9 +8,9 @@ and message parameters rather than an arbitrary error message. 1. Check if the error is an internal error. Internal errors are bugs in the code that we do not expect users to encounter; this does not include unsupported operations. If true, use the error class `INTERNAL_ERROR` and skip to step 4. -2. Check if an appropriate error class already exists in `error-class.json`. +2. Check if an appropriate error class already exists in `error-classes.json`. If true, use the error class and skip to step 4. -3. Add a new class to `error-class.json`; keep in mind the invariants below. +3. Add a new class to `error-classes.json`; keep in mind the invariants below. 4. Check if the exception type already extends `SparkThrowable`. If true, skip to step 6. 5. Mix `SparkThrowable` into the exception. @@ -24,10 +24,10 @@ Throw with arbitrary error message: ### After -`error-class.json` +`error-classes.json` "PROBLEM_BECAUSE": { - "message": ["Problem %s because %s"], + "message": ["Problem because "], "sqlState": "X" } @@ -35,16 +35,18 @@ Throw with arbitrary error message: class SparkTestException( errorClass: String, -messageParameters: Seq[String]) +messageParameters: Map[String, String]) extends TestException(SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { - def getErrorClass: String = errorClass + override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava + + override def getErrorClass: String = errorClass } Throw with error class and message parameters: -throw new SparkTestException("PROBLEM_BECAUSE", Seq("A", "B")) +throw new SparkTestException("PROBLEM_BECAUSE", Map("problem" -> "A", "cause" -> "B")) ## Access fields - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41780][SQL] Should throw INVALID_PARAMETER_VALUE.PATTERN when the parameters `regexp` is invalid
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 15a0f55246b [SPARK-41780][SQL] Should throw INVALID_PARAMETER_VALUE.PATTERN when the parameters `regexp` is invalid 15a0f55246b is described below commit 15a0f55246bee7b043bd6081f53744fbf74403eb Author: panbingkun AuthorDate: Mon Jan 9 11:37:54 2023 +0300 [SPARK-41780][SQL] Should throw INVALID_PARAMETER_VALUE.PATTERN when the parameters `regexp` is invalid ### What changes were proposed in this pull request? In the PR, I propose to throw error classes - `INVALID_PARAMETER_VALUE.PATTERN` when the parameters `regexp` in regexp_replace & regexp_extract & rlike is invalid. ### Why are the changes needed? Clear error prompt should improve user experience with Spark SQL. The original error prompt is: https://user-images.githubusercontent.com/15246973/210493673-c1de9927-9a18-4f9d-a94c-48735b6c5e5a.png;> Valid: [a\\d]{0,2} Invalid: [a\\d]{0, 2} ![image](https://user-images.githubusercontent.com/15246973/210494925-cb6c8043-de02-4c8e-9b40-225350422340.png) ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Add new UT. Pass GA. Closes #39383 from panbingkun/SPARK-41780. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../catalyst/expressions/regexpExpressions.scala | 20 ++-- .../expressions/RegexpExpressionsSuite.scala | 29 - .../apache/spark/sql/StringFunctionsSuite.scala| 38 ++ 3 files changed, 76 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index c86dcfb3b96..29510bc3852 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -57,7 +57,12 @@ abstract class StringRegexExpression extends BinaryExpression null } else { // Let it raise exception if couldn't compile the regex string -Pattern.compile(escape(str)) +try { + Pattern.compile(escape(str)) +} catch { + case e: PatternSyntaxException => +throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e) +} } protected def pattern(str: String) = if (cache == null) compile(str) else cache @@ -634,7 +639,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio if (!p.equals(lastRegex)) { // regex value changed lastRegex = p.asInstanceOf[UTF8String].clone() - pattern = Pattern.compile(lastRegex.toString) + try { +pattern = Pattern.compile(lastRegex.toString) + } catch { +case e: PatternSyntaxException => + throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e) + } } if (!r.equals(lastReplacementInUTF8)) { // replacement string changed @@ -688,7 +698,11 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio if (!$regexp.equals($termLastRegex)) { // regex value changed $termLastRegex = $regexp.clone(); -$termPattern = $classNamePattern.compile($termLastRegex.toString()); +try { + $termPattern = $classNamePattern.compile($termLastRegex.toString()); +} catch (java.util.regex.PatternSyntaxException e) { + throw QueryExecutionErrors.invalidPatternError("$prettyName", e.getPattern(), e); +} } if (!$rep.equals($termLastReplacementInUTF8)) { // replacement string changed diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 8b5e303849c..af051a1a9bc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -279,14 +279,27 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkLiteralRow("abc" rlike _, "^bc", false) checkLiteralRow("abc" rlike _, "^ab", true) checkLiteralRow("abc" rlike _, "^bc", false) - -intercept[java.util.regex.PatternSyntaxException] { - evaluateWithoutCodegen("ac" rlike "**") -} -intercept[java.util.regex.PatternSyntaxExcep
[spark] branch master updated: [SPARK-41581][SQL] Update `_LEGACY_ERROR_TEMP_1230` as `INTERNAL_ERROR`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6b92cda04e6 [SPARK-41581][SQL] Update `_LEGACY_ERROR_TEMP_1230` as `INTERNAL_ERROR` 6b92cda04e6 is described below commit 6b92cda04e618f82711587d027fa20601e094418 Author: itholic AuthorDate: Mon Jan 9 10:41:49 2023 +0300 [SPARK-41581][SQL] Update `_LEGACY_ERROR_TEMP_1230` as `INTERNAL_ERROR` ### What changes were proposed in this pull request? This PR proposes to update `_LEGACY_ERROR_TEMP_1230`, as `INTERNAL_ERROR`. ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39282 from itholic/LEGACY_1230. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json| 5 - .../apache/spark/sql/errors/QueryCompilationErrors.scala| 10 -- .../scala/org/apache/spark/sql/types/DecimalSuite.scala | 13 - 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 5409507c3c8..a3acb940585 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -2944,11 +2944,6 @@ " can only support precision up to ." ] }, - "_LEGACY_ERROR_TEMP_1230" : { -"message" : [ - "Negative scale is not allowed: . You can use =true to enable legacy mode to allow it." -] - }, "_LEGACY_ERROR_TEMP_1231" : { "message" : [ " is not a valid partition column in table ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 2ced0b8ac7a..25005a1f609 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import org.apache.hadoop.fs.Path -import org.apache.spark.{SparkThrowable, SparkThrowableHelper} +import org.apache.spark.{SparkException, SparkThrowable, SparkThrowableHelper} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, ResolvedTable, Star, TableAlreadyExistsException, UnresolvedRegex} @@ -2242,11 +2242,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { } def negativeScaleNotAllowedError(scale: Int): Throwable = { -new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1230", - messageParameters = Map( -"scale" -> scale.toString, -"config" -> LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key)) +SparkException.internalError(s"Negative scale is not allowed: ${scale.toString}." + + s" Set the config ${toSQLConf(LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key)}" + + " to \"true\" to allow it.") } def invalidPartitionColumnKeyInTableError(key: String, tblName: String): Throwable = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala index 73944d9dff9..465c25118fa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala @@ -19,8 +19,7 @@ package org.apache.spark.sql.types import org.scalatest.PrivateMethodTester -import org.apache.spark.{SparkArithmeticException, SparkFunSuite, SparkNumberFormatException} -import org.apache.spark.sql.AnalysisException +import org.apache.spark.{SparkArithmeticException, SparkException, SparkFunSuite, SparkNumberFormatException} import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.Decimal._ @@ -111,9 +110,13 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper test("SPARK-30252: Negative scale is not allowed by default&qu
[spark] branch master updated (514449b7cbf -> a641dc4954d)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 514449b7cbf [SPARK-41899][CONNECT][PYTHON] createDataFrame` should respect user provided DDL schema add a641dc4954d [SPARK-41889][SQL] Attach root cause to invalidPatternError & refactor error classes INVALID_PARAMETER_VALUE No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 24 - .../catalyst/expressions/regexpExpressions.scala | 5 ++--- .../spark/sql/errors/QueryCompilationErrors.scala | 7 +++--- .../spark/sql/errors/QueryExecutionErrors.scala| 25 -- .../expressions/RegexpExpressionsSuite.scala | 19 ++-- .../sql-tests/results/postgreSQL/text.sql.out | 5 ++--- .../sql-tests/results/regexp-functions.sql.out | 18 .../sql/errors/QueryCompilationErrorsSuite.scala | 9 .../sql/errors/QueryExecutionErrorsSuite.scala | 19 9 files changed, 78 insertions(+), 53 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41580][SQL] Assign name to _LEGACY_ERROR_TEMP_2137
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4d3bc8f5b55 [SPARK-41580][SQL] Assign name to _LEGACY_ERROR_TEMP_2137 4d3bc8f5b55 is described below commit 4d3bc8f5b55969f7c954991239ff43f9faba1346 Author: itholic AuthorDate: Thu Jan 5 10:58:14 2023 +0500 [SPARK-41580][SQL] Assign name to _LEGACY_ERROR_TEMP_2137 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2137, "INVALID_JSON_ROOT_FIELD". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39305 from itholic/LEGACY_2137. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 2 +- .../spark/sql/execution/datasources/json/JsonSuite.scala | 14 +++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 12f4b0f9c37..29cafdcc1b6 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -760,6 +760,11 @@ "The identifier is invalid. Please, consider quoting it with back-quotes as ``." ] }, + "INVALID_JSON_ROOT_FIELD" : { +"message" : [ + "Cannot convert JSON root field to target Spark type." +] + }, "INVALID_JSON_SCHEMA_MAP_TYPE" : { "message" : [ "Input schema can only contain STRING as a key type for a MAP." @@ -4110,11 +4115,6 @@ "Failed to parse an empty string for data type " ] }, - "_LEGACY_ERROR_TEMP_2137" : { -"message" : [ - "Root converter returned null" -] - }, "_LEGACY_ERROR_TEMP_2138" : { "message" : [ "Cannot have circular references in bean class, but got the circular reference of class " diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 227e86994f5..0c92d56ed04 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1457,7 +1457,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { def rootConverterReturnNullError(): SparkRuntimeException = { new SparkRuntimeException( - errorClass = "_LEGACY_ERROR_TEMP_2137", + errorClass = "INVALID_JSON_ROOT_FIELD", messageParameters = Map.empty) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 0d2c98316e7..a4b7df9af42 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -25,11 +25,12 @@ import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, ZoneId} import java.util.Locale import com.fasterxml.jackson.core.JsonFactory +import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.GzipCodec -import org.apache.spark.{SparkConf, SparkException, SparkUpgradeException, TestUtils} +import org.apache.spark.{SparkConf, SparkException, SparkRuntimeException, SparkUpgradeException, TestUtils} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{functions => F, _} import org.apache.spark.sql.catalyst.json._ @@ -3192,10 +3193,17 @@ abstract class JsonSuite } test("SPARK-36379: proceed parsing with root nulls in permissive mode") { -assert(intercept[SparkException] { +val exception = intercept[SparkException] { spark.read.option("mode", "failfast") .schema("a string").json(Seq("""[{"a": "str"}, null]""").toDS).collect() -}.getMessage.contains("Malformed records are detected")) +} +assert(exception.getMessage.contains("Malformed records are d
[spark] branch master updated: [SPARK-41576][SQL] Assign name to _LEGACY_ERROR_TEMP_2051
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 76d7c857078 [SPARK-41576][SQL] Assign name to _LEGACY_ERROR_TEMP_2051 76d7c857078 is described below commit 76d7c8570788c773720c6e143e496647dfe9ebe0 Author: itholic AuthorDate: Thu Jan 5 10:47:46 2023 +0500 [SPARK-41576][SQL] Assign name to _LEGACY_ERROR_TEMP_2051 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2051, "DATA_SOURCE_NOT_FOUND". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39281 from itholic/LEGACY_2051. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 4 ++-- .../apache/spark/sql/execution/datasources/DataSource.scala | 2 +- .../org/apache/spark/sql/execution/command/DDLSuite.scala| 12 .../apache/spark/sql/sources/ResolvedDataSourceSuite.scala | 9 +++-- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 120925f5254..12f4b0f9c37 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -441,6 +441,11 @@ ], "sqlState" : "42000" }, + "DATA_SOURCE_NOT_FOUND" : { +"message" : [ + "Failed to find the data source: . Please find packages at `https://spark.apache.org/third-party-projects.html`.; +] + }, "DATETIME_OVERFLOW" : { "message" : [ "Datetime operation overflow: ." @@ -3696,11 +3701,6 @@ "Expected exactly one path to be specified, but got: " ] }, - "_LEGACY_ERROR_TEMP_2051" : { -"message" : [ - "Failed to find data source: . Please find packages at https://spark.apache.org/third-party-projects.html; -] - }, "_LEGACY_ERROR_TEMP_2052" : { "message" : [ " was removed in Spark 2.0. Please check if your library is compatible with Spark 2.0" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 44a1972272f..227e86994f5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -731,10 +731,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Map("paths" -> allPaths.mkString(", "))) } - def failedToFindDataSourceError( + def dataSourceNotFoundError( provider: String, error: Throwable): SparkClassNotFoundException = { new SparkClassNotFoundException( - errorClass = "_LEGACY_ERROR_TEMP_2051", + errorClass = "DATA_SOURCE_NOT_FOUND", messageParameters = Map("provider" -> provider), cause = error) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index edbdd6bbc67..9bb5191dc01 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -643,7 +643,7 @@ object DataSource extends Logging { } else if (provider1.toLowerCase(Locale.ROOT) == "kafka") { throw QueryCompilationErrors.failedToFindKafkaDataSourceError(provider1) } else { - throw QueryExecutionErrors.failedToFindDataSourceError(provider1, error) + throw QueryExecutionErrors.dataSourceNotFoundError(provider1, error) } } } catch { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 6cc37a41210..f5d17b142e2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -24,7 +24,7 @@ import
[spark] branch master updated: [SPARK-41573][SQL] Assign name to _LEGACY_ERROR_TEMP_2136
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f352f103ed5 [SPARK-41573][SQL] Assign name to _LEGACY_ERROR_TEMP_2136 f352f103ed5 is described below commit f352f103ed512806abb3f642571a0c595b8b0509 Author: itholic AuthorDate: Thu Jan 5 00:21:32 2023 +0500 [SPARK-41573][SQL] Assign name to _LEGACY_ERROR_TEMP_2136 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2136, "CANNOT_PARSE_JSON_FIELD". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39284 from itholic/LEGACY_2136. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json| 10 +- .../spark/sql/catalyst/json/JacksonParser.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala | 8 .../org/apache/spark/sql/JsonFunctionsSuite.scala | 21 ++--- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a7b120ef427..120925f5254 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -75,6 +75,11 @@ ], "sqlState" : "42000" }, + "CANNOT_PARSE_JSON_FIELD" : { +"message" : [ + "Cannot parse the field name and the value of the JSON token type to target Spark data type " +] + }, "CANNOT_PARSE_PROTOBUF_DESCRIPTOR" : { "message" : [ "Error parsing file descriptor byte[] into Descriptor object" @@ -4105,11 +4110,6 @@ "Failed to parse an empty string for data type " ] }, - "_LEGACY_ERROR_TEMP_2136" : { -"message" : [ - "Failed to parse field name , field value , [] to target spark data type []." -] - }, "_LEGACY_ERROR_TEMP_2137" : { "message" : [ "Root converter returned null" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index ee21a1e2b76..3fe26e87499 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -430,7 +430,7 @@ class JacksonParser( case token => // We cannot parse this token based on the given data type. So, we throw a // RuntimeException and this exception will be caught by `parse` method. - throw QueryExecutionErrors.failToParseValueForDataTypeError(parser, token, dataType) + throw QueryExecutionErrors.cannotParseJSONFieldError(parser, token, dataType) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 3e234cfee2c..44a1972272f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1444,15 +1444,15 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { "dataType" -> dataType.catalogString)) } - def failToParseValueForDataTypeError(parser: JsonParser, token: JsonToken, dataType: DataType) + def cannotParseJSONFieldError(parser: JsonParser, jsonType: JsonToken, dataType: DataType) : SparkRuntimeException = { new SparkRuntimeException( - errorClass = "_LEGACY_ERROR_TEMP_2136", + errorClass = "CANNOT_PARSE_JSON_FIELD", messageParameters = Map( "fieldName" -> parser.getCurrentName.toString(), "fieldValue" -> parser.getText.toString(), -"token" -> token.toString(), -"dataType" -> dataType.toString())) +"jsonType" -> jsonType.toString(), +"dataType" -> toSQLType(dataType))) } def rootConverterReturnNullError(): SparkRuntimeException = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 399665c0de6..0f282336d58 100644 --- a/sql/core/src/test/scala/org/apa
[spark] branch master updated (b7a0fc4b7bd -> 4d6856e913c)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from b7a0fc4b7bd [SPARK-41658][CONNECT][TESTS] Enable doctests in pyspark.sql.connect.functions add 4d6856e913c [SPARK-41311][SQL][TESTS] Rewrite test RENAME_SRC_PATH_NOT_FOUND to trigger the error from user space No new revisions were added by this update. Summary of changes: .../sql/errors/QueryExecutionErrorsSuite.scala | 54 +- 1 file changed, 31 insertions(+), 23 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (2cf11cdb04f -> 973b8ffc828)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 2cf11cdb04f [SPARK-41854][PYTHON][BUILD] Automatic reformat/check python/setup.py add 973b8ffc828 [SPARK-41807][CORE] Remove non-existent error class: UNSUPPORTED_FEATURE.DISTRIBUTE_BY No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 5 - 1 file changed, 5 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41571][SQL] Assign name to _LEGACY_ERROR_TEMP_2310
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 470beda2231 [SPARK-41571][SQL] Assign name to _LEGACY_ERROR_TEMP_2310 470beda2231 is described below commit 470beda2231c89d9cbd609bcf1e83d84c80a7f06 Author: itholic AuthorDate: Mon Jan 2 11:53:27 2023 +0500 [SPARK-41571][SQL] Assign name to _LEGACY_ERROR_TEMP_2310 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2310, "WRITE_STREAM_NOT_ALLOWED". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39285 from itholic/LEGACY_2310. Authored-by: itholic Signed-off-by: Max Gekk --- R/pkg/tests/fulltests/test_streaming.R | 3 +-- core/src/main/resources/error/error-classes.json | 10 +- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +- .../org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala | 8 +--- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R index cc84a985423..8804471e640 100644 --- a/R/pkg/tests/fulltests/test_streaming.R +++ b/R/pkg/tests/fulltests/test_streaming.R @@ -140,8 +140,7 @@ test_that("Non-streaming DataFrame", { expect_false(isStreaming(c)) expect_error(write.stream(c, "memory", queryName = "people", outputMode = "complete"), - paste0(".*(writeStream : analysis error - 'writeStream' can be called only on ", - "streaming Dataset/DataFrame).*")) + paste0("Error in writeStream : analysis error - \\[WRITE_STREAM_NOT_ALLOWED\\].*")) }) test_that("Unsupported operation", { diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 4003fab0685..4687d04bf71 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1618,6 +1618,11 @@ ], "sqlState" : "42000" }, + "WRITE_STREAM_NOT_ALLOWED" : { +"message" : [ + "`writeStream` can be called only on streaming Dataset/DataFrame." +] + }, "WRONG_NUM_ARGS" : { "message" : [ "Invalid number of arguments for the function ." @@ -4907,11 +4912,6 @@ "cannot resolve in MERGE command given columns []" ] }, - "_LEGACY_ERROR_TEMP_2310" : { -"message" : [ - "'writeStream' can be called only on streaming Dataset/DataFrame" -] - }, "_LEGACY_ERROR_TEMP_2311" : { "message" : [ "'writeTo' can not be called on streaming Dataset/DataFrame" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 5f6512d4e4b..c8e2a48859d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3875,7 +3875,7 @@ class Dataset[T] private[sql]( def writeStream: DataStreamWriter[T] = { if (!isStreaming) { logicalPlan.failAnalysis( -errorClass = "_LEGACY_ERROR_TEMP_2310", +errorClass = "WRITE_STREAM_NOT_ALLOWED", messageParameters = Map.empty) } new DataStreamWriter[T](this) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index 3f2414d2178..17a003dfe8f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -162,9 +162,11 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with .writeStream .start() } -Seq("'writeStream'", "only", "streaming Dataset/DataFrame").foreach { s => - assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT))) -} +checkError( + exception = e, + errorClass = "WRITE_STREAM_NOT_ALLOWED", + parameters = Map.empty +) } test("resolve default source") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41796][TESTS] Test the error class: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b0751ed22b9 [SPARK-41796][TESTS] Test the error class: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE b0751ed22b9 is described below commit b0751ed22b94a93a5a60a20b24a88ca77d67c694 Author: panbingkun AuthorDate: Sun Jan 1 21:45:56 2023 +0500 [SPARK-41796][TESTS] Test the error class: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE ### What changes were proposed in this pull request? This PR aims to modify a test for the error class UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE in SubquerySuite. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Update existed UT. Closes #39320 from panbingkun/SPARK-41796. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../scala/org/apache/spark/sql/SubquerySuite.scala | 20 ++-- 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 65dd911df31..3d4a629f7a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -2452,16 +2452,24 @@ class SubquerySuite extends QueryTest Row(2)) // Cannot use non-orderable data type in one row subquery that cannot be collapsed. -val error = intercept[AnalysisException] { + checkError( +exception = intercept[AnalysisException] { sql( -""" - |select ( +"""select ( | select concat(a, a) from | (select upper(x['a'] + rand()) as a) |) from v1 - |""".stripMargin).collect() -} -assert(error.getMessage.contains("Correlated column reference 'v1.x' cannot be map type")) + |""".stripMargin + ).collect() +}, +errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." + + "UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE", +parameters = Map("expr" -> "v1.x", "dataType" -> "map"), +context = ExpectedContext( + fragment = "select upper(x['a'] + rand()) as a", + start = 39, + stop = 72) + ) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41578][SQL] Assign name to _LEGACY_ERROR_TEMP_2141
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7823f84942a [SPARK-41578][SQL] Assign name to _LEGACY_ERROR_TEMP_2141 7823f84942a is described below commit 7823f84942acd1a1a6abc5c1f9045317795d00fb Author: itholic AuthorDate: Fri Dec 30 12:18:50 2022 +0500 [SPARK-41578][SQL] Assign name to _LEGACY_ERROR_TEMP_2141 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2141, "ENCODER_NOT_FOUND". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes #39279 from itholic/LEGACY_2141. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 ++- .../spark/sql/catalyst/ScalaReflection.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala| 8 +-- .../encoders/EncoderErrorMessageSuite.scala| 80 ++ .../catalyst/encoders/ExpressionEncoderSuite.scala | 13 ++-- 5 files changed, 52 insertions(+), 62 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 21b7c467b64..67398a30180 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -459,6 +459,11 @@ "The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)." ] }, + "ENCODER_NOT_FOUND" : { +"message" : [ + "Not found an encoder of the type to Spark SQL internal representation. Consider to change the input type to one of supported at https://spark.apache.org/docs/latest/sql-ref-datatypes.html.; +] + }, "FAILED_EXECUTE_UDF" : { "message" : [ "Failed to execute user defined function (: () => )" @@ -4116,12 +4121,6 @@ "" ] }, - "_LEGACY_ERROR_TEMP_2141" : { -"message" : [ - "No Encoder found for ", - "" -] - }, "_LEGACY_ERROR_TEMP_2142" : { "message" : [ "Attributes for type is not supported" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 0a8a823216f..e02e42cea1a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -779,7 +779,7 @@ object ScalaReflection extends ScalaReflection { } ProductEncoder(ClassTag(getClassFromType(t)), params) case _ => -throw QueryExecutionErrors.cannotFindEncoderForTypeError(tpe.toString, path) +throw QueryExecutionErrors.cannotFindEncoderForTypeError(tpe.toString) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index cef4acafe07..3e234cfee2c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1483,13 +1483,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { "walkedTypePath" -> walkedTypePath.toString())) } - def cannotFindEncoderForTypeError( - tpe: String, walkedTypePath: WalkedTypePath): SparkUnsupportedOperationException = { + def cannotFindEncoderForTypeError(typeName: String): SparkUnsupportedOperationException = { new SparkUnsupportedOperationException( - errorClass = "_LEGACY_ERROR_TEMP_2141", + errorClass = "ENCODER_NOT_FOUND", messageParameters = Map( -"tpe" -> tpe, -"walkedTypePath" -> walkedTypePath.toString())) +"typeName" -> typeName)) } def attributesForTypeUnsupportedError(schema: Schema): SparkUnsupportedOperationException = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala index 8c766ef8299..501dfa58305 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala +++ b/
[spark] branch master updated: [SPARK-41729][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_0011` to `UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e5508443f66 [SPARK-41729][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_0011` to `UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES` e5508443f66 is described below commit e5508443f66d92fe5106bcdf7f2a868164c62c9c Author: yangjie01 AuthorDate: Wed Dec 28 11:36:47 2022 +0500 [SPARK-41729][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_0011` to `UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES` ### What changes were proposed in this pull request? In the PR, I propose to assign the name `UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES` to the error class `_LEGACY_ERROR_TEMP_0011`. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GA Closes #39235 from LuciferYang/SPARK-41729. Authored-by: yangjie01 Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../scala/org/apache/spark/sql/errors/QueryParsingErrors.scala | 2 +- .../apache/spark/sql/catalyst/parser/ErrorParserSuite.scala| 2 +- .../org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala | 8 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 2f144251e5d..21b7c467b64 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1331,6 +1331,11 @@ "Catalog does not support ." ] }, + "COMBINATION_QUERY_RESULT_CLAUSES" : { +"message" : [ + "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY." +] + }, "DESC_TABLE_COLUMN_PARTITION" : { "message" : [ "DESC TABLE COLUMN for a specific partition." @@ -1645,11 +1650,6 @@ "There must be at least one WHEN clause in a MERGE statement." ] }, - "_LEGACY_ERROR_TEMP_0011" : { -"message" : [ - "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported." -] - }, "_LEGACY_ERROR_TEMP_0012" : { "message" : [ "DISTRIBUTE BY is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 773a79a3f3f..ef59dfa5517 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -78,7 +78,7 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase { } def combinationQueryResultClausesUnsupportedError(ctx: QueryOrganizationContext): Throwable = { -new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0011", ctx) +new ParseException(errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES", ctx) } def distributeByUnsupportedError(ctx: QueryOrganizationContext): Throwable = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala index a985992abba..7cf853b0812 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala @@ -34,7 +34,7 @@ class ErrorParserSuite extends AnalysisTest { test("semantic errors") { checkError( exception = parseException("select *\nfrom r\norder by q\ncluster by q"), - errorClass = "_LEGACY_ERROR_TEMP_0011", + errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES", parameters = Map.empty, context = ExpectedContext(fragment = "order by q\ncluster by q", start = 16, stop = 38)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 035e6231178..c25f218fe1b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -376,7 +376,7 @@ class PlanParserSuite extends AnalysisTest { val sql1 = s"$baseSql order
[spark] branch master updated: [SPARK-41666][PYTHON] Support parameterized SQL by `sql()`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new a1c727f3867 [SPARK-41666][PYTHON] Support parameterized SQL by `sql()` a1c727f3867 is described below commit a1c727f386724156f680953fa34ec51bb35348a4 Author: Max Gekk AuthorDate: Fri Dec 23 12:30:30 2022 +0300 [SPARK-41666][PYTHON] Support parameterized SQL by `sql()` ### What changes were proposed in this pull request? In the PR, I propose to extend the `sql()` method in PySpark to support parameterized SQL queries, see https://github.com/apache/spark/pull/38864, and add new parameter - `args` of the type `Dict[str, str]`. This parameter maps named parameters that can occur in the input SQL query to SQL literals like 1, INTERVAL '1-1' YEAR TO MONTH, DATE'2022-12-22' (see [the doc ](https://spark.apache.org/docs/latest/sql-ref-literals.html)of supported literals). For example: ```python >>> spark.sql("SELECT * FROM range(10) WHERE id > :minId", args = {"minId" : "7"}) id 0 8 1 9 ``` Closes #39159 ### Why are the changes needed? To achieve feature parity with Scala/Java API, and provide PySpark users the same feature. ### Does this PR introduce _any_ user-facing change? No, it shouldn't. ### How was this patch tested? Checked the examples locally, and running the tests: ``` $ python/run-tests --modules=pyspark-sql --parallelism=1 ``` Closes #39183 from MaxGekk/parameterized-sql-pyspark-dict. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 6 +++--- .../source/migration_guide/pyspark_3.3_to_3.4.rst | 2 ++ python/pyspark/pandas/sql_formatter.py | 20 +-- python/pyspark/sql/session.py | 23 ++ 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ff235e80dbb..95db9005d02 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -813,7 +813,7 @@ }, "INVALID_SQL_ARG" : { "message" : [ - "The argument of `sql()` is invalid. Consider to replace it by a SQL literal statement." + "The argument of `sql()` is invalid. Consider to replace it by a SQL literal." ] }, "INVALID_SQL_SYNTAX" : { @@ -1164,7 +1164,7 @@ }, "UNBOUND_SQL_PARAMETER" : { "message" : [ - "Found the unbound parameter: . Please, fix `args` and provide a mapping of the parameter to a SQL literal statement." + "Found the unbound parameter: . Please, fix `args` and provide a mapping of the parameter to a SQL literal." ] }, "UNCLOSED_BRACKETED_COMMENT" : { @@ -5225,4 +5225,4 @@ "grouping() can only be used with GroupingSets/Cube/Rollup" ] } -} \ No newline at end of file +} diff --git a/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst b/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst index b3baa8345aa..ca942c54979 100644 --- a/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst +++ b/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst @@ -39,3 +39,5 @@ Upgrading from PySpark 3.3 to 3.4 * In Spark 3.4, the ``Series.concat`` sort parameter will be respected to follow pandas 1.4 behaviors. * In Spark 3.4, the ``DataFrame.__setitem__`` will make a copy and replace pre-existing arrays, which will NOT be over-written to follow pandas 1.4 behaviors. + +* In Spark 3.4, the ``SparkSession.sql`` and the Pandas on Spark API ``sql`` have got new parameter ``args`` which provides binding of named parameters to their SQL literals. diff --git a/python/pyspark/pandas/sql_formatter.py b/python/pyspark/pandas/sql_formatter.py index 45c615161d9..9103366c192 100644 --- a/python/pyspark/pandas/sql_formatter.py +++ b/python/pyspark/pandas/sql_formatter.py @@ -17,7 +17,7 @@ import os import string -from typing import Any, Optional, Union, List, Sequence, Mapping, Tuple +from typing import Any, Dict, Optional, Union, List, Sequence, Mapping, Tuple import uuid import warnings @@ -43,6 +43,7 @@ _CAPTURE_SCOPES = 3 def sql( query: str, index_col: Optional[Union[str, List[str]]] = None, +args: Dict[str, str] = {}, **kwargs: Any, ) -> DataFrame: """ @@ -57,6 +58,8 @@ def sql( * pandas Series * string +Also the method can bind named parameters to SQL literals from `args`. + Parameters -- query : s
[spark] branch master updated: [SPARK-41565][SQL] Add the error class `UNRESOLVED_ROUTINE`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new cd832e546fc [SPARK-41565][SQL] Add the error class `UNRESOLVED_ROUTINE` cd832e546fc is described below commit cd832e546fc58c522d4afa90fc781c3be2cc527e Author: Max Gekk AuthorDate: Wed Dec 21 16:33:02 2022 +0300 [SPARK-41565][SQL] Add the error class `UNRESOLVED_ROUTINE` ### What changes were proposed in this pull request? In the PR, I propose to remove the error classes `_LEGACY_ERROR_TEMP_1041`, `_LEGACY_ERROR_TEMP_1242` and `_LEGACY_ERROR_TEMP_1243`, and use new one `UNRESOLVED_ROUTINE` instead. Closes #38870 ### Why are the changes needed? To improve user experience with Spark SQL, and unify representation of error messages. ### Does this PR introduce _any_ user-facing change? Yes, the PR changes an user-facing error message. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "core/testOnly *SparkThrowableSuite" $ build/sbt "test:testOnly *SQLQuerySuite" $ build/sbt "test:testOnly *UDFSuite" $ build/sbt "test:testOnly *HiveUDFSuite" $ build/sbt "test:testOnly *HiveQuerySuite" $ build/sbt "test:testOnly *JDBCV2Suite" $ build/sbt "test:testOnly *DDLSuite" $ build/sbt "test:testOnly *DataSourceV2FunctionSuite" $ build/sbt "test:testOnly *LookupFunctionsSuite" $ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` Closes #39095 from MaxGekk/unresolved-routine-error-class. Lead-authored-by: Max Gekk Co-authored-by: Serge Rielau Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 21 --- .../spark/sql/catalyst/analysis/Analyzer.scala | 15 ++-- .../sql/catalyst/analysis/CheckAnalysis.scala | 7 ++-- .../sql/catalyst/analysis/FunctionRegistry.scala | 2 +- .../spark/sql/errors/QueryCompilationErrors.scala | 41 ++ .../catalyst/analysis/LookupFunctionsSuite.scala | 10 +++--- .../apache/spark/sql/internal/CatalogImpl.scala| 5 ++- .../double-quoted-identifiers-disabled.sql.out | 13 --- .../ansi/double-quoted-identifiers-enabled.sql.out | 26 -- .../sql-tests/results/ansi/interval.sql.out| 28 --- .../results/double-quoted-identifiers.sql.out | 13 --- .../sql-tests/results/inline-table.sql.out | 7 ++-- .../resources/sql-tests/results/interval.sql.out | 28 --- .../results/postgreSQL/window_part3.sql.out| 7 ++-- .../sql-tests/results/udf/udf-inline-table.sql.out | 7 ++-- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 14 ++-- .../test/scala/org/apache/spark/sql/UDFSuite.scala | 19 ++ .../sql/connector/DataSourceV2FunctionSuite.scala | 15 ++-- .../spark/sql/execution/command/DDLSuite.scala | 34 -- .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala| 32 - .../spark/sql/hive/execution/HiveQuerySuite.scala | 16 - .../spark/sql/hive/execution/HiveUDFSuite.scala| 17 ++--- .../spark/sql/hive/execution/SQLQuerySuite.scala | 13 +-- 23 files changed, 244 insertions(+), 146 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e6ae5678993..989df84ed53 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1266,6 +1266,12 @@ }, "sqlState" : "42000" }, + "UNRESOLVED_ROUTINE" : { +"message" : [ + "Cannot resolve function on search path ." +], +"sqlState" : "42000" + }, "UNSUPPORTED_DATATYPE" : { "message" : [ "Unsupported data type " @@ -2060,11 +2066,6 @@ "Gap duration expression used in session window must be CalendarIntervalType, but got ." ] }, - "_LEGACY_ERROR_TEMP_1041" : { -"message" : [ - "Undefined function ." -] - }, "_LEGACY_ERROR_TEMP_1045" : { "message" : [ "ALTER TABLE SET LOCATION does not support partition for v2 tables." @@ -2920,16 +2921,6 @@ "CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory . To allow overwriting the existing non-empty directory, set '' to true." ] }, - "_LEGACY_ERROR_TEMP_1242" : { -"message" : [ - "Undefined function: . This function is neither a built-in/temporary function, nor
[spark] branch master updated: [SPARK-41568][SQL] Assign name to _LEGACY_ERROR_TEMP_1236
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2440f699797 [SPARK-41568][SQL] Assign name to _LEGACY_ERROR_TEMP_1236 2440f699797 is described below commit 2440f6997978ca033579a311caea561140ef76d5 Author: panbingkun AuthorDate: Tue Dec 20 21:16:43 2022 +0300 [SPARK-41568][SQL] Assign name to _LEGACY_ERROR_TEMP_1236 ### What changes were proposed in this pull request? In the PR, I propose to assign the name `UNSUPPORTED_FEATURE.ANALYZE_VIEW` to the error class `_LEGACY_ERROR_TEMP_1236`. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #39119 from panbingkun/LEGACY_ERROR_TEMP_1236. Lead-authored-by: panbingkun Co-authored-by: Maxim Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 - .../spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../spark/sql/StatisticsCollectionSuite.scala | 24 +- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 30b0a5ce8f3..b5e846a8a89 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1309,6 +1309,11 @@ "The ANALYZE TABLE FOR COLUMNS command does not support the type of the column in the table ." ] }, + "ANALYZE_VIEW" : { +"message" : [ + "The ANALYZE TABLE command does not support views." +] + }, "CATALOG_OPERATION" : { "message" : [ "Catalog does not support ." @@ -2895,11 +2900,6 @@ "Partition spec is invalid. The spec () must match the partition spec () defined in table ''." ] }, - "_LEGACY_ERROR_TEMP_1236" : { -"message" : [ - "ANALYZE TABLE is not supported on views." -] - }, "_LEGACY_ERROR_TEMP_1237" : { "message" : [ "The list of partition columns with values in partition specification for table '' in database '' is not a prefix of the list of partition columns defined in the table schema. Expected a prefix of [], but got []." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 2ddd0704565..b0cf8f6876c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -2302,7 +2302,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def analyzeTableNotSupportedOnViewsError(): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1236", + errorClass = "UNSUPPORTED_FEATURE.ANALYZE_VIEW", messageParameters = Map.empty) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index dda1cc5b52b..2ab8bb25a8b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -63,22 +63,26 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } test("analyzing views is not supported") { -def assertAnalyzeUnsupported(analyzeCommand: String): Unit = { - val err = intercept[AnalysisException] { -sql(analyzeCommand) - } - assert(err.message.contains("ANALYZE TABLE is not supported")) -} - val tableName = "tbl" withTable(tableName) { spark.range(10).write.saveAsTable(tableName) val viewName = "view" withView(viewName) { sql(s"CREATE VIEW $viewName AS SELECT * FROM $tableName") - -assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") -assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") +checkError( + exception = intercept[AnalysisException] { +sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") + }, + errorClass = "UNSUPPORTED_FEATURE.ANALYZE_VIEW", + parameters = Map.empty +) +ch
[spark] branch master updated: [SPARK-41582][CORE][SQL] Reuse `INVALID_TYPED_LITERAL` instead of `_LEGACY_ERROR_TEMP_0022`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9840a0327a3 [SPARK-41582][CORE][SQL] Reuse `INVALID_TYPED_LITERAL` instead of `_LEGACY_ERROR_TEMP_0022` 9840a0327a3 is described below commit 9840a0327a3f242877759c97d2e7bbf8b4ac1072 Author: yangjie01 AuthorDate: Tue Dec 20 18:15:08 2022 +0300 [SPARK-41582][CORE][SQL] Reuse `INVALID_TYPED_LITERAL` instead of `_LEGACY_ERROR_TEMP_0022` ### What changes were proposed in this pull request? This pr aims reuse `INVALID_TYPED_LITERAL` instead of `_LEGACY_ERROR_TEMP_0022`. ### Why are the changes needed? Proper names of error classes to improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes, the PR changes user-facing error message. ### How was this patch tested? Pass GitHub Actions Closes #39122 from LuciferYang/SPARK-41582. Authored-by: yangjie01 Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 5 - .../spark/sql/catalyst/parser/AstBuilder.scala | 130 ++--- .../spark/sql/errors/QueryParsingErrors.scala | 9 -- .../catalyst/parser/ExpressionParserSuite.scala| 8 +- .../sql-tests/results/ansi/literals.sql.out| 6 +- .../resources/sql-tests/results/literals.sql.out | 6 +- 6 files changed, 77 insertions(+), 87 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 68034a5221e..30b0a5ce8f3 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1663,11 +1663,6 @@ "Function trim doesn't support with type . Please use BOTH, LEADING or TRAILING as trim type." ] }, - "_LEGACY_ERROR_TEMP_0022" : { -"message" : [ - "." -] - }, "_LEGACY_ERROR_TEMP_0023" : { "message" : [ "Numeric literal does not fit in range [, ] for type ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 545d5d97d88..ea752a420d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2379,76 +2379,72 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType)) } -try { - valueType match { -case "DATE" => - val zoneId = getZoneId(conf.sessionLocalTimeZone) - val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) - specialDate.getOrElse(toLiteral(stringToDate, DateType)) -case "TIMESTAMP_NTZ" => - convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) -.map(Literal(_, TimestampNTZType)) -.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) -case "TIMESTAMP_LTZ" => - constructTimestampLTZLiteral(value) -case "TIMESTAMP" => - SQLConf.get.timestampType match { -case TimestampNTZType => - convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) -.map(Literal(_, TimestampNTZType)) -.getOrElse { - val containsTimeZonePart = - DateTimeUtils.parseTimestampString(UTF8String.fromString(value))._2.isDefined - // If the input string contains time zone part, return a timestamp with local time - // zone literal. - if (containsTimeZonePart) { -constructTimestampLTZLiteral(value) - } else { -toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType) - } +valueType match { + case "DATE" => +val zoneId = getZoneId(conf.sessionLocalTimeZone) +val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) +specialDate.getOrElse(toLiteral(stringToDate, DateType)) + case "TIMESTAMP_NTZ" => +convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) + .map(Literal(_, TimestampNTZType)) + .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) + case "TIMESTAMP_LTZ" => +constructTimestampLTZLiteral(value) + case "TIMESTAMP" => +
[spark] branch branch-3.3 updated: [SPARK-41538][SQL] Metadata column should be appended at the end of project list
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new b23198ee6d7 [SPARK-41538][SQL] Metadata column should be appended at the end of project list b23198ee6d7 is described below commit b23198ee6d76cc0486ae810a1d37f0474b74c27c Author: Gengliang Wang AuthorDate: Fri Dec 16 10:43:17 2022 +0300 [SPARK-41538][SQL] Metadata column should be appended at the end of project list ### What changes were proposed in this pull request? For the following query: ``` CREATE TABLE table_1 ( a ARRAY, s STRUCT) USING parquet; CREATE VIEW view_1 (id) AS WITH source AS ( SELECT * FROM table_1 ), renamed AS ( SELECT s.id FROM source ) SELECT id FROM renamed; with foo AS ( SELECT 'a' as id ), bar AS ( SELECT 'a' as id ) SELECT 1 FROM foo FULL OUTER JOIN bar USING(id) FULL OUTER JOIN view_1 USING(id) WHERE foo.id IS NOT NULL ``` There will be the following error: ``` class org.apache.spark.sql.types.ArrayType cannot be cast to class org.apache.spark.sql.types.StructType (org.apache.spark.sql.types.ArrayType and org.apache.spark.sql.types.StructType are in unnamed module of loader 'app') java.lang.ClassCastException: class org.apache.spark.sql.types.ArrayType cannot be cast to class org.apache.spark.sql.types.StructType (org.apache.spark.sql.types.ArrayType and org.apache.spark.sql.types.StructType are in unnamed module of loader 'app') at org.apache.spark.sql.catalyst.expressions.GetStructField.childSchema$lzycompute(complexTypeExtractors.scala:108) at org.apache.spark.sql.catalyst.expressions.GetStructField.childSchema(complexTypeExtractors.scala:108) ``` This is caused by the inconsistent metadata column positions in the following two nodes: * Table relation: at the ending position * Project list: at the beginning position https://user-images.githubusercontent.com/1097932/207992343-438714bc-e1d1-46f7-9a79-84ab83dd299f.png;> When the InlineCTE rule executes, the metadata column in the project is wrongly combined with the table output. https://user-images.githubusercontent.com/1097932/207992431-f4cfc774-4cab-4728-b109-2ebff94e5fe2.png;> Thus the column `a ARRAY` is casted as `s STRUCT` and cause the error. This PR is to fix the issue by putting the Metadata column at the end of project list, so that it is consistent with the table relation. ### Why are the changes needed? Bug fix ### Does this PR introduce _any_ user-facing change? Yes, it fixes a bug in the analysis rule `AddMetadataColumns` ### How was this patch tested? New test case Closes #39081 from gengliangwang/fixMetadata. Authored-by: Gengliang Wang Signed-off-by: Max Gekk (cherry picked from commit 172f719fffa84a2528628e08627a02cf8d1fe8a8) Signed-off-by: Max Gekk --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 39 ++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0c68dd8839d..c6429077b07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -970,7 +970,7 @@ class Analyzer(override val catalogManager: CatalogManager) case s: ExposesMetadataColumns => s.withMetadataColumns() case p: Project => val newProj = p.copy( - projectList = p.metadataOutput ++ p.projectList, + projectList = p.projectList ++ p.metadataOutput, child = addMetadataCol(p.child)) newProj.copyTagsFrom(p) newProj diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 5b42d05c237..66f9700e8ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4572,6 +4572,45 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark sql("SELECT /*+ hash(t2) */ * FROM t1 join t2 on c1 = c2") } } + + test("SPARK-41538: Metadata column should be appended at the end of project") { +val tableName = "table_1" +val viewName = "view_1" +withTable(tableName) { + withView(viewName) { +sql(
[spark] branch master updated (066870d938c -> 172f719fffa)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 066870d938c [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422` add 172f719fffa [SPARK-41538][SQL] Metadata column should be appended at the end of project list No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 39 ++ 2 files changed, 40 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 066870d938c [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422` 066870d938c is described below commit 066870d938cf7fb2f088c2a7f6a036de6fb5b7d2 Author: Max Gekk AuthorDate: Fri Dec 16 10:20:38 2022 +0300 [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422` ### What changes were proposed in this pull request? In the PR, I propose to assign new name `MISSING_GROUP_BY` to the legacy error class `_LEGACY_ERROR_TEMP_2422`, improve its error message format, and regenerate the SQL golden files. ### Why are the changes needed? To improve user experience with Spark SQL, and unify representation of error messages. ### Does this PR introduce _any_ user-facing change? Yes, it changes an user-facing error message. ### How was this patch tested? By running the affected test suites: ``` $ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` Closes #39061 from MaxGekk/error-class-_LEGACY_ERROR_TEMP_2422. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 - .../sql/catalyst/analysis/CheckAnalysis.scala | 14 - .../sql-tests/results/group-by-filter.sql.out | 12 --- .../resources/sql-tests/results/group-by.sql.out | 24 -- .../results/postgreSQL/select_having.sql.out | 6 +- .../negative-cases/invalid-correlation.sql.out | 12 --- .../results/udaf/udaf-group-by-ordinal.sql.out | 6 +- .../sql-tests/results/udaf/udaf-group-by.sql.out | 12 --- .../udf/postgreSQL/udf-select_having.sql.out | 6 +- .../sql-tests/results/udf/udf-group-by.sql.out | 18 +--- 10 files changed, 37 insertions(+), 83 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ab4a93798a7..7af794b9ef9 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -863,6 +863,11 @@ ], "sqlState" : "42000" }, + "MISSING_GROUP_BY" : { +"message" : [ + "The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses." +] + }, "MISSING_STATIC_PARTITION_COLUMN" : { "message" : [ "Unknown static partition column: " @@ -5116,11 +5121,6 @@ "nondeterministic expression should not appear in the arguments of an aggregate function." ] }, - "_LEGACY_ERROR_TEMP_2422" : { -"message" : [ - "grouping expressions sequence is empty, and '' is not an aggregate function. Wrap '' in windowing function(s) or wrap '' in first() (or first_value) if you don't care which value you get." -] - }, "_LEGACY_ERROR_TEMP_2423" : { "message" : [ "Correlated scalar subquery '' is neither present in the group by, nor in an aggregate function. Add it to group by using ordinal position or wrap it in first() (or first_value) if you don't care which value you get." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 11b2d6671c7..2c57c2b9bab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -402,16 +402,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB messageParameters = Map("sqlExpr" -> expr.sql)) } } - case e: Attribute if groupingExprs.isEmpty => -// Collect all [[AggregateExpressions]]s. -val aggExprs = aggregateExprs.filter(_.collect { - case a: AggregateExpression => a -}.nonEmpty) -e.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2422", - messageParameters = Map( -"sqlExpr" -> e.sql, -"aggExprs" -> aggExprs.map(_.sql).mkString("(", ", ", ")"))) + case _: Attribute if groupingExprs.isEmpty => +operator.failAnalysis( + errorClass = "MISSING_GROUP_BY&quo
[spark] branch master updated (e03f86d84bd -> 92440151c9e)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from e03f86d84bd [SPARK-41542][CONNECT][TESTS] Set parallelism as 1 for coverage report in Spark Connect add 92440151c9e [SPARK-41508][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_1180` to `UNEXPECTED_INPUT_TYPE` and remove `_LEGACY_ERROR_TEMP_1179` No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 17 +++--- .../sql/catalyst/analysis/FunctionRegistry.scala | 12 +-- .../plans/logical/basicLogicalOperators.scala | 19 +++ .../spark/sql/errors/QueryCompilationErrors.scala | 27 +++ .../sql-tests/results/postgreSQL/int8.sql.out | 7 ++-- .../results/table-valued-functions.sql.out | 38 ++ 6 files changed, 49 insertions(+), 71 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (0bd8c856c74 -> cd117fbd402)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 0bd8c856c74 [SPARK-41465][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1235 add cd117fbd402 [SPARK-41350][SQL][FOLLOWUP] Allow simple name access of join hidden columns after alias No new revisions were added by this update. Summary of changes: .../catalyst/expressions/namedExpressions.scala| 7 +++- .../resources/sql-tests/inputs/natural-join.sql| 2 + .../test/resources/sql-tests/inputs/using-join.sql | 8 .../sql-tests/results/natural-join.sql.out | 10 + .../resources/sql-tests/results/using-join.sql.out | 44 ++ 5 files changed, 69 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41465][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1235
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0bd8c856c74 [SPARK-41465][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1235 0bd8c856c74 is described below commit 0bd8c856c748a73f0bb1fecdeae05bf6f2e4063e Author: panbingkun AuthorDate: Thu Dec 15 21:21:44 2022 +0300 [SPARK-41465][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1235 ### What changes were proposed in this pull request? In the PR, I propose to assign the name `ANALYZE_UNSUPPORTED_COLUMN_TYPE` to the error class `_LEGACY_ERROR_TEMP_1235`. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add new UT. Pass GA. Closes #39003 from panbingkun/LEGACY_ERROR_TEMP_1235. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../spark/sql/errors/QueryCompilationErrors.scala | 8 .../apache/spark/sql/StatisticsCollectionSuite.scala | 9 + .../org/apache/spark/sql/hive/StatisticsSuite.scala | 19 +++ 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index b7bf07a0e48..a60a24d14c6 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1289,6 +1289,11 @@ "The ANALYZE TABLE FOR COLUMNS command can operate on temporary views that have been cached already. Consider to cache the view ." ] }, + "ANALYZE_UNSUPPORTED_COLUMN_TYPE" : { +"message" : [ + "The ANALYZE TABLE FOR COLUMNS command does not support the type of the column in the table ." +] + }, "CATALOG_OPERATION" : { "message" : [ "Catalog does not support ." @@ -2892,11 +2897,6 @@ "Partition spec is invalid. The spec () must match the partition spec () defined in table ''." ] }, - "_LEGACY_ERROR_TEMP_1235" : { -"message" : [ - "Column in table is of type , and Spark does not support statistics collection on this column type." -] - }, "_LEGACY_ERROR_TEMP_1236" : { "message" : [ "ANALYZE TABLE is not supported on views." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index a5ff2084ca8..18ac6b7bcf8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -2298,11 +2298,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { tableIdent: TableIdentifier, dataType: DataType): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1235", + errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE", messageParameters = Map( -"name" -> name, -"tableIdent" -> tableIdent.toString, -"dataType" -> dataType.toString)) +"columnType" -> toSQLType(dataType), +"columnName" -> toSQLId(name), +"tableName" -> toSQLId(tableIdent.toString))) } def analyzeTableNotSupportedOnViewsError(): Throwable = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 95d9245c57d..dda1cc5b52b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -76,6 +76,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared val viewName = "view" withView(viewName) { sql(s"CREATE VIEW $viewName AS SELECT * FROM $tableName") + assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") } @@ -128,11 +129,11 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared exception = intercept[AnalysisException] { sql(s"ANALYZE TABLE $t
[spark] branch master updated (724bbfdce87 -> a09a2736866)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 724bbfdce87 Revert "[SPARK-41521][BUILD][K8S] Upgrade `kubernetes-client` to 6.3.0" add a09a2736866 [MINOR][SQL][TESTS] Fix Typos 'e1 -> e2' No new revisions were added by this update. Summary of changes: sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41271][SQL] Support parameterized SQL queries by `sql()`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 35fa5e6716e [SPARK-41271][SQL] Support parameterized SQL queries by `sql()` 35fa5e6716e is described below commit 35fa5e6716e59b004851b61f7fbfbdace15f46b7 Author: Max Gekk AuthorDate: Thu Dec 15 09:14:46 2022 +0300 [SPARK-41271][SQL] Support parameterized SQL queries by `sql()` ### What changes were proposed in this pull request? In the PR, I propose to extend SparkSession API and override the `sql` method by: ```scala def sql(sqlText: String, args: Map[String, String]): DataFrame ``` which accepts a map with: - keys are parameters names, - values are SQL literal values. And the first argument `sqlText` might have named parameters in the positions of constants like literal values. For example: ```scala spark.sql( sqlText = "SELECT * FROM tbl WHERE date > :startDate LIMIT :maxRows", args = Map( "startDate" -> "DATE'2022-12-01'", "maxRows" -> "100")) ``` The new `sql()` method parses the input SQL statement and provided parameter values, and replaces the named parameters by the literal values. And then it eagerly runs DDL/DML commands, but not for SELECT queries. Closes #38712 ### Why are the changes needed? 1. To improve user experience with Spark SQL via - Using Spark as remote service (microservice). - Write SQL code that will power reports, dashboards, charts and other data presentation solutions that need to account for criteria modifiable by users through an interface. - Build a generic integration layer based on the SQL API. The goal is to expose managed data to a wide application ecosystem with a microservice architecture. It is only natural in such a setup to ask for modular and reusable SQL code, that can be executed repeatedly with different parameter values. 2. To achieve feature parity with other systems that support named parameters: - Redshift: https://docs.aws.amazon.com/redshift/latest/mgmt/data-api.html#data-api-calling - BigQuery: https://cloud.google.com/bigquery/docs/parameterized-queries#api - MS DBSQL: https://learn.microsoft.com/en-us/azure/databricks/sql/user/queries/query-parameters ### Does this PR introduce _any_ user-facing change? No, this is an extension of the existing APIs. ### How was this patch tested? By running new tests: ``` $ build/sbt "core/testOnly *SparkThrowableSuite" $ build/sbt "test:testOnly *PlanParserSuite" $ build/sbt "test:testOnly *AnalysisSuite" $ build/sbt "test:testOnly *ParametersSuite" ``` Closes #38864 from MaxGekk/parameterized-sql-2. Lead-authored-by: Max Gekk Co-authored-by: Maxim Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +++ .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 1 + .../sql/catalyst/analysis/CheckAnalysis.scala | 5 ++ .../sql/catalyst/expressions/parameters.scala | 64 ++ .../spark/sql/catalyst/parser/AstBuilder.scala | 7 ++ .../spark/sql/catalyst/trees/TreePatterns.scala| 1 + .../sql/catalyst/analysis/AnalysisSuite.scala | 14 .../sql/catalyst/parser/PlanParserSuite.scala | 26 .../scala/org/apache/spark/sql/SparkSession.scala | 40 +-- .../org/apache/spark/sql/ParametersSuite.scala | 78 ++ .../org/apache/spark/sql/test/SQLTestUtils.scala | 2 +- .../benchmark/InsertIntoHiveTableBenchmark.scala | 4 +- .../ObjectHashAggregateExecBenchmark.scala | 4 +- 13 files changed, 246 insertions(+), 10 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f66d6998e26..b7bf07a0e48 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -806,6 +806,11 @@ } } }, + "INVALID_SQL_ARG" : { +"message" : [ + "The argument of `sql()` is invalid. Consider to replace it by a SQL literal statement." +] + }, "INVALID_SQL_SYNTAX" : { "message" : [ "Invalid SQL syntax: " @@ -1147,6 +1152,11 @@ "Unable to convert SQL type to Protobuf type ." ] }, + "UNBOUND_SQL_PARAMETER" : { +"message" : [ + "Found the unbound parameter: . Please, fix `args` and provide a mapping of the parameter to a SQL literal statement." +] + }, "UNCLOSED_BRACKETED
[spark] branch master updated (4e8980e6ae9 -> 5b5083484cd)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 4e8980e6ae9 [SPARK-41409][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_1043` to `WRONG_NUM_ARGS.WITHOUT_SUGGESTION` add 5b5083484cd [SPARK-41248][SQL] Add "spark.sql.json.enablePartialResults" to enable/disable JSON partial results No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/json/JacksonParser.scala| 10 +- .../org/apache/spark/sql/internal/SQLConf.scala| 11 ++ sql/core/benchmarks/JsonBenchmark-results.txt | 155 ++--- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 67 +++-- .../sql/execution/datasources/json/JsonSuite.scala | 25 +++- 5 files changed, 158 insertions(+), 110 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41409][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_1043` to `WRONG_NUM_ARGS.WITHOUT_SUGGESTION`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4e8980e6ae9 [SPARK-41409][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_1043` to `WRONG_NUM_ARGS.WITHOUT_SUGGESTION` 4e8980e6ae9 is described below commit 4e8980e6ae9a513bb4c990944841a9db073013ea Author: yangjie01 AuthorDate: Wed Dec 14 08:22:33 2022 +0300 [SPARK-41409][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_1043` to `WRONG_NUM_ARGS.WITHOUT_SUGGESTION` ### What changes were proposed in this pull request? This pr introduces sub-classes of `WRONG_NUM_ARGS`: - WITHOUT_SUGGESTION - WITH_SUGGESTION then replace existing `WRONG_NUM_ARGS` to `WRONG_NUM_ARGS.WITH_SUGGESTION` and rename error class `_LEGACY_ERROR_TEMP_1043` to `WRONG_NUM_ARGS.WITHOUT_SUGGESTION` ### Why are the changes needed? Proper names of error classes to improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new test case Closes #38940 from LuciferYang/legacy-1043. Lead-authored-by: yangjie01 Co-authored-by: YangJie Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json| 21 ++--- .../spark/sql/errors/QueryCompilationErrors.scala | 8 .../resources/sql-tests/results/ansi/date.sql.out | 2 +- .../sql-tests/results/ansi/string-functions.sql.out | 4 ++-- .../results/ceil-floor-with-scale-param.sql.out | 4 ++-- .../sql-tests/results/csv-functions.sql.out | 2 +- .../test/resources/sql-tests/results/date.sql.out | 2 +- .../sql-tests/results/datetime-legacy.sql.out | 2 +- .../sql-tests/results/json-functions.sql.out| 8 .../results/sql-compatibility-functions.sql.out | 2 +- .../sql-tests/results/string-functions.sql.out | 4 ++-- .../results/table-valued-functions.sql.out | 2 +- .../sql-tests/results/timestamp-ntz.sql.out | 2 +- .../resources/sql-tests/results/udaf/udaf.sql.out | 2 +- .../sql-tests/results/udf/udf-udaf.sql.out | 2 +- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 2 +- .../org/apache/spark/sql/DateFunctionsSuite.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- .../org/apache/spark/sql/StringFunctionsSuite.scala | 2 +- .../test/scala/org/apache/spark/sql/UDFSuite.scala | 11 ++- .../sql/errors/QueryCompilationErrorsSuite.scala| 13 + .../spark/sql/hive/execution/HiveUDAFSuite.scala| 2 +- 22 files changed, 57 insertions(+), 44 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e1df3db4291..f66d6998e26 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1548,8 +1548,20 @@ }, "WRONG_NUM_ARGS" : { "message" : [ - "The requires parameters but the actual number is ." -] + "Invalid number of arguments for the function ." +], +"subClass" : { + "WITHOUT_SUGGESTION" : { +"message" : [ + "Please, refer to 'https://spark.apache.org/docs/latest/sql-ref-functions.html' for a fix." +] + }, + "WITH_SUGGESTION" : { +"message" : [ + "Consider to change the number of arguments because the function requires parameters but the actual number is ." +] + } +} }, "_LEGACY_ERROR_TEMP_0001" : { "message" : [ @@ -2018,11 +2030,6 @@ "Undefined function ." ] }, - "_LEGACY_ERROR_TEMP_1043" : { -"message" : [ - "Invalid arguments for function ." -] - }, "_LEGACY_ERROR_TEMP_1045" : { "message" : [ "ALTER TABLE SET LOCATION does not support partition for v2 tables." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index b329f6689d4..a5ff2084ca8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -640,7 +640,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def invalidFunctionArgumentsError( name: String, expectedNum: String, actualNum: Int): Throwable = { new AnalysisException( - errorClass = "WRONG_NUM_ARGS", + errorClass = "WRONG_NUM_ARGS.WITH_SUGGESTION&
[spark] branch master updated: [SPARK-41062][SQL] Rename `UNSUPPORTED_CORRELATED_REFERENCE` to `CORRELATED_REFERENCE`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e29ada0c13e [SPARK-41062][SQL] Rename `UNSUPPORTED_CORRELATED_REFERENCE` to `CORRELATED_REFERENCE` e29ada0c13e is described below commit e29ada0c13e71aaad0566ef67591a33d4c58fe2a Author: itholic AuthorDate: Tue Dec 13 21:48:11 2022 +0300 [SPARK-41062][SQL] Rename `UNSUPPORTED_CORRELATED_REFERENCE` to `CORRELATED_REFERENCE` ### What changes were proposed in this pull request? This PR proposes to rename `UNSUPPORTED_CORRELATED_REFERENCE` to `CORRELATED_REFERENCE`. Also, show `sqlExprs` rather than `treeNode` which is more useful information to users. ### Why are the changes needed? The sub-error class name is duplicated with its main class, `UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY`. We should make the all error class name clear and briefly. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? ``` ./build/sbt “sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*” ``` Closes #38576 from itholic/SPARK-41062. Lead-authored-by: itholic Co-authored-by: Haejoon Lee <44108233+itho...@users.noreply.github.com> Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json| 10 +- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 7 --- .../spark/sql/catalyst/analysis/ResolveSubquerySuite.scala | 13 - .../subquery/negative-cases/invalid-correlation.sql.out | 4 ++-- .../src/test/scala/org/apache/spark/sql/SubquerySuite.scala | 12 +--- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 25362d5893f..e1df3db4291 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1471,6 +1471,11 @@ "A correlated outer name reference within a subquery expression body was not found in the enclosing query: " ] }, + "CORRELATED_REFERENCE" : { +"message" : [ + "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses: " +] + }, "LATERAL_JOIN_CONDITION_NON_DETERMINISTIC" : { "message" : [ "Lateral join condition cannot be non-deterministic: " @@ -1496,11 +1501,6 @@ "Non-deterministic lateral subqueries are not supported when joining with outer relations that produce more than one row" ] }, - "UNSUPPORTED_CORRELATED_REFERENCE" : { -"message" : [ - "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses" -] - }, "UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE" : { "message" : [ "Correlated column reference '' cannot be type" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index e7e153a319d..5303364710c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -1089,11 +1089,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB // 2. Expressions containing outer references on plan nodes other than allowed operators. def failOnInvalidOuterReference(p: LogicalPlan): Unit = { p.expressions.foreach(checkMixedReferencesInsideAggregateExpr) - if (!canHostOuter(p) && p.expressions.exists(containsOuter)) { + val exprs = stripOuterReferences(p.expressions.filter(expr => containsOuter(expr))) + if (!canHostOuter(p) && !exprs.isEmpty) { p.failAnalysis( errorClass = - "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_REFERENCE", - messageParameters = Map("treeNode" -> planToString(p))) +"UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE", + messageParameters = Map("sqlExprs" -> exprs.map(toSQLExpr).mkString(","))) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index 577f663d8b1..7b99153acf9 100644 --- a
[spark] branch master updated (0e2d604fd33 -> 3809ccdca6e)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 0e2d604fd33 [SPARK-41406][SQL] Refactor error message for `NUM_COLUMNS_MISMATCH` to make it more generic add 3809ccdca6e [SPARK-41478][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1234 No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 10 +- .../spark/sql/errors/QueryCompilationErrors.scala | 4 ++-- .../spark/sql/StatisticsCollectionSuite.scala | 23 +- .../apache/spark/sql/execution/SQLViewSuite.scala | 11 +++ 4 files changed, 28 insertions(+), 20 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41406][SQL] Refactor error message for `NUM_COLUMNS_MISMATCH` to make it more generic
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0e2d604fd33 [SPARK-41406][SQL] Refactor error message for `NUM_COLUMNS_MISMATCH` to make it more generic 0e2d604fd33 is described below commit 0e2d604fd33c8236cfa8ae243eeaec42d3176a06 Author: panbingkun AuthorDate: Tue Dec 13 14:02:36 2022 +0300 [SPARK-41406][SQL] Refactor error message for `NUM_COLUMNS_MISMATCH` to make it more generic ### What changes were proposed in this pull request? The pr aims to refactor error message for `NUM_COLUMNS_MISMATCH` to make it more generic. ### Why are the changes needed? The changes improve the error framework. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Update existed UT. Pass GA. Closes #38937 from panbingkun/SPARK-41406. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 2 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 4 +- .../plans/logical/basicLogicalOperators.scala | 4 +- .../resources/sql-tests/results/except-all.sql.out | 6 +- .../sql-tests/results/intersect-all.sql.out| 6 +- .../native/widenSetOperationTypes.sql.out | 140 ++--- .../sql-tests/results/udf/udf-except-all.sql.out | 6 +- .../results/udf/udf-intersect-all.sql.out | 6 +- .../spark/sql/DataFrameSetOperationsSuite.scala| 9 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 22 +++- 10 files changed, 110 insertions(+), 95 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e76328e970d..6faaf0af35f 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -943,7 +943,7 @@ }, "NUM_COLUMNS_MISMATCH" : { "message" : [ - " can only be performed on tables with the same number of columns, but the first table has columns and the table has columns." + " can only be performed on inputs with the same number of columns, but the first input has columns and the input has columns." ] }, "ORDER_BY_POS_OUT_OF_RANGE" : { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 12dac5c632a..be812adaaa1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -552,7 +552,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB errorClass = "NUM_COLUMNS_MISMATCH", messageParameters = Map( "operator" -> toSQLStmt(operator.nodeName), -"refNumColumns" -> ref.length.toString, +"firstNumColumns" -> ref.length.toString, "invalidOrdinalNum" -> ordinalNumber(ti + 1), "invalidNumColumns" -> child.output.length.toString)) } @@ -565,7 +565,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB e.failAnalysis( errorClass = "_LEGACY_ERROR_TEMP_2430", messageParameters = Map( - "operator" -> operator.nodeName, + "operator" -> toSQLStmt(operator.nodeName), "ci" -> ordinalNumber(ci), "ti" -> ordinalNumber(ti + 1), "dt1" -> dt1.catalogString, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 60586e4166c..878ad91c088 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -342,7 +342,7 @@ case class Intersect( right: LogicalPlan, isAll: Boolean) extends SetOperation(left, right) { - override def nodeName: String = getClass.getSimpleName + ( if ( isAll ) "All" else "" ) + override def nodeName: String = getClass.getSimpleName + ( if ( isAll ) " All" else "" ) final
[spark] branch master updated (af8dd411aa9 -> 9b69331602e)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from af8dd411aa9 [SPARK-33782][K8S][CORE] Place spark.files, spark.jars and spark.files under the current working directory on the driver in K8S cluster mode add 9b69331602e [SPARK-41481][CORE][SQL] Reuse `INVALID_TYPED_LITERAL` instead of `_LEGACY_ERROR_TEMP_0020` No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 5 -- .../spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../spark/sql/errors/QueryParsingErrors.scala | 7 --- .../catalyst/parser/ExpressionParserSuite.scala| 21 +--- .../sql-tests/results/ansi/interval.sql.out| 60 ++ .../resources/sql-tests/results/interval.sql.out | 60 ++ 6 files changed, 96 insertions(+), 59 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (5d52bb36d3b -> cd2f78657ce)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 5d52bb36d3b [SPARK-41486][SQL][TESTS] Upgrade `MySQL` docker image to 8.0.31 to support `ARM64` test add cd2f78657ce [SPARK-41463][SQL][TESTS] Ensure error class names contain only capital letters, numbers and underscores No new revisions were added by this update. Summary of changes: .../test/scala/org/apache/spark/SparkThrowableSuite.scala| 12 1 file changed, 12 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41443][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1061
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 92655db9fc6 [SPARK-41443][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1061 92655db9fc6 is described below commit 92655db9fc69410022052b6e662488285a322490 Author: panbingkun AuthorDate: Sat Dec 10 19:27:26 2022 +0300 [SPARK-41443][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1061 ### What changes were proposed in this pull request? In the PR, I propose to assign the name COLUMN_NOT_FOUND to the error class _LEGACY_ERROR_TEMP_1061. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add new UT. Pass GA. Closes #38972 from panbingkun/LEGACY_ERROR_TEMP_1061. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 .../spark/sql/errors/QueryCompilationErrors.scala | 14 ++- .../catalyst/analysis/ResolveSessionCatalog.scala | 2 +- .../execution/command/AnalyzeColumnCommand.scala | 2 +- .../spark/sql/execution/command/tables.scala | 2 +- .../spark/sql/StatisticsCollectionSuite.scala | 29 -- .../execution/command/v1/DescribeTableSuite.scala | 28 +++-- .../apache/spark/sql/hive/StatisticsSuite.scala| 21 ++-- 8 files changed, 76 insertions(+), 32 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a8738994e17..3f091f090fc 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -109,6 +109,11 @@ "The column already exists. Consider to choose another name or rename the existing column." ] }, + "COLUMN_NOT_FOUND" : { +"message" : [ + "The column cannot be found. Verify the spelling and correctness of the column name according to the SQL config ." +] + }, "CONCURRENT_QUERY" : { "message" : [ "Another instance of this query was just started by a concurrent session." @@ -2092,11 +2097,6 @@ " does not support nested column: ." ] }, - "_LEGACY_ERROR_TEMP_1061" : { -"message" : [ - "Column does not exist." -] - }, "_LEGACY_ERROR_TEMP_1065" : { "message" : [ "`` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index ed08e33829e..b507045f8c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -795,12 +795,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "column" -> quoted)) } - def columnDoesNotExistError(colName: String): Throwable = { -new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1061", - messageParameters = Map("colName" -> colName)) - } - def renameTempViewToExistingViewError(newName: String): Throwable = { new TableAlreadyExistsException(newName) } @@ -2281,6 +2275,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { messageParameters = Map("columnName" -> toSQLId(columnName))) } + def columnNotFoundError(colName: String): Throwable = { +new AnalysisException( + errorClass = "COLUMN_NOT_FOUND", + messageParameters = Map( +"colName" -> toSQLId(colName), +"caseSensitiveConfig" -> toSQLConf(SQLConf.CASE_SENSITIVE.key))) + } + def noSuchTableError(db: String, table: String): Throwable = { new NoSuchTableException(db = db, table = table) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 4afcf5b7514..7b2d5015840 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -155,7 +155,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case Descri
[spark] branch master updated: [SPARK-41417][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_0019` to `INVALID_TYPED_LITERAL`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6972341b06e [SPARK-41417][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_0019` to `INVALID_TYPED_LITERAL` 6972341b06e is described below commit 6972341b06eae40dda787306e2d1bde062501617 Author: yangjie01 AuthorDate: Sat Dec 10 09:50:08 2022 +0300 [SPARK-41417][CORE][SQL] Rename `_LEGACY_ERROR_TEMP_0019` to `INVALID_TYPED_LITERAL` ### What changes were proposed in this pull request? This pr aims rename `_LEGACY_ERROR_TEMP_0019` to `INVALID_TYPED_LITERAL` ### Why are the changes needed? Proper names of error classes to improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38954 from LuciferYang/SPARK-41417. Authored-by: yangjie01 Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +-- .../spark/sql/errors/QueryParsingErrors.scala | 7 +- .../catalyst/parser/ExpressionParserSuite.scala| 31 +--- .../resources/sql-tests/results/ansi/date.sql.out | 21 +++--- .../sql-tests/results/ansi/literals.sql.out| 14 ++-- .../sql-tests/results/ansi/timestamp.sql.out | 21 +++--- .../test/resources/sql-tests/results/date.sql.out | 21 +++--- .../sql-tests/results/datetime-legacy.sql.out | 42 ++- .../resources/sql-tests/results/literals.sql.out | 14 ++-- .../sql-tests/results/postgreSQL/date.sql.out | 84 -- .../resources/sql-tests/results/timestamp.sql.out | 21 +++--- .../results/timestampNTZ/timestamp-ansi.sql.out| 21 +++--- .../results/timestampNTZ/timestamp.sql.out | 21 +++--- 13 files changed, 192 insertions(+), 137 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 19ab5ada2b5..a8738994e17 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -813,6 +813,12 @@ } } }, + "INVALID_TYPED_LITERAL" : { +"message" : [ + "The value of the typed literal is invalid: ." +], +"sqlState" : "42000" + }, "INVALID_WHERE_CONDITION" : { "message" : [ "The WHERE condition contains invalid expressions: .", @@ -1599,11 +1605,6 @@ "Function trim doesn't support with type . Please use BOTH, LEADING or TRAILING as trim type." ] }, - "_LEGACY_ERROR_TEMP_0019" : { -"message" : [ - "Cannot parse the value: ." -] - }, "_LEGACY_ERROR_TEMP_0020" : { "message" : [ "Cannot parse the INTERVAL value: ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 018e9a12e01..ad6f72986d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -211,8 +211,11 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase { def cannotParseValueTypeError( valueType: String, value: String, ctx: TypeConstructorContext): Throwable = { new ParseException( - errorClass = "_LEGACY_ERROR_TEMP_0019", - messageParameters = Map("valueType" -> valueType, "value" -> value), + errorClass = "INVALID_TYPED_LITERAL", + messageParameters = Map( +"valueType" -> toSQLType(valueType), +"value" -> toSQLValue(value, StringType) + ), ctx) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 884e782736c..01c9907cb8c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -521,8 +521,12 @@ class ExpressionParserSuite extends AnalysisTest { Literal(Timestamp.valueOf("2016-03-11 20:54:00.000"))) checkError( exception = parseException("timestamP_LTZ '2016-33-11 20:54:00.000'"), -errorClass = "_LEGACY_ERROR_TEMP_0019", -parameters = Map("valueType" -> "TIMESTAMP_LTZ", "value" -> "2016-33-11 20:54:00.000"), +errorClass = "INV
[spark] branch master updated (fc3c0f1008d -> 928eab666da)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from fc3c0f1008d [SPARK-41450][BUILD] Fix shading in `core` module add 928eab666da [SPARK-41462][SQL] Date and timestamp type can up cast to TimestampNTZ No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 3 +++ .../apache/spark/sql/catalyst/expressions/CastSuiteBase.scala| 9 + 2 files changed, 12 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41435][SQL] Change to call `invalidFunctionArgumentsError` for `curdate()` when `expressions` is not empty
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d5e32757429 [SPARK-41435][SQL] Change to call `invalidFunctionArgumentsError` for `curdate()` when `expressions` is not empty d5e32757429 is described below commit d5e327574290e1da92d109081c500782d5a3bc21 Author: yangjie01 AuthorDate: Thu Dec 8 15:40:18 2022 +0300 [SPARK-41435][SQL] Change to call `invalidFunctionArgumentsError` for `curdate()` when `expressions` is not empty ### What changes were proposed in this pull request? This pr change to call `invalidFunctionArgumentsError` instead of `invalidFunctionArgumentNumberError ` for `curdate()` when `expressions` is not empty, then `curdate()` will throw `AnalysisException` with error class `WRONG_NUM_ARGS` when input args it not empty. ### Why are the changes needed? `WRONG_NUM_ARGS` is a more appropriate error class ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new test case Closes #38960 from LuciferYang/curdate-err-msg. Authored-by: yangjie01 Signed-off-by: Max Gekk --- .../catalyst/expressions/datetimeExpressions.scala | 4 ++-- .../src/test/resources/sql-tests/inputs/date.sql | 1 + .../resources/sql-tests/results/ansi/date.sql.out | 23 ++ .../test/resources/sql-tests/results/date.sql.out | 23 ++ .../sql-tests/results/datetime-legacy.sql.out | 23 ++ .../org/apache/spark/sql/DateFunctionsSuite.scala | 13 6 files changed, 85 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index e8bad46e84a..3e89dfe39ce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -171,8 +171,8 @@ object CurDateExpressionBuilder extends ExpressionBuilder { if (expressions.isEmpty) { CurrentDate() } else { - throw QueryCompilationErrors.invalidFunctionArgumentNumberError( -Seq.empty, funcName, expressions.length) + throw QueryCompilationErrors.invalidFunctionArgumentsError( +funcName, "0", expressions.length) } } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql index ab57c7c754c..163855069f0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/date.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql @@ -19,6 +19,7 @@ select date'2021-4294967297-11'; select current_date = current_date; -- under ANSI mode, `current_date` can't be a function name. select current_date() = current_date(); +select curdate(1); -- conversions between date and unix_date (number of days from epoch) select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 9ddbaec4f99..d0f5b02c916 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -135,6 +135,29 @@ struct<(current_date() = current_date()):boolean> true +-- !query +select curdate(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS", + "messageParameters" : { +"actualNum" : "1", +"expectedNum" : "0", +"functionName" : "`curdate`" + }, + "queryContext" : [ { +"objectType" : "", +"objectName" : "", +"startIndex" : 8, +"stopIndex" : 17, +"fragment" : "curdate(1)" + } ] +} + + -- !query select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out index 9e427adb052..434e3c7abd3 100644 --- a/sql/core/src/test/resources/sql-tests/results/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out @@ -121,6 +121,29 @@ struct<(current_date() = current_date()):boolean> true +-- !query +select curdate(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ +
[spark] branch master updated: [SPARK-41390][SQL] Update the script used to generate `register` function in `UDFRegistration`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 11cebdbdd0e [SPARK-41390][SQL] Update the script used to generate `register` function in `UDFRegistration` 11cebdbdd0e is described below commit 11cebdbdd0e6d83cbde5f1cb5e4802a7dd5ada48 Author: yangjie01 AuthorDate: Mon Dec 5 23:11:23 2022 +0300 [SPARK-41390][SQL] Update the script used to generate `register` function in `UDFRegistration` ### What changes were proposed in this pull request? SPARK-35065 use `QueryCompilationErrors.invalidFunctionArgumentsError` instead of `throw new AnalysisException(...)` for `register` function in `UDFRegistration`, but the script used to generate `register` function has not been updated, so this pr update the script. ### Why are the changes needed? Update the script used to generate `register` function in `UDFRegistration` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually checked the results of the script. Closes #38916 from LuciferYang/register-func-script. Authored-by: yangjie01 Signed-off-by: Max Gekk --- sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index 99820336477..80550dc21d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -145,8 +145,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends | def builder(e: Seq[Expression]) = if (e.length == $x) { |finalUdf.createScalaUDF(e) | } else { -|throw new AnalysisException("Invalid number of arguments for function " + name + -| ". Expected: $x; Found: " + e.length) +|throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "$x", e.length) | } | functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf") | finalUdf @@ -171,8 +170,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends | def builder(e: Seq[Expression]) = if (e.length == $i) { |ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) | } else { -|throw new AnalysisException("Invalid number of arguments for function " + name + -| ". Expected: $i; Found: " + e.length) +|throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "$i", e.length) | } | functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf") |}""".stripMargin) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41389][CORE][SQL] Reuse `WRONG_NUM_ARGS` instead of `_LEGACY_ERROR_TEMP_1044`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1996a94b09f [SPARK-41389][CORE][SQL] Reuse `WRONG_NUM_ARGS` instead of `_LEGACY_ERROR_TEMP_1044` 1996a94b09f is described below commit 1996a94b09fe1f450eb33ddb23b16af090bc4d1b Author: yangjie01 AuthorDate: Mon Dec 5 18:04:51 2022 +0300 [SPARK-41389][CORE][SQL] Reuse `WRONG_NUM_ARGS` instead of `_LEGACY_ERROR_TEMP_1044` ### What changes were proposed in this pull request? This pr aims to reuse error class `WRONG_NUM_ARGS` instead of `_LEGACY_ERROR_TEMP_1044`. ### Why are the changes needed? Proper names of error classes to improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions. Closes #38913 from LuciferYang/SPARK-41389. Authored-by: yangjie01 Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json| 5 - .../org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 5 +++-- .../scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala | 6 -- .../resources/sql-tests/results/sql-compatibility-functions.sql.out | 6 -- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7d5c272a77f..19ab5ada2b5 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -2011,11 +2011,6 @@ "Invalid arguments for function ." ] }, - "_LEGACY_ERROR_TEMP_1044" : { -"message" : [ - "Function accepts only one argument." -] - }, "_LEGACY_ERROR_TEMP_1045" : { "message" : [ "ALTER TABLE SET LOCATION does not support partition for v2 tables." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3817f00d09d..be16eaec6ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -896,8 +896,9 @@ object FunctionRegistry { name: String, dataType: DataType): (String, (ExpressionInfo, FunctionBuilder)) = { val builder = (args: Seq[Expression]) => { - if (args.size != 1) { -throw QueryCompilationErrors.functionAcceptsOnlyOneArgumentError(name) + val argSize = args.size + if (argSize != 1) { +throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "1", argSize) } Cast(args.head, dataType) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 2e20d7aec8d..ed08e33829e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -663,12 +663,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { } } - def functionAcceptsOnlyOneArgumentError(name: String): Throwable = { -new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1044", - messageParameters = Map("name" -> name)) - } - def alterV2TableSetLocationWithPartitionNotSupportedError(): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1045", diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out index e0d5874d058..319ac059385 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out @@ -94,9 +94,11 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_1044", + "errorClass" : "WRONG_NUM_ARGS", "messageParameters" : { -"name" : "string" +"actualNum" : "2", +"expectedNum" : "1", +"functionName" : "`string`" }, "queryContext" : [ { "objectType" : "", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41373][SQL][ERROR] Rename CAST_WITH_FUN_SUGGESTION to CAST_WITH_FUNC_SUGGESTION
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 811921be3ba [SPARK-41373][SQL][ERROR] Rename CAST_WITH_FUN_SUGGESTION to CAST_WITH_FUNC_SUGGESTION 811921be3ba is described below commit 811921be3bacb2edb1d382257561429a0a604adb Author: Rui Wang AuthorDate: Sun Dec 4 00:44:11 2022 +0300 [SPARK-41373][SQL][ERROR] Rename CAST_WITH_FUN_SUGGESTION to CAST_WITH_FUNC_SUGGESTION ### What changes were proposed in this pull request? Rename CAST_WITH_FUN_SUGGESTION to CAST_WITH_FUNC_SUGGESTION. This is just `_FUN_SUGGESTION` could has other meaning. `CAST_WITH_FUNC_SUGGESTION` is more clear. I didn't choose to rename this it `CAST_WITH_SUGGESTION` because there is a `CAST_WITH_CONF_SUGGESTION` so we need to differentiate. ### Why are the changes needed? Better error message name. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? Existing UT. Closes #38892 from amaliujia/improve_error_message. Authored-by: Rui Wang Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 2 +- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- .../spark/sql/catalyst/expressions/CastSuiteBase.scala | 12 ++-- .../spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 347b9a14862..7d5c272a77f 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -197,7 +197,7 @@ "If you have to cast to , you can set as ." ] }, - "CAST_WITH_FUN_SUGGESTION" : { + "CAST_WITH_FUNC_SUGGESTION" : { "message" : [ "cannot cast to .", "To convert values from to , you can use the functions instead." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index a302298d99c..23152adc0ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -419,7 +419,7 @@ object Cast extends QueryErrorsBase { fallbackConf: Option[(String, String)]): DataTypeMismatch = { def withFunSuggest(names: String*): DataTypeMismatch = { DataTypeMismatch( -errorSubClass = "CAST_WITH_FUN_SUGGESTION", +errorSubClass = "CAST_WITH_FUNC_SUGGESTION", messageParameters = Map( "srcType" -> toSQLType(from), "targetType" -> toSQLType(to), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 6d972a8482a..68b3d5c8446 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -545,7 +545,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { protected def checkInvalidCastFromNumericType(to: DataType): Unit = { cast(1.toByte, to).checkInputDataTypes() == DataTypeMismatch( -errorSubClass = "CAST_WITH_FUN_SUGGESTION", +errorSubClass = "CAST_WITH_FUNC_SUGGESTION", messageParameters = Map( "srcType" -> toSQLType(Literal(1.toByte).dataType), "targetType" -> toSQLType(to), @@ -554,7 +554,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ) cast(1.toShort, to).checkInputDataTypes() == DataTypeMismatch( -errorSubClass = "CAST_WITH_FUN_SUGGESTION", +errorSubClass = "CAST_WITH_FUNC_SUGGESTION", messageParameters = Map( "srcType" -> toSQLType(Literal(1.toShort).dataType), "targetType" -> toSQLType(to), @@ -563,7 +563,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ) cast(1, to).checkInputDataTypes() == DataTypeMismatch( -errorSubClass = "CAST_WITH_FUN_SUGGESTION", +errorSubClass = "CAST_WITH_FUNC_SUGGESTION", messageParameters = Map( "srcType" -> toSQLType(Literal(
[spark] branch master updated (0f1c515179e -> 3fc8a902673)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 0f1c515179e [SPARK-41345][CONNECT] Add Hint to Connect Proto add 3fc8a902673 [SPARK-41348][SQL][TESTS] Refactor `UnsafeArrayWriterSuite` to check error class No new revisions were added by this update. Summary of changes: .../expressions/codegen/UnsafeArrayWriterSuite.scala| 17 + 1 file changed, 13 insertions(+), 4 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41314][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_1094`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e00f14ff521 [SPARK-41314][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_1094` e00f14ff521 is described below commit e00f14ff5216e194fe39ef38d2c9414a22ef696a Author: yangjie01 AuthorDate: Thu Dec 1 11:49:42 2022 +0300 [SPARK-41314][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_1094` ### What changes were proposed in this pull request? This pr aims to rename error class `_LEGACY_ERROR_TEMP_1094` to `INVALID_SCHEMA.NON_STRUCT_TYPE`. ### Why are the changes needed? Proper names of error classes to improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new tests to check `INVALID_SCHEMA.NON_STRUCT_TYPE` Closes #38856 from LuciferYang/SPARK-41314. Authored-by: yangjie01 Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 .../spark/sql/catalyst/expressions/ExprUtils.scala | 2 +- .../spark/sql/errors/QueryCompilationErrors.scala | 9 --- .../resources/sql-tests/inputs/csv-functions.sql | 1 + .../sql-tests/results/csv-functions.sql.out| 22 .../org/apache/spark/sql/CsvFunctionsSuite.scala | 29 ++ 6 files changed, 64 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 65b6dc68d12..347b9a14862 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -782,6 +782,11 @@ "The input expression must be string literal and not null." ] }, + "NON_STRUCT_TYPE" : { +"message" : [ + "The input expression should be evaluated to struct type, but got ." +] + }, "PARSE_ERROR" : { "message" : [ "Cannot parse the schema:", @@ -2211,11 +2216,6 @@ "Cannot read table property '' as it's corrupted.." ] }, - "_LEGACY_ERROR_TEMP_1094" : { -"message" : [ - "Schema should be struct type but got ." -] - }, "_LEGACY_ERROR_TEMP_1097" : { "message" : [ "The field for corrupt records must be string type and nullable." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala index fbe3d5eb458..2fa970bac0c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala @@ -46,7 +46,7 @@ object ExprUtils extends QueryErrorsBase { def evalSchemaExpr(exp: Expression): StructType = { val dataType = evalTypeExpr(exp) if (!dataType.isInstanceOf[StructType]) { - throw QueryCompilationErrors.schemaIsNotStructTypeError(dataType) + throw QueryCompilationErrors.schemaIsNotStructTypeError(exp, dataType) } dataType.asInstanceOf[StructType] } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index fc9a08104b4..2e20d7aec8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1010,10 +1010,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { messageParameters = Map("inputSchema" -> toSQLExpr(exp))) } - def schemaIsNotStructTypeError(dataType: DataType): Throwable = { + def schemaIsNotStructTypeError(exp: Expression, dataType: DataType): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1094", - messageParameters = Map("dataType" -> dataType.toString)) + errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE", + messageParameters = Map( +"inputSchema" -> toSQLExpr(exp), +"dataType" -> toSQLType(dataType) + )) } def keyValueInMapNotStringError(m: CreateMap): Throwable = { diff --git a/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql index a1a4bc9de3f..01d436534a1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql +++ b/sql/core/src/test/resources/sql-tests/i
[spark] branch master updated: [SPARK-41228][SQL] Rename & Improve error message for `COLUMN_NOT_IN_GROUP_BY_CLAUSE`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 5badb2446fa [SPARK-41228][SQL] Rename & Improve error message for `COLUMN_NOT_IN_GROUP_BY_CLAUSE` 5badb2446fa is described below commit 5badb2446fa2b51e8ea239ced6c9b44178b2f1fa Author: itholic AuthorDate: Thu Dec 1 09:18:17 2022 +0300 [SPARK-41228][SQL] Rename & Improve error message for `COLUMN_NOT_IN_GROUP_BY_CLAUSE` ### What changes were proposed in this pull request? This PR proposes to rename `COLUMN_NOT_IN_GROUP_BY_CLAUSE` to `MISSING_AGGREGATION`. Also, improve its error message. ### Why are the changes needed? The current error class name and its error message doesn't illustrate the error cause and resolution correctly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? ``` ./build/sbt “sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*” ``` Closes #38769 from itholic/SPARK-41128. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 13 +++-- .../sql/tests/pandas/test_pandas_udf_grouped_agg.py | 2 +- .../apache/spark/sql/errors/QueryCompilationErrors.scala | 7 +-- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 7 +-- .../src/test/resources/sql-tests/results/extract.sql.out | 2 ++ .../resources/sql-tests/results/group-by-filter.sql.out | 10 ++ .../src/test/resources/sql-tests/results/group-by.sql.out | 15 +-- .../test/resources/sql-tests/results/grouping_set.sql.out | 5 +++-- .../sql-tests/results/postgreSQL/create_view.sql.out | 5 +++-- .../sql-tests/results/udaf/udaf-group-by-ordinal.sql.out | 15 +-- .../sql-tests/results/udaf/udaf-group-by.sql.out | 15 +-- .../resources/sql-tests/results/udf/udf-group-by.sql.out | 15 +-- .../org/apache/spark/sql/execution/SQLViewSuite.scala | 5 +++-- 13 files changed, 71 insertions(+), 45 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a79c02e1f1d..65b6dc68d12 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -109,12 +109,6 @@ "The column already exists. Consider to choose another name or rename the existing column." ] }, - "COLUMN_NOT_IN_GROUP_BY_CLAUSE" : { -"message" : [ - "The expression is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in `first()` (or `first_value()`) if you don't care which value you get." -], -"sqlState" : "42000" - }, "CONCURRENT_QUERY" : { "message" : [ "Another instance of this query was just started by a concurrent session." @@ -830,6 +824,13 @@ "Malformed Protobuf messages are detected in message deserialization. Parse Mode: . To process malformed protobuf message as null result, try setting the option 'mode' as 'PERMISSIVE'." ] }, + "MISSING_AGGREGATION" : { +"message" : [ + "The non-aggregating expression is based on columns which are not participating in the GROUP BY clause.", + "Add the columns or the expression to the GROUP BY, aggregate the expression, or use if you do not care which of the values within a group is returned." +], +"sqlState" : "42000" + }, "MISSING_STATIC_PARTITION_COLUMN" : { "message" : [ "Unknown static partition column: " diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py index 6f475624b74..aa844fc5fd5 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py @@ -475,7 +475,7 @@ class GroupedAggPandasUDFTests(ReusedSQLTestCase): mean_udf = self.pandas_agg_mean_udf with QuietTest(self.sc): -with self.assertRaisesRegex(AnalysisException, "nor.*aggregate function"): +with self.assertRaisesRegex(AnalysisException, "[MISSING_AGGREGATION]"): df.groupby(df.id).agg(plus_one(df.v)).collect() with QuietTest(self.sc): diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index e5b1c3c100d..fc9a08104
[spark] branch master updated (ce41ca0848e -> c5f189c5365)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from ce41ca0848e [SPARK-41343][CONNECT] Move FunctionName parsing to server side add c5f189c5365 [SPARK-41237][SQL] Reuse the error class `UNSUPPORTED_DATATYPE` for `_LEGACY_ERROR_TEMP_0030` No new revisions were added by this update. Summary of changes: R/pkg/tests/fulltests/test_sparkSQL.R| 6 +++--- R/pkg/tests/fulltests/test_streaming.R | 2 +- R/pkg/tests/fulltests/test_utils.R | 2 +- core/src/main/resources/error/error-classes.json | 5 - .../org/apache/spark/sql/errors/QueryParsingErrors.scala | 4 ++-- .../apache/spark/sql/catalyst/parser/DDLParserSuite.scala| 4 ++-- .../spark/sql/catalyst/parser/DataTypeParserSuite.scala | 12 ++-- .../apache/spark/sql/catalyst/parser/ErrorParserSuite.scala | 4 ++-- .../test/resources/sql-tests/results/csv-functions.sql.out | 1 - .../test/resources/sql-tests/results/postgreSQL/with.sql.out | 10 ++ .../sql/execution/datasources/jdbc/JdbcUtilsSuite.scala | 4 ++-- .../datasources/v2/jdbc/JDBCTableCatalogSuite.scala | 4 ++-- .../scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala | 4 ++-- 13 files changed, 33 insertions(+), 29 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41309][SQL] Reuse `INVALID_SCHEMA.NON_STRING_LITERAL` instead of `_LEGACY_ERROR_TEMP_1093`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new a47869af7fa [SPARK-41309][SQL] Reuse `INVALID_SCHEMA.NON_STRING_LITERAL` instead of `_LEGACY_ERROR_TEMP_1093` a47869af7fa is described below commit a47869af7fa82b708520da123fa0446214f601c2 Author: yangjie01 AuthorDate: Tue Nov 29 19:36:59 2022 +0300 [SPARK-41309][SQL] Reuse `INVALID_SCHEMA.NON_STRING_LITERAL` instead of `_LEGACY_ERROR_TEMP_1093` ### What changes were proposed in this pull request? This pr aims reuse `INVALID_SCHEMA.NON_STRING_LITERAL` instead of `_LEGACY_ERROR_TEMP_1093`. ### Why are the changes needed? Proper names of error classes to improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38830 from LuciferYang/SPARK-41309. Lead-authored-by: yangjie01 Co-authored-by: YangJie Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json| 5 - .../apache/spark/sql/catalyst/expressions/ExprUtils.scala | 2 +- .../apache/spark/sql/errors/QueryCompilationErrors.scala| 6 -- .../test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala | 13 - .../scala/org/apache/spark/sql/JsonFunctionsSuite.scala | 13 - 5 files changed, 17 insertions(+), 22 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 89728777201..cddb0848765 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -2215,11 +2215,6 @@ "Cannot read table property '' as it's corrupted.." ] }, - "_LEGACY_ERROR_TEMP_1093" : { -"message" : [ - "Schema should be specified in DDL format as a string literal or output of the schema_of_json/schema_of_csv functions instead of ." -] - }, "_LEGACY_ERROR_TEMP_1094" : { "message" : [ "Schema should be struct type but got ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala index e9084442b22..fbe3d5eb458 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala @@ -39,7 +39,7 @@ object ExprUtils extends QueryErrorsBase { } } else { - throw QueryCompilationErrors.schemaNotFoldableError(exp) + throw QueryCompilationErrors.unexpectedSchemaTypeError(exp) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index ce99bf4aa47..e5b1c3c100d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1009,12 +1009,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { messageParameters = Map("inputSchema" -> toSQLExpr(exp))) } - def schemaNotFoldableError(exp: Expression): Throwable = { -new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1093", - messageParameters = Map("expr" -> exp.sql)) - } - def schemaIsNotStructTypeError(dataType: DataType): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1094", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala index 940eaaed6ac..ab4c148da04 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala @@ -357,11 +357,14 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { Seq("""1,"a"""").toDS().select(from_csv($"value", schema, options)), Row(Row(1, "a"))) -val errMsg = intercept[AnalysisException] { - Seq(("1", "i int")).toDF("csv", "schema") -.select(from_csv($"csv", $"schema", options)).collect() -}.getMessage -assert(errMsg.contains("Schema should be specified in DDL format as a string literal")) +checkError( + exception = intercept[AnalysisException] { +Seq(("1", "i int"))
[spark] branch master updated: [SPARK-41180][SQL] Reuse `INVALID_SCHEMA` instead of `_LEGACY_ERROR_TEMP_1227`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new bdb4d5e4da5 [SPARK-41180][SQL] Reuse `INVALID_SCHEMA` instead of `_LEGACY_ERROR_TEMP_1227` bdb4d5e4da5 is described below commit bdb4d5e4da558775df2be712dd8760d5f5f14747 Author: yangjie01 AuthorDate: Mon Nov 28 20:26:27 2022 +0300 [SPARK-41180][SQL] Reuse `INVALID_SCHEMA` instead of `_LEGACY_ERROR_TEMP_1227` ### What changes were proposed in this pull request? This pr aims rename `_LEGACY_ERROR_TEMP_1227` to `INVALID_SCHEMA` ### Why are the changes needed? Proper names of error classes to improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38754 from LuciferYang/SPARK-41180. Lead-authored-by: yangjie01 Co-authored-by: YangJie Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 23 --- project/MimaExcludes.scala | 5 +- .../spark/sql/catalyst/expressions/ExprUtils.scala | 2 +- .../spark/sql/errors/QueryCompilationErrors.scala | 23 --- .../apache/spark/sql/errors/QueryErrorsBase.scala | 4 ++ .../org/apache/spark/sql/types/DataType.scala | 13 ++-- .../scala/org/apache/spark/sql/functions.scala | 1 - .../sql-tests/results/csv-functions.sql.out| 12 ++-- .../sql-tests/results/json-functions.sql.out | 12 ++-- .../org/apache/spark/sql/CsvFunctionsSuite.scala | 4 +- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 4 +- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 73 -- 12 files changed, 115 insertions(+), 61 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 9f4337d0618..89728777201 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -780,8 +780,21 @@ }, "INVALID_SCHEMA" : { "message" : [ - "The expression is not a valid schema string." -] + "The input schema is not a valid schema string." +], +"subClass" : { + "NON_STRING_LITERAL" : { +"message" : [ + "The input expression must be string literal and not null." +] + }, + "PARSE_ERROR" : { +"message" : [ + "Cannot parse the schema:", + "" +] + } +} }, "INVALID_SQL_SYNTAX" : { "message" : [ @@ -2844,12 +2857,6 @@ "The SQL config '' was removed in the version . " ] }, - "_LEGACY_ERROR_TEMP_1227" : { -"message" : [ - "", - "Failed fallback parsing: " -] - }, "_LEGACY_ERROR_TEMP_1228" : { "message" : [ "Decimal scale () cannot be greater than precision ()." diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index d8f87a504fa..eed79d1f204 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -120,7 +120,10 @@ object MimaExcludes { ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockFetcherIterator#FetchRequest.apply"), // [SPARK-41072][SS] Add the error class STREAM_FAILED to StreamingQueryException - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.this") + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.this"), + +// [SPARK-41180][SQL] Reuse INVALID_SCHEMA instead of _LEGACY_ERROR_TEMP_1227 + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.types.DataType.parseTypeWithFallback") ) // Defulat exclude rules diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala index 3e10b820aa6..e9084442b22 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala @@ -35,7 +35,7 @@ object ExprUtils extends QueryErrorsBase { case s: UTF8String if s != null => val dataType = DataType.fromDDL(s.toString) CharVarcharUtils.failIfHasCharVarchar(dataType) -case _ => throw QueryCompilationErrors.invalidSchemaStringError(exp) +case _ => throw QueryCompilationErrors.unexp
[spark] branch master updated: [SPARK-41293][SQL][TESTS] Code cleanup for `assertXXX` methods in `ExpressionTypeCheckingSuite`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c3de4ca1477 [SPARK-41293][SQL][TESTS] Code cleanup for `assertXXX` methods in `ExpressionTypeCheckingSuite` c3de4ca1477 is described below commit c3de4ca14772fa6dff703b662a561a9e65e23d9e Author: yangjie01 AuthorDate: Mon Nov 28 14:55:28 2022 +0300 [SPARK-41293][SQL][TESTS] Code cleanup for `assertXXX` methods in `ExpressionTypeCheckingSuite` ### What changes were proposed in this pull request? This pr do some code clean up for `assertXXX` method in `ExpressionTypeCheckingSuite`: 1. Reuse `analysisException` instead of duplicate `intercept[AnalysisException](assertSuccess(expr))` in `assertErrorForXXX` methods. 2. remove `assertError` method that is no longer used 3. Change `assertErrorForXXX` methods access scope to `private` due to they are only used in `ExpressionTypeCheckingSuite`. ### Why are the changes needed? Code clean up. ### Does this PR introduce _any_ user-facing change? No, just for test ### How was this patch tested? Pass GitHub Actions Closes #38820 from LuciferYang/SPARK-41293. Authored-by: yangjie01 Signed-off-by: Max Gekk --- .../analysis/ExpressionTypeCheckingSuite.scala | 41 ++ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index d406ec8f74a..6202d1e367a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -44,65 +44,46 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer intercept[AnalysisException](assertSuccess(expr)) } - def assertError(expr: Expression, errorMessage: String): Unit = { -val e = intercept[AnalysisException] { - assertSuccess(expr) -} -assert(e.getMessage.contains( - s"cannot resolve '${expr.sql}' due to data type mismatch:")) -assert(e.getMessage.contains(errorMessage)) - } - - def assertSuccess(expr: Expression): Unit = { + private def assertSuccess(expr: Expression): Unit = { val analyzed = testRelation.select(expr.as("c")).analyze SimpleAnalyzer.checkAnalysis(analyzed) } - def assertErrorForBinaryDifferingTypes( + private def assertErrorForBinaryDifferingTypes( expr: Expression, messageParameters: Map[String, String]): Unit = { checkError( - exception = intercept[AnalysisException] { -assertSuccess(expr) - }, + exception = analysisException(expr), errorClass = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES", parameters = messageParameters) } - def assertErrorForOrderingTypes( + private def assertErrorForOrderingTypes( expr: Expression, messageParameters: Map[String, String]): Unit = { checkError( - exception = intercept[AnalysisException] { -assertSuccess(expr) - }, + exception = analysisException(expr), errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE", parameters = messageParameters) } - def assertErrorForDataDifferingTypes( + private def assertErrorForDataDifferingTypes( expr: Expression, messageParameters: Map[String, String]): Unit = { checkError( - exception = intercept[AnalysisException] { -assertSuccess(expr) - }, + exception = analysisException(expr), errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES", parameters = messageParameters) } - def assertErrorForWrongNumParameters( + private def assertErrorForWrongNumParameters( expr: Expression, messageParameters: Map[String, String]): Unit = { checkError( - exception = intercept[AnalysisException] { -assertSuccess(expr) - }, + exception = analysisException(expr), errorClass = "DATATYPE_MISMATCH.WRONG_NUM_ARGS", parameters = messageParameters) } - def assertForWrongType(expr: Expression, messageParameters: Map[String, String]): Unit = { + private def assertForWrongType(expr: Expression, messageParameters: Map[String, String]): Unit = { checkError( - exception = intercept[AnalysisException] { -assertSuccess(expr) - }, + exception = analysisException(expr), errorClass = "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE", parameters = messageParameters) } - To un
[spark] branch master updated: [SPARK-41272][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2019
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d979736a9eb [SPARK-41272][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2019 d979736a9eb is described below commit d979736a9eb754725d33fd5baca88a1c1a8c23ce Author: panbingkun AuthorDate: Mon Nov 28 12:01:02 2022 +0300 [SPARK-41272][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2019 ### What changes were proposed in this pull request? In the PR, I propose to assign the name `NULL_MAP_KEY` to the error class `_LEGACY_ERROR_TEMP_2019`. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #38808 from panbingkun/LEGACY_2019. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 ++--- .../spark/sql/errors/QueryExecutionErrors.scala| 2 +- .../catalyst/encoders/ExpressionEncoderSuite.scala | 20 +++--- .../expressions/CollectionExpressionsSuite.scala | 10 ++--- .../catalyst/expressions/ComplexTypeSuite.scala| 11 +++--- .../expressions/ExpressionEvalHelper.scala | 43 +- .../expressions/ObjectExpressionsSuite.scala | 10 ++--- .../catalyst/util/ArrayBasedMapBuilderSuite.scala | 8 +++- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 38 ++- 9 files changed, 113 insertions(+), 39 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 1246e870e0d..9f4337d0618 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -895,6 +895,11 @@ "The comparison result is null. If you want to handle null as 0 (equal), you can set \"spark.sql.legacy.allowNullComparisonResultInArraySort\" to \"true\"." ] }, + "NULL_MAP_KEY" : { +"message" : [ + "Cannot use null as map key." +] + }, "NUMERIC_OUT_OF_SUPPORTED_RANGE" : { "message" : [ "The value cannot be interpreted as a numeric since it has more than 38 digits." @@ -3504,11 +3509,6 @@ "class `` is not supported by `MapObjects` as resulting collection." ] }, - "_LEGACY_ERROR_TEMP_2019" : { -"message" : [ - "Cannot use null as map key!" -] - }, "_LEGACY_ERROR_TEMP_2020" : { "message" : [ "Couldn't find a valid constructor on " diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 5db54f7f4cf..15dfa581c59 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -444,7 +444,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { def nullAsMapKeyNotAllowedError(): SparkRuntimeException = { new SparkRuntimeException( - errorClass = "_LEGACY_ERROR_TEMP_2019", + errorClass = "NULL_MAP_KEY", messageParameters = Map.empty) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala index 9b481b13fee..e9336405a53 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala @@ -24,7 +24,7 @@ import java.util.Arrays import scala.collection.mutable.ArrayBuffer import scala.reflect.runtime.universe.TypeTag -import org.apache.spark.SparkArithmeticException +import org.apache.spark.{SparkArithmeticException, SparkRuntimeException} import org.apache.spark.sql.{Encoder, Encoders} import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData, ScroogeLikeExample} import org.apache.spark.sql.catalyst.analysis.AnalysisTest @@ -539,14 +539,24 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes test("null check for map key: String") { val toRow = ExpressionEncoder[Map[String, Int]]().createSerializer() -val e = intercept[RuntimeException](toRow(Map(("a", 1), (null, 2 -assert(e.getMessage.contains("Cannot use null as map
[spark] branch master updated (ed3775704bb -> e4b5eec6e27)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from ed3775704bb [MINOR][SQL][TESTS] Restore the code style check of `QueryExecutionErrorsSuite` add e4b5eec6e27 [SPARK-38728][SQL] Test the error class: FAILED_RENAME_PATH No new revisions were added by this update. Summary of changes: .../sql/errors/QueryExecutionErrorsSuite.scala | 35 ++ 1 file changed, 35 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][SQL][TESTS] Restore the code style check of `QueryExecutionErrorsSuite`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new ed3775704bb [MINOR][SQL][TESTS] Restore the code style check of `QueryExecutionErrorsSuite` ed3775704bb is described below commit ed3775704bbdc9a9c479dc06565c8bf8c4d9640c Author: yangjie01 AuthorDate: Sun Nov 27 15:03:35 2022 +0300 [MINOR][SQL][TESTS] Restore the code style check of `QueryExecutionErrorsSuite` ### What changes were proposed in this pull request? https://github.com/apache/spark/blob/9af216d7ac26f0ec916833c2e80a01aef8933529/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala#L451-L454 As above code, line 451 in `QueryExecutionErrorsSuite.scala` turn off all scala style check and line 454 just turn on `throwerror` check, so the code after line 454 of the `QueryExecutionErrorsSuite.scala` will not be checked for code style except `throwerror`. This pr restore the code style check and fix a existing `File line length exceeds 100 characters.` case. ### Why are the changes needed? Restore the code style check of `QueryExecutionErrorsSuite` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38812 from LuciferYang/minor-checkstyle. Authored-by: yangjie01 Signed-off-by: Max Gekk --- .../org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index aa0f720d4de..807188bee3a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -448,7 +448,7 @@ class QueryExecutionErrorsSuite override def getResources(name: String): java.util.Enumeration[URL] = { if (name.equals("META-INF/services/org.apache.spark.sql.sources.DataSourceRegister")) { - // scalastyle:off + // scalastyle:off throwerror throw new ServiceConfigurationError(s"Illegal configuration-file syntax: $name", new NoClassDefFoundError("org.apache.spark.sql.sources.HadoopFsRelationProvider")) // scalastyle:on throwerror @@ -632,7 +632,8 @@ class QueryExecutionErrorsSuite }, errorClass = "UNSUPPORTED_DATATYPE", parameters = Map( -"typeName" -> "StructType()[1.1] failure: 'TimestampType' expected but 'S' found\n\nStructType()\n^" +"typeName" -> + "StructType()[1.1] failure: 'TimestampType' expected but 'S' found\n\nStructType()\n^" ), sqlState = "0A000") } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][SQL] Fix the pretty name of the `AnyValue` expression
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9af216d7ac2 [MINOR][SQL] Fix the pretty name of the `AnyValue` expression 9af216d7ac2 is described below commit 9af216d7ac26f0ec916833c2e80a01aef8933529 Author: Max Gekk AuthorDate: Sun Nov 27 10:33:26 2022 +0300 [MINOR][SQL] Fix the pretty name of the `AnyValue` expression ### What changes were proposed in this pull request? In the PR, I propose to override the `prettyName` method of the `AnyValue` expression and set to `any_value` by default as in `FunctionRegistry`: https://github.com/apache/spark/blob/40b7d29e14cfa96984c5b0a231a75b210dd85a7e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L466 ### Why are the changes needed? To don't confuse users by non-existent function name, and print correct name in errors. ### Does this PR introduce _any_ user-facing change? Yes, it could be. ### How was this patch tested? By running the affected test suite: ``` $ build/sbt "sql/testOnly *ExpressionsSchemaSuite" ``` Closes #38805 from MaxGekk/any_value-pretty-name. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala| 4 sql/core/src/test/resources/sql-functions/sql-expression-schema.md| 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala index 47559b90e9c..9fbca1629c9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.trees.UnaryLike import org.apache.spark.sql.types._ @@ -61,4 +62,7 @@ case class AnyValue(child: Expression, ignoreNulls: Boolean) override protected def withNewChildInternal(newChild: Expression): AnyValue = copy(child = newChild) override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, BooleanType) + + override def prettyName: String = +getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("any_value") } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 482c72679bb..8d47878de15 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -349,7 +349,7 @@ | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct | | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct>> | -| org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue | any_value | SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue | any_value | SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.Average | avg | SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41181][SQL] Migrate the map options errors onto error classes
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0ae82d99d13 [SPARK-41181][SQL] Migrate the map options errors onto error classes 0ae82d99d13 is described below commit 0ae82d99d13988086a297920d45a766115a70578 Author: panbingkun AuthorDate: Fri Nov 25 09:03:49 2022 +0300 [SPARK-41181][SQL] Migrate the map options errors onto error classes ### What changes were proposed in this pull request? The pr aims to migrate the map options errors onto error classes. ### Why are the changes needed? The changes improve the error framework. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #38730 from panbingkun/SPARK-41181. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 27 + .../spark/sql/errors/QueryCompilationErrors.scala | 6 +- .../sql-tests/results/csv-functions.sql.out| 13 +++-- .../sql-tests/results/json-functions.sql.out | 12 ++-- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 66 -- 5 files changed, 81 insertions(+), 43 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 55a56712554..1246e870e0d 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -735,6 +735,23 @@ "The JOIN with LATERAL correlation is not allowed because an OUTER subquery cannot correlate to its join partner. Remove the LATERAL correlation or use an INNER JOIN, or LEFT OUTER JOIN instead." ] }, + "INVALID_OPTIONS" : { +"message" : [ + "Invalid options:" +], +"subClass" : { + "NON_MAP_FUNCTION" : { +"message" : [ + "Must use the `map()` function for options." +] + }, + "NON_STRING_TYPE" : { +"message" : [ + "A type of keys and values in `map()` must be string, but got ." +] + } +} + }, "INVALID_PANDAS_UDF_PLACEMENT" : { "message" : [ "The group aggregate pandas UDF cannot be invoked together with as other, non-pandas aggregate functions." @@ -2190,16 +2207,6 @@ "Schema should be struct type but got ." ] }, - "_LEGACY_ERROR_TEMP_1095" : { -"message" : [ - "A type of keys and values in map() must be string, but got ." -] - }, - "_LEGACY_ERROR_TEMP_1096" : { -"message" : [ - "Must use a map() function for options." -] - }, "_LEGACY_ERROR_TEMP_1097" : { "message" : [ "The field for corrupt records must be string type and nullable." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index fa22c36f841..486bd21b844 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1013,13 +1013,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def keyValueInMapNotStringError(m: CreateMap): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1095", - messageParameters = Map("map" -> m.dataType.catalogString)) + errorClass = "INVALID_OPTIONS.NON_STRING_TYPE", + messageParameters = Map("mapType" -> toSQLType(m.dataType))) } def nonMapFunctionNotAllowedError(): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1096", + errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION", messageParameters = Map.empty) } diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index 0b5a63c28e4..200ddd837e1 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -66,7 +66,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_1096", + "errorClass" : "INVALID_OPTIONS.NON_MAP_FUNCTION", "queryContext" : [ { "objectType" : "", "objectName" : ""
[spark] branch master updated (074444bd71f -> 1f90e416314)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 07bd71f [SPARK-41179][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1092 add 1f90e416314 [SPARK-41182][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1102 No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 10 ++-- .../spark/sql/errors/QueryCompilationErrors.scala | 6 +-- .../resources/sql-tests/results/extract.sql.out| 56 +++--- 3 files changed, 35 insertions(+), 37 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41179][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1092
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 07bd71f [SPARK-41179][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1092 07bd71f is described below commit 07bd71f088d1a5acb6f2ecf92d71ed06ef21 Author: panbingkun AuthorDate: Thu Nov 24 09:17:45 2022 +0300 [SPARK-41179][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1092 ### What changes were proposed in this pull request? In the PR, I propose to assign the name `INVALID_SCHEMA` to the error class `_LEGACY_ERROR_TEMP_1092`. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #38710 from panbingkun/SPARK-41179. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json| 10 +- .../spark/sql/errors/QueryCompilationErrors.scala | 4 ++-- .../resources/sql-tests/results/csv-functions.sql.out | 4 ++-- .../resources/sql-tests/results/json-functions.sql.out | 4 ++-- .../scala/org/apache/spark/sql/CsvFunctionsSuite.scala | 11 +++ .../org/apache/spark/sql/DataFrameFunctionsSuite.scala | 17 + .../scala/org/apache/spark/sql/JsonFunctionsSuite.scala | 17 + 7 files changed, 48 insertions(+), 19 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a89fffde51d..c58f9b9fb38 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -756,6 +756,11 @@ " is not a Protobuf message type" ] }, + "INVALID_SCHEMA" : { +"message" : [ + "The expression is not a valid schema string." +] + }, "INVALID_SQL_SYNTAX" : { "message" : [ "Invalid SQL syntax: " @@ -2170,11 +2175,6 @@ "Cannot read table property '' as it's corrupted.." ] }, - "_LEGACY_ERROR_TEMP_1092" : { -"message" : [ - "The expression '' is not a valid schema string." -] - }, "_LEGACY_ERROR_TEMP_1093" : { "message" : [ "Schema should be specified in DDL format as a string literal or output of the schema_of_json/schema_of_csv functions instead of ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index f52a0345bce..7772dd5e9a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -995,8 +995,8 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def invalidSchemaStringError(exp: Expression): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1092", - messageParameters = Map("expr" -> exp.sql)) + errorClass = "INVALID_SCHEMA", + messageParameters = Map("expr" -> toSQLExpr(exp))) } def schemaNotFoldableError(exp: Expression): Throwable = { diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index c2be9ed7d0b..0b5a63c28e4 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -22,9 +22,9 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_1092", + "errorClass" : "INVALID_SCHEMA", "messageParameters" : { -"expr" : "1" +"expr" : "\"1\"" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 3c98cc6e856..ab1465350d8 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -148,9 +148,9 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_1092", + "errorClass" : "INVALID_SCHEMA", "messageParameters" :
[spark] branch master updated: [MINOR][SQL] Fix error message for `UNEXPECTED_INPUT_TYPE`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 57f3f0fdd3a [MINOR][SQL] Fix error message for `UNEXPECTED_INPUT_TYPE` 57f3f0fdd3a is described below commit 57f3f0fdd3acd136ddf4904193bfa4e7102a255c Author: itholic AuthorDate: Thu Nov 24 08:52:37 2022 +0300 [MINOR][SQL] Fix error message for `UNEXPECTED_INPUT_TYPE` ### What changes were proposed in this pull request? This PR proposes to correct the minor syntax on error message for `UNEXPECTED_INPUT_TYPE`, ### Why are the changes needed? Error message should be started with upper-case character, and clear to read. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? ``` ./build/sbt “sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*” ``` Closes #38766 from itholic/minor-UNEXPECTED_INPUT_TYPE. Lead-authored-by: itholic Co-authored-by: Haejoon Lee <44108233+itho...@users.noreply.github.com> Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f2e7783efdd..239f43ce6e8 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -364,7 +364,7 @@ }, "UNEXPECTED_INPUT_TYPE" : { "message" : [ - "parameter requires type, however, is of type." + "Parameter requires the type, however has the type ." ] }, "UNEXPECTED_NULL" : { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (c3f8c973d44 -> b77ced58b44)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from c3f8c973d44 [SPARK-41174][CORE][SQL] Propagate an error class to users for invalid `format` of `to_binary()` add b77ced58b44 [SPARK-41131][SQL] Improve error message for `UNRESOLVED_MAP_KEY.WITHOUT_SUGGESTION` No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41174][CORE][SQL] Propagate an error class to users for invalid `format` of `to_binary()`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c3f8c973d44 [SPARK-41174][CORE][SQL] Propagate an error class to users for invalid `format` of `to_binary()` c3f8c973d44 is described below commit c3f8c973d448b4d9be7502985aededdd7b81d164 Author: yangjie01 AuthorDate: Wed Nov 23 17:25:06 2022 +0300 [SPARK-41174][CORE][SQL] Propagate an error class to users for invalid `format` of `to_binary()` ### What changes were proposed in this pull request? This pr overrides the `checkInputDataTypes()` method of `ToBinary` function to propagate error class to users for invalid `format`. ### Why are the changes needed? Migration onto error classes unifies Spark SQL error messages. ### Does this PR introduce _any_ user-facing change? Yes. The PR changes user-facing error messages. ### How was this patch tested? Pass GitHub Actions Closes #38737 from LuciferYang/SPARK-41174. Authored-by: yangjie01 Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 5 ++ .../catalyst/expressions/stringExpressions.scala | 85 +++--- .../expressions/StringExpressionsSuite.scala | 15 .../sql-tests/inputs/string-functions.sql | 4 + .../results/ansi/string-functions.sql.out | 70 +++--- .../sql-tests/results/string-functions.sql.out | 70 +++--- 6 files changed, 204 insertions(+), 45 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index afe08f044c7..5bac5ae71f2 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -234,6 +234,11 @@ "Input to the function cannot contain elements of the \"MAP\" type. In Spark, same maps may have different hashcode, thus hash expressions are prohibited on \"MAP\" elements. To restore previous behavior set \"spark.sql.legacy.allowHashOnMapType\" to \"true\"." ] }, + "INVALID_ARG_VALUE" : { +"message" : [ + "The value must to be a literal of , but got ." +] + }, "INVALID_JSON_MAP_KEY_TYPE" : { "message" : [ "Input schema can only contain STRING as a key type for a MAP." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 60b56f4fef7..3a1db2ce1b8 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2620,39 +2620,30 @@ case class ToBinary( nullOnInvalidFormat: Boolean = false) extends RuntimeReplaceable with ImplicitCastInputTypes { - override lazy val replacement: Expression = format.map { f => -assert(f.foldable && (f.dataType == StringType || f.dataType == NullType)) + @transient lazy val fmt: String = format.map { f => val value = f.eval() if (value == null) { - Literal(null, BinaryType) + null } else { - value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match { -case "hex" => Unhex(expr, failOnError = true) -case "utf-8" | "utf8" => Encode(expr, Literal("UTF-8")) -case "base64" => UnBase64(expr, failOnError = true) -case _ if nullOnInvalidFormat => Literal(null, BinaryType) -case other => throw QueryCompilationErrors.invalidStringLiteralParameter( - "to_binary", - "format", - other, - Some( -"The value has to be a case-insensitive string literal of " + -"'hex', 'utf-8', 'utf8', or 'base64'.")) - } + value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) +} + }.getOrElse("hex") + + override lazy val replacement: Expression = if (fmt == null) { +Literal(null, BinaryType) + } else { +fmt match { + case "hex" => Unhex(expr, failOnError = true) + case "utf-8" | "utf8" => Encode(expr, Literal("UTF-8")) + case "base64" => UnBase64(expr, failOnError = true) + case _ => Literal(null, BinaryType) } - }.getOrElse(Unhex(expr, failOnError = true)) + } def this(expr: Expression) = this(expr, None, false) def this(expr: Ex
[spark] branch master updated (2dfb81f898c -> 291315853b8)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 2dfb81f898c [SPARK-41223][BUILD] Upgrade slf4j to 2.0.4 add 291315853b8 [SPARK-41221][SQL] Add the error class `INVALID_FORMAT` No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 114 ++--- .../sql/catalyst/analysis/CheckAnalysis.scala | 7 ++ .../sql/catalyst/analysis/TypeCheckResult.scala| 13 +++ .../spark/sql/catalyst/analysis/package.scala | 9 +- .../spark/sql/catalyst/util/ToNumberParser.scala | 67 +--- .../spark/sql/errors/QueryCompilationErrors.scala | 9 +- .../expressions/RegexpExpressionsSuite.scala | 35 --- .../expressions/StringExpressionsSuite.scala | 110 ++-- .../sql-tests/results/postgreSQL/numeric.sql.out | 10 +- .../sql-tests/results/postgreSQL/strings.sql.out | 32 +++--- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 8 +- 11 files changed, 211 insertions(+), 203 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41206][SQL][FOLLOWUP] Make result of `checkColumnNameDuplication` stable to fix `COLUMN_ALREADY_EXISTS` check failed with Scala 2.13
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e42d3836af9 [SPARK-41206][SQL][FOLLOWUP] Make result of `checkColumnNameDuplication` stable to fix `COLUMN_ALREADY_EXISTS` check failed with Scala 2.13 e42d3836af9 is described below commit e42d3836af9eea881868c80f3c2cbc29e1d7b4f1 Author: yangjie01 AuthorDate: Wed Nov 23 09:13:56 2022 +0300 [SPARK-41206][SQL][FOLLOWUP] Make result of `checkColumnNameDuplication` stable to fix `COLUMN_ALREADY_EXISTS` check failed with Scala 2.13 ### What changes were proposed in this pull request? This pr add a sort when `columnAlreadyExistsError` will be thrown to make the result of `SchemaUtils#checkColumnNameDuplication` stable. ### Why are the changes needed? Fix `COLUMN_ALREADY_EXISTS` check failed with Scala 2.13 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GA - Manual test: ``` dev/change-scala-version.sh 2.13 build/sbt clean "sql/testOnly org.apache.spark.sql.DataFrameSuite" -Pscala-2.13 build/sbt "sql/testOnly org.apache.spark.sql.execution.datasources.json.JsonV1Suite" -Pscala-2.13 build/sbt "sql/testOnly org.apache.spark.sql.execution.datasources.json.JsonV2Suite" -Pscala-2.13 build/sbt "sql/testOnly org.apache.spark.sql.execution.datasources.json.JsonLegacyTimeParserSuite" -Pscala-2.13 ``` All tests passed Closes #38764 from LuciferYang/SPARK-41206. Authored-by: yangjie01 Signed-off-by: Max Gekk --- sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala index aac96a9b56c..d202900381a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala @@ -107,7 +107,7 @@ private[spark] object SchemaUtils { val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase) // scalastyle:on caselocale if (names.distinct.length != names.length) { - val columnName = names.groupBy(identity).collectFirst { + val columnName = names.groupBy(identity).toSeq.sortBy(_._1).collectFirst { case (x, ys) if ys.length > 1 => x }.get throw QueryCompilationErrors.columnAlreadyExistsError(columnName) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 17816170316 [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND 17816170316 is described below commit 178161703161ccf49b37baf9a667630865367950 Author: itholic AuthorDate: Wed Nov 23 08:38:20 2022 +0300 [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND ### What changes were proposed in this pull request? The original PR to introduce the error class `PATH_NOT_FOUND` was reverted since it breaks the tests in different test env. This PR proposes to restore it back. ### Why are the changes needed? Restoring the reverted changes with proper fix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? The existing CI should pass. Closes #38575 from itholic/SPARK-40948-followup. Authored-by: itholic Signed-off-by: Max Gekk --- R/pkg/tests/fulltests/test_sparkSQL.R | 14 +--- core/src/main/resources/error/error-classes.json | 10 +++--- .../spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../org/apache/spark/sql/DataFrameSuite.scala | 37 -- .../execution/datasources/DataSourceSuite.scala| 28 +--- 5 files changed, 52 insertions(+), 39 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 534ec07abac..d2b6220b2e7 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -3990,12 +3990,16 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume expect_error(read.df(source = "json"), paste("Error in load : analysis error - Unable to infer schema for JSON.", "It must be specified manually")) - expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist") - expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist") - expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist") - expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist") + expect_error(read.df("arbitrary_path"), + "Error in load : analysis error - \\[PATH_NOT_FOUND\\].*") + expect_error(read.json("arbitrary_path"), + "Error in json : analysis error - \\[PATH_NOT_FOUND\\].*") + expect_error(read.text("arbitrary_path"), + "Error in text : analysis error - \\[PATH_NOT_FOUND\\].*") + expect_error(read.orc("arbitrary_path"), + "Error in orc : analysis error - \\[PATH_NOT_FOUND\\].*") expect_error(read.parquet("arbitrary_path"), - "Error in parquet : analysis error - Path does not exist") + "Error in parquet : analysis error - \\[PATH_NOT_FOUND\\].*") # Arguments checking in R side. expect_error(read.df(path = c(3)), diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 77d155bfc21..12c97c2108a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -912,6 +912,11 @@ ], "sqlState" : "42000" }, + "PATH_NOT_FOUND" : { +"message" : [ + "Path does not exist: ." +] + }, "PIVOT_VALUE_DATA_TYPE_MISMATCH" : { "message" : [ "Invalid pivot value '': value data type does not match pivot column data type " @@ -2332,11 +2337,6 @@ "Unable to infer schema for . It must be specified manually." ] }, - "_LEGACY_ERROR_TEMP_1130" : { -"message" : [ - "Path does not exist: ." -] - }, "_LEGACY_ERROR_TEMP_1131" : { "message" : [ "Data source does not support output mode." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 63c912c15a1..0f245597efd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1378,7 +1378,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def dataPathNotExistError(path: String): Throwable = {
[spark] branch master updated: [SPARK-41135][SQL] Rename `UNSUPPORTED_EMPTY_LOCATION` to `INVALID_EMPTY_LOCATION`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 3bff4f6339f [SPARK-41135][SQL] Rename `UNSUPPORTED_EMPTY_LOCATION` to `INVALID_EMPTY_LOCATION` 3bff4f6339f is described below commit 3bff4f6339f54d19362a0c03ef2b396e47881fd8 Author: itholic AuthorDate: Tue Nov 22 13:14:13 2022 +0300 [SPARK-41135][SQL] Rename `UNSUPPORTED_EMPTY_LOCATION` to `INVALID_EMPTY_LOCATION` ### What changes were proposed in this pull request? This PR proposes to rename `UNSUPPORTED_EMPTY_LOCATION` to `INVALID_EMPTY_LOCATION`. ### Why are the changes needed? Error class and its message should be clear/brief, and should not ambiguously specific when it illustrates things that possibly supported in the future. ### Does this PR introduce _any_ user-facing change? Error message changes From ``` "Unsupported empty location." ``` To ``` "The location name cannot be empty string, but `...` was given." ``` ### How was this patch tested? ``` $ build/sbt “sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*” $ build/sbt "core/testOnly *SparkThrowableSuite" ``` Closes #38650 from itholic/SPARK-41135. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 6 +++--- .../spark/sql/catalyst/analysis/ResolveSessionCatalog.scala| 4 ++-- .../sql/execution/datasources/v2/DataSourceV2Strategy.scala| 4 ++-- .../execution/command/AlterNamespaceSetLocationSuiteBase.scala | 4 ++-- .../spark/sql/execution/command/CreateNamespaceSuiteBase.scala | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ae76a52e40f..77d155bfc21 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -676,6 +676,11 @@ ], "sqlState" : "42000" }, + "INVALID_EMPTY_LOCATION" : { +"message" : [ + "The location name cannot be empty string, but `` was given." +] + }, "INVALID_FIELD_NAME" : { "message" : [ "Field name is invalid: is not a struct." @@ -1181,11 +1186,6 @@ } } }, - "UNSUPPORTED_EMPTY_LOCATION" : { -"message" : [ - "Unsupported empty location." -] - }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported:" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 6081d9f32a5..5db54f7f4cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2806,10 +2806,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { "size" -> elementSize.toString)) } - def unsupportedEmptyLocationError(): SparkIllegalArgumentException = { + def invalidEmptyLocationError(location: String): SparkIllegalArgumentException = { new SparkIllegalArgumentException( - errorClass = "UNSUPPORTED_EMPTY_LOCATION", - messageParameters = Map.empty) + errorClass = "INVALID_EMPTY_LOCATION", + messageParameters = Map("location" -> location)) } def malformedProtobufMessageDetectedInMessageParsingError(e: Throwable): Throwable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index d00d07150b0..d7e26b04ce4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -134,7 +134,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case SetNamespaceLocation(DatabaseInSessionCatalog(db), location) if conf.useV1Command => if (StringUtils.isEmpty(location)) { -throw QueryExecutionErrors.unsupportedEmptyLocationError() +throw QueryExecutionErrors.invalidEmptyLocationError(location) } AlterDatabaseSetLocationCommand(db, location) @@ -243,7 +243,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogMana
[spark] branch master updated (40b7d29e14c -> a80899f8bef)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 40b7d29e14c [SPARK-41217][SQL] Add the error class `FAILED_FUNCTION_CALL` add a80899f8bef [SPARK-41206][SQL] Rename the error class `_LEGACY_ERROR_TEMP_1233` to `COLUMN_ALREADY_EXISTS` No new revisions were added by this update. Summary of changes: .../connect/planner/SparkConnectProtoSuite.scala | 12 ++-- core/src/main/resources/error/error-classes.json | 10 +-- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 1 - .../spark/sql/catalyst/analysis/ResolveUnion.scala | 2 - .../spark/sql/errors/QueryCompilationErrors.scala | 8 +-- .../apache/spark/sql/util/PartitioningUtils.scala | 3 +- .../org/apache/spark/sql/util/SchemaUtils.scala| 40 --- .../apache/spark/sql/util/SchemaUtilsSuite.scala | 52 +++--- .../main/scala/org/apache/spark/sql/Dataset.scala | 2 - .../spark/sql/execution/command/CommandCheck.scala | 2 +- .../spark/sql/execution/command/tables.scala | 1 - .../apache/spark/sql/execution/command/views.scala | 3 +- .../sql/execution/datasources/DataSource.scala | 10 +-- .../InsertIntoHadoopFsRelationCommand.scala| 1 - .../execution/datasources/PartitioningUtils.scala | 3 +- .../sql/execution/datasources/jdbc/JdbcUtils.scala | 8 +-- .../spark/sql/execution/datasources/rules.scala| 9 +-- .../sql/execution/datasources/v2/FileTable.scala | 6 +- .../sql/execution/datasources/v2/FileWrite.scala | 3 +- .../spark/sql/DataFrameSetOperationsSuite.scala| 21 +++--- .../org/apache/spark/sql/DataFrameSuite.scala | 67 +- .../apache/spark/sql/NestedDataSourceSuite.scala | 5 +- .../org/apache/spark/sql/SQLInsertTestSuite.scala | 7 +- .../spark/sql/StatisticsCollectionSuite.scala | 10 +-- .../spark/sql/connector/AlterTableTests.scala | 34 + .../spark/sql/connector/DataSourceV2SQLSuite.scala | 80 -- .../connector/V2CommandsCaseSensitivitySuite.scala | 21 -- .../spark/sql/execution/command/DDLSuite.scala | 80 +- .../datasources/jdbc/JdbcUtilsSuite.scala | 6 +- .../sql/execution/datasources/json/JsonSuite.scala | 10 +-- .../org/apache/spark/sql/jdbc/JDBCWriteSuite.scala | 10 +-- .../spark/sql/sources/PartitionedWriteSuite.scala | 9 ++- .../spark/sql/streaming/FileStreamSinkSuite.scala | 10 +-- .../sql/test/DataFrameReaderWriterSuite.scala | 65 ++ .../hive/execution/InsertIntoHiveDirCommand.scala | 1 - .../org/apache/spark/sql/hive/InsertSuite.scala| 9 ++- .../spark/sql/hive/execution/HiveDDLSuite.scala| 16 +++-- 38 files changed, 326 insertions(+), 314 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (d453598a428 -> 40b7d29e14c)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from d453598a428 [SPARK-40809][CONNECT][FOLLOW-UP] Do not use Buffer to make Scala 2.13 test pass add 40b7d29e14c [SPARK-41217][SQL] Add the error class `FAILED_FUNCTION_CALL` No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 5 ++ .../sql/catalyst/analysis/FunctionRegistry.scala | 9 +-- .../spark/sql/errors/QueryCompilationErrors.scala | 13 ++- .../results/ansi/string-functions.sql.out | 16 +++- .../sql-tests/results/csv-functions.sql.out| 93 +- .../resources/sql-tests/results/extract.sql.out| 75 +++-- .../sql-tests/results/json-functions.sql.out | 93 +- .../sql-tests/results/postgreSQL/int8.sql.out | 2 +- .../sql-tests/results/string-functions.sql.out | 16 +++- .../results/table-valued-functions.sql.out | 2 +- 10 files changed, 269 insertions(+), 55 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 62f8ce40ddb [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class 62f8ce40ddb is described below commit 62f8ce40ddbf76ce86fd5e51cc73c67d66e12f48 Author: panbingkun AuthorDate: Sat Nov 19 20:31:38 2022 +0300 [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class ### What changes were proposed in this pull request? The pr aims to migrate the ambiguous ref error to an error class. ### Why are the changes needed? The changes improve the error framework. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #38721 from panbingkun/SPARK-41172. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 5 + .../spark/sql/catalyst/expressions/package.scala | 5 +- .../spark/sql/errors/QueryCompilationErrors.scala | 9 ++ .../sql/catalyst/analysis/AnalysisSuite.scala | 5 +- .../catalyst/analysis/ResolveSubquerySuite.scala | 4 +- .../expressions/AttributeResolutionSuite.scala | 30 +++-- .../results/columnresolution-negative.sql.out | 135 +++-- .../sql-tests/results/postgreSQL/join.sql.out | 30 - .../results/postgreSQL/select_implicit.sql.out | 45 ++- .../results/udf/postgreSQL/udf-join.sql.out| 30 - .../udf/postgreSQL/udf-select_implicit.sql.out | 45 ++- .../spark/sql/DataFrameNaFunctionsSuite.scala | 42 +-- .../org/apache/spark/sql/DataFrameStatSuite.scala | 52 ++-- .../execution/command/PlanResolutionSuite.scala| 22 ++-- .../execution/datasources/orc/OrcFilterSuite.scala | 20 ++- 15 files changed, 406 insertions(+), 73 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index fe340c517a2..4da9d2f9fbc 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -5,6 +5,11 @@ ], "sqlState" : "42000" }, + "AMBIGUOUS_REFERENCE" : { +"message" : [ + "Reference is ambiguous, could be: ." +] + }, "ARITHMETIC_OVERFLOW" : { "message" : [ ". If necessary set to \"false\" to bypass this error." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 7913f396120..ededac3d917 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -21,9 +21,9 @@ import java.util.Locale import com.google.common.collect.Maps -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute} import org.apache.spark.sql.catalyst.util.MetadataColumnHelper +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types.{StructField, StructType} /** @@ -368,8 +368,7 @@ package object expressions { case ambiguousReferences => // More than one match. - val referenceNames = ambiguousReferences.map(_.qualifiedName).mkString(", ") - throw new AnalysisException(s"Reference '$name' is ambiguous, could be: $referenceNames.") + throw QueryCompilationErrors.ambiguousReferenceError(name, ambiguousReferences) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 22b4cfdb3c6..cbdbb6adc11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1834,6 +1834,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "n" -> numMatches.toString)) } + def ambiguousReferenceError(name: String, ambiguousReferences: Seq[Attribute]): Throwable = { +new AnalysisException( + errorClass = "AMBIGUOUS_REFERENCE", + messageParameters = Map( +"name" -> toSQLId(name), +"referenceNames" -> + ambiguousReferences.map(ar => toSQLId(ar.qualifiedName)).sorted.mkString("[", ", ", "]"))) + } + def cannotUseIntervalTypeInTableSchemaError(): Throwable = { n
[spark] branch master updated: [SPARK-41175][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1078
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e62596a09f3 [SPARK-41175][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1078 e62596a09f3 is described below commit e62596a09f323bfe0f8592ba7a3c45674ce04ac6 Author: panbingkun AuthorDate: Sat Nov 19 09:02:33 2022 +0300 [SPARK-41175][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1078 ### What changes were proposed in this pull request? In the PR, I propose to assign the name `CANNOT_LOAD_FUNCTION_CLASS` to the error class _LEGACY_ERROR_TEMP_1078. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the affected test suites: > $ build/sbt "catalyst/testOnly *SessionCatalogSuite" Closes #38696 from panbingkun/SPARK-41175. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 10 +- .../apache/spark/sql/errors/QueryCompilationErrors.scala | 4 ++-- .../spark/sql/catalyst/catalog/SessionCatalogSuite.scala | 15 +++ .../test/resources/sql-tests/results/udaf/udaf.sql.out| 4 ++-- .../test/resources/sql-tests/results/udf/udf-udaf.sql.out | 4 ++-- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 14 ++ .../org/apache/spark/sql/execution/command/DDLSuite.scala | 14 ++ 7 files changed, 46 insertions(+), 19 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a2d9fa071d0..fe340c517a2 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -48,6 +48,11 @@ ], "sqlState" : "42000" }, + "CANNOT_LOAD_FUNCTION_CLASS" : { +"message" : [ + "Cannot load class when registering the function , please make sure it is on the classpath." +] + }, "CANNOT_LOAD_PROTOBUF_CLASS" : { "message" : [ "Could not load Protobuf class with name . ." @@ -2075,11 +2080,6 @@ "Partition spec is invalid. ." ] }, - "_LEGACY_ERROR_TEMP_1078" : { -"message" : [ - "Can not load class '' when registering the function '', please make sure it is on the classpath." -] - }, "_LEGACY_ERROR_TEMP_1079" : { "message" : [ "Resource Type '' is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index e6ce12756ca..22b4cfdb3c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -899,10 +899,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def cannotLoadClassWhenRegisteringFunctionError( className: String, func: FunctionIdentifier): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1078", + errorClass = "CANNOT_LOAD_FUNCTION_CLASS", messageParameters = Map( "className" -> className, -"func" -> func.toString)) +"functionName" -> toSQLId(func.toString))) } def resourceTypeNotSupportedError(resourceType: String): Throwable = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala index f86d12474d6..a7254865c1e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala @@ -1477,6 +1477,21 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually { assert( catalog.lookupFunction( FunctionIdentifier("temp1"), arguments) === Literal(arguments.length)) + + checkError( +exception = intercept[AnalysisException] { + catalog.registerFunction( +CatalogFunction(FunctionIdentifier("temp2", None), + "function_class_cannot_load", Seq.empty[FunctionResource]), +overrideIfExists = false, +None) +}, +errorClass = "CANNOT_LOAD_FUNCTION_CLASS", +
[spark] branch master updated: [SPARK-41173][SQL] Move `require()` out from the constructors of string expressions
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b96ddce77aa [SPARK-41173][SQL] Move `require()` out from the constructors of string expressions b96ddce77aa is described below commit b96ddce77aa3f17eb0dea95083a9ac35d6077a94 Author: yangjie01 AuthorDate: Fri Nov 18 22:14:32 2022 +0300 [SPARK-41173][SQL] Move `require()` out from the constructors of string expressions ### What changes were proposed in this pull request? This pr aims to move `require()` out from the constructors of string expressions, include `ConcatWs` and `FormatString`. The args number checking logic moved into `checkInputDataTypes()`. ### Why are the changes needed? Migration onto error classes unifies Spark SQL error messages. ### Does this PR introduce _any_ user-facing change? Yes. The PR changes user-facing error messages. ### How was this patch tested? Pass GitHub Actions Closes #38705 from LuciferYang/SPARK-41173. Authored-by: yangjie01 Signed-off-by: Max Gekk --- .../catalyst/expressions/stringExpressions.scala | 35 +++--- .../results/ansi/string-functions.sql.out | 34 +++-- .../sql-tests/results/string-functions.sql.out | 34 +++-- 3 files changed, 95 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 45bed3e2387..60b56f4fef7 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -67,8 +67,6 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String} case class ConcatWs(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes { - require(children.nonEmpty, s"$prettyName requires at least one argument.") - override def prettyName: String = "concat_ws" /** The 1st child (separator) is str, and rest are either str or array of str. */ @@ -82,6 +80,21 @@ case class ConcatWs(children: Seq[Expression]) override def nullable: Boolean = children.head.nullable override def foldable: Boolean = children.forall(_.foldable) + override def checkInputDataTypes(): TypeCheckResult = { +if (children.isEmpty) { + DataTypeMismatch( +errorSubClass = "WRONG_NUM_ARGS", +messageParameters = Map( + "functionName" -> toSQLId(prettyName), + "expectedNum" -> "> 0", + "actualNum" -> children.length.toString +) + ) +} else { + super.checkInputDataTypes() +} + } + override def eval(input: InternalRow): Any = { val flatInputs = children.flatMap { child => child.eval(input) match { @@ -1662,8 +1675,7 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression = Litera // scalastyle:on line.size.limit case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes { - require(children.nonEmpty, s"$prettyName() should take at least 1 argument") - if (!SQLConf.get.getConf(SQLConf.ALLOW_ZERO_INDEX_IN_FORMAT_STRING)) { + if (children.nonEmpty && !SQLConf.get.getConf(SQLConf.ALLOW_ZERO_INDEX_IN_FORMAT_STRING)) { checkArgumentIndexNotZero(children(0)) } @@ -1675,6 +1687,21 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC override def inputTypes: Seq[AbstractDataType] = StringType :: List.fill(children.size - 1)(AnyDataType) + override def checkInputDataTypes(): TypeCheckResult = { +if (children.isEmpty) { + DataTypeMismatch( +errorSubClass = "WRONG_NUM_ARGS", +messageParameters = Map( + "functionName" -> toSQLId(prettyName), + "expectedNum" -> "> 0", + "actualNum" -> children.length.toString +) + ) +} else { + super.checkInputDataTypes() +} + } + override def eval(input: InternalRow): Any = { val pattern = children(0).eval(input) if (pattern == null) { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 5b82cfa957d..41f1922f8bd 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -5,7 +5,22 @@ select concat_ws
[spark] branch master updated: [SPARK-41166][SQL][TESTS] Check errorSubClass of DataTypeMismatch in *ExpressionSuites
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e7520fc58e1 [SPARK-41166][SQL][TESTS] Check errorSubClass of DataTypeMismatch in *ExpressionSuites e7520fc58e1 is described below commit e7520fc58e18c45e43e07dc63f1f03cfd4da0fcc Author: panbingkun AuthorDate: Fri Nov 18 13:30:48 2022 +0300 [SPARK-41166][SQL][TESTS] Check errorSubClass of DataTypeMismatch in *ExpressionSuites ### What changes were proposed in this pull request? The pr aims to check errorSubClass of DataTypeMismatch in `*ExpressionSuites`. ### Why are the changes needed? The changes improve the error framework. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #38688 from panbingkun/SPARK-41166. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../expressions/CallMethodViaReflectionSuite.scala | 30 - .../sql/catalyst/expressions/CastSuiteBase.scala | 71 ++-- .../catalyst/expressions/CastWithAnsiOnSuite.scala | 118 +++- .../expressions/CollectionExpressionsSuite.scala | 32 +- .../catalyst/expressions/ComplexTypeSuite.scala| 52 - .../expressions/GeneratorExpressionSuite.scala | 36 +- .../expressions/JsonExpressionsSuite.scala | 14 ++- .../expressions/MiscExpressionsSuite.scala | 13 ++- .../expressions/StringExpressionsSuite.scala | 81 -- .../aggregate/AggregateExpressionSuite.scala | 121 + .../ApproxCountDistinctForIntervalsSuite.scala | 26 - .../aggregate/ApproximatePercentileSuite.scala | 23 +++- .../expressions/aggregate/PercentileSuite.scala| 62 ++- 13 files changed, 610 insertions(+), 69 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala index c8b99f6f026..e65b81ee166 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala @@ -97,10 +97,34 @@ class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelp } test("input type checking") { -assert(CallMethodViaReflection(Seq.empty).checkInputDataTypes().isFailure) - assert(CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes().isFailure) +assert(CallMethodViaReflection(Seq.empty).checkInputDataTypes() == + DataTypeMismatch( +errorSubClass = "WRONG_NUM_ARGS", +messageParameters = Map( + "functionName" -> "`reflect`", + "expectedNum" -> "> 1", + "actualNum" -> "0") + ) +) + assert(CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes() == + DataTypeMismatch( +errorSubClass = "WRONG_NUM_ARGS", +messageParameters = Map( + "functionName" -> "`reflect`", + "expectedNum" -> "> 1", + "actualNum" -> "1") + ) +) assert(CallMethodViaReflection( - Seq(Literal(staticClassName), Literal(1))).checkInputDataTypes().isFailure) + Seq(Literal(staticClassName), Literal(1))).checkInputDataTypes() == + DataTypeMismatch( +errorSubClass = "NON_FOLDABLE_INPUT", +messageParameters = Map( + "inputName" -> "method", + "inputType" -> "\"STRING\"", + "inputExpr" -> "\"1\"") + ) +) assert(createExpr(staticClassName, "method1").checkInputDataTypes().isSuccess) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index a60491b0ab8..6d972a8482a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence +import org.apache.spark.sql.catalyst.expressions.Cast._ import org.apa
[spark] branch master updated (12a77bb22f1 -> bcbc88377ff)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 12a77bb22f1 [SPARK-41107][PYTHON][INFRA][TESTS] Install memory-profiler in the CI add bcbc88377ff [SPARK-41130][SQL] Rename `OUT_OF_DECIMAL_TYPE_RANGE` to `NUMERIC_OUT_OF_SUPPORTED_RANGE` No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 10 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 2 +- .../spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala | 4 ++-- .../test/scala/org/apache/spark/sql/types/DecimalSuite.scala | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (f24f8489f80 -> 23fcd25b870)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from f24f8489f80 [SPARK-41106][SQL] Reduce collection conversion when create AttributeMap add 23fcd25b870 [SPARK-41133][SQL] Integrate `UNSCALED_VALUE_TOO_LARGE_FOR_PRECISION` into `NUMERIC_VALUE_OUT_OF_RANGE` No new revisions were added by this update. Summary of changes: .../spark/sql/avro/AvroLogicalTypeSuite.scala | 17 ++- core/src/main/resources/error/error-classes.json | 7 +-- .../spark/sql/errors/QueryExecutionErrors.scala| 17 ++- .../scala/org/apache/spark/sql/types/Decimal.scala | 2 +- .../org/apache/spark/sql/types/DecimalSuite.scala | 24 -- 5 files changed, 44 insertions(+), 23 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41139][SQL] Improve error class: `PYTHON_UDF_IN_ON_CLAUSE`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new fea905acea2 [SPARK-41139][SQL] Improve error class: `PYTHON_UDF_IN_ON_CLAUSE` fea905acea2 is described below commit fea905acea2e8eedb10f86d4cea6565f19066023 Author: itholic AuthorDate: Wed Nov 16 19:13:52 2022 +0300 [SPARK-41139][SQL] Improve error class: `PYTHON_UDF_IN_ON_CLAUSE` ### What changes were proposed in this pull request? This PR proposes to improve the error message and test for `PYTHON_UDF_IN_ON_CLAUSE` ### Why are the changes needed? The current error message is not clear enough to let user understand the solve the problem. We can provide more information to improve the usability. Also, we should test the error class with `checkError` for better testability. ### Does this PR introduce _any_ user-facing change? The error message is improved with additional detailed information. From ``` Python UDF in the ON clause of a JOIN. ``` To ``` Python UDF in the ON clause of a JOIN. In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause. ``` ### How was this patch tested? Manually tested for fixed test case. Closes #38657 from itholic/SPARK-41139. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 2 +- .../optimizer/ExtractPythonUDFFromJoinConditionSuite.scala| 8 +--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 32083c23df8..d5d6e938ad1 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1248,7 +1248,7 @@ }, "PYTHON_UDF_IN_ON_CLAUSE" : { "message" : [ - "Python UDF in the ON clause of a JOIN." + "Python UDF in the ON clause of a JOIN. In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause." ] }, "REPEATED_PIVOT" : { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala index 0b215818d36..854a3e8f7a7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala @@ -187,9 +187,11 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest { condition = Some(unevaluableJoinCond)) Optimize.execute(query.analyze) } - assert(e.message == -"[UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE] The feature is not supported: " + -s"""Python UDF in the ON clause of a ${joinType.sql} JOIN.""") + checkError( +exception = e, +errorClass = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE", +parameters = Map("joinType" -> joinType.sql) + ) val query2 = testRelationLeft.join( testRelationRight, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (0f7eaeee644 -> e3aa2fca385)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 0f7eaeee644 [SPARK-40809][CONNECT][FOLLOW] Support `alias()` in Python client add e3aa2fca385 [SPARK-41158][SQL][TESTS] Use `checkError()` to check `DATATYPE_MISMATCH` in `DataFrameFunctionsSuite` No new revisions were added by this update. Summary of changes: .../apache/spark/sql/DataFrameFunctionsSuite.scala | 496 + 1 file changed, 405 insertions(+), 91 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org