MaxGekk commented on code in PR #37621:
URL: https://github.com/apache/spark/pull/37621#discussion_r962255122
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala:
##########
@@ -178,7 +178,10 @@ object Project {
createNewColumn(columnExpr, f.name, f.metadata, Metadata.empty)
} else {
if (columnPath.isEmpty) {
- throw QueryCompilationErrors.unresolvedColumnError(f.name,
fields.map(_._1))
+ val candidates = fields.map(_._1)
+ val orderedCandidates =
+ StringUtils.orderStringsBySimilarity(f.name, candidates).take(5)
Review Comment:
Let's move `.take(5)` to `unresolvedColumnError`
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala:
##########
@@ -117,8 +117,18 @@ class AnalysisErrorSuite extends AnalysisTest {
plan: LogicalPlan,
errorClass: String,
messageParameters: Array[String]): Unit = {
+ errorClassTest(name, plan, errorClass, null, messageParameters)
+ }
+
+ def errorClassTest(
+ name: String,
+ plan: LogicalPlan,
+ errorClass: String,
+ errorSubClass: String,
+ messageParameters: Array[String]): Unit = {
Review Comment:
wrong indentation, see
https://github.com/databricks/scala-style-guide#spacing-and-indentation
##########
sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala:
##########
@@ -322,19 +322,27 @@ class DatasetSuite extends QueryTest
val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS()
withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
- var e = intercept[AnalysisException] {
- ds.select(expr("`(_1)?+.+`").as[Int])
- }
- assert(e.getErrorClass == "UNRESOLVED_COLUMN")
- assert(e.messageParameters.head == "`(_1)?+.+`")
-
- e = intercept[AnalysisException] {
- ds.select(expr("`(_1|_2)`").as[Int])
- }
- assert(e.getErrorClass == "UNRESOLVED_COLUMN")
- assert(e.messageParameters.head == "`(_1|_2)`")
+ checkError(
+ exception = intercept[AnalysisException] {
+ ds.select(expr("`(_1)?+.+`").as[Int])
+ },
+ errorClass = "UNRESOLVED_COLUMN",
+ errorSubClass = Some("WITH_SUGGESTION"),
+ parameters = Map(
+ "objectName" -> "`(_1)?+.+`",
+ "proposal" -> "`_1`, `_2`"))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ ds.select(expr("`(_1|_2)`").as[Int])
+ },
+ errorClass = "UNRESOLVED_COLUMN",
+ errorSubClass = Some("WITH_SUGGESTION"),
+ parameters = Map(
+ "objectName" -> "`(_1|_2)`",
+ "proposal" -> "`_1`, `_2`"))
- e = intercept[AnalysisException] {
+ var e = intercept[AnalysisException] {
Review Comment:
Can't you invoke `checkError` too?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala:
##########
@@ -107,7 +107,7 @@ trait CheckAnalysis extends PredicateHelper with
LookupCatalog {
errorClass: String): Nothing = {
val missingCol = a.sql
val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
- val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol,
candidates)
+ val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol,
candidates).take(5)
Review Comment:
I would prefer to restrict the size in one place inside of
`unresolvedAttributeError()`
##########
core/src/main/resources/error/error-classes.json:
##########
@@ -415,20 +415,56 @@
},
"UNRESOLVED_COLUMN" : {
"message" : [
- "A column or function parameter with name <objectName> cannot be
resolved. Did you mean one of the following? [<objectList>]"
+ "A column or function parameter with name <objectName> cannot be
resolved."
],
+ "subClass" : {
+ "GENERIC" : {
Review Comment:
@cloud-fan @srielau Are you ok with such approach?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]