This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 14622fc [SPARK-36488][SQL] Improve error message with
quotedRegexColumnNames
14622fc is described below
commit 14622fcec8b977e2c2f7b3860797cc0b544bad3b
Author: Pablo Langa <[email protected]>
AuthorDate: Thu Aug 26 11:33:40 2021 +0800
[SPARK-36488][SQL] Improve error message with quotedRegexColumnNames
### What changes were proposed in this pull request?
When `spark.sql.parser.quotedRegexColumnNames=true` and a pattern is used
in a place where is not allowed the message is a little bit confusing
```
scala> spark.sql("set spark.sql.parser.quotedRegexColumnNames=true")
scala> spark.sql("SELECT `col_.?`/col_b FROM (SELECT 3 AS col_a, 1 as
col_b)")
org.apache.spark.sql.AnalysisException: Invalid usage of '*' in expression
'divide'
```
This PR attempts to improve the error message
```
scala> spark.sql("SELECT `col_.?`/col_b FROM (SELECT 3 AS col_a, 1 as
col_b)")
org.apache.spark.sql.AnalysisException: Invalid usage of regular expression
in expression 'divide'
```
### Why are the changes needed?
To clarify the error message with this option active
### Does this PR introduce _any_ user-facing change?
Yes, change the error message
### How was this patch tested?
Unit testing and manual testing
Closes #33802 from planga82/feature/spark36488_improve_error_message.
Authored-by: Pablo Langa <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 9 ++++++--
.../sql/catalyst/analysis/CheckAnalysis.scala | 2 +-
.../spark/sql/errors/QueryCompilationErrors.scala | 17 ++++++++++++---
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 24 ++++++++++++++++++++++
4 files changed, 46 insertions(+), 6 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index af9ff0d..a26f6b6 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1443,7 +1443,8 @@ class Analyzer(override val catalogManager:
CatalogManager)
a.copy(aggregateExpressions =
buildExpandedProjectList(a.aggregateExpressions, a.child))
}
case g: Generate if containsStar(g.generator.children) =>
- throw
QueryCompilationErrors.invalidStarUsageError("explode/json_tuple/UDTF")
+ throw
QueryCompilationErrors.invalidStarUsageError("explode/json_tuple/UDTF",
+ extractStar(g.generator.children))
// When resolve `SortOrder`s in Sort based on child, don't report errors
as
// we still have chance to resolve it based on its descendants
@@ -1657,6 +1658,9 @@ class Analyzer(override val catalogManager:
CatalogManager)
def containsStar(exprs: Seq[Expression]): Boolean =
exprs.exists(_.collect { case _: Star => true }.nonEmpty)
+ private def extractStar(exprs: Seq[Expression]): Seq[Star] =
+ exprs.map(_.collect { case s: Star => s }).flatten
+
/**
* Expands the matching attribute.*'s in `child`'s output.
*/
@@ -1704,7 +1708,8 @@ class Analyzer(override val catalogManager:
CatalogManager)
})
// count(*) has been replaced by count(1)
case o if containsStar(o.children) =>
- throw QueryCompilationErrors.invalidStarUsageError(s"expression
'${o.prettyName}'")
+ throw QueryCompilationErrors.invalidStarUsageError(s"expression
'${o.prettyName}'",
+ extractStar(o.children))
}
}
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 932414e..2adf110 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -173,7 +173,7 @@ trait CheckAnalysis extends PredicateHelper with
LookupCatalog {
case s: Star =>
withPosition(s) {
- throw
QueryCompilationErrors.invalidStarUsageError(operator.nodeName)
+ throw
QueryCompilationErrors.invalidStarUsageError(operator.nodeName, Seq(s))
}
case e: Expression if e.checkInputDataTypes().isFailure =>
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 2cbca6f..0c7b322 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName,
TableIdentifier}
-import
org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException,
NamespaceAlreadyExistsException, NoSuchFunctionException,
NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException,
ResolvedNamespace, ResolvedTable, ResolvedView, TableAlreadyExistsException}
+import
org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException,
NamespaceAlreadyExistsException, NoSuchFunctionException,
NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException,
ResolvedNamespace, ResolvedTable, ResolvedView, Star,
TableAlreadyExistsException, UnresolvedRegex}
import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable,
InvalidUDFClassException}
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute,
AttributeReference, AttributeSet, CreateMap, Expression, GroupingID,
NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction,
WindowSpecDefinition}
@@ -262,8 +262,19 @@ private[spark] object QueryCompilationErrors {
"Star (*) is not allowed in select list when GROUP BY ordinal position
is used")
}
- def invalidStarUsageError(prettyName: String): Throwable = {
- new AnalysisException(s"Invalid usage of '*' in $prettyName")
+ def invalidStarUsageError(prettyName: String, stars: Seq[Star]): Throwable =
{
+ val regExpr = stars.collect{ case UnresolvedRegex(pattern, _, _) =>
s"'$pattern'" }
+ val resExprMsg = Option(regExpr.distinct).filter(_.nonEmpty).map {
+ case Seq(p) => s"regular expression $p"
+ case patterns => s"regular expressions ${patterns.mkString(", ")}"
+ }
+ val starMsg = if (stars.length - regExpr.length > 0) {
+ Some("'*'")
+ } else {
+ None
+ }
+ val elem = Seq(starMsg, resExprMsg).flatten.mkString(" and ")
+ new AnalysisException(s"Invalid usage of $elem in $prettyName")
}
def singleTableStarInCountNotAllowedError(targetString: String): Throwable =
{
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 960c260..eef61ee 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData,
GenericArrayData, MapData}
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
private[sql] case class GroupableData(data: Int) {
@@ -850,4 +851,27 @@ class AnalysisErrorSuite extends AnalysisTest {
"Invalid usage of '*' in Filter" :: Nil
)
}
+
+ test("SPARK-36488: Regular expression expansion should fail with a
meaningful message") {
+ withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "true") {
+ assertAnalysisError(testRelation.select(Divide(UnresolvedRegex(".?",
None, false), "a")),
+ s"Invalid usage of regular expression '.?' in" :: Nil)
+ assertAnalysisError(testRelation.select(
+ Divide(UnresolvedRegex(".?", None, false), UnresolvedRegex(".*", None,
false))),
+ s"Invalid usage of regular expressions '.?', '.*' in" :: Nil)
+ assertAnalysisError(testRelation.select(
+ Divide(UnresolvedRegex(".?", None, false), UnresolvedRegex(".?", None,
false))),
+ s"Invalid usage of regular expression '.?' in" :: Nil)
+ assertAnalysisError(testRelation.select(Divide(UnresolvedStar(None),
"a")),
+ "Invalid usage of '*' in" :: Nil)
+ assertAnalysisError(testRelation.select(Divide(UnresolvedStar(None),
UnresolvedStar(None))),
+ "Invalid usage of '*' in" :: Nil)
+ assertAnalysisError(testRelation.select(Divide(UnresolvedStar(None),
+ UnresolvedRegex(".?", None, false))),
+ "Invalid usage of '*' and regular expression '.?' in" :: Nil)
+ assertAnalysisError(testRelation.select(Least(Seq(UnresolvedStar(None),
+ UnresolvedRegex(".*", None, false), UnresolvedRegex(".?", None,
false)))),
+ "Invalid usage of '*' and regular expressions '.*', '.?' in" :: Nil)
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]