This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6e4d75a349f [SPARK-43980][SQL] introducing `select * except` syntax
6e4d75a349f is described below
commit 6e4d75a349f4b86beb9053657bd7944c85e9a9f2
Author: milastdbx <[email protected]>
AuthorDate: Thu Nov 23 23:01:36 2023 +0800
[SPARK-43980][SQL] introducing `select * except` syntax
### What changes were proposed in this pull request?
Changing parser to support new syntax when using * to fetch columns from
source.
Introducing new expression `UnresolvedStarExcept` in visit method when
newly introduced syntax is created.
Expansion of this expression is core logic of feature.
### Why are the changes needed?
Introducing new `SELECT * EXCEPT (col1, col2)`
### Does this PR introduce _any_ user-facing change?
Yes, this PR introduces new SQL syntax, which is used to explicitly exclude
columns from star projection.
### How was this patch tested?
Unit tests.
Generated new golden files.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #43843 from milastdbx/feature/selectStarExcept.
Authored-by: milastdbx <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../src/main/resources/error/error-classes.json | 12 +
docs/sql-error-conditions.md | 12 +
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 8 +-
.../spark/sql/catalyst/analysis/unresolved.scala | 156 +++++++++-
.../spark/sql/catalyst/parser/AstBuilder.scala | 22 +-
.../analyzer-results/selectExcept.sql.out | 330 ++++++++++++++++++++
.../resources/sql-tests/inputs/selectExcept.sql | 51 +++
.../sql-tests/results/selectExcept.sql.out | 346 +++++++++++++++++++++
8 files changed, 929 insertions(+), 8 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index 8b1a3a33f7d..b0621c44532 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -993,6 +993,18 @@
],
"sqlState" : "54006"
},
+ "EXCEPT_NESTED_COLUMN_INVALID_TYPE" : {
+ "message" : [
+ "EXCEPT column <columnName> was resolved and expected to be StructType,
but found type <dataType>."
+ ],
+ "sqlState" : "428H2"
+ },
+ "EXCEPT_OVERLAPPING_COLUMNS" : {
+ "message" : [
+ "Columns in an EXCEPT list must be distinct and non-overlapping, but got
(<columns>)."
+ ],
+ "sqlState" : "42702"
+ },
"EXPECT_PERMANENT_VIEW_NOT_TEMP" : {
"message" : [
"'<operation>' expects a permanent view but <viewName> is a temp view."
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 3e706debbf8..c0f88bffa6e 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -587,6 +587,18 @@ The event time `<eventName>` has the invalid type
`<eventType>`, but expected "T
Exceeds char/varchar type length limitation: `<limit>`.
+### EXCEPT_NESTED_COLUMN_INVALID_TYPE
+
+[SQLSTATE:
428H2](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+EXCEPT column `<columnName>` was resolved and expected to be StructType, but
found type `<dataType>`.
+
+### EXCEPT_OVERLAPPING_COLUMNS
+
+[SQLSTATE:
42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Columns in an EXCEPT list must be distinct and non-overlapping, but got
(`<columns>`).
+
### EXPECT_PERMANENT_VIEW_NOT_TEMP
[SQLSTATE:
42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 609bd72e219..439a12c3018 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -587,6 +587,10 @@ notMatchedBySourceAction
| UPDATE SET assignmentList
;
+exceptClause
+ : EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN
+ ;
+
assignmentList
: assignment (COMMA assignment)*
;
@@ -969,8 +973,8 @@ primaryExpression
| LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN
#last
| POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression
RIGHT_PAREN #position
| constant
#constantDefault
- | ASTERISK
#star
- | qualifiedName DOT ASTERISK
#star
+ | ASTERISK exceptClause?
#star
+ | qualifiedName DOT ASTERISK exceptClause?
#star
| LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN
#rowConstructor
| LEFT_PAREN query RIGHT_PAREN
#subqueryExpression
| functionName LEFT_PAREN (setQuantifier? argument+=functionArgument
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index e232dfc05fa..60d1a89959d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql.catalyst.analysis
+import org.apache.spark.SparkException
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow,
TableIdentifier}
import org.apache.spark.sql.catalyst.expressions._
@@ -358,10 +359,10 @@ abstract class Star extends LeafExpression with
NamedExpression {
def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression]
}
-
/**
* Represents all of the input attributes to a given relational operator, for
example in
* "SELECT * FROM ...".
+ * "SELECT * FROM ..." or "SELECT * EXCEPT(...) FROM ..."
*
* This is also used to expand structs. For example:
* "SELECT record.* from (SELECT struct(a,b,c) as record ...)
@@ -369,9 +370,12 @@ abstract class Star extends LeafExpression with
NamedExpression {
* @param target an optional name that should be the target of the expansion.
If omitted all
* targets' columns are produced. This can either be a table name
or struct name. This
* is a list of identifiers that is the path of the expansion.
- */
-case class UnresolvedStar(target: Option[Seq[String]]) extends Star with
Unevaluable {
-
+ *
+ * This class provides the shared behavior between the classes for SELECT *
([[UnresolvedStar]])
+ * and SELECT * EXCEPT ([[UnresolvedStarExcept]]). [[UnresolvedStar]] is just
a case class of this,
+ * while [[UnresolvedStarExcept]] adds some additional logic to the expand
method.
+ */
+abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star
with Unevaluable {
/**
* Returns true if the nameParts is a subset of the last elements of
qualifier of the attribute.
*
@@ -383,7 +387,7 @@ case class UnresolvedStar(target: Option[Seq[String]])
extends Star with Unevalu
* - `SELECT t.* FROM ns1.ns2.t` where nameParts is Seq("t") and
* qualifier is Seq("ns1", "ns2", "t").
*/
- private def matchedQualifier(
+ protected def matchedQualifier(
attribute: Attribute,
nameParts: Seq[String],
resolver: Resolver): Boolean = {
@@ -444,6 +448,148 @@ case class UnresolvedStar(target: Option[Seq[String]])
extends Star with Unevalu
override def toString: String = target.map(_.mkString("", ".",
".")).getOrElse("") + "*"
}
+/**
+ * Represents some of the input attributes to a given relational operator, for
example in
+ * "SELECT * EXCEPT(a) FROM ...".
+ *
+ * @param target an optional name that should be the target of the expansion.
If omitted all
+ * targets' columns are produced. This can only be a table name.
This
+ * is a list of identifiers that is the path of the expansion.
+ *
+ * @param excepts a list of names that should be excluded from the expansion.
+ *
+ */
+case class UnresolvedStarExcept(target: Option[Seq[String]], excepts:
Seq[Seq[String]])
+ extends UnresolvedStarBase(target) {
+
+ /**
+ * We expand the * EXCEPT by the following three steps:
+ * 1. use the original .expand() to get top-level column list or struct
expansion
+ * 2. resolve excepts (with respect to the Seq[NamedExpression] returned
from (1))
+ * 3. filter the expanded columns with the resolved except list. recursively
apply filtering in
+ * case of nested columns in the except list (in order to rewrite structs)
+ */
+ override def expand(input: LogicalPlan, resolver: Resolver):
Seq[NamedExpression] = {
+ // Use the UnresolvedStarBase expand method to get a seq of
NamedExpressions corresponding to
+ // the star expansion. This will yield a list of top-level columns from
the logical plan's
+ // output, or in the case of struct expansion (e.g. target=`x` for SELECT
x.*) it will give
+ // a seq of Alias wrapping the struct field extraction.
+ val expandedCols = super.expand(input, resolver)
+
+ // resolve except list with respect to the expandedCols
+ val resolvedExcepts = excepts.map { exceptParts =>
+ AttributeSeq(expandedCols.map(_.toAttribute)).resolve(exceptParts,
resolver).getOrElse {
+ val orderedCandidates =
StringUtils.orderSuggestedIdentifiersBySimilarity(
+ UnresolvedAttribute(exceptParts).name, expandedCols.map(a =>
a.qualifier :+ a.name))
+ // if target is defined and expandedCols does not include any
Attributes, it must be struct
+ // expansion; give message suggesting to use unqualified names of
nested fields.
+ throw QueryCompilationErrors
+ .unresolvedColumnError(UnresolvedAttribute(exceptParts).name,
orderedCandidates)
+ }
+ }
+
+ // Convert each resolved except into a pair of (col: Attribute,
nestedColumn) representing the
+ // top level column in expandedCols that we must 'filter' based on
nestedColumn.
+ @scala.annotation.tailrec
+ def getRootColumn(expr: Expression, nestedColumn: Seq[String] = Nil)
+ : (NamedExpression, Seq[String]) = expr match {
+ case GetStructField(fieldExpr, _, Some(fieldName)) =>
+ getRootColumn(fieldExpr, fieldName +: nestedColumn)
+ case e: NamedExpression => e -> nestedColumn
+ case other: ExtractValue => throw new AnalysisException(
+ errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE",
+ messageParameters = Map("columnName" -> other.sql, "dataType" ->
other.dataType.toString))
+ }
+ // An exceptPair represents the column in expandedCols along with the path
of a nestedColumn
+ // that should be except-ed. Consider two examples:
+ // 1. excepting the entire col1 = (col1, Seq())
+ // 2. excepting a nested field in col2, col2.a.b = (col2, Seq(a, b))
+ // INVARIANT: we rely on the structure of the resolved except being an
Alias of GetStructField
+ // in the case of nested columns.
+ val exceptPairs = resolvedExcepts.map {
+ case Alias(exceptExpr, name) => getRootColumn(exceptExpr)
+ case except: NamedExpression => except -> Seq.empty
+ }
+
+ // Filter columns which correspond to ones listed in the except list and
return a new list of
+ // columns which exclude the columns in the except list. The 'filtering'
manifests as either
+ // dropping the column from the list of columns we return, or rewriting
the projected column in
+ // order to remove excepts that refer to nested columns. For example, with
the example above:
+ // excepts = Seq(
+ // (col1, Seq()), => filter col1 from the output
+ // (col2, Seq(a, b)) => rewrite col2 in the output so that it doesn't
include the nested field
+ // ) corresponding to col2.a.b
+ //
+ // This occurs in two steps:
+ // 1. group the excepts by the column they refer to (groupedExcepts)
+ // 2. filter/rewrite input columns based on four cases:
+ // a. column doesn't match any groupedExcepts => column unchanged
+ // b. column exists in groupedExcepts and:
+ // i. none of remainingExcepts are empty => recursively apply
filterColumns over the
+ // struct fields in order to rewrite the struct
+ // ii. a remainingExcept is empty, but there are multiple
remainingExcepts => we must
+ // have duplicate/overlapping excepts - throw an error
+ // iii. [otherwise] remainingExcept is exactly Seq(Seq()) => this is
the base 'filtering'
+ // case. we omit the column from the output (this is a column
we would like to
+ // except). NOTE: this case isn't explicitly listed in the
`collect` below since we
+ // 'collect' columns which match the cases above and omit ones
that fall into this
+ // remaining case.
+ def filterColumns(columns: Seq[NamedExpression], excepts:
Seq[(NamedExpression, Seq[String])])
+ : Seq[NamedExpression] = {
+ // group the except pairs by the column they refer to. NOTE: no groupMap
until scala 2.13
+ val groupedExcepts: AttributeMap[Seq[Seq[String]]] =
+ AttributeMap(excepts.groupBy(_._1.toAttribute).view.mapValues(v =>
v.map(_._2)))
+
+ // map input columns while searching for the except entry corresponding
to the current column
+ columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect {
+ // pass through columns that don't match anything in groupedExcepts
+ case (col, None) => col
+ // found a match but nestedExcepts has remaining excepts - recurse to
rewrite the struct
+ case (col, Some(nestedExcepts)) if nestedExcepts.forall(_.nonEmpty) =>
+ val fields = col.dataType match {
+ case s: StructType => s.fields
+ // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE
in getRootColumn
+ // for this case - just throw internal error
+ case _ => throw SparkException.internalError("Invalid column type")
+ }
+ val extractedFields = fields.zipWithIndex.map { case (f, i) =>
+ Alias(GetStructField(col, i), f.name)()
+ }
+ val newExcepts = nestedExcepts.map { nestedExcept =>
+ // INVARIANT: we cannot have duplicate column names in nested
columns, thus, this `head`
+ // will find the one and only column corresponding to the correct
extractedField.
+ extractedFields.collectFirst { case col if resolver(col.name,
nestedExcept.head) =>
+ col.toAttribute -> nestedExcept.tail
+ }.get
+ }
+ Alias(CreateStruct(filterColumns(extractedFields.toSeq,
newExcepts)), col.name)()
+ // if there are multiple nestedExcepts but one is empty we must have
overlapping except
+ // columns. throw an error.
+ case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 =>
+ throw new AnalysisException(
+ errorClass = "EXCEPT_OVERLAPPING_COLUMNS",
+ messageParameters = Map(
+ "columns" -> this.excepts.map(_.mkString(".")).mkString(", ")))
+ }
+ }
+
+ filterColumns(expandedCols, exceptPairs)
+ }
+}
+
+/**
+ * Represents all of the input attributes to a given relational operator, for
example in
+ * "SELECT * FROM ...".
+ *
+ * This is also used to expand structs. For example:
+ * "SELECT record.* from (SELECT struct(a,b,c) as record ...)
+ *
+ * @param target an optional name that should be the target of the expansion.
If omitted all
+ * targets' columns are produced. This can either be a table name
or struct name. This
+ * is a list of identifiers that is the path of the expansion.
+ */
+case class UnresolvedStar(target: Option[Seq[String]]) extends
UnresolvedStarBase(target)
+
/**
* Represents all of the input attributes to a given relational operator, for
example in
* "SELECT `(id)?+.+` FROM ...".
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 09161d8f8da..9f62c8cb323 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1786,7 +1786,27 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
* Both un-targeted (global) and targeted aliases are supported.
*/
override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) {
-
UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq))
+ var target =
Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)
+
+ if (ctx.exceptClause != null) {
+ visitStarExcept(ctx, target)
+ }
+ else {
+ UnresolvedStar(target)
+ }
+ }
+
+ /**
+ * Create a star-except (i.e. all - except list) expression; this selects
all elements in the
+ * specified object except those in the except list.
+ * Both un-targeted (global) and targeted aliases are supported.
+ */
+ def visitStarExcept(ctx: StarContext, target: Option[Seq[String]]):
Expression = withOrigin(ctx) {
+ val exceptCols = ctx.exceptClause
+ .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
+ UnresolvedStarExcept(
+ target,
+ exceptCols.toSeq)
}
/**
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
new file mode 100644
index 00000000000..8643d40b886
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -0,0 +1,330 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
+ (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3",
"a"))),
+ (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3",
"b"))),
+ (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3",
"c"))),
+ (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3",
"d"))),
+ (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3",
"e"))),
+ (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3",
"f"))),
+ (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3",
"g")))
+AS tbl_view(id, name, data)
+-- !query analysis
+CreateViewCommand `tbl_view`, SELECT * FROM VALUES
+ (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3",
"a"))),
+ (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3",
"b"))),
+ (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3",
"c"))),
+ (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3",
"d"))),
+ (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3",
"e"))),
+ (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3",
"f"))),
+ (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3",
"g")))
+AS tbl_view(id, name, data), false, false, LocalTempView, true
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+CREATE TABLE ids (id INT) USING CSV
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+{
+ "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
+ "sqlState" : "42P07",
+ "messageParameters" : {
+ "relationName" : "`spark_catalog`.`default`.`ids`"
+ }
+}
+
+
+-- !query
+SELECT * FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, data#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (id) FROM tbl_view
+-- !query analysis
+Project [name#x, data#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (name) FROM tbl_view
+-- !query analysis
+Project [id#x, data#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (data) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (data.f1) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, named_struct(s2, data#x.s2) AS data#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (data.s2) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, named_struct(f1, data#x.f1) AS data#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (data.s2.f2) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, named_struct(f1, data#x.f1, s2, named_struct(f3,
data#x.s2.f3)) AS data#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (id, name, data) FROM tbl_view
+-- !query analysis
+Project
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`)
+-- !query analysis
+Project [a_b_c#x]
++- SubqueryAlias __auto_generated_subquery_name
+ +- Project [1 AS a_b_c#x, 2 AS a-b-c#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT tbl_view.* EXCEPT (name) FROM tbl_view
+-- !query analysis
+Project [id#x, data#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+INSERT INTO ids
+SELECT * EXCEPT (name, data) FROM tbl_view
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in
comparison]/{warehouse_dir}/ids, false, CSV, [path=file:[not included in
comparison]/{warehouse_dir}/ids], Append, `spark_catalog`.`default`.`ids`,
org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included
in comparison]/{warehouse_dir}/ids), [id]
++- Project [id#x]
+ +- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS
name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * FROM ids
+-- !query analysis
+Project [id#x]
++- SubqueryAlias spark_catalog.default.ids
+ +- Relation spark_catalog.default.ids[id#x] csv
+
+
+-- !query
+SELECT * EXCEPT (ids.id) FROM ids
+-- !query analysis
+Project
++- SubqueryAlias spark_catalog.default.ids
+ +- Relation spark_catalog.default.ids[id#x] csv
+
+
+-- !query
+SELECT data.* EXCEPT (s2) FROM tbl_view
+-- !query analysis
+Project [data#x.f1 AS f1#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT data.* EXCEPT (s2.f2) FROM tbl_view
+-- !query analysis
+Project [data#x.f1 AS f1#x, named_struct(f3, data#x.s2.f3) AS s2#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT data.s2.* EXCEPT (f2) FROM tbl_view
+-- !query analysis
+Project [data#x.s2.f3 AS f3#x]
++- SubqueryAlias tbl_view
+ +- View (`tbl_view`, [id#x,name#x,data#x])
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x,
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+ +- Project [id#x, name#x, data#x]
+ +- SubqueryAlias tbl_view
+ +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT name FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'name'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+SELECT * EXCEPT() name FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "')'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+SELECT * EXCEPT(invalid_column) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`invalid_column`",
+ "proposal" : "`id`, `name`, `data`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 31,
+ "fragment" : "* EXCEPT(invalid_column)"
+ } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, invalid_column) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`invalid_column`",
+ "proposal" : "`id`, `name`, `data`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 35,
+ "fragment" : "* EXCEPT(id, invalid_column)"
+ } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, id) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "columns" : "id, id"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 23,
+ "fragment" : "* EXCEPT(id, id)"
+ } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "columns" : "data.s2, data.s2.f2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 36,
+ "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
+ } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
new file mode 100644
index 00000000000..e07e4f1117c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
@@ -0,0 +1,51 @@
+CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
+ (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3",
"a"))),
+ (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3",
"b"))),
+ (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3",
"c"))),
+ (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3",
"d"))),
+ (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3",
"e"))),
+ (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3",
"f"))),
+ (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3",
"g")))
+AS tbl_view(id, name, data);
+
+CREATE TABLE ids (id INT) USING CSV;
+
+-- Happy path
+-- EXCEPT basic scenario
+SELECT * FROM tbl_view;
+SELECT * EXCEPT (id) FROM tbl_view;
+SELECT * EXCEPT (name) FROM tbl_view;
+-- EXCEPT named structs
+SELECT * EXCEPT (data) FROM tbl_view;
+SELECT * EXCEPT (data.f1) FROM tbl_view;
+SELECT * EXCEPT (data.s2) FROM tbl_view;
+SELECT * EXCEPT (data.s2.f2) FROM tbl_view;
+-- EXCEPT all columns
+SELECT * EXCEPT (id, name, data) FROM tbl_view;
+-- EXCEPT special character names
+SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`);
+-- EXCEPT qualified star
+SELECT tbl_view.* EXCEPT (name) FROM tbl_view;
+INSERT INTO ids
+SELECT * EXCEPT (name, data) FROM tbl_view;
+SELECT * FROM ids;
+-- EXCEPT qualified columns
+SELECT * EXCEPT (ids.id) FROM ids;
+-- EXCEPT structs
+SELECT data.* EXCEPT (s2) FROM tbl_view;
+SELECT data.* EXCEPT (s2.f2) FROM tbl_view;
+SELECT data.s2.* EXCEPT (f2) FROM tbl_view;
+
+-- Errors
+-- EXCEPT missing brackets
+SELECT * EXCEPT name FROM tbl_view;
+-- EXCEPT no columns
+SELECT * EXCEPT() name FROM tbl_view;
+-- EXCEPT invalid column
+SELECT * EXCEPT(invalid_column) FROM tbl_view;
+-- EXCEPT find invalid column
+SELECT * EXCEPT(id, invalid_column) FROM tbl_view;
+-- EXCEPT duplicate column
+SELECT * EXCEPT(id, id) FROM tbl_view;
+-- EXCEPT overlapping columns
+SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view;
diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
new file mode 100644
index 00000000000..6f6ba909734
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
@@ -0,0 +1,346 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
+ (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3",
"a"))),
+ (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3",
"b"))),
+ (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3",
"c"))),
+ (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3",
"d"))),
+ (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3",
"e"))),
+ (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3",
"f"))),
+ (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3",
"g")))
+AS tbl_view(id, name, data)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE ids (id INT) USING CSV
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM tbl_view
+-- !query schema
+struct<id:int,name:string,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+-- !query output
+10 name1 {"f1":1,"s2":{"f2":101,"f3":"a"}}
+20 name2 {"f1":2,"s2":{"f2":202,"f3":"b"}}
+30 name3 {"f1":3,"s2":{"f2":303,"f3":"c"}}
+40 name4 {"f1":4,"s2":{"f2":404,"f3":"d"}}
+50 name5 {"f1":5,"s2":{"f2":505,"f3":"e"}}
+60 name6 {"f1":6,"s2":{"f2":606,"f3":"f"}}
+70 name7 {"f1":7,"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (id) FROM tbl_view
+-- !query schema
+struct<name:string,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+-- !query output
+name1 {"f1":1,"s2":{"f2":101,"f3":"a"}}
+name2 {"f1":2,"s2":{"f2":202,"f3":"b"}}
+name3 {"f1":3,"s2":{"f2":303,"f3":"c"}}
+name4 {"f1":4,"s2":{"f2":404,"f3":"d"}}
+name5 {"f1":5,"s2":{"f2":505,"f3":"e"}}
+name6 {"f1":6,"s2":{"f2":606,"f3":"f"}}
+name7 {"f1":7,"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (name) FROM tbl_view
+-- !query schema
+struct<id:int,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+-- !query output
+10 {"f1":1,"s2":{"f2":101,"f3":"a"}}
+20 {"f1":2,"s2":{"f2":202,"f3":"b"}}
+30 {"f1":3,"s2":{"f2":303,"f3":"c"}}
+40 {"f1":4,"s2":{"f2":404,"f3":"d"}}
+50 {"f1":5,"s2":{"f2":505,"f3":"e"}}
+60 {"f1":6,"s2":{"f2":606,"f3":"f"}}
+70 {"f1":7,"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (data) FROM tbl_view
+-- !query schema
+struct<id:int,name:string>
+-- !query output
+10 name1
+20 name2
+30 name3
+40 name4
+50 name5
+60 name6
+70 name7
+
+
+-- !query
+SELECT * EXCEPT (data.f1) FROM tbl_view
+-- !query schema
+struct<id:int,name:string,data:struct<s2:struct<f2:int,f3:string>>>
+-- !query output
+10 name1 {"s2":{"f2":101,"f3":"a"}}
+20 name2 {"s2":{"f2":202,"f3":"b"}}
+30 name3 {"s2":{"f2":303,"f3":"c"}}
+40 name4 {"s2":{"f2":404,"f3":"d"}}
+50 name5 {"s2":{"f2":505,"f3":"e"}}
+60 name6 {"s2":{"f2":606,"f3":"f"}}
+70 name7 {"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (data.s2) FROM tbl_view
+-- !query schema
+struct<id:int,name:string,data:struct<f1:int>>
+-- !query output
+10 name1 {"f1":1}
+20 name2 {"f1":2}
+30 name3 {"f1":3}
+40 name4 {"f1":4}
+50 name5 {"f1":5}
+60 name6 {"f1":6}
+70 name7 {"f1":7}
+
+
+-- !query
+SELECT * EXCEPT (data.s2.f2) FROM tbl_view
+-- !query schema
+struct<id:int,name:string,data:struct<f1:int,s2:struct<f3:string>>>
+-- !query output
+10 name1 {"f1":1,"s2":{"f3":"a"}}
+20 name2 {"f1":2,"s2":{"f3":"b"}}
+30 name3 {"f1":3,"s2":{"f3":"c"}}
+40 name4 {"f1":4,"s2":{"f3":"d"}}
+50 name5 {"f1":5,"s2":{"f3":"e"}}
+60 name6 {"f1":6,"s2":{"f3":"f"}}
+70 name7 {"f1":7,"s2":{"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (id, name, data) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`)
+-- !query schema
+struct<a_b_c:int>
+-- !query output
+1
+
+
+-- !query
+SELECT tbl_view.* EXCEPT (name) FROM tbl_view
+-- !query schema
+struct<id:int,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+-- !query output
+10 {"f1":1,"s2":{"f2":101,"f3":"a"}}
+20 {"f1":2,"s2":{"f2":202,"f3":"b"}}
+30 {"f1":3,"s2":{"f2":303,"f3":"c"}}
+40 {"f1":4,"s2":{"f2":404,"f3":"d"}}
+50 {"f1":5,"s2":{"f2":505,"f3":"e"}}
+60 {"f1":6,"s2":{"f2":606,"f3":"f"}}
+70 {"f1":7,"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+INSERT INTO ids
+SELECT * EXCEPT (name, data) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM ids
+-- !query schema
+struct<id:int>
+-- !query output
+10
+20
+30
+40
+50
+60
+70
+
+
+-- !query
+SELECT * EXCEPT (ids.id) FROM ids
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT data.* EXCEPT (s2) FROM tbl_view
+-- !query schema
+struct<f1:int>
+-- !query output
+1
+2
+3
+4
+5
+6
+7
+
+
+-- !query
+SELECT data.* EXCEPT (s2.f2) FROM tbl_view
+-- !query schema
+struct<f1:int,s2:struct<f3:string>>
+-- !query output
+1 {"f3":"a"}
+2 {"f3":"b"}
+3 {"f3":"c"}
+4 {"f3":"d"}
+5 {"f3":"e"}
+6 {"f3":"f"}
+7 {"f3":"g"}
+
+
+-- !query
+SELECT data.s2.* EXCEPT (f2) FROM tbl_view
+-- !query schema
+struct<f3:string>
+-- !query output
+a
+b
+c
+d
+e
+f
+g
+
+
+-- !query
+SELECT * EXCEPT name FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'name'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+SELECT * EXCEPT() name FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "')'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+SELECT * EXCEPT(invalid_column) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`invalid_column`",
+ "proposal" : "`id`, `name`, `data`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 31,
+ "fragment" : "* EXCEPT(invalid_column)"
+ } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, invalid_column) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`invalid_column`",
+ "proposal" : "`id`, `name`, `data`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 35,
+ "fragment" : "* EXCEPT(id, invalid_column)"
+ } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, id) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "columns" : "id, id"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 23,
+ "fragment" : "* EXCEPT(id, id)"
+ } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "columns" : "data.s2, data.s2.f2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 36,
+ "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
+ } ]
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]