This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fdbacdf0fb21 [SPARK-48760][SQL][DOCS][FOLLOWUP] Add `CLUSTER BY` to
doc `sql-ref-syntax-ddl-alter-table.md`
fdbacdf0fb21 is described below
commit fdbacdf0fb21873d55cb0de63653ace8363187b9
Author: panbingkun <[email protected]>
AuthorDate: Tue Jul 9 21:18:10 2024 +0800
[SPARK-48760][SQL][DOCS][FOLLOWUP] Add `CLUSTER BY` to doc
`sql-ref-syntax-ddl-alter-table.md`
### What changes were proposed in this pull request?
The pr is following up https://github.com/apache/spark/pull/47156, aims to
- add `CLUSTER BY` to doc `sql-ref-syntax-ddl-alter-table.md`
- move parser tests from `o.a.s.s.c.p.DDLParserSuite` to
`AlterTableClusterByParserSuite`
- use `checkError` to check exception in
`o.a.s.s.e.c.AlterTableClusterBySuiteBase`
### Why are the changes needed?
- Enable the doc `sql-ref-syntax-ddl-alter-table.md` to cover new syntax
`ALTER TABLE ... CLUSTER BY ...`.
- Align with other similar tests, eg: AlterTableRename*
### Does this PR introduce _any_ user-facing change?
Yes, Make end-users can query the explanation of `CLUSTER BY` through the
doc `sql-ref-syntax-ddl-alter-table.md`.
### How was this patch tested?
Updated UT.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47254 from panbingkun/SPARK-48760_FOLLOWUP.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
docs/sql-ref-syntax-ddl-alter-table.md | 71 ++++++++++++++++++++++
.../spark/sql/catalyst/parser/DDLParserSuite.scala | 18 ------
.../catalyst/analysis/ResolveSessionCatalog.scala | 9 +--
.../command/AlterTableClusterByParserSuite.scala | 44 ++++++++++++++
.../command/AlterTableClusterBySuiteBase.scala | 33 ++++++----
.../command/v1/AlterTableClusterBySuite.scala | 2 +-
.../command/v2/AlterTableClusterBySuite.scala | 6 +-
7 files changed, 142 insertions(+), 41 deletions(-)
diff --git a/docs/sql-ref-syntax-ddl-alter-table.md
b/docs/sql-ref-syntax-ddl-alter-table.md
index 31eaf659b5c7..adcfa8db06f1 100644
--- a/docs/sql-ref-syntax-ddl-alter-table.md
+++ b/docs/sql-ref-syntax-ddl-alter-table.md
@@ -233,6 +233,32 @@ ALTER TABLE table_identifier DROP [ IF EXISTS ]
partition_spec [PURGE]
Partition to be dropped. Note that one can use a typed literal (e.g.,
date'2019-01-02') in the partition spec.
**Syntax:** `PARTITION ( partition_col_name = partition_col_val [ , ... ]
)`
+
+#### CLUSTER BY
+
+`ALTER TABLE CLUSTER BY` command can also be used for changing or removing the
clustering columns for existing tables.
+
+##### Syntax
+
+```sql
+-- Changing Clustering Columns
+ALTER TABLE table_identifier CLUSTER BY ( col_name [ , ... ] )
+
+-- Removing Clustering Columns
+ALTER TABLE table_identifier CLUSTER BY NONE
+```
+
+#### Parameters
+
+* **table_identifier**
+
+ Specifies a table name, which may be optionally qualified with a database
name.
+
+ **Syntax:** `[ database_name. ] table_name`
+
+* **col_name**
+
+ Specifies the name of the column.
### SET AND UNSET
@@ -596,6 +622,51 @@ SHOW PARTITIONS StudentInfo;
| age=20|
+---------+
+-- CLUSTER BY
+DESC Teacher;
++------------------------+---------+-------+
+| col_name|data_type|comment|
++------------------------+---------+-------+
+| name| string| NULL|
+| gender| string| NULL|
+| country| string| NULL|
+| age| int| NULL|
+|# Clustering Information| | |
+| # col_name|data_type|comment|
+| gender| string| NULL|
++------------------------+---------+-------+
+
+ALTER TABLE Teacher CLUSTER BY (gender, country);
+
+-- After changing clustering columns
+DESC Teacher;
++------------------------+---------+-------+
+| col_name|data_type|comment|
++------------------------+---------+-------+
+| name| string| NULL|
+| gender| string| NULL|
+| country| string| NULL|
+| age| int| NULL|
+|# Clustering Information| | |
+| # col_name|data_type|comment|
+| gender| string| NULL|
+| country| string| NULL|
++------------------------+---------+-------+
+
+ALTER TABLE Teacher CLUSTER BY NONE;
+
+-- After removing clustering columns
+DESC Teacher;
++------------------------+---------+-------+
+| col_name|data_type|comment|
++------------------------+---------+-------+
+| name| string| NULL|
+| gender| string| NULL|
+| country| string| NULL|
+| age| int| NULL|
+|# Clustering Information| | |
++------------------------+---------+-------+
+
-- Change the fileformat
ALTER TABLE loc_orc SET fileformat orc;
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 70cfa9325419..f88c516f0019 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -21,7 +21,6 @@ import java.util.Locale
import org.apache.spark.SparkThrowable
import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.ClusterBySpec
import org.apache.spark.sql.catalyst.expressions.{EqualTo, Hex, Literal}
import org.apache.spark.sql.catalyst.plans.logical._
import
org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
@@ -236,23 +235,6 @@ class DDLParserSuite extends AnalysisTest {
}
}
- test("alter table cluster by") {
- comparePlans(
- parsePlan("ALTER TABLE table_name CLUSTER BY (`a.b`, c.d, none)"),
- AlterTableClusterBy(
- UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
- Some(ClusterBySpec(Seq(
- FieldReference(Seq("a.b")),
- FieldReference(Seq("c", "d")),
- FieldReference(Seq("none")))))))
-
- comparePlans(
- parsePlan("ALTER TABLE table_name CLUSTER BY NONE"),
- AlterTableClusterBy(
- UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
- None))
- }
-
test("create/replace table - with comment") {
val createSql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet
COMMENT 'abc'"
val replaceSql = "REPLACE TABLE my_tab(a INT, b STRING) USING parquet
COMMENT 'abc'"
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index d8fa48a72cf8..1f9419c41b74 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -103,13 +103,10 @@ class ResolveSessionCatalog(val catalogManager:
CatalogManager)
builder.build())
AlterTableChangeColumnCommand(table.catalogTable.identifier, colName,
newColumn)
- case AlterTableClusterBy(ResolvedTable(catalog, ident, table: V1Table, _),
clusterBySpecOpt)
+ case AlterTableClusterBy(ResolvedTable(catalog, _, table: V1Table, _),
clusterBySpecOpt)
if isSessionCatalog(catalog) =>
- val prop = clusterBySpecOpt.map { clusterBySpec =>
- Map(ClusterBySpec.toProperty(table.schema, clusterBySpec,
conf.resolver))
- }.getOrElse {
- Map(ClusterBySpec.toProperty(table.schema, ClusterBySpec(Nil),
conf.resolver))
- }
+ val prop = Map(ClusterBySpec.toProperty(table.schema,
+ clusterBySpecOpt.getOrElse(ClusterBySpec(Nil)), conf.resolver))
AlterTableSetPropertiesCommand(table.catalogTable.identifier, prop,
isView = false)
case RenameColumn(ResolvedV1TableIdentifier(ident), _, _) =>
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterByParserSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterByParserSuite.scala
new file mode 100644
index 000000000000..1bbb3eb35fed
--- /dev/null
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterByParserSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTable}
+import org.apache.spark.sql.catalyst.catalog.ClusterBySpec
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.AlterTableClusterBy
+import org.apache.spark.sql.connector.expressions.FieldReference
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableClusterByParserSuite extends AnalysisTest with
SharedSparkSession {
+
+ test("alter table cluster by") {
+ comparePlans(
+ parsePlan("ALTER TABLE table_name CLUSTER BY (`a.b`, c.d, none)"),
+ AlterTableClusterBy(
+ UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
+ Some(ClusterBySpec(Seq(
+ FieldReference(Seq("a.b")),
+ FieldReference(Seq("c", "d")),
+ FieldReference(Seq("none")))))))
+
+ comparePlans(
+ parsePlan("ALTER TABLE table_name CLUSTER BY NONE"),
+ AlterTableClusterBy(
+ UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
+ None))
+ }
+}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala
index 8961019f3f8d..73a80cd91069 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala
@@ -45,34 +45,33 @@ trait AlterTableClusterBySuiteBase extends QueryTest with
DDLCommandTestUtils {
test("test basic ALTER TABLE with clustering columns") {
withNamespaceAndTable("ns", "table") { tbl =>
- spark.sql(s"CREATE TABLE $tbl (id INT, data STRING) $defaultUsing
CLUSTER BY (id, data)")
+ sql(s"CREATE TABLE $tbl (id INT, data STRING) $defaultUsing CLUSTER BY
(id, data)")
validateClusterBy(tbl, Seq("id", "data"))
- spark.sql(s"ALTER TABLE $tbl CLUSTER BY (data, id)")
+ sql(s"ALTER TABLE $tbl CLUSTER BY (data, id)")
validateClusterBy(tbl, Seq("data", "id"))
- spark.sql(s"ALTER TABLE $tbl CLUSTER BY NONE")
+ sql(s"ALTER TABLE $tbl CLUSTER BY NONE")
validateClusterBy(tbl, Seq.empty)
- spark.sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
+ sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
validateClusterBy(tbl, Seq("id"))
}
}
test("test clustering columns with comma") {
withNamespaceAndTable("ns", "table") { tbl =>
- spark.sql(s"CREATE TABLE $tbl (`i,d` INT, data STRING) $defaultUsing " +
- "CLUSTER BY (`i,d`, data)")
+ sql(s"CREATE TABLE $tbl (`i,d` INT, data STRING) $defaultUsing CLUSTER
BY (`i,d`, data)")
validateClusterBy(tbl, Seq("`i,d`", "data"))
- spark.sql(s"ALTER TABLE $tbl CLUSTER BY (data, `i,d`)")
+ sql(s"ALTER TABLE $tbl CLUSTER BY (data, `i,d`)")
validateClusterBy(tbl, Seq("data", "`i,d`"))
}
}
test("test nested clustering columns") {
withNamespaceAndTable("ns", "table") { tbl =>
- spark.sql(s"CREATE TABLE $tbl " +
+ sql(s"CREATE TABLE $tbl " +
s"($nestedColumnSchema) " +
s"$defaultUsing CLUSTER BY (${nestedClusteringColumns.mkString(",")})")
validateClusterBy(tbl, nestedClusteringColumns)
- spark.sql(s"ALTER TABLE $tbl CLUSTER BY
(${nestedClusteringColumnsNew.mkString(",")})")
+ sql(s"ALTER TABLE $tbl CLUSTER BY
(${nestedClusteringColumnsNew.mkString(",")})")
validateClusterBy(tbl, nestedClusteringColumnsNew)
}
}
@@ -80,10 +79,18 @@ trait AlterTableClusterBySuiteBase extends QueryTest with
DDLCommandTestUtils {
test("clustering columns not defined in schema") {
withNamespaceAndTable("ns", "table") { tbl =>
sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing CLUSTER
BY (id)")
- val err = intercept[AnalysisException] {
- sql(s"ALTER TABLE $tbl CLUSTER BY (unknown)")
- }
- assert(err.message.contains("Couldn't find column unknown in:"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(s"ALTER TABLE $tbl CLUSTER BY (unknown)")
+ },
+ errorClass = "_LEGACY_ERROR_TEMP_3060",
+ parameters = Map("i" -> "unknown",
+ "schema" ->
+ """root
+ | |-- id: long (nullable = true)
+ | |-- data: string (nullable = true)
+ |""".stripMargin)
+ )
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableClusterBySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableClusterBySuite.scala
index 385ef961127a..f5f2b7517b48 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableClusterBySuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableClusterBySuite.scala
@@ -36,7 +36,7 @@ trait AlterTableClusterBySuiteBase extends
command.AlterTableClusterBySuiteBase
override def validateClusterBy(tableName: String, clusteringColumns:
Seq[String]): Unit = {
val catalog = spark.sessionState.catalog
val (_, db, t) = parseTableName(tableName)
- val table = catalog.getTableMetadata(TableIdentifier.apply(t, Some(db)))
+ val table = catalog.getTableMetadata(TableIdentifier(t, Some(db)))
assert(table.clusterBySpec ===
Some(ClusterBySpec(clusteringColumns.map(FieldReference(_)))))
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableClusterBySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableClusterBySuite.scala
index bbbe6cd75875..8a74d9c3572b 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableClusterBySuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableClusterBySuite.scala
@@ -40,13 +40,13 @@ class AlterTableClusterBySuite extends
command.AlterTableClusterBySuiteBase
test("test REPLACE TABLE with clustering columns") {
withNamespaceAndTable("ns", "table") { tbl =>
- spark.sql(s"CREATE TABLE $tbl (id INT) $defaultUsing CLUSTER BY (id)")
+ sql(s"CREATE TABLE $tbl (id INT) $defaultUsing CLUSTER BY (id)")
validateClusterBy(tbl, Seq("id"))
- spark.sql(s"REPLACE TABLE $tbl (id INT, id2 INT) $defaultUsing CLUSTER
BY (id2)")
+ sql(s"REPLACE TABLE $tbl (id INT, id2 INT) $defaultUsing CLUSTER BY
(id2)")
validateClusterBy(tbl, Seq("id2"))
- spark.sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
+ sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
validateClusterBy(tbl, Seq("id"))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]