This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 04b7cdf31e8 [SPARK-40943][SQL] Make `MSCK` keyword optional in `REPAIR 
TABLE` syntax
04b7cdf31e8 is described below

commit 04b7cdf31e8062dc8d964d979007948979ced0fb
Author: Ben Zhang <ben.zh...@databricks.com>
AuthorDate: Wed Feb 15 23:02:04 2023 -0800

    [SPARK-40943][SQL] Make `MSCK` keyword optional in `REPAIR TABLE` syntax
    
    Make the `MSCK` keyword optional in `MSCK REPAIR TABLE` commands so that it 
can be omitted.
    
    The use of the keyword `MSCK`, meaning metastore check, is arcane and does 
not add value for the command. Removing it makes the meaning of `REPAIR TABLE` 
commands more clear.
    
    The [Spark documentation 
page](https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-repair-table.html)
 for this command is titled "REPAIR TABLE". It only mentions the MSCK keyword 
in the content of the page, indicating that this keyword is not necessary for 
the semantic understanding of the command.
    
    Additionally, here is a reference from 
[MySQL](https://dev.mysql.com/doc/refman/8.0/en/repair-table.html), which 
completely omits the MSCK keyword.
    
    Yes, previously, it was no possible to specify only `REPAIR TABLE` without 
`MSCK`. Now, it is possible. No changes to existing behaviour using the 
original `MSCK REPAIR TABLE` syntax are in this PR.
    
    Unit tests.
    
    Closes #38433 from ben-zhang/SPARK-40943.
    
    Authored-by: Ben Zhang <ben.zh...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
    (cherry picked from commit 64aef23d9a8c4f9222d6cf9994545157487f78b1)
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 docs/sql-ref-syntax-ddl-repair-table.md                          | 8 ++++----
 .../antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +-
 .../scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala  | 2 +-
 .../spark/sql/execution/command/MsckRepairTableParserSuite.scala | 9 +++++++++
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/docs/sql-ref-syntax-ddl-repair-table.md 
b/docs/sql-ref-syntax-ddl-repair-table.md
index 2e3711c2602..c64da2338dc 100644
--- a/docs/sql-ref-syntax-ddl-repair-table.md
+++ b/docs/sql-ref-syntax-ddl-repair-table.md
@@ -21,14 +21,14 @@ license: |
 
 ### Description
 
-`MSCK REPAIR TABLE` recovers all the partitions in the directory of a table 
and updates the Hive metastore. When creating a table using `PARTITIONED BY` 
clause, partitions are generated and registered in the Hive metastore. However, 
if the partitioned table is created from existing data, partitions are not 
registered automatically in the Hive metastore. User needs to run `MSCK REPAIR 
TABLE` to register the partitions. `MSCK REPAIR TABLE` on a non-existent table 
or a table without partiti [...]
+`REPAIR TABLE` recovers all the partitions in the directory of a table and 
updates the Hive metastore. When creating a table using `PARTITIONED BY` 
clause, partitions are generated and registered in the Hive metastore. However, 
if the partitioned table is created from existing data, partitions are not 
registered automatically in the Hive metastore. User needs to run `REPAIR 
TABLE` to register the partitions. `REPAIR TABLE` on a non-existent table or a 
table without partitions throws an e [...]
 
 If the table is cached, the command clears cached data of the table and all 
its dependents that refer to it. The cache will be lazily filled when the next 
time the table or the dependents are accessed.
 
 ### Syntax
 
 ```sql
-MSCK REPAIR TABLE table_identifier [{ADD|DROP|SYNC} PARTITIONS]
+[MSCK] REPAIR TABLE table_identifier [{ADD|DROP|SYNC} PARTITIONS]
 ```
 
 ### Parameters
@@ -57,8 +57,8 @@ CREATE TABLE t1 (name STRING, age INT) USING parquet 
PARTITIONED BY (age)
 -- SELECT * FROM t1 does not return results
 SELECT * FROM t1;
 
--- run MSCK REPAIR TABLE to recovers all the partitions
-MSCK REPAIR TABLE t1;
+-- run REPAIR TABLE to recovers all the partitions
+REPAIR TABLE t1;
 
 -- SELECT * FROM t1 returns results
 SELECT * FROM t1;
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 078a9939116..7c073411188 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -203,7 +203,7 @@ statement
     | LOAD DATA LOCAL? INPATH path=stringLit OVERWRITE? INTO TABLE
         multipartIdentifier partitionSpec?                             
#loadData
     | TRUNCATE TABLE multipartIdentifier partitionSpec?                
#truncateTable
-    | MSCK REPAIR TABLE multipartIdentifier
+    | (MSCK)? REPAIR TABLE multipartIdentifier
         (option=(ADD|DROP|SYNC) PARTITIONS)?                           
#repairTable
     | op=(ADD | LIST) identifier .*?                                   
#manageResource
     | SET ROLE .*?                                                     
#failNativeCommand
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index aea496b872b..8957794ad95 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -4388,7 +4388,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] 
with SQLConfHelper wit
    *
    * For example:
    * {{{
-   *   MSCK REPAIR TABLE multi_part_name [{ADD|DROP|SYNC} PARTITIONS]
+   *   [MSCK] REPAIR TABLE multi_part_name [{ADD|DROP|SYNC} PARTITIONS]
    * }}}
    */
   override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = 
withOrigin(ctx) {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala
index 458b3a4fc3c..f52315a4979 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala
@@ -31,6 +31,15 @@ class MsckRepairTableParserSuite extends AnalysisTest {
         enableDropPartitions = false))
   }
 
+  test("repair a table without MSCK keyword") {
+    comparePlans(
+      parsePlan("REPAIR TABLE a.b.c"),
+      RepairTable(
+        UnresolvedTable(Seq("a", "b", "c"), "MSCK REPAIR TABLE", None),
+        enableAddPartitions = true,
+        enableDropPartitions = false))
+  }
+
   test("add partitions") {
     comparePlans(
       parsePlan("msck repair table ns.tbl add partitions"),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to