This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 2fd8517  [SPARK-34603][SQL] Support ADD ARCHIVE and LIST ARCHIVES 
command
2fd8517 is described below

commit 2fd85174e9423673edec5ecb1f1c402ec33472fe
Author: Kousuke Saruta <[email protected]>
AuthorDate: Tue Mar 9 21:28:35 2021 +0900

    [SPARK-34603][SQL] Support ADD ARCHIVE and LIST ARCHIVES command
    
    ### What changes were proposed in this pull request?
    
    This PR adds `ADD ARCHIVE` and `LIST ARCHIVES` commands to SQL and updates 
relevant documents.
    SPARK-33530 added `addArchive` and `listArchives` to `SparkContext` but 
it's not supported yet to add/list archives with SQL.
    
    ### Why are the changes needed?
    
    To complement features.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added new test and confirmed the generated HTML from the updated documents.
    
    Closes #31721 from sarutak/sql-archive.
    
    Authored-by: Kousuke Saruta <[email protected]>
    Signed-off-by: HyukjinKwon <[email protected]>
---
 docs/_data/menu-sql.yaml                           |  4 +
 ...ql-ref-syntax-aux-resource-mgmt-add-archive.md} | 28 +++----
 docs/sql-ref-syntax-aux-resource-mgmt-add-file.md  |  3 +-
 docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md   |  2 +
 ...l-ref-syntax-aux-resource-mgmt-list-archive.md} | 33 ++++----
 docs/sql-ref-syntax-aux-resource-mgmt-list-file.md |  5 +-
 docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md  |  6 +-
 docs/sql-ref-syntax-aux-resource-mgmt.md           |  2 +
 .../spark/sql/execution/SparkSqlParser.scala       | 10 ++-
 .../spark/sql/execution/command/resources.scala    | 31 +++++++
 .../spark/sql/hive/execution/HiveQuerySuite.scala  | 95 ++++++++++++++++++++++
 11 files changed, 183 insertions(+), 36 deletions(-)

diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index a9ea6fe..5192422 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -263,7 +263,11 @@
                   url: sql-ref-syntax-aux-resource-mgmt-add-file.html
                 - text: ADD JAR
                   url: sql-ref-syntax-aux-resource-mgmt-add-jar.html
+                - text: ADD ARCHIVE
+                  url: sql-ref-syntax-aux-resource-mgmt-add-archive.html
                 - text: LIST FILE
                   url: sql-ref-syntax-aux-resource-mgmt-list-file.html
                 - text: LIST JAR
                   url: sql-ref-syntax-aux-resource-mgmt-list-jar.html
+                - text: LIST ARCHIVE
+                  url: sql-ref-syntax-aux-resource-mgmt-list-archive.html
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md 
b/docs/sql-ref-syntax-aux-resource-mgmt-add-archive.md
similarity index 60%
copy from docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
copy to docs/sql-ref-syntax-aux-resource-mgmt-add-archive.md
index 9203293..fa86acd 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-archive.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: ADD FILE
-displayTitle: ADD FILE
+title: ADD ARCHIVE
+displayTitle: ADD ARCHIVE
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -21,33 +21,33 @@ license: |
 
 ### Description
 
-`ADD FILE` can be used to add a single file as well as a directory to the list 
of resources. The added resource can be listed using [LIST 
FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html).
+`ADD ARCHIVE` can be used to add an archive file to the list of resources. The 
given archive file should be one of .zip, .tar, .tar.gz, .tgz and .jar. The 
added archive file can be listed using [LIST 
ARCHIVE](sql-ref-syntax-aux-resource-mgmt-list-archive.html).
 
 ### Syntax
 
 ```sql
-ADD FILE resource_name
+ADD ARCHIVE file_name
 ```
 
 ### Parameters
 
-* **resource_name**
+* **file_name**
 
-    The name of the file or directory to be added.
+    The name of the archive file to be added. It could be either on a local 
file system or a distributed file system.
 
 ### Examples
 
 ```sql
-ADD FILE /tmp/test;
-ADD FILE "/path/to/file/abc.txt";
-ADD FILE '/another/test.txt';
-ADD FILE "/path with space/abc.txt";
-ADD FILE "/path/to/some/directory";
+ADD ARCHIVE /tmp/test.tar.gz;
+ADD ARCHIVE "/path/to/some.zip";
+ADD ARCHIVE '/some/other.tgz';
+ADD ARCHIVE "/path with space/abc.tar";
 ```
 
 ### Related Statements
 
 * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
 * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
+* [LIST ARCHIVE](sql-ref-syntax-aux-resource-mgmt-list-archive.html)
+* [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
 * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
-
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md 
b/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
index 9203293..3154c43 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
@@ -49,5 +49,6 @@ ADD FILE "/path/to/some/directory";
 
 * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
 * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
+* [LIST ARCHIVE](sql-ref-syntax-aux-resource-mgmt-list-archive.html)
 * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
-
+* [ADD ARCHIVE](sql-ref-syntax-aux-resource-mgmt-add-archive.html)
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md 
b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
index e5ac58b..3b342b0 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
@@ -63,3 +63,5 @@ ADD JAR 
"ivy://group:module:version?exclude=group:module&transitive=true"
 * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
 * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
 * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
+* [ADD ARCHIVE](sql-ref-syntax-aux-resource-mgmt-add-archive.html)
+* [LIST ARCHIVE](sql-ref-syntax-aux-resource-mgmt-list-archive.html)
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md 
b/docs/sql-ref-syntax-aux-resource-mgmt-list-archive.md
similarity index 66%
copy from docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md
copy to docs/sql-ref-syntax-aux-resource-mgmt-list-archive.md
index 04aa52c..ae5051a 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-list-archive.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: LIST JAR
-displayTitle: LIST JAR
+title: LIST ARCHIVE
+displayTitle: LIST ARCHIVE
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -21,32 +21,33 @@ license: |
 
 ### Description
 
-`LIST JAR` lists the JARs added by [ADD 
JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html).
+`LIST ARCHIVE` lists the archives added by [ADD 
ARCHIVE](sql-ref-syntax-aux-resource-mgmt-add-archive.html).
 
 ### Syntax
 
 ```sql
-LIST JAR
+LIST { ARCHIVE | ARCHIVES } file_name [ ... ]
 ```
 
 ### Examples
 
 ```sql
-ADD JAR /tmp/test.jar;
-ADD JAR /tmp/test_2.jar;
-LIST JAR;
--- output for LIST JAR
-spark://192.168.1.112:62859/jars/test.jar
-spark://192.168.1.112:62859/jars/test_2.jar
-
-LIST JAR /tmp/test.jar /some/random.jar /another/random.jar;
+ADD ARCHIVE /tmp/test.zip;
+ADD ARCHIVE /tmp/test_2.tar.gz;
+LIST ARCHIVE;
+-- output for LIST ARCHIVE
+file:/tmp/test.zip
+file:/tmp/test_2.tar.gz
+
+LIST ARCHIVE /tmp/test.zip /some/random.tgz /another/random.tar;
 -- output
-spark://192.168.1.112:62859/jars/test.jar
+file:/tmp/test.zip
 ```
 
 ### Related Statements
 
 * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
 * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
+* [ADD ARCHIVE](sql-ref-syntax-aux-resource-mgmt-add-archive.html)
 * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
-
+* [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-list-file.md 
b/docs/sql-ref-syntax-aux-resource-mgmt-list-file.md
index 9b9a7df..d92f5fe 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-list-file.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-list-file.md
@@ -26,7 +26,7 @@ license: |
 ### Syntax
 
 ```sql
-LIST FILE
+LIST { FILE | FILES } file_name [ ... ]
 ```
 
 ### Examples
@@ -48,5 +48,6 @@ file:/private/tmp/test
 
 * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
 * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
+* [ADD ARCHIVE](sql-ref-syntax-aux-resource-mgmt-add-archive.html)
 * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
-
+* [LIST ARCHIVE](sql-ref-syntax-aux-resource-mgmt-list-archive.html)
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md 
b/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md
index 04aa52c..6c705aa 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md
@@ -26,7 +26,8 @@ license: |
 ### Syntax
 
 ```sql
-LIST JAR
+LIST { JAR | JARS } file_name [ ... ]
+
 ```
 
 ### Examples
@@ -48,5 +49,6 @@ spark://192.168.1.112:62859/jars/test.jar
 
 * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
 * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
+* [ADD ARCHIVE](sql-ref-syntax-aux-resource-mgmt-add-archive.html)
 * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
-
+* [LIST ARCHIVE](sql-ref-syntax-aux-resource-mgmt-list-archive.html)
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt.md 
b/docs/sql-ref-syntax-aux-resource-mgmt.md
index 50c12ef..4a26282 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt.md
@@ -21,5 +21,7 @@ license: |
 
  * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
  * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
+ * [ADD ARCHIVE](sql-ref-syntax-aux-resource-mgmt-add-archive.html)
  * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
  * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
+ * [LIST ARCHIVE](sql-ref-syntax-aux-resource-mgmt-list-archive.html)
\ No newline at end of file
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index e9feba7..0861706 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -339,7 +339,8 @@ class SparkSqlAstBuilder extends AstBuilder {
   }
 
   /**
-   * Create a [[AddFileCommand]], [[AddJarCommand]], [[ListFilesCommand]] or 
[[ListJarsCommand]]
+   * Create a [[AddFileCommand]], [[AddJarCommand]], [[AddArchiveCommand]],
+   * [[ListFilesCommand]], [[ListJarsCommand]] or [[ListArchivesCommand]]
    * command depending on the requested operation on resources.
    * Expected format:
    * {{{
@@ -359,6 +360,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
           case "file" => AddFileCommand(maybePaths)
           case "jar" => AddJarCommand(maybePaths)
+          case "archive" => AddArchiveCommand(maybePaths)
           case other => operationNotAllowed(s"ADD with resource type 
'$other'", ctx)
         }
       case SqlBaseParser.LIST =>
@@ -375,6 +377,12 @@ class SparkSqlAstBuilder extends AstBuilder {
             } else {
               ListJarsCommand()
             }
+          case "archives" | "archive" =>
+            if (maybePaths.length > 0) {
+              ListArchivesCommand(maybePaths.split("\\s+"))
+            } else {
+              ListArchivesCommand()
+            }
           case other => operationNotAllowed(s"LIST with resource type 
'$other'", ctx)
         }
       case _ => operationNotAllowed(s"Other types of operation on resources", 
ctx)
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
index fd115ac..691837f 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
@@ -48,6 +48,16 @@ case class AddFileCommand(path: String) extends 
RunnableCommand {
 }
 
 /**
+ * Adds an archive to the current session so it can be used.
+ */
+case class AddArchiveCommand(path: String) extends RunnableCommand {
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    sparkSession.sparkContext.addArchive(path)
+    Seq.empty[Row]
+  }
+}
+
+/**
  * Returns a list of file paths that are added to resources.
  * If file paths are provided, return the ones that are added to resources.
  */
@@ -94,3 +104,24 @@ case class ListJarsCommand(jars: Seq[String] = 
Seq.empty[String]) extends Runnab
     }
   }
 }
+
+/**
+ * Returns a list of archive paths that are added to resources.
+ * If archive paths are provided, return the ones that are added to resources.
+ */
+case class ListArchivesCommand(archives: Seq[String] = Seq.empty[String]) 
extends RunnableCommand {
+  override val output: Seq[Attribute] = {
+    AttributeReference("Results", StringType, nullable = false)() :: Nil
+  }
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val archiveList = sparkSession.sparkContext.listArchives()
+    if (archives.nonEmpty) {
+      for {
+        archiveName <- archives.map(f => new Path(f).getName)
+        archivePath <- archiveList if archivePath.contains(archiveName)
+      } yield Row(archivePath)
+    } else {
+      archiveList.map(Row(_))
+    }
+  }
+}
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 87c2541..9968e8d 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import java.io.File
 import java.net.URI
+import java.nio.file.Files
 import java.sql.Timestamp
 import java.util.Locale
 
@@ -857,6 +858,100 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
     assert(sql(s"list file $testFile").count() == 1)
   }
 
+  test("ADD ARCHIVE/LIST ARCHIVES commands") {
+    withTempDir { dir =>
+      val file1 = File.createTempFile("someprefix1", "somesuffix1", dir)
+      val file2 = File.createTempFile("someprefix2", "somesuffix2", dir)
+
+      Files.write(file1.toPath, "file1".getBytes)
+      Files.write(file2.toPath, "file2".getBytes)
+
+      val zipFile = new File(dir, "test.zip")
+      val jarFile = new File(dir, "test.jar")
+      TestUtils.createJar(Seq(file1), zipFile)
+      TestUtils.createJar(Seq(file2), jarFile)
+
+      sql(s"ADD ARCHIVE ${zipFile.getAbsolutePath}#foo")
+      sql(s"ADD ARCHIVE ${jarFile.getAbsolutePath}#bar")
+
+      val checkAddArchive =
+        sparkContext.parallelize(
+          Seq(
+            "foo",
+            s"foo/${file1.getName}",
+            "nonexistence",
+            "bar",
+            s"bar/${file2.getName}"), 1).map { name =>
+          val file = new File(SparkFiles.get(name))
+          val contents =
+            if (file.isFile) {
+              Some(String.join("", new 
String(Files.readAllBytes(file.toPath))))
+            } else {
+              None
+            }
+          (name, file.canRead, contents)
+        }.collect()
+
+      assert(checkAddArchive(0) === ("foo", true, None))
+      assert(checkAddArchive(1) === (s"foo/${file1.getName}", true, 
Some("file1")))
+      assert(checkAddArchive(2) === ("nonexistence", false, None))
+      assert(checkAddArchive(3) === ("bar", true, None))
+      assert(checkAddArchive(4) === (s"bar/${file2.getName}", true, 
Some("file2")))
+      assert(sql("list archives").
+        filter(_.getString(0).contains(s"${zipFile.getAbsolutePath}")).count() 
> 0)
+      assert(sql("list archive").
+        filter(_.getString(0).contains(s"${jarFile.getAbsolutePath}")).count() 
> 0)
+      assert(sql(s"list archive ${zipFile.getAbsolutePath}").count() === 1)
+      assert(sql(s"list archives ${zipFile.getAbsolutePath} 
nonexistence").count() === 1)
+      assert(sql(s"list archives ${zipFile.getAbsolutePath} " +
+        s"${jarFile.getAbsolutePath}").count === 2)
+    }
+  }
+
+  test("ADD ARCHIVE/List ARCHIVES commands - unsupported archive formats") {
+    withTempDir { dir =>
+      val file1 = File.createTempFile("someprefix1", "somesuffix1", dir)
+      val file2 = File.createTempFile("someprefix2", "somesuffix2", dir)
+
+      Files.write(file1.toPath, "file1".getBytes)
+      Files.write(file2.toPath, "file2".getBytes)
+
+      // Emulate unsupported archive formats with .bz2 and .xz suffix.
+      val bz2File = new File(dir, "test.bz2")
+      val xzFile = new File(dir, "test.xz")
+      TestUtils.createJar(Seq(file1), bz2File)
+      TestUtils.createJar(Seq(file2), xzFile)
+
+      sql(s"ADD ARCHIVE ${bz2File.getAbsolutePath}#foo")
+      sql(s"ADD ARCHIVE ${xzFile.getAbsolutePath}#bar")
+
+      val checkAddArchive =
+        sparkContext.parallelize(
+          Seq(
+            "foo",
+            "bar"), 1).map { name =>
+          val file = new File(SparkFiles.get(name))
+          val contents =
+            if (file.isFile) {
+              Some(Files.readAllBytes(file.toPath).toSeq)
+            } else {
+              None
+            }
+          (name, file.canRead, contents)
+        }.collect()
+
+      assert(checkAddArchive(0) === ("foo", true, 
Some(Files.readAllBytes(bz2File.toPath).toSeq)))
+      assert(checkAddArchive(1) === ("bar", true, 
Some(Files.readAllBytes(xzFile.toPath).toSeq)))
+      assert(sql("list archives").
+        filter(_.getString(0).contains(s"${bz2File.getAbsolutePath}")).count() 
> 0)
+      assert(sql("list archive").
+        filter(_.getString(0).contains(s"${xzFile.getAbsolutePath}")).count() 
> 0)
+      assert(sql(s"list archive ${bz2File.getAbsolutePath}").count() === 1)
+      assert(sql(s"list archives ${bz2File.getAbsolutePath} " +
+        s"${xzFile.getAbsolutePath}").count === 2)
+    }
+  }
+
   createQueryTest("dynamic_partition",
     """
       |DROP TABLE IF EXISTS dynamic_part_table;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to