[spark] branch branch-3.1 updated: [SPARK-35792][SQL] View should not capture configs used in `RelationConversions`

wenchen Thu, 17 Jun 2021 06:42:20 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 33ee3d9  [SPARK-35792][SQL] View should not capture configs used in 
`RelationConversions`
33ee3d9 is described below

commit 33ee3d94ed54ed5076f8bad7ca314bdb86695e7c
Author: Linhong Liu <[email protected]>
AuthorDate: Thu Jun 17 21:40:53 2021 +0800

    [SPARK-35792][SQL] View should not capture configs used in 
`RelationConversions`
    
    ### What changes were proposed in this pull request?
    `RelationConversions` is actually an optimization rule while it's executed 
in the analysis phase.
    For view, it's designed to only capture semantic configs, so we should 
ignore the optimization
    configs that will be used in the analysis phase.
    
    This PR also fixes the issue that view resolution will always use the 
default value for uncaptured config
    
    ### Why are the changes needed?
    Bugfix
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, after this PR view resolution will respect the values set in the 
current session for the below configs
    ```
    "spark.sql.hive.convertMetastoreParquet"
    "spark.sql.hive.convertMetastoreOrc"
    "spark.sql.hive.convertInsertingPartitionedTable"
    "spark.sql.hive.convertMetastoreCtas"
    ```
    
    ### How was this patch tested?
    By running new UT:
    ```
    $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *HiveSQLViewSuite"
    ```
    
    Closes #32941 from linhongliu-db/SPARK-35792-ignore-convert-configs.
    
    Authored-by: Linhong Liu <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
    (cherry picked from commit b86a69f026b503896846ec32c8f7addc39dda2a0)
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../plans/logical/basicLogicalOperators.scala      | 13 +++++++++++-
 .../apache/spark/sql/execution/command/views.scala |  5 +++++
 .../sql/hive/execution/HiveSQLViewSuite.scala      | 24 +++++++++++++++++++++-
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 224e7bc..5149ec9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -500,7 +500,18 @@ object View {
     if (activeConf.useCurrentSQLConfigsForView && !isTempView) return 
activeConf
 
     val sqlConf = new SQLConf()
-    for ((k, v) <- configs) {
+    // We retain below configs from current session because they are not 
captured by view
+    // as optimization configs but they are still needed during the view 
resolution.
+    // TODO: remove this `retainedConfigs` after the `RelationConversions` is 
moved to
+    // optimization phase.
+    val retainedConfigs = activeConf.getAllConfs.filterKeys(key =>
+      Seq(
+        "spark.sql.hive.convertMetastoreParquet",
+        "spark.sql.hive.convertMetastoreOrc",
+        "spark.sql.hive.convertInsertingPartitionedTable",
+        "spark.sql.hive.convertMetastoreCtas"
+      ).contains(key))
+    for ((k, v) <- configs ++ retainedConfigs) {
       sqlConf.settings.put(k, v)
     }
     sqlConf
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 620553c..c58c6b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -352,6 +352,11 @@ object ViewHelper {
     "spark.sql.execution.",
     "spark.sql.shuffle.",
     "spark.sql.adaptive.",
+    // ignore optimization configs used in `RelationConversions`
+    "spark.sql.hive.convertMetastoreParquet",
+    "spark.sql.hive.convertMetastoreOrc",
+    "spark.sql.hive.convertInsertingPartitionedTable",
+    "spark.sql.hive.convertMetastoreCtas",
     SQLConf.ADDITIONAL_REMOTE_REPOSITORIES.key)
 
   private val configAllowList = Seq(
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
index 8aae7a1..feb2c67 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, 
CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, 
CatalogTable, CatalogTableType, HiveTableRelation}
 import org.apache.spark.sql.execution.SQLViewSuite
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types.{NullType, StructType}
 
@@ -157,4 +158,25 @@ class HiveSQLViewSuite extends SQLViewSuite with 
TestHiveSingleton {
       )
     }
   }
+
+  test("SPARK-35792: ignore optimization configs used in RelationConversions") 
{
+    withTable("t_orc") {
+      withView("v_orc") {
+        withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
+          spark.sql("create table t_orc stored as orc as select 1 as a, 2 as 
b")
+          spark.sql("create view v_orc as select * from t_orc")
+        }
+        withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
+          val relationInTable = sql("select * from 
t_orc").queryExecution.analyzed.collect {
+            case r: HiveTableRelation => r
+          }.headOption
+          val relationInView = sql("select * from 
v_orc").queryExecution.analyzed.collect {
+            case r: HiveTableRelation => r
+          }.headOption
+          assert(relationInTable.isDefined)
+          assert(relationInView.isDefined)
+        }
+      }
+    }
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-3.1 updated: [SPARK-35792][SQL] View should not capture configs used in `RelationConversions`

Reply via email to