(hudi) branch master updated: [HUDI-8161] Make spark-sql command 'desc' independent from schema evolution config (#11871)

danny0405 Thu, 26 Sep 2024 18:10:58 -0700

This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git



The following commit(s) were added to refs/heads/master by this push:
     new f2d90f5c36e [HUDI-8161] Make spark-sql command 'desc' independent from 
schema evolution config (#11871)
f2d90f5c36e is described below

commit f2d90f5c36eb737704500b41f725b626bc7ad101
Author: Vova Kolmakov <[email protected]>
AuthorDate: Fri Sep 27 08:09:10 2024 +0700

    [HUDI-8161] Make spark-sql command 'desc' independent from schema evolution 
config (#11871)
    
    Co-authored-by: Vova Kolmakov <[email protected]>
---
 .../spark/sql/hudi/ddl/TestDescribeTable.scala     | 117 +++++++++++++++++++++
 .../sql/hudi/analysis/HoodieSpark3Analysis.scala   |   4 +
 2 files changed, 121 insertions(+)

diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestDescribeTable.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestDescribeTable.scala
new file mode 100644
index 00000000000..c1879fe2a19
--- /dev/null
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestDescribeTable.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.ddl
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
+
+import java.util.function.Predicate
+
+class TestDescribeTable extends HoodieSparkSqlTestBase {
+
+  test("Test desc hudi table command") {
+    withTempDir { tmp =>
+      val tbName = "wk_date"
+      val basePath = s"${tmp.getCanonicalPath}/$tbName"
+
+      spark.sql(
+        s"""
+           |create table $tbName (id int, driver string, precomb int, dat 
string)
+           | using hudi
+           | partitioned by(dat)
+           | tblproperties(
+           |   type='cow',
+           |   primaryKey='id',
+           |   preCombineField='precomb'
+           | )
+           | location '$basePath'
+       """.stripMargin)
+
+      // just for scala-2.11 compatibility
+      val locationInFirstColumn: Predicate[Row] = new Predicate[Row] {
+        def test(row: Row): Boolean = row(0).equals("Location")
+      }
+
+      spark.sql("set hoodie.schema.on.read.enable=false")
+      var output: java.util.List[Row] = spark.sql(s"describe extended 
$tbName").collectAsList()
+      assert(output.stream().anyMatch(locationInFirstColumn))
+
+      spark.sql("set hoodie.schema.on.read.enable=true")
+      output = spark.sql(s"desc formatted $tbName").collectAsList()
+      assert(output.stream().anyMatch(locationInFirstColumn))
+
+      output = spark.sql(s"describe table extended $tbName").collectAsList()
+      assert(output.stream().anyMatch(locationInFirstColumn))
+
+      // DESC returns only columns and partitions when run without 'extended' 
or 'formatted' keywords
+      output = spark.sql(s"describe table $tbName").collectAsList()
+      assert(output.stream().noneMatch(locationInFirstColumn))
+
+      output = spark.sql(s"desc table $tbName").collectAsList()
+      assert(output.stream().noneMatch(locationInFirstColumn))
+
+      output = spark.sql(s"desc $tbName").collectAsList()
+      assert(output.stream().noneMatch(locationInFirstColumn))
+    }
+  }
+
+  test("Test desc non-hudi table command") {
+    withTempDir { tmp =>
+      val tbName = "wk_date"
+      val basePath = s"${tmp.getCanonicalPath}/$tbName"
+
+      spark.sql(
+        s"""
+           |create table $tbName (
+           | id int,
+           | driver string,
+           | precomb int,
+           | dat string
+           |)
+           | using parquet
+           | location '$basePath';
+       """.stripMargin)
+
+      // just for scala-2.11 compatibility
+      val locationInFirstColumn: Predicate[Row] = new Predicate[Row] {
+        def test(row: Row): Boolean = row(0).equals("Location")
+      }
+
+      spark.sql("set hoodie.schema.on.read.enable=false")
+      var output: java.util.List[Row] = spark.sql(s"describe extended 
$tbName").collectAsList()
+      assert(output.stream().anyMatch(locationInFirstColumn))
+
+      spark.sql("set hoodie.schema.on.read.enable=true")
+      output = spark.sql(s"desc formatted $tbName").collectAsList()
+      assert(output.stream().anyMatch(locationInFirstColumn))
+
+      output = spark.sql(s"describe table extended $tbName").collectAsList()
+      assert(output.stream().anyMatch(locationInFirstColumn))
+
+      // DESC returns only columns and partitions when run without 'extended' 
or 'formatted' keywords
+      output = spark.sql(s"describe table $tbName").collectAsList()
+      assert(output.stream().noneMatch(locationInFirstColumn))
+
+      output = spark.sql(s"desc table $tbName").collectAsList()
+      assert(output.stream().noneMatch(locationInFirstColumn))
+
+      output = spark.sql(s"desc $tbName").collectAsList()
+      assert(output.stream().noneMatch(locationInFirstColumn))
+    }
+  }
+}
diff --git 
a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala
 
b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala
index d917adb7d94..d1d5e6e0c0b 100644
--- 
a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala
+++ 
b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.connector.catalog.{Table, V1Table}
 import 
org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.execution.command.DescribeTableCommand
 import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation}
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.isMetaField
 import org.apache.spark.sql.hudi.ProvidesHoodieConfig
@@ -333,6 +334,9 @@ case class HoodieSpark3PostAnalysisRule(sparkSession: 
SparkSession) extends Rule
           retainData = true
         )
 
+      case DescribeRelation(MatchResolvedTable(_, id, HoodieV1OrV2Table(_)), 
partitionSpec, isExtended, output) =>
+        DescribeTableCommand(id.asTableIdentifier, partitionSpec, isExtended, 
output)
+
       case _ => plan
     }
   }

(hudi) branch master updated: [HUDI-8161] Make spark-sql command 'desc' independent from schema evolution config (#11871)

Reply via email to