This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 85e252e8503 [SPARK-44313][SQL] Fix generated column expression 
validation when there is a char/varchar column in the schema
85e252e8503 is described below

commit 85e252e8503534009f4fb5ea005d44c9eda31447
Author: Allison Portis <[email protected]>
AuthorDate: Thu Jul 6 10:25:57 2023 +0800

    [SPARK-44313][SQL] Fix generated column expression validation when there is 
a char/varchar column in the schema
    
    ### What changes were proposed in this pull request?
    
    #38823 added support for defining generated columns in create table 
statements. This included generation expression validation. This validation 
currently erroneously fails when there are char or varchar columns anywhere in 
the table schema since the checkAnalysis fails here 
https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala#L123.
    
    This PR replaces any char/varchar columns in the schema with a string 
before analysis.
    
    ### Why are the changes needed?
    
    This should not fail.
    ```
    CREATE TABLE default.example (
        name VARCHAR(64),
        tstamp TIMESTAMP,
        tstamp_date DATE GENERATED ALWAYS AS (CAST(tstamp as DATE))
    )
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Adds a unit test.
    
    Closes #41868 from allisonport-db/validateGeneratedColumns-charvarchar.
    
    Authored-by: Allison Portis <[email protected]>
    Signed-off-by: Kent Yao <[email protected]>
    (cherry picked from commit f0e18284d0c8938cb35f2ad3668aff0e4ef4891c)
    Signed-off-by: Kent Yao <[email protected]>
---
 .../spark/sql/catalyst/util/GeneratedColumn.scala      |  3 ++-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala     | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
index 9a1ce5b0295..5dd278e3fea 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
@@ -116,7 +116,8 @@ object GeneratedColumn {
     val allowedBaseColumns = schema
       .filterNot(_.name == fieldName) // Can't reference itself
       .filterNot(isGeneratedColumn) // Can't reference other generated columns
-    val relation = new 
LocalRelation(StructType(allowedBaseColumns).toAttributes)
+    val relation = new 
LocalRelation(CharVarcharUtils.replaceCharVarcharWithStringInSchema(
+      StructType(allowedBaseColumns)).toAttributes)
     val plan = try {
       val analyzer: Analyzer = GeneratedColumnAnalyzer
       val analyzed = analyzer.execute(Project(Seq(Alias(parsed, fieldName)()), 
relation))
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 13ffa6d9bfd..c7dc924c117 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1694,6 +1694,24 @@ class DataSourceV2SQLSuiteV1Filter
     )
   }
 
+  test("SPARK-44313: generation expression validation passes when there is a 
char/varchar column") {
+    val tblName = "my_tab"
+    // InMemoryTableCatalog.capabilities() = 
{SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS}
+    for (charVarCharCol <- Seq("name VARCHAR(64)", "name CHAR(64)")) {
+      withTable(s"testcat.$tblName") {
+        sql(
+          s"""
+             |CREATE TABLE testcat.$tblName(
+             |  $charVarCharCol,
+             |  tstamp TIMESTAMP,
+             |  tstamp_date DATE GENERATED ALWAYS AS (CAST(tstamp AS DATE))
+             |) USING foo
+             |""".stripMargin)
+        
assert(catalog("testcat").asTableCatalog.tableExists(Identifier.of(Array(), 
tblName)))
+      }
+    }
+  }
+
   test("ShowCurrentNamespace: basic tests") {
     def testShowCurrentNamespace(expectedCatalogName: String, 
expectedNamespace: String): Unit = {
       val schema = new StructType()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to