(spark) branch master updated: [SPARK-48033][SQL] Fix `RuntimeReplaceable` expressions being used in default columns

wenchen Mon, 29 Apr 2024 18:07:43 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new da92293f9ce0 [SPARK-48033][SQL] Fix `RuntimeReplaceable` expressions 
being used in default columns
da92293f9ce0 is described below

commit da92293f9ce0be1ac283c4a5d769af550abf7031
Author: Richard Chen <r.c...@databricks.com>
AuthorDate: Tue Apr 30 09:07:24 2024 +0800

    [SPARK-48033][SQL] Fix `RuntimeReplaceable` expressions being used in 
default columns
    
    ### What changes were proposed in this pull request?
    
    Currently, default columns that have a default of a `RuntimeReplaceable` 
expression fails.
    
    This is because the `AlterTableCommand` constant folds before replacing 
expressions with the actual implementation. For example:
    ```
    sql(s"CREATE TABLE t(v VARIANT DEFAULT parse_json('1')) USING PARQUET")
    sql("INSERT INTO t VALUES(DEFAULT)")
    ```
    fails because `parse_json` is `RuntimeReplaceable` and is evaluated before 
the analyzer inserts the correct expression into the plan
    
    To fix this, we run the `ReplaceExpressions` rule before `ConstantFolding`
    
    ### Why are the changes needed?
    
    This allows default columns to use expressions that are `RuntimeReplaceable`
    
    This is especially important for Variant types because literal variants are 
difficult to create - `parse_json` will likely be used the majority of the time.
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    added UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #46269 from richardc-db/fix_default_cols_runtime_replaceable.
    
    Authored-by: Richard Chen <r.c...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala       | 4 ++--
 .../scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala   | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index 7b00349a4f27..d73e2ca6bd9d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, 
InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral}
-import org.apache.spark.sql.catalyst.optimizer.ConstantFolding
+import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, 
ReplaceExpressions}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
@@ -289,7 +289,7 @@ object ResolveDefaultColumns extends QueryErrorsBase
       val analyzer: Analyzer = DefaultColumnAnalyzer
       val analyzed = analyzer.execute(Project(Seq(Alias(parsed, colName)()), 
OneRowRelation()))
       analyzer.checkAnalysis(analyzed)
-      ConstantFolding(analyzed)
+      ConstantFolding(ReplaceExpressions(analyzed))
     } catch {
       case ex: AnalysisException =>
         throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
index 48a9564ab8f9..bca147279993 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -279,4 +279,12 @@ class ResolveDefaultColumnsSuite extends QueryTest with 
SharedSparkSession {
       checkAnswer(sql("select CAST(c as STRING) from t"), Row("2018-11-17 
13:33:33"))
     }
   }
+
+  test("SPARK-48033: default columns using runtime replaceable expression 
works") {
+    withTable("t") {
+      sql("CREATE TABLE t(v VARIANT DEFAULT parse_json('1')) USING PARQUET")
+      sql("INSERT INTO t VALUES(DEFAULT)")
+      checkAnswer(sql("select v from t"), sql("select 
parse_json('1')").collect())
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-48033][SQL] Fix `RuntimeReplaceable` expressions being used in default columns

Reply via email to