gengliangwang commented on code in PR #36091:
URL: https://github.com/apache/spark/pull/36091#discussion_r847577720


##########
sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala:
##########
@@ -1255,6 +1255,173 @@ class InsertSuite extends DataSourceTest with 
SharedSparkSession {
     }
   }
 
+  test("SPARK-38811 INSERT INTO on columns added with ALTER TABLE ADD COLUMNS: 
Positive tests") {
+    // There is a complex expression in the default value.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      sql("alter table t add column s string default concat('abc', 'def')")
+      sql("insert into t values(true, default)")
+      checkAnswer(sql("select s from t where i = true"), Seq("abcdef").map(i 
=> Row(i)))
+    }
+    // There are two trailing default values referenced implicitly by the 
INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i int) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("alter table t add column x bigint default 43")
+      sql("insert into t values(1)")
+      checkAnswer(sql("select s + x from t where i = 1"), Seq(85L).map(i => 
Row(i)))
+    }
+    // There are two trailing default values referenced implicitly by the 
INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i int) using parquet")
+      sql("alter table t add columns s bigint default 42, x bigint default 43")
+      sql("insert into t values(1)")
+      checkAnswer(sql("select s + x from t where i = 1"), Seq(85L).map(i => 
Row(i)))
+    }
+    // The table has a partitioning column and a default value is injected.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint) using parquet partitioned by 
(i)")
+      sql("alter table t add column q int default 42")
+      sql("insert into t partition(i='true') values(5, default)")
+      checkAnswer(sql("select s from t where i = true"), Seq(5).map(i => 
Row(i)))
+    }
+    // The default value parses correctly as a constant but non-literal 
expression.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      sql("alter table t add column s bigint default 41 + 1")
+      sql("insert into t values(false, default)")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // Explicit defaults may appear in different positions within the inline 
table provided as input
+    // to the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i boolean default false) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t values(false, default), (default, 42)")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L, 42L).map(i 
=> Row(i)))
+    }
+    // There is an explicit default value provided in the INSERT INTO 
statement in the VALUES,
+    // with an alias over the VALUES.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t select * from values (false, default) as tab(col, 
other)")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // The explicit default value is provided in the wrong order (first 
instead of second), but
+    // this is OK because the provided default value evaluates to literal NULL.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t values (default, 43)")
+      checkAnswer(sql("select s from t where i is null"), Seq(43L).map(i => 
Row(i)))
+    }
+    // There is an explicit default value provided in the INSERT INTO 
statement as a SELECT.
+    // This is supported.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t select false, default")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // There is a complex query plan in the SELECT query in the INSERT INTO 
statement.
+    withTable("t") {
+      sql("create table t(i boolean default false) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t select col, count(*) from values (default, default) " 
+
+        "as tab(col, other) group by 1")
+      checkAnswer(sql("select s from t where i = false"), Seq(1).map(i => 
Row(i)))
+    }
+    // There are three column types exercising various combinations of 
implicit and explicit
+    // default column value references in the 'insert into' statements. Note 
these tests depend on
+    // enabling the configuration to use NULLs for missing DEFAULT column 
values.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> 
"true") {
+      withTable("t1", "t2") {
+        sql("create table t1(j int) using parquet")
+        sql("alter table t1 add column s bigint default 42")
+        sql("alter table t1 add column x bigint default 43")
+        sql("insert into t1 values(1)")
+        sql("insert into t1 values(2, default)")
+        sql("insert into t1 values(3, default, default)")
+        sql("insert into t1 values(4, 44)")
+        sql("insert into t1 values(5, 44, 45)")
+        sql("create table t2(j int) using parquet")
+        sql("alter table t2 add columns s bigint default 42, x bigint default 
43")
+        sql("insert into t2 select j from t1 where j = 1")
+        sql("insert into t2 select j, default from t1 where j = 2")
+        sql("insert into t2 select j, default, default from t1 where j = 3")
+        sql("insert into t2 select j, s from t1 where j = 4")
+        sql("insert into t2 select j, s, default from t1 where j = 5")
+        val resultSchema = new StructType()
+          .add("s", LongType, false)
+          .add("x", LongType, false)
+        checkAnswer(
+          sql("select j, s, x from t2 order by j, s, x"),
+          Seq(
+            new GenericRowWithSchema(Array(1, 42L, 43L), resultSchema),

Review Comment:
   probably we can use `Row(1, 42L, 43L) :: Row(2, 42L, 43L) ... :: Nil ` here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to