gengliangwang commented on a change in pull request #35982:
URL: https://github.com/apache/spark/pull/35982#discussion_r838092540



##########
File path: 
sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
##########
@@ -849,6 +857,257 @@ class InsertSuite extends DataSourceTest with 
SharedSparkSession {
     }
   }
 
+  test("INSERT INTO statements with tables with default columns") {
+    // For most of these cases, the test table 't' has two columns:
+    // (1) name 'i' with boolean type and no default value
+    // (2) name 's' with long integer type and a default value of 42L.
+    //
+    // Positive tests:
+    // When the USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES configuration is 
enabled, and no
+    // explicit DEFAULT value is available when the INSERT INTO statement 
provides fewer
+    // values than expected, NULL values are appended in their place.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> 
"true") {
+      withTable("t") {
+        sql("create table t(i boolean, s bigint) using parquet")
+        sql("insert into t values(true)")
+        checkAnswer(sql("select s from t where i = true"), Seq(null).map(i => 
Row(i)))
+      }
+    }
+    // The default value for the DEFAULT keyword is the NULL literal.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint) using parquet")
+      sql("insert into t values(true, default)")
+      checkAnswer(sql("select s from t where i = true"), Seq(null).map(i => 
Row(i)))
+    }
+    // There is a complex expression in the default value.
+    withTable("t") {
+      sql("create table t(i boolean, s string default concat('abc', 'def')) 
using parquet")
+      sql("insert into t values(true, default)")
+      checkAnswer(sql("select s from t where i = true"), Seq("abcdef").map(i 
=> Row(i)))
+    }
+    // The default value parses correctly and the provided value type is 
different but coercible.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t values(false)")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // There are two trailing default values referenced implicitly by the 
INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i int, s bigint default 42, x bigint default 43) 
using parquet")
+      sql("insert into t values(1)")
+      checkAnswer(sql("select s + x from t where i = 1"), Seq(85L).map(i => 
Row(i)))
+    }
+    // The table has a partitioning column and a default value is injected.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint, q int default 42 ) using 
parquet partitioned by (i)")
+      sql("insert into t partition(i='true') values(5, default)")
+      checkAnswer(sql("select s from t where i = true"), Seq(5).map(i => 
Row(i)))
+    }
+    // The table has a partitioning column and a default value is added per an 
explicit reference.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet 
partitioned by (i)")
+      sql("insert into t partition(i='true') values(default)")
+      checkAnswer(sql("select s from t where i = true"), Seq(42L).map(i => 
Row(i)))
+    }
+    // The default value parses correctly as a constant but non-literal 
expression.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 41 + 1) using parquet")
+      sql("insert into t values(false, default)")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // Explicit defaults may appear in different positions within the inline 
table provided as input
+    // to the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint default 42) using 
parquet")
+      sql("insert into t values(false, default), (default, 42)")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L, 42L).map(i 
=> Row(i)))
+    }
+    // There is an explicit default value provided in the INSERT INTO 
statement in the VALUES,
+    // with an alias over the VALUES.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t select * from values (false, default) as tab(col, 
other)")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // The explicit default value arrives first before the other value.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint) using parquet")
+      sql("insert into t values (default, 43)")
+      checkAnswer(sql("select s from t where i = false"), Seq(43L).map(i => 
Row(i)))
+    }
+    // The 'create table' statement provides the default parameter first.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint) using parquet")
+      sql("insert into t values (default, 43)")
+      checkAnswer(sql("select s from t where i = false"), Seq(43L).map(i => 
Row(i)))
+    }
+    // The explicit default value is provided in the wrong order (first 
instead of second), but
+    // this is OK because the provided default value evaluates to literal NULL.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t values (default, 43)")
+      checkAnswer(sql("select s from t where i is null"), Seq(43L).map(i => 
Row(i)))
+    }
+    // There is an explicit default value provided in the INSERT INTO 
statement as a SELECT.
+    // This is supported.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t select false, default")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // There is a complex query plan in the SELECT query in the INSERT INTO 
statement.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint default 42) using 
parquet")
+      sql("insert into t select col, count(*) from values (default, default) " 
+
+        "as tab(col, other) group by 1")
+      checkAnswer(sql("select s from t where i = false"), Seq(1).map(i => 
Row(i)))
+    }
+    // The explicit default reference resolves successfully with nested table 
subqueries.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint) using parquet")
+      sql("insert into t select * from (select * from values(default, 42))")
+      checkAnswer(sql("select s from t where i = false"), Seq(42L).map(i => 
Row(i)))
+    }
+    // There are three column types exercising various combinations of 
implicit and explicit
+    // default column value references in the 'insert into' statements. Note 
these tests depend on
+    // enabling the configuration to use NULLs for missing DEFAULT column 
values.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> 
"true") {
+      withTable("t1", "t2") {
+        sql("create table t1(j int, s bigint default 42, x bigint default 43) 
using parquet")
+        sql("insert into t1 values(1)")
+        sql("insert into t1 values(2, default)")
+        sql("insert into t1 values(3, default, default)")
+        sql("insert into t1 values(4, 44)")
+        sql("insert into t1 values(5, 44, 45)")
+        sql("create table t2(j int, s bigint default 42, x bigint default 43) 
using parquet")
+        sql("insert into t2 select j from t1 where j = 1")
+        sql("insert into t2 select j, default from t1 where j = 2")
+        sql("insert into t2 select j, default, default from t1 where j = 3")
+        sql("insert into t2 select j, s from t1 where j = 4")
+        sql("insert into t2 select j, s, default from t1 where j = 5")
+        val resultSchema = new StructType()
+          .add("s", LongType, false)
+          .add("x", LongType, false)
+        checkAnswer(sql("select j, s, x from t2 order by j, s, x"),
+          Seq(
+            new GenericRowWithSchema(Array(1, 42L, 43L), resultSchema),
+            new GenericRowWithSchema(Array(2, 42L, 43L), resultSchema),
+            new GenericRowWithSchema(Array(3, 42L, 43L), resultSchema),
+            new GenericRowWithSchema(Array(4, 44L, 43L), resultSchema),
+            new GenericRowWithSchema(Array(5, 44L, 43L), resultSchema)))
+      }
+    }
+
+    // Negative tests:

Review comment:
       I would suggest putting negative tests to a new test case, in case of 
one test case being too long.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to