Re: [PR] [HUDI-8628] Table Service writer config hard wired to table schema [hudi]

via GitHub Fri, 24 Jan 2025 21:45:17 -0800


Davis-Zhang-Onehouse commented on code in PR #12646:
URL: https://github.com/apache/hudi/pull/12646#discussion_r1929481074



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTable.scala:
##########
@@ -1291,6 +1294,62 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase 
with ScalaAssertionSuppo
     })
   }
 
+  test("Test partial insert with inline clustering") {
+    withRecordType()(withTempDir { tmp =>
+      val tableName = generateTableName
+      val basePath = s"${tmp.getCanonicalPath}/$tableName"
+      val tableType = "mor"
+      val logDataBlockFormat = "parquet"
+      withSparkSqlSessionConfig(
+          HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key -> "0",
+          DataSourceWriteOptions.ENABLE_MERGE_INTO_PARTIAL_UPDATES.key -> 
"true",
+          HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT.key -> 
logDataBlockFormat,
+          HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key -> "true",
+          HoodieClusteringConfig.INLINE_CLUSTERING.key -> "true",
+          HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMITS.key -> "2",
+          HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS.key -> "id,price") 
{
+        spark.sql(
+          s"""
+             | create table $tableName (
+             |   id int,
+             |   name string,
+             |   price long,
+             |   ts long,
+             |   description string
+             | ) using hudi
+             | tblproperties(
+             |   type ='$tableType',
+             |   primaryKey = 'id',
+             |   preCombineField = 'ts'
+             | )
+             | location '$basePath'
+            """.stripMargin)
+        spark.sql(s"insert into $tableName values " +
+          "(1, 'a1', 10, 1000, 'a1: desc1')," +
+          "(2, 'a2', 20, 1200, 'a2: desc2'), " +
+          "(3, 'a3', 30.0, 1250, 'a3: desc3')")
+
+        // Partial updates using MERGE INTO statement with changed fields: 
"price" and "ts"
+        spark.sql(
+          s"""
+             | merge into $tableName t0
+             | using (
+             |   select 1 as id, 'a1' as name1, 12 as price, 1001 as _ts
+             | union
+             |   select 3 as id, 'a3' as name1, 25 as price, 1260 as _ts
+             |   ) s0
+             | on t0.id = s0.id
+             | when matched then update set price = s0.price, ts = s0._ts
+          """.stripMargin)
+        checkAnswer(s"select id, name, price, ts, description from $tableName 
order by id")(
+          Seq(1, "a1", 12, 1001, "a1: desc1"),
+          Seq(2, "a2", 20, 1200, "a2: desc2"),
+          Seq(3, "a3", 25, 1260, "a3: desc3")

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-8628] Table Service writer config hard wired to table schema [hudi]

Reply via email to