yihua commented on code in PR #8990:
URL: https://github.com/apache/hudi/pull/8990#discussion_r1231414550


##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCDCForSparkSQL.scala:
##########
@@ -44,6 +44,47 @@ class TestCDCForSparkSQL extends HoodieSparkSqlTestBase {
     assertEquals(expectedDeletedCnt, cdcData.where("op = 'd'").count())
   }
 
+  test("Test delete all records in filegroup") {
+    withTempDir { tmp =>
+      val databaseName = "hudi_database"
+      spark.sql(s"create database if not exists $databaseName")
+      spark.sql(s"use $databaseName")
+      val tableName = generateTableName
+      val basePath = s"${tmp.getCanonicalPath}/$tableName"
+      spark.sql(
+        s"""
+           | create table $tableName (
+           |  id int,
+           |  name string,
+           |  price double,
+           |  ts long
+           | ) using hudi
+           | partitioned by (name)
+           | tblproperties (
+           |   'primaryKey' = 'id',
+           |   'preCombineField' = 'ts',
+           |   'hoodie.table.cdc.enabled' = 'true',
+           |   'hoodie.table.cdc.supplemental.logging.mode' = 
'$DATA_BEFORE_AFTER',
+           |   type = 'cow'
+           | )
+           | location '$basePath'
+      """.stripMargin)
+      val metaClient = HoodieTableMetaClient.builder()
+        .setBasePath(basePath)
+        .setConf(spark.sessionState.newHadoopConf())
+        .build()
+      spark.sql(s"insert into $tableName values (1, 11, 1000, 'a1'), (2, 12, 
1000, 'a2')")
+      val commitTime1 = 
metaClient.reloadActiveTimeline.lastInstant().get().getTimestamp
+      val cdcDataOnly1 = cdcDataFrame(basePath, commitTime1.toLong - 1)
+      cdcDataOnly1.show(false)
+      assertCDCOpCnt(cdcDataOnly1, 2, 0, 0)
+
+      spark.sql(s"delete from $tableName where id = 1")
+      val cdcDataOnly2 = cdcDataFrame(basePath, commitTime1.toLong)
+      assertCDCOpCnt(cdcDataOnly2, 0, 0, 1)

Review Comment:
   Could you add assert statements to guarantee the precondition of removing 
one file group, i.e., before deletion there are two file groups, and after 
deletion there is one file group, and the deleted record belongs to the removed 
file group? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to