(hudi) 12/19: [HUDI-6953] Adding test for composite keys with bulk insert row writer (#10214)

yihua Tue, 14 May 2024 13:11:39 -0700

This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch branch-0.x
in repository https://gitbox.apache.org/repos/asf/hudi.git


commit 7c3437f69ec928a82318ca1b983ca82db8fe744e
Author: Sivabalan Narayanan <[email protected]>
AuthorDate: Sat Mar 2 21:57:23 2024 -0800

    [HUDI-6953] Adding test for composite keys with bulk insert row writer 
(#10214)
---
 .../apache/hudi/functional/TestCOWDataSource.scala  | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
index 5614b414927..ff87a90cef8 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
@@ -487,6 +487,27 @@ class TestCOWDataSource extends HoodieSparkClientTestBase 
with ScalaAssertionSup
     assertEquals(snapshotDF2.count(), (validRecordsFromBatch1 + 
validRecordsFromBatch2))
   }
 
+  @Test
+  def bulkInsertCompositeKeys(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts(HoodieRecordType.AVRO)
+
+    // Insert Operation
+    val records = recordsToStrings(dataGen.generateInserts("000", 100)).toList
+    val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2))
+
+    val inputDf1 = inputDF.withColumn("new_col",lit("value1"))
+    val inputDf2 = inputDF.withColumn("new_col", lit(null).cast("String") )
+
+    inputDf1.union(inputDf2).write.format("hudi")
+        .options(writeOpts)
+        .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "_row_key,new_col")
+        .option(DataSourceWriteOptions.OPERATION.key(),"bulk_insert")
+        .mode(SaveMode.Overwrite)
+        .save(basePath)
+
+    assertEquals(200, 
spark.read.format("org.apache.hudi").options(readOpts).load(basePath).count())
+  }
+
   /**
    * This tests the case that query by with a specified partition condition on 
hudi table which is
    * different between the value of the partition field and the actual 
partition path,

(hudi) 12/19: [HUDI-6953] Adding test for composite keys with bulk insert row writer (#10214)

Reply via email to