Re: [PR] [HUDI-7146] Integrate secondary index on reader path [hudi]

via GitHub Thu, 06 Jun 2024 23:50:39 -0700


codope commented on code in PR #11162:
URL: https://github.com/apache/hudi/pull/11162#discussion_r1630728589



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/SecondaryIndexTestBase.scala:
##########
@@ -62,4 +69,54 @@ class SecondaryIndexTestBase extends 
HoodieSparkClientTestBase {
     cleanupResources()
   }
 
+  def verifyQueryPredicate(hudiOpts: Map[String, String], columnName: String): 
Unit = {
+    mergedDfList = 
spark.read.format("hudi").options(hudiOpts).load(basePath).repartition(1).cache()
 :: mergedDfList
+    val secondaryKey = mergedDfList.last.limit(1).collect().map(row => 
row.getAs(columnName).toString)
+    val dataFilter = EqualTo(attribute(columnName), Literal(secondaryKey(0)))
+    verifyFilePruning(hudiOpts, dataFilter)
+  }
+
+  private def attribute(partition: String): AttributeReference = {
+    AttributeReference(partition, StringType, nullable = true)()
+  }
+
+
+  private def verifyFilePruning(opts: Map[String, String], dataFilter: 
Expression): Unit = {
+    // with data skipping
+    val commonOpts = opts + ("path" -> basePath)
+    metaClient = HoodieTableMetaClient.reload(metaClient)
+    var fileIndex = HoodieFileIndex(spark, metaClient, None, commonOpts, 
includeLogFiles = true)
+    val filteredPartitionDirectories = fileIndex.listFiles(Seq(), 
Seq(dataFilter))
+    val filteredFilesCount = filteredPartitionDirectories.flatMap(s => 
s.files).size
+    assertTrue(filteredFilesCount < getLatestDataFilesCount(opts))

Review Comment:
   will check this. I also need to refactor and put these in some test util 
class as it is used in other index test as well.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7146] Integrate secondary index on reader path [hudi]

Reply via email to