yihua commented on code in PR #13790:
URL: https://github.com/apache/hudi/pull/13790#discussion_r2305300955


##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowColumnStatsOverlapProcedure.scala:
##########
@@ -123,7 +135,12 @@ class ShowColumnStatsOverlapProcedure extends 
BaseProcedure with ProcedureBuilde
     addStatisticsToRows(groupedPoints, fileSlicesSizeByPartition.toMap, rows)
 
     // The returned results are sorted by column name and average value
-    rows.asScala.toList.sortBy(row => (row.getString(1), row.getDouble(2)))
+    val results = rows.asScala.toList.sortBy(row => (row.getString(1), 
row.getDouble(2)))
+    if (filter != null && filter.trim.nonEmpty) {
+      HoodieProcedureFilterUtils.evaluateFilter(results, filter, OUTPUT_TYPE, 
sparkSession)
+    } else {
+      results
+    }
   }
 
   private def getTargetColumnsSeq(args: ProcedureArgs): Seq[String] = {

Review Comment:
   This needs to be updated with the new argument index.



##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowColumnStatsOverlapProcedure.scala:
##########
@@ -95,17 +97,27 @@ class ShowColumnStatsOverlapProcedure extends BaseProcedure 
with ProcedureBuilde
   override def call(args: ProcedureArgs): Seq[Row] = {
     super.checkArgs(PARAMETERS, args)
 
-    val table = getArgValueOrDefault(args, PARAMETERS(0))
-    val partitions = getArgValueOrDefault(args, 
PARAMETERS(1)).getOrElse("").toString
+    val tableName = getArgValueOrDefault(args, PARAMETERS(0))
+    val tablePath = getArgValueOrDefault(args, PARAMETERS(1))
+    val partitions = getArgValueOrDefault(args, 
PARAMETERS(2)).getOrElse("").toString
     val partitionsSeq = partitions.split(",").filter(_.nonEmpty).toSeq
+    val filter = getArgValueOrDefault(args, 
PARAMETERS(4)).get.asInstanceOf[String]
+
+    if (filter != null && filter.trim.nonEmpty) {
+      HoodieProcedureFilterUtils.validateFilterExpression(filter, OUTPUT_TYPE, 
sparkSession) match {
+        case Left(errorMessage) =>
+          throw new IllegalArgumentException(s"Invalid filter expression: 
$errorMessage")
+        case Right(_) => // Validation passed, continue
+      }
+    }
 
     val targetColumnsSeq = getTargetColumnsSeq(args)
-    val basePath = getBasePath(table)
+    val basePath = getBasePath(tableName, tablePath)
     val metadataConfig = HoodieMetadataConfig.newBuilder().enable(true).build
     val metaClient = createMetaClient(jsc, basePath)
     val schema = getSchema(metaClient)
     val columnStatsIndex = new ColumnStatsIndexSupport(spark, schema, 
metadataConfig, metaClient)
-    val fsView = buildFileSystemView(table)
+    val fsView = buildFileSystemView(tableName, tablePath)

Review Comment:
   Should this take resolved `basePath`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to