vamsikarnika commented on code in PR #17460:
URL: https://github.com/apache/hudi/pull/17460#discussion_r2601802361


##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCompactionProcedure.scala:
##########
@@ -29,22 +30,94 @@ import org.apache.spark.sql.types._
 import java.util.function.Supplier
 
 import scala.collection.JavaConverters._
+import scala.util.Try
 
+/**
+ * Spark SQL procedure to show all compaction operations for a MoR Hudi table.
+ *
+ * This procedure provides a comprehensive view of Hudi compaction operations, 
displaying both
+ * completed compactions with execution metadata and pending compaction plans.
+ * For pending operations, execution-specific fields are gracefully set to 
null.
+ *
+ * == Parameters ==
+ * - `table`: Required. The name of the Hudi table to query
+ * - `path`: Optional. The path of the Hudi table (any one of the `table` or 
`path` must be provided)
+ * - `limit`: Optional. Maximum number of compaction operations to return 
(default: 20)
+ * - `showArchived`: Optional. Whether to include archived compaction 
operations (default: false)
+ * - `filter`: Optional. SQL expression to filter results (default: empty 
string)
+ * - `startTime`: Optional. Start time for compactions (format: 
yyyyMMddHHmmss, default: empty)
+ * - `endTime`: Optional. End time for compactions (format: yyyyMMddHHmmss, 
default: empty)
+ *
+ * == Output Schema ==
+ * - `compaction_time`: Timestamp when the compaction operation was initiated
+ * - `state_transition_time`: Time when the compaction transitioned to 
completed state (null for pending)
+ * - `state`: Operation state (COMPLETED, INFLIGHT, REQUESTED)
+ * - `action`: The action type (compaction)
+ * - `operation_size`: Number of operations in the compaction plan
+ * - `partition_path`: Partition path for the compaction operation
+ * - `total_log_files_per_partition`: Total number of log records processed in 
the partition
+ * - `total_updated_records_compacted_per_partition`: Total number of updated 
records compacted in the partition
+ * - `total_log_size_compacted_per_partition`: Total size of log files 
compacted in the partition
+ * - `total_write_bytes_per_partition`: Total bytes written during compaction 
for the partition
+ *
+ * == Error Handling ==
+ * - Throws `IllegalArgumentException` for invalid filter expressions or 
non-MoR tables
+ * - Returns empty result set if no compaction plans match the criteria
+ *
+ * == Filter Support ==
+ * The `filter` parameter supports SQL expressions for filtering results.
+ *
+ * === Common Filter Examples ===
+ * {{{
+ * -- Show only completed compactions
+ * CALL show_compaction(
+ *   table => 'my_table',
+ *   filter => "state = 'COMPLETED'"
+ * )
+ *
+ * -- Show large compactions (more than 10 operations)
+ * CALL show_compaction(
+ *   table => 'my_table',
+ *   filter => "operation_size > 10"
+ * )
+ *
+ * -- Show recent compactions within last hour
+ * CALL show_compaction(
+ *   table => 'my_table',
+ *   filter => "compaction_time > '20240101120000000'"
+ * )
+ *
+ * -- Show pending compactions
+ * CALL show_compaction(
+ *   table => 'my_table',
+ *   filter => "state IN ('INFLIGHT', 'REQUESTED')"
+ * )
+ * }}}
+ *
+ */
 class ShowCompactionProcedure extends BaseProcedure with ProcedureBuilder with 
SparkAdapterSupport with Logging {
-  /**
-   * SHOW COMPACTION  ON tableIdentifier (LIMIT limit = INTEGER_VALUE)?
-   * SHOW COMPACTION  ON path = STRING (LIMIT limit = INTEGER_VALUE)?
-   */
   private val PARAMETERS = Array[ProcedureParameter](
     ProcedureParameter.optional(0, "table", DataTypes.StringType),
     ProcedureParameter.optional(1, "path", DataTypes.StringType),
-    ProcedureParameter.optional(2, "limit", DataTypes.IntegerType, 20)
+    ProcedureParameter.optional(2, "limit", DataTypes.IntegerType, 20),
+    ProcedureParameter.optional(3, "showArchived", DataTypes.BooleanType, 
false),
+    ProcedureParameter.optional(4, "filter", DataTypes.StringType, ""),
+    ProcedureParameter.optional(5, "startTime", DataTypes.StringType, ""),
+    ProcedureParameter.optional(6, "endTime", DataTypes.StringType, "")
   )

Review Comment:
   same here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to