vamsikarnika commented on code in PR #17460:
URL: https://github.com/apache/hudi/pull/17460#discussion_r2601802361
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCompactionProcedure.scala:
##########
@@ -29,22 +30,94 @@ import org.apache.spark.sql.types._
import java.util.function.Supplier
import scala.collection.JavaConverters._
+import scala.util.Try
+/**
+ * Spark SQL procedure to show all compaction operations for a MoR Hudi table.
+ *
+ * This procedure provides a comprehensive view of Hudi compaction operations,
displaying both
+ * completed compactions with execution metadata and pending compaction plans.
+ * For pending operations, execution-specific fields are gracefully set to
null.
+ *
+ * == Parameters ==
+ * - `table`: Required. The name of the Hudi table to query
+ * - `path`: Optional. The path of the Hudi table (any one of the `table` or
`path` must be provided)
+ * - `limit`: Optional. Maximum number of compaction operations to return
(default: 20)
+ * - `showArchived`: Optional. Whether to include archived compaction
operations (default: false)
+ * - `filter`: Optional. SQL expression to filter results (default: empty
string)
+ * - `startTime`: Optional. Start time for compactions (format:
yyyyMMddHHmmss, default: empty)
+ * - `endTime`: Optional. End time for compactions (format: yyyyMMddHHmmss,
default: empty)
+ *
+ * == Output Schema ==
+ * - `compaction_time`: Timestamp when the compaction operation was initiated
+ * - `state_transition_time`: Time when the compaction transitioned to
completed state (null for pending)
+ * - `state`: Operation state (COMPLETED, INFLIGHT, REQUESTED)
+ * - `action`: The action type (compaction)
+ * - `operation_size`: Number of operations in the compaction plan
+ * - `partition_path`: Partition path for the compaction operation
+ * - `total_log_files_per_partition`: Total number of log records processed in
the partition
+ * - `total_updated_records_compacted_per_partition`: Total number of updated
records compacted in the partition
+ * - `total_log_size_compacted_per_partition`: Total size of log files
compacted in the partition
+ * - `total_write_bytes_per_partition`: Total bytes written during compaction
for the partition
+ *
+ * == Error Handling ==
+ * - Throws `IllegalArgumentException` for invalid filter expressions or
non-MoR tables
+ * - Returns empty result set if no compaction plans match the criteria
+ *
+ * == Filter Support ==
+ * The `filter` parameter supports SQL expressions for filtering results.
+ *
+ * === Common Filter Examples ===
+ * {{{
+ * -- Show only completed compactions
+ * CALL show_compaction(
+ * table => 'my_table',
+ * filter => "state = 'COMPLETED'"
+ * )
+ *
+ * -- Show large compactions (more than 10 operations)
+ * CALL show_compaction(
+ * table => 'my_table',
+ * filter => "operation_size > 10"
+ * )
+ *
+ * -- Show recent compactions within last hour
+ * CALL show_compaction(
+ * table => 'my_table',
+ * filter => "compaction_time > '20240101120000000'"
+ * )
+ *
+ * -- Show pending compactions
+ * CALL show_compaction(
+ * table => 'my_table',
+ * filter => "state IN ('INFLIGHT', 'REQUESTED')"
+ * )
+ * }}}
+ *
+ */
class ShowCompactionProcedure extends BaseProcedure with ProcedureBuilder with
SparkAdapterSupport with Logging {
- /**
- * SHOW COMPACTION ON tableIdentifier (LIMIT limit = INTEGER_VALUE)?
- * SHOW COMPACTION ON path = STRING (LIMIT limit = INTEGER_VALUE)?
- */
private val PARAMETERS = Array[ProcedureParameter](
ProcedureParameter.optional(0, "table", DataTypes.StringType),
ProcedureParameter.optional(1, "path", DataTypes.StringType),
- ProcedureParameter.optional(2, "limit", DataTypes.IntegerType, 20)
+ ProcedureParameter.optional(2, "limit", DataTypes.IntegerType, 20),
+ ProcedureParameter.optional(3, "showArchived", DataTypes.BooleanType,
false),
+ ProcedureParameter.optional(4, "filter", DataTypes.StringType, ""),
+ ProcedureParameter.optional(5, "startTime", DataTypes.StringType, ""),
+ ProcedureParameter.optional(6, "endTime", DataTypes.StringType, "")
)
Review Comment:
same here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]