This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/spark-connect-swift.git
The following commit(s) were added to refs/heads/main by this push: new 7639540 [SPARK-51642] Support `explain` for `DataFrame` 7639540 is described below commit 7639540cf3bdad230fca7a7bd4e7a0decd1d9824 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Fri Mar 28 08:39:44 2025 -0700 [SPARK-51642] Support `explain` for `DataFrame` ### What changes were proposed in this pull request? This PR aims to support `explain` for `DataFrame`. ### Why are the changes needed? For the feature parity. ### Does this PR introduce _any_ user-facing change? No, this is a new API addition to the unreleased versions. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #32 from dongjoon-hyun/SPARK-51642. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- Sources/SparkConnect/DataFrame.swift | 25 +++++++++++++++++++++++++ Sources/SparkConnect/Extension.swift | 12 ++++++++++++ Sources/SparkConnect/SparkConnectClient.swift | 12 ++++++++++++ Sources/SparkConnect/TypeAliases.swift | 1 + Tests/SparkConnectTests/DataFrameTests.swift | 9 +++++++++ 5 files changed, 59 insertions(+) diff --git a/Sources/SparkConnect/DataFrame.swift b/Sources/SparkConnect/DataFrame.swift index 504d8c2..b1ec758 100644 --- a/Sources/SparkConnect/DataFrame.swift +++ b/Sources/SparkConnect/DataFrame.swift @@ -330,4 +330,29 @@ public actor DataFrame: Sendable { return self } + + public func explain() async throws { + try await explain("simple") + } + + public func explain(_ extended: Bool) async throws { + if (extended) { + try await explain("extended") + } else { + try await explain("simple") + } + } + + public func explain(_ mode: String) async throws { + try await withGRPCClient( + transport: .http2NIOPosix( + target: .dns(host: spark.client.host, port: spark.client.port), + transportSecurity: .plaintext + ) + ) { client in + let service = Spark_Connect_SparkConnectService.Client(wrapping: client) + let response = try await service.analyzePlan(spark.client.getExplain(spark.sessionID, plan, mode)) + print(response.explain.explainString) + } + } } diff --git a/Sources/SparkConnect/Extension.swift b/Sources/SparkConnect/Extension.swift index da330cc..1d470fe 100644 --- a/Sources/SparkConnect/Extension.swift +++ b/Sources/SparkConnect/Extension.swift @@ -57,6 +57,18 @@ extension String { expression.expression = self return expression } + + var toExplainMode: ExplainMode { + let mode = switch self { + case "codegen": ExplainMode.codegen + case "cost": ExplainMode.cost + case "extended": ExplainMode.extended + case "formatted": ExplainMode.formatted + case "simple": ExplainMode.simple + default: ExplainMode.simple + } + return mode + } } extension [String: String] { diff --git a/Sources/SparkConnect/SparkConnectClient.swift b/Sources/SparkConnect/SparkConnectClient.swift index 0b2d523..4d28c34 100644 --- a/Sources/SparkConnect/SparkConnectClient.swift +++ b/Sources/SparkConnect/SparkConnectClient.swift @@ -282,6 +282,18 @@ public actor SparkConnectClient { }) } + func getExplain(_ sessionID: String, _ plan: Plan, _ mode: String) async -> AnalyzePlanRequest + { + return analyze( + sessionID, + { + var explain = AnalyzePlanRequest.Explain() + explain.plan = plan + explain.explainMode = mode.toExplainMode + return OneOf_Analyze.explain(explain) + }) + } + static func getProject(_ child: Relation, _ cols: [String]) -> Plan { var project = Project() project.input = child diff --git a/Sources/SparkConnect/TypeAliases.swift b/Sources/SparkConnect/TypeAliases.swift index 275ed9d..aa1e087 100644 --- a/Sources/SparkConnect/TypeAliases.swift +++ b/Sources/SparkConnect/TypeAliases.swift @@ -22,6 +22,7 @@ typealias ConfigRequest = Spark_Connect_ConfigRequest typealias DataSource = Spark_Connect_Read.DataSource typealias DataType = Spark_Connect_DataType typealias ExecutePlanRequest = Spark_Connect_ExecutePlanRequest +typealias ExplainMode = AnalyzePlanRequest.Explain.ExplainMode typealias ExpressionString = Spark_Connect_Expression.ExpressionString typealias Filter = Spark_Connect_Filter typealias KeyValue = Spark_Connect_KeyValue diff --git a/Tests/SparkConnectTests/DataFrameTests.swift b/Tests/SparkConnectTests/DataFrameTests.swift index 29dddb4..87b8fa4 100644 --- a/Tests/SparkConnectTests/DataFrameTests.swift +++ b/Tests/SparkConnectTests/DataFrameTests.swift @@ -70,6 +70,15 @@ struct DataFrameTests { await spark.stop() } + @Test + func explain() async throws { + let spark = try await SparkSession.builder.getOrCreate() + try await spark.range(1).explain() + try await spark.range(1).explain(true) + try await spark.range(1).explain("formatted") + await spark.stop() + } + @Test func count() async throws { let spark = try await SparkSession.builder.getOrCreate() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org