miland-db commented on code in PR #47609:
URL: https://github.com/apache/spark/pull/47609#discussion_r1704152082
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala:
##########
@@ -73,4 +84,28 @@ case class IfElseStatement(
conditionalBodies: Seq[CompoundBody],
elseBody: Option[CompoundBody]) extends CompoundPlanStatement {
assert(conditions.length == conditionalBodies.length)
+
+ override def output: Seq[Attribute] = Seq.empty
Review Comment:
Why do we need to know output here?
##########
sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala:
##########
@@ -27,29 +29,44 @@ import org.apache.spark.sql.test.SharedSparkSession
* Output from the interpreter (iterator over executable statements) is then
checked - statements
* are executed and output DataFrames are compared with expected outputs.
*/
-class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
+class SqlScriptingInterpreterSuite extends SparkFunSuite with
SharedSparkSession {
// Helpers
- private def verifySqlScriptResult(sqlText: String, expected: Seq[Seq[Row]]):
Unit = {
- val interpreter = SqlScriptingInterpreter()
- val compoundBody = spark.sessionState.sqlParser.parseScript(sqlText)
- val executionPlan = interpreter.buildExecutionPlan(compoundBody, spark)
- val result = executionPlan.flatMap {
- case statement: SingleStatementExec =>
- if (statement.isExecuted) {
- None
- } else {
- Some(Dataset.ofRows(spark, statement.parsedPlan, new
QueryPlanningTracker))
- }
- case _ => None
- }.toArray
-
+ private def verifySqlScriptResult(sqlText: String, expected:
Seq[Array[Row]]): Unit = {
+ val interpreter = SqlScriptingInterpreter(spark)
+ val compoundBody =
spark.sessionState.sqlParser.parsePlan(sqlText).asInstanceOf[CompoundBody]
+ val result = interpreter.executeInternal(compoundBody).toSeq
assert(result.length == expected.length)
- result.zip(expected).foreach { case (df, expectedAnswer) =>
checkAnswer(df, expectedAnswer) }
Review Comment:
If we use `Seq` type to return results of script execution we may be able to
reuse `checkAnswer` that has some additional checks instead of this
`assert(actualAnswer.sameElements(expectedAnswer))`.
##########
sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala:
##########
@@ -82,20 +89,36 @@ case class SqlScriptingInterpreter() {
.map(new SingleStatementExec(_, Origin(), isInternal = true))
.reverse
new CompoundBodyExec(
- body.collection.map(st => transformTreeIntoExecutable(st, session))
++ dropVariables)
+ body.collection.map(st => transformTreeIntoExecutable(st)) ++
dropVariables,
+ session)
case IfElseStatement(conditions, conditionalBodies, elseBody) =>
val conditionsExec = conditions.map(condition =>
- new SingleStatementExec(condition.parsedPlan, condition.origin,
isInternal = false))
+ new SingleStatementExec(condition.parsedPlan, condition.origin))
val conditionalBodiesExec = conditionalBodies.map(body =>
- transformTreeIntoExecutable(body,
session).asInstanceOf[CompoundBodyExec])
+ transformTreeIntoExecutable(body).asInstanceOf[CompoundBodyExec])
val unconditionalBodiesExec = elseBody.map(body =>
- transformTreeIntoExecutable(body,
session).asInstanceOf[CompoundBodyExec])
+ transformTreeIntoExecutable(body).asInstanceOf[CompoundBodyExec])
new IfElseStatementExec(
conditionsExec, conditionalBodiesExec, unconditionalBodiesExec,
session)
case sparkStatement: SingleStatement =>
new SingleStatementExec(
sparkStatement.parsedPlan,
sparkStatement.origin,
- isInternal = false)
+ shouldCollectResult = true)
}
+
+ /**
+ * Execute the provided CompoundBody and return all results.
+ *
+ * @param compoundBody
+ * CompoundBody to execute.
+ * @return Results from all leaf statements.
+ */
+ private[scripting] def executeInternal(compoundBody: CompoundBody):
Iterator[Array[Row]] = {
Review Comment:
We should also check if there are any differences when working with `Array`
vs `Seq` and choose the better option. I went with `Array` because it was
faster/easier to write POC without useless conversions between types.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]