Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/20020#discussion_r158019546
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
---
@@ -87,6 +87,51 @@ case class ExecutedCommandExec(cmd: RunnableCommand)
extends LeafExecNode {
}
}
+/**
+ * A physical operator that executes the run method of a `RunnableCommand`
and
+ * saves the result to prevent multiple executions.
+ *
+ * @param cmd the `RunnableCommand` this operator will run.
+ * @param children the children physical plans ran by the
`RunnableCommand`.
+ */
+case class DataWritingCommandExec(cmd: DataWritingCommand, children:
Seq[SparkPlan])
+ extends SparkPlan {
+
+ override lazy val metrics: Map[String, SQLMetric] = cmd.metrics
+
+ /**
+ * A concrete command should override this lazy field to wrap up any
side effects caused by the
+ * command or any other computation that should be evaluated exactly
once. The value of this field
+ * can be used as the contents of the corresponding RDD generated from
the physical plan of this
+ * command.
+ *
+ * The `execute()` method of all the physical command classes should
reference `sideEffectResult`
+ * so that the command can be executed eagerly right after the command
query is created.
+ */
+ protected[sql] lazy val sideEffectResult: Seq[InternalRow] = {
+ val converter =
CatalystTypeConverters.createToCatalystConverter(schema)
+ val rows = cmd.run(sqlContext.sparkSession, children)
+
+ rows.map(converter(_).asInstanceOf[InternalRow])
+ }
+
+ override def innerChildren: Seq[QueryPlan[_]] = cmd.children
--- End diff --
why do we need this?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]