Github user xuanyuanking commented on a diff in the pull request:
https://github.com/apache/spark/pull/22222#discussion_r212784374
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
---
@@ -40,6 +42,29 @@ private[sql] trait ColumnarBatchScan extends
CodegenSupport {
"numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of
output rows"),
"scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time"))
+ /**
+ * Returns all the RDDs of ColumnarBatch which generates the input rows.
+ */
+ def inputBatchRDDs(): Seq[RDD[ColumnarBatch]]
+
+ /**
+ * Returns all the RDDs of InternalRow which generates the input rows.
+ */
+ def inputRowRDDs(): Seq[RDD[InternalRow]]
+
+ /**
+ * Get input RDD depends on supportsBatch.
+ */
+ final def getInputRDDs(): Seq[RDD[InternalRow]] = {
+ if (supportsBatch) {
+ inputBatchRDDs().asInstanceOf[Seq[RDD[InternalRow]]]
--- End diff --
Here maybe the last explicitly erasure hack left, please check whether is
it acceptable or not.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]