Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/13890#discussion_r69674281 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala --- @@ -74,13 +74,71 @@ object RDDConversions { } } +private[sql] object ExistingRDD { + + def apply[T: Encoder](rdd: RDD[T])(session: SparkSession): LogicalPlan = { + val exisitingRdd = ExistingRDD(CatalystSerde.generateObjAttr[T], rdd)(session) + CatalystSerde.serialize[T](exisitingRdd) + } +} + /** Logical plan node for scanning data from an RDD. */ +private[sql] case class ExistingRDD[T]( + outputObjAttr: Attribute, + rdd: RDD[T])(session: SparkSession) + extends LeafNode with ObjectProducer with MultiInstanceRelation { + + override protected final def otherCopyArgs: Seq[AnyRef] = session :: Nil + + override def newInstance(): ExistingRDD.this.type = + ExistingRDD(outputObjAttr.newInstance(), rdd)(session).asInstanceOf[this.type] + + override def sameResult(plan: LogicalPlan): Boolean = { + plan.canonicalized match { + case ExistingRDD(_, otherRDD) => rdd.id == otherRDD.id + case _ => false + } + } + + override protected def stringArgs: Iterator[Any] = Iterator(output) + + @transient override lazy val statistics: Statistics = Statistics( + // TODO: Instead of returning a default value here, find a way to return a meaningful size + // estimate for RDDs. See PR 1238 for more discussions. + sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes) + ) +} + +/** Physical plan node for scanning data from an RDD. */ +private[sql] case class ExistingRDDScanExec[T]( --- End diff -- From the name it's hard to tell what's the difference between this one and `RDDScanExec`...
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org