cloud-fan commented on code in PR #37248:
URL: https://github.com/apache/spark/pull/37248#discussion_r927769131
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala:
##########
@@ -86,19 +86,22 @@ case class ExternalRDDScanExec[T](
/**
* Logical plan node for scanning data from an RDD of InternalRow.
*
- * It is advised to set the field `originLogicalPlan` if the RDD is directly
built from DataFrame,
- * as the stat can be inherited from `originLogicalPlan`.
+ * It is advised to set the field `originStats` and `originConstraints` if the
RDD is directly
+ * built from DataFrame, so that Spark can make better optimizations.
*/
case class LogicalRDD(
output: Seq[Attribute],
rdd: RDD[InternalRow],
- originLogicalPlan: Option[LogicalPlan] = None,
outputPartitioning: Partitioning = UnknownPartitioning(0),
override val outputOrdering: Seq[SortOrder] = Nil,
- override val isStreaming: Boolean = false)(session: SparkSession)
+ override val isStreaming: Boolean = false)(
+ session: SparkSession,
+ originStats: Option[Statistics] = None,
+ originConstraints: Option[ExpressionSet] = None)
Review Comment:
can we add a comment to explain why they are in the curry constructor?
because we don't want other rules to mistakenly transform and rewrite them.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]