Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/18747#discussion_r146411821
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
---
@@ -490,22 +502,14 @@ case class CollapseCodegenStages(conf: SQLConf)
extends Rule[SparkPlan] {
case _ => true
}
- private def numOfNestedFields(dataType: DataType): Int = dataType match {
- case dt: StructType => dt.fields.map(f =>
numOfNestedFields(f.dataType)).sum
- case m: MapType => numOfNestedFields(m.keyType) +
numOfNestedFields(m.valueType)
- case a: ArrayType => numOfNestedFields(a.elementType)
- case u: UserDefinedType[_] => numOfNestedFields(u.sqlType)
- case _ => 1
- }
-
private def supportCodegen(plan: SparkPlan): Boolean = plan match {
case plan: CodegenSupport if plan.supportCodegen =>
val willFallback = plan.expressions.exists(_.find(e =>
!supportCodegen(e)).isDefined)
// the generated code will be huge if there are too many columns
val hasTooManyOutputFields =
- numOfNestedFields(plan.schema) > conf.wholeStageMaxNumFields
+ WholeStageCodegenExec.isTooManyFields(conf, plan.schema)
val hasTooManyInputFields =
- plan.children.map(p => numOfNestedFields(p.schema)).exists(_ >
conf.wholeStageMaxNumFields)
+ plan.children.find(p =>
WholeStageCodegenExec.isTooManyFields(conf, p.schema)).isDefined
--- End diff --
`find(...).isDefined` -> `exists`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]