Repository: spark Updated Branches: refs/heads/master ca003354d -> afd0debe0
[SPARK-14137] [SPARK-14150] [SQL] Infer IsNotNull constraints from non-nullable attributes ## What changes were proposed in this pull request? This PR adds support for automatically inferring `IsNotNull` constraints from any non-nullable attributes that are part of an operator's output. This also fixes the issue that causes the optimizer to hit the maximum number of iterations for certain queries in https://github.com/apache/spark/pull/11828. ## How was this patch tested? Unit test in `ConstraintPropagationSuite` Author: Sameer Agarwal <[email protected]> Closes #11953 from sameeragarwal/infer-isnotnull. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/afd0debe Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/afd0debe Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/afd0debe Branch: refs/heads/master Commit: afd0debe075e9ea8466e384932a513ef0188273c Parents: ca00335 Author: Sameer Agarwal <[email protected]> Authored: Fri Mar 25 12:57:26 2016 -0700 Committer: Davies Liu <[email protected]> Committed: Fri Mar 25 12:57:26 2016 -0700 ---------------------------------------------------------------------- .../spark/sql/catalyst/plans/QueryPlan.scala | 38 ++++++++++++-------- .../plans/ConstraintPropagationSuite.scala | 9 +++++ .../hive/execution/HiveCompatibilitySuite.scala | 4 +-- 3 files changed, 33 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/afd0debe/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index e9bfa09..d31164f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -39,29 +39,37 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT } /** - * Infers a set of `isNotNull` constraints from a given set of equality/comparison expressions. - * For e.g., if an expression is of the form (`a > 5`), this returns a constraint of the form - * `isNotNull(a)` + * Infers a set of `isNotNull` constraints from a given set of equality/comparison expressions as + * well as non-nullable attributes. For e.g., if an expression is of the form (`a > 5`), this + * returns a constraint of the form `isNotNull(a)` */ private def constructIsNotNullConstraints(constraints: Set[Expression]): Set[Expression] = { - // Currently we only propagate constraints if the condition consists of equality - // and ranges. For all other cases, we return an empty set of constraints - constraints.map { + var isNotNullConstraints = Set.empty[Expression] + + // First, we propagate constraints if the condition consists of equality and ranges. For all + // other cases, we return an empty set of constraints + constraints.foreach { case EqualTo(l, r) => - Set(IsNotNull(l), IsNotNull(r)) + isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r)) case GreaterThan(l, r) => - Set(IsNotNull(l), IsNotNull(r)) + isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r)) case GreaterThanOrEqual(l, r) => - Set(IsNotNull(l), IsNotNull(r)) + isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r)) case LessThan(l, r) => - Set(IsNotNull(l), IsNotNull(r)) + isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r)) case LessThanOrEqual(l, r) => - Set(IsNotNull(l), IsNotNull(r)) + isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r)) case Not(EqualTo(l, r)) => - Set(IsNotNull(l), IsNotNull(r)) - case _ => - Set.empty[Expression] - }.foldLeft(Set.empty[Expression])(_ union _.toSet) + isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r)) + case _ => // No inference + } + + // Second, we infer additional constraints from non-nullable attributes that are part of the + // operator's output + val nonNullableAttributes = output.filterNot(_.nullable) + isNotNullConstraints ++= nonNullableAttributes.map(IsNotNull).toSet + + isNotNullConstraints -- constraints } /** http://git-wip-us.apache.org/repos/asf/spark/blob/afd0debe/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala index f3ab026..e506359 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.types.{IntegerType, StringType} class ConstraintPropagationSuite extends SparkFunSuite { @@ -217,4 +218,12 @@ class ConstraintPropagationSuite extends SparkFunSuite { IsNotNull(resolveColumn(tr, "a")), IsNotNull(resolveColumn(tr, "b"))))) } + + test("infer IsNotNull constraints from non-nullable attributes") { + val tr = LocalRelation('a.int, AttributeReference("b", IntegerType, nullable = false)(), + AttributeReference("c", StringType, nullable = false)()) + + verifyConstraints(tr.analyze.constraints, + ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "b")), IsNotNull(resolveColumn(tr, "c"))))) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/afd0debe/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 8bd731d..650797f 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -341,9 +341,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_round_3", "view_cast", - // enable this after fixing SPARK-14137 - "union20", - // These tests check the VIEW table definition, but Spark handles CREATE VIEW itself and // generates different View Expanded Text. "alter_view_as_select", @@ -1046,6 +1043,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "union18", "union19", "union2", + "union20", "union22", "union23", "union24", --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
