Repository: spark
Updated Branches:
refs/heads/master 6942aeeb0 -> b8aaef49f
[SPARK-21807][SQL] Override ++ operation in ExpressionSet to reduce clone time
## What changes were proposed in this pull request?
The getAliasedConstraints fuction in LogicalPlan.scala will clone the
expression set when an element added,
and it will take a long time. This PR add a function to add multiple elements
at once to reduce the clone time.
Before modified, the cost of getAliasedConstraints is:
100 expressions: 41 seconds
150 expressions: 466 seconds
After modified, the cost of getAliasedConstraints is:
100 expressions: 1.8 seconds
150 expressions: 6.5 seconds
The test is like this:
test("getAliasedConstraints") {
val expressionNum = 150
val aggExpression = (1 to expressionNum).map(i => Alias(Count(Literal(1)),
s"cnt$i")())
val aggPlan = Aggregate(Nil, aggExpression, LocalRelation())
val beginTime = System.currentTimeMillis()
val expressions = aggPlan.validConstraints
println(s"validConstraints cost: ${System.currentTimeMillis() -
beginTime}ms")
// The size of Aliased expression is n * (n - 1) / 2 + n
assert( expressions.size === expressionNum * (expressionNum - 1) / 2 +
expressionNum)
}
(Please fill in changes proposed in this fix)
## How was this patch tested?
(Please explain how this patch was tested. E.g. unit tests, integration tests,
manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise,
remove this)
Run new added test.
Please review http://spark.apache.org/contributing.html before opening a pull
request.
Author: 10129659 <[email protected]>
Closes #19022 from eatoncys/getAliasedConstraints.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b8aaef49
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b8aaef49
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b8aaef49
Branch: refs/heads/master
Commit: b8aaef49fbf02401c874b06d17cbe354f739b9e7
Parents: 6942aee
Author: 10129659 <[email protected]>
Authored: Wed Aug 23 20:35:08 2017 -0700
Committer: gatorsmile <[email protected]>
Committed: Wed Aug 23 20:35:08 2017 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/expressions/ExpressionSet.scala | 8 +++++++-
.../spark/sql/catalyst/expressions/ExpressionSetSuite.scala | 9 +++++++++
2 files changed, 16 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/b8aaef49/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
index ede0b16..305ac90 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.expressions
-import scala.collection.mutable
+import scala.collection.{mutable, GenTraversableOnce}
import scala.collection.mutable.ArrayBuffer
object ExpressionSet {
@@ -67,6 +67,12 @@ class ExpressionSet protected(
newSet
}
+ override def ++(elems: GenTraversableOnce[Expression]): ExpressionSet = {
+ val newSet = new ExpressionSet(baseSet.clone(), originals.clone())
+ elems.foreach(newSet.add)
+ newSet
+ }
+
override def -(elem: Expression): ExpressionSet = {
val newBaseSet = baseSet.clone().filterNot(_ == elem.canonicalized)
val newOriginals = originals.clone().filterNot(_.canonicalized ==
elem.canonicalized)
http://git-wip-us.apache.org/repos/asf/spark/blob/b8aaef49/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
index d617ad5..a1000a0 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
@@ -210,4 +210,13 @@ class ExpressionSetSuite extends SparkFunSuite {
assert((initialSet - (aLower + 1)).size == 0)
}
+
+ test("add multiple elements to set") {
+ val initialSet = ExpressionSet(aUpper + 1 :: Nil)
+ val setToAddWithSameExpression = ExpressionSet(aUpper + 1 :: aUpper + 2 ::
Nil)
+ val setToAddWithOutSameExpression = ExpressionSet(aUpper + 3 :: aUpper + 4
:: Nil)
+
+ assert((initialSet ++ setToAddWithSameExpression).size == 2)
+ assert((initialSet ++ setToAddWithOutSameExpression).size == 3)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]