wangyum commented on code in PR #38682:
URL: https://github.com/apache/spark/pull/38682#discussion_r1026470585


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala:
##########
@@ -117,4 +117,23 @@ object ExprUtils extends QueryErrorsBase {
       TypeCheckSuccess
     }
   }
+
+  /**
+   * Combine a number of boolean expressions into a balanced expression tree. 
These expressions are
+   * either combined by a logical [[And]] or a logical [[Or]].
+   *
+   * A balanced binary tree is created because regular left recursive trees 
cause considerable
+   * performance degradations and can cause stack overflows.
+   */
+  def reduceToExpressionTree(
+       patterns: Seq[Expression],
+       expressionCombiner: (Expression, Expression) => Expression): Expression 
= {
+    assert(patterns.size > 0)
+    var res = patterns
+    while (res.size > 1) {
+      res = res.sliding(2, 2).toSeq
+        .map(tup => if (tup.size == 2) expressionCombiner(tup.head, tup.last) 
else tup(0))
+    }
+    res.head

Review Comment:
   seems that the original implementation is better than this one?
   ```scala
     def reduceToExpressionTree1(
         patterns: Seq[Expression],
         expressionCombiner: (Expression, Expression) => Expression): 
Expression = {
       var res = patterns
       while (res.size > 1) {
         res = res.sliding(2, 2).toSeq
           .map(tup => if (tup.size == 2) expressionCombiner(tup.head, 
tup.last) else tup(0))
       }
       res.head
     }
   
     def reduceToExpressionTree2(
         expressions: Seq[Expression],
         expressionCombiner: (Expression, Expression) => Expression): 
Expression = {
       def reduceToExpressionTree(low: Int, high: Int): Expression = high - low 
match {
         case 0 =>
           expressions(low)
         case 1 =>
           expressionCombiner(expressions(low), expressions(high))
         case x =>
           val mid = low + x / 2
           expressionCombiner(
             reduceToExpressionTree(low, mid),
             reduceToExpressionTree(mid + 1, high))
       }
       reduceToExpressionTree(0, expressions.size - 1)
     }
   
       Seq(10000, 100000, 1000000).foreach { N =>
         val expressions = Range(1, N).map(Literal(_))
   
         val benchmark = new Benchmark(s"Benchmark reduceToExpressionTree with 
$N elements", N, minNumIters = 10)
         benchmark.addCase("reduceToExpressionTree1") { _ =>
           reduceToExpressionTree1(expressions, Or.apply)
         }
   
         benchmark.addCase("reduceToExpressionTree2") { _ =>
           reduceToExpressionTree2(expressions, Or.apply)
         }
   
         benchmark.run()
       }
   ```
   
   ```
   OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Mac OS X 10.16
   Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
   Benchmark reduceToExpressionTree with 1000000 elements:  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   reduceToExpressionTree1                                           823        
   1012         212          1.2         822.5       1.0X
   reduceToExpressionTree2                                           150        
    183          25          6.7         149.7       5.5X
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to