c21 commented on a change in pull request #34670:
URL: https://github.com/apache/spark/pull/34670#discussion_r753512929



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
##########
@@ -1666,9 +1677,14 @@ case class WidthBucket(
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, (input, min, max, numBucket) =>
-      "org.apache.spark.sql.catalyst.expressions.WidthBucket" +
-        s".computeBucketNumber($input, $min, $max, $numBucket)")
+    nullSafeCodeGen(ctx, ev, (input, min, max, numBucket) => {

Review comment:
       Sorry if I am missing anything, but with the change to `nullSafeCodeGen` 
here, we should be able to handle null inputs right? Why we need to change 
implementation of `computeBucketNumber()` as well?

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
##########
@@ -1547,14 +1547,25 @@ case class BRound(child: Expression, scale: Expression)
 }
 
 object WidthBucket {
-
   def computeBucketNumber(value: Double, min: Double, max: Double, numBucket: 
Long): jl.Long = {
-    if (numBucket <= 0 || numBucket == Long.MaxValue || jl.Double.isNaN(value) 
|| min == max ||
-        jl.Double.isNaN(min) || jl.Double.isInfinite(min) ||
-        jl.Double.isNaN(max) || jl.Double.isInfinite(max)) {
-      return null
+    if (isNull(value, min, max, numBucket)) {
+      null
+    } else {
+      computeBucketNumberNotNull(value, min, max, numBucket)
     }
+  }
 
+  def isNull(value: Double, min: Double, max: Double, numBucket: Long): 
Boolean = {
+    numBucket <= 0 ||
+      numBucket == Long.MaxValue ||
+      jl.Double.isNaN(value) ||
+      min == max ||
+      jl.Double.isNaN(min) || jl.Double.isInfinite(min) ||
+      jl.Double.isNaN(max) || jl.Double.isInfinite(max)
+  }
+
+  def computeBucketNumberNotNull(

Review comment:
       nit: this method does not need to be public, can be `private def`.

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
##########
@@ -760,4 +760,30 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       checkEvaluation(WidthBucket(Literal(v), Literal(s), Literal(e), 
Literal(n)), expected)
     }
   }
+
+  test("SPARK-37388: width_bucket") {

Review comment:
       shall we have a more descriptive name e.g. `SPARK-37388: width_bucket 
handles null input correctly`

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
##########
@@ -760,4 +760,30 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       checkEvaluation(WidthBucket(Literal(v), Literal(s), Literal(e), 
Literal(n)), expected)
     }
   }
+
+  test("SPARK-37388: width_bucket") {
+    val nullDouble = Literal.create(null, DoubleType)
+    val nullLong = Literal.create(null, LongType)
+
+    checkEvaluation(WidthBucket(5.35, 0.024, 10.06, 5L), 3L)

Review comment:
       I know we already handle that, but shall we add a test case for all 
inputs to be null? `checkEvaluation(WidthBucket(nullDouble, nullDouble, 
nullDouble, nullLong), null)`

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
##########
@@ -1547,14 +1547,25 @@ case class BRound(child: Expression, scale: Expression)
 }
 
 object WidthBucket {
-
   def computeBucketNumber(value: Double, min: Double, max: Double, numBucket: 
Long): jl.Long = {
-    if (numBucket <= 0 || numBucket == Long.MaxValue || jl.Double.isNaN(value) 
|| min == max ||
-        jl.Double.isNaN(min) || jl.Double.isInfinite(min) ||
-        jl.Double.isNaN(max) || jl.Double.isInfinite(max)) {
-      return null
+    if (isNull(value, min, max, numBucket)) {
+      null
+    } else {
+      computeBucketNumberNotNull(value, min, max, numBucket)
     }
+  }
 
+  def isNull(value: Double, min: Double, max: Double, numBucket: Long): 
Boolean = {

Review comment:
       nit: this method does not need to be public, can be `private def`.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to