This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 28eaa9c0861 [SPARK-44154][SQL] Added more unit tests to 
BitmapExpressionUtilsSuite and made minor improvements to Bitmap Aggregate 
Expressions
28eaa9c0861 is described below

commit 28eaa9c0861984d925098172ad08fa5d255ef8bc
Author: Harsh Motwani <harsh.motw...@databricks.com>
AuthorDate: Tue Jul 18 11:03:19 2023 +0800

    [SPARK-44154][SQL] Added more unit tests to BitmapExpressionUtilsSuite and 
made minor improvements to Bitmap Aggregate Expressions
    
    ### What changes were proposed in this pull request?
    I firstly added more unit tests for the `BITMAT_BIT_POSITION` and 
`BITMAP_BUCKET_NUMBER` expressions. Secondly, I made a minor improvement in the 
implementation of the `BITMAP_CONSTRUCT_AGG` and `BUTMAP_OR_AGG` expressions, 
where I converted `inputAggBufferAttributes` from a method to a value.
    
    ### Why are the changes needed?
    The unit tests cover more corner cases. Having `inputAggBufferAttributes` 
as a value makes it so that the AttributeReferences aren't reinitialized every 
time `inputAggBufferAttributes` is referred to.
    
    ### How was this patch tested?
    I reran all the tests for Bitmap expressions and they succeeded. The test 
suites were `BitmapExpressionUtilsSuite` and `BitmapExpressionsQuerySuite`.
    
    Closes #42043 from harshmotw-db/harsh-dev.
    
    Authored-by: Harsh Motwani <harsh.motw...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit df550cfc3ad820448ed5040df94c2b32fb161290)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../sql/catalyst/expressions/bitmapExpressions.scala    | 16 ++++++++--------
 .../expressions/BitmapExpressionUtilsSuite.scala        | 17 +++++++++++------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
index 2c89428f243..bd8f4efa059 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
@@ -177,17 +177,17 @@ case class BitmapConstructAgg(child: Expression,
 
   override def aggBufferSchema: StructType = 
StructType.fromAttributes(aggBufferAttributes)
 
+  // The aggregation buffer is a fixed size binary.
+  private val bitmapAttr = AttributeReference("bitmap", BinaryType, nullable = 
false)()
+
   override def aggBufferAttributes: Seq[AttributeReference] = bitmapAttr :: Nil
 
   override def defaultResult: Option[Literal] =
     Option(Literal(Array.fill[Byte](BitmapExpressionUtils.NUM_BYTES)(0)))
 
-  override def inputAggBufferAttributes: Seq[AttributeReference] =
+  override val inputAggBufferAttributes: Seq[AttributeReference] =
     aggBufferAttributes.map(_.newInstance())
 
-  // The aggregation buffer is a fixed size binary.
-  private val bitmapAttr = AttributeReference("bitmap", BinaryType, nullable = 
false)()
-
   override def initialize(buffer: InternalRow): Unit = {
     buffer.update(mutableAggBufferOffset, 
Array.fill[Byte](BitmapExpressionUtils.NUM_BYTES)(0))
   }
@@ -270,17 +270,17 @@ case class BitmapOrAgg(child: Expression,
 
   override def aggBufferSchema: StructType = 
StructType.fromAttributes(aggBufferAttributes)
 
+  // The aggregation buffer is a fixed size binary.
+  private val bitmapAttr = AttributeReference("bitmap", BinaryType, false)()
+
   override def aggBufferAttributes: Seq[AttributeReference] = bitmapAttr :: Nil
 
   override def defaultResult: Option[Literal] =
     Option(Literal(Array.fill[Byte](BitmapExpressionUtils.NUM_BYTES)(0)))
 
-  override def inputAggBufferAttributes: Seq[AttributeReference] =
+  override val inputAggBufferAttributes: Seq[AttributeReference] =
     aggBufferAttributes.map(_.newInstance())
 
-  // The aggregation buffer is a fixed size binary.
-  private val bitmapAttr = AttributeReference("bitmap", BinaryType, false)()
-
   override def initialize(buffer: InternalRow): Unit = {
     buffer.update(mutableAggBufferOffset, 
Array.fill[Byte](BitmapExpressionUtils.NUM_BYTES)(0))
   }
diff --git 
a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
 
b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
index ee1f4026fed..53935c66c61 100644
--- 
a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
+++ 
b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
@@ -22,23 +22,27 @@ import org.apache.spark.SparkFunSuite
 class BitmapExpressionUtilsSuite extends SparkFunSuite {
 
   test("bitmap_bucket_number with positive inputs") {
-    Seq((0L, 0L), (1L, 1L), (2L, 1L), (3L, 1L),
-      (32768L, 1L), (32769L, 2L), (32770L, 2L)).foreach {
+    Seq((0L, 0L), (1L, 1L), (2L, 1L), (3L, 1L), (65537L, 3L), (65536L, 2L), 
(3232423L, 99L),
+      (4538345L, 139L), (845894934L, 25815L), (2147483647L, 65536L),
+      (Long.MaxValue, 281474976710656L), (32768L, 1L), (32769L, 2L), (32770L, 
2L)).foreach {
       case (input, expected) =>
         assert(BitmapExpressionUtils.bitmapBucketNumber(input) == expected)
     }
   }
 
   test("bitmap_bucket_number with negative inputs") {
-    Seq((-1L, 0L), (-2L, 0L), (-3L, 0L),
-      (-32767L, 0L), (-32768L, -1L), (-32769L, -1L)).foreach {
+    Seq((-1L, 0L), (-2L, 0L), (-3L, 0L), (-65536L, -2L), (65537L, 3L), 
(-65535L, -1L),
+      (-3843485L, -117L), (-2147483647L, -65535L), (-2147483648L, -65536L),
+      (Long.MinValue, -281474976710656L), (Long.MinValue + 1, 
-281474976710655L), (-32767L, 0L),
+      (-32768L, -1L), (-32769L, -1L)).foreach {
       case (input, expected) =>
         assert(BitmapExpressionUtils.bitmapBucketNumber(input) == expected)
     }
   }
 
   test("bitmap_bit_position with positive inputs") {
-    Seq((0L, 0L), (1L, 0L), (2L, 1L), (3L, 2L),
+    Seq((0L, 0L), (1L, 0L), (2L, 1L), (3L, 2L), (65537L, 0L), (65536L, 
32767L), (3232423L, 21158L),
+      (4538345L, 16360L), (845894934L, 21781L), (2147483647L, 32766L), 
(Long.MaxValue, 32766L),
       (32768L, 32767L), (32769L, 0L), (32770L, 1L)).foreach {
       case (input, expected) =>
         assert(BitmapExpressionUtils.bitmapBitPosition(input) == expected)
@@ -46,7 +50,8 @@ class BitmapExpressionUtilsSuite extends SparkFunSuite {
   }
 
   test("bitmap_bit_position with negative inputs") {
-    Seq((-1L, 1L), (-2L, 2L), (-3L, 3L),
+    Seq((-1L, 1L), (-2L, 2L), (-3L, 3L), (-65536L, 0L), (-65535L, 32767L), 
(-3843485L, 9629L),
+      (-2147483647L, 32767L), (-2147483648L, 0L), (Long.MinValue, 0L), 
(Long.MinValue + 1, 32767L),
       (-32767L, 32767L), (-32768L, 0L), (-32769L, 1L)).foreach {
       case (input, expected) =>
         assert(BitmapExpressionUtils.bitmapBitPosition(input) == expected)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to