LuciferYang commented on code in PR #40352:
URL: https://github.com/apache/spark/pull/40352#discussion_r1160557436


##########
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala:
##########
@@ -1073,6 +1074,91 @@ class SparkConnectPlanner(val session: SparkSession) {
         }
         Some(Lead(children.head, children(1), children(2), ignoreNulls))
 
+      case "bloom_filter_agg" if fun.getArgumentsCount == 5 =>
+        // [col, catalogString: String, expectedNumItems: Long, numBits: Long, 
fpp: Double]
+        val children = 
fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        val dt = {
+          val ddl = children(1) match {
+            case StringLiteral(s) => s
+            case other =>
+              throw InvalidPlanInput(s"col dataType should be a literal 
string, but got $other")
+          }
+          DataType.fromDDL(ddl)
+        }
+        val col = dt match {
+          case IntegerType | ShortType | ByteType => Cast(children.head, 
LongType)
+          case LongType | StringType => children.head
+          case other =>
+            throw InvalidPlanInput(
+              s"Bloom filter only supports integral types, " +
+                s"and does not support type $other.")
+        }
+
+        val fpp = children(4) match {
+          case DoubleLiteral(d) => d
+          case _ =>
+            throw InvalidPlanInput("False positive must be double literal.")
+        }
+
+        if (fpp.isNaN) {
+          // Use expectedNumItems and numBits when `fpp.isNaN` if true.
+          // Check expectedNumItems > 0L
+          val expectedNumItemsExpr = children(2)
+          val expectedNumItems = expectedNumItemsExpr match {
+            case Literal(l: Long, LongType) => l
+            case _ =>
+              throw InvalidPlanInput("Expected insertions must be long 
literal.")
+          }
+          if (expectedNumItems <= 0L) {
+            throw InvalidPlanInput("Expected insertions must be positive.")
+          }
+          // Check numBits > 0L
+          val numBitsExpr = children(3)
+          val numBits = numBitsExpr match {
+            case Literal(l: Long, LongType) => l
+            case _ =>
+              throw InvalidPlanInput("Number of bits must be long literal.")
+          }
+          if (numBits <= 0L) {
+            throw InvalidPlanInput("Number of bits must be positive.")
+          }
+          // Create BloomFilterAggregate with expectedNumItemsExpr and 
numBitsExpr.
+          Some(
+            new BloomFilterAggregate(col, expectedNumItemsExpr, numBitsExpr)
+              .toAggregateExpression())
+
+        } else {
+          def optimalNumOfBits(n: Long, p: Double): Long =

Review Comment:
   this is a java file, no `private[spark]`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to