hvanhovell commented on code in PR #40352:
URL: https://github.com/apache/spark/pull/40352#discussion_r1160392895


##########
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala:
##########
@@ -1073,6 +1074,91 @@ class SparkConnectPlanner(val session: SparkSession) {
         }
         Some(Lead(children.head, children(1), children(2), ignoreNulls))
 
+      case "bloom_filter_agg" if fun.getArgumentsCount == 5 =>
+        // [col, catalogString: String, expectedNumItems: Long, numBits: Long, 
fpp: Double]
+        val children = 
fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        val dt = {
+          val ddl = children(1) match {
+            case StringLiteral(s) => s
+            case other =>
+              throw InvalidPlanInput(s"col dataType should be a literal 
string, but got $other")
+          }
+          DataType.fromDDL(ddl)
+        }
+        val col = dt match {
+          case IntegerType | ShortType | ByteType => Cast(children.head, 
LongType)
+          case LongType | StringType => children.head
+          case other =>
+            throw InvalidPlanInput(
+              s"Bloom filter only supports integral types, " +
+                s"and does not support type $other.")
+        }
+
+        val fpp = children(4) match {
+          case DoubleLiteral(d) => d
+          case _ =>
+            throw InvalidPlanInput("False positive must be double literal.")
+        }
+
+        if (fpp.isNaN) {
+          // Use expectedNumItems and numBits when `fpp.isNaN` if true.
+          // Check expectedNumItems > 0L
+          val expectedNumItemsExpr = children(2)
+          val expectedNumItems = expectedNumItemsExpr match {
+            case Literal(l: Long, LongType) => l
+            case _ =>
+              throw InvalidPlanInput("Expected insertions must be long 
literal.")
+          }
+          if (expectedNumItems <= 0L) {
+            throw InvalidPlanInput("Expected insertions must be positive.")
+          }
+          // Check numBits > 0L
+          val numBitsExpr = children(3)
+          val numBits = numBitsExpr match {
+            case Literal(l: Long, LongType) => l
+            case _ =>
+              throw InvalidPlanInput("Number of bits must be long literal.")
+          }
+          if (numBits <= 0L) {
+            throw InvalidPlanInput("Number of bits must be positive.")
+          }
+          // Create BloomFilterAggregate with expectedNumItemsExpr and 
numBitsExpr.
+          Some(
+            new BloomFilterAggregate(col, expectedNumItemsExpr, numBitsExpr)
+              .toAggregateExpression())
+
+        } else {
+          def optimalNumOfBits(n: Long, p: Double): Long =

Review Comment:
   Not public just private[spark].



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to