LuciferYang commented on code in PR #40352: URL: https://github.com/apache/spark/pull/40352#discussion_r1160557436
########## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala: ########## @@ -1073,6 +1074,91 @@ class SparkConnectPlanner(val session: SparkSession) { } Some(Lead(children.head, children(1), children(2), ignoreNulls)) + case "bloom_filter_agg" if fun.getArgumentsCount == 5 => + // [col, catalogString: String, expectedNumItems: Long, numBits: Long, fpp: Double] + val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression) + val dt = { + val ddl = children(1) match { + case StringLiteral(s) => s + case other => + throw InvalidPlanInput(s"col dataType should be a literal string, but got $other") + } + DataType.fromDDL(ddl) + } + val col = dt match { + case IntegerType | ShortType | ByteType => Cast(children.head, LongType) + case LongType | StringType => children.head + case other => + throw InvalidPlanInput( + s"Bloom filter only supports integral types, " + + s"and does not support type $other.") + } + + val fpp = children(4) match { + case DoubleLiteral(d) => d + case _ => + throw InvalidPlanInput("False positive must be double literal.") + } + + if (fpp.isNaN) { + // Use expectedNumItems and numBits when `fpp.isNaN` if true. + // Check expectedNumItems > 0L + val expectedNumItemsExpr = children(2) + val expectedNumItems = expectedNumItemsExpr match { + case Literal(l: Long, LongType) => l + case _ => + throw InvalidPlanInput("Expected insertions must be long literal.") + } + if (expectedNumItems <= 0L) { + throw InvalidPlanInput("Expected insertions must be positive.") + } + // Check numBits > 0L + val numBitsExpr = children(3) + val numBits = numBitsExpr match { + case Literal(l: Long, LongType) => l + case _ => + throw InvalidPlanInput("Number of bits must be long literal.") + } + if (numBits <= 0L) { + throw InvalidPlanInput("Number of bits must be positive.") + } + // Create BloomFilterAggregate with expectedNumItemsExpr and numBitsExpr. + Some( + new BloomFilterAggregate(col, expectedNumItemsExpr, numBitsExpr) + .toAggregateExpression()) + + } else { + def optimalNumOfBits(n: Long, p: Double): Long = Review Comment: this is a java file, no `private[spark]` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org