LuciferYang commented on code in PR #40352:
URL: https://github.com/apache/spark/pull/40352#discussion_r1131995398


##########
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala:
##########
@@ -584,6 +585,101 @@ final class DataFrameStatFunctions private[sql] 
(sparkSession: SparkSession, roo
     }
     CountMinSketch.readFrom(ds.head())
   }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param fpp
+   *   expected false positive probability of the filter.
+   * @since 3.4.0
+   */
+  def bloomFilter(colName: String, expectedNumItems: Long, fpp: Double): 
BloomFilter = {
+    buildBloomFilter(Column(colName), expectedNumItems, -1L, fpp)
+  }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param col
+   *   the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param fpp
+   *   expected false positive probability of the filter.
+   * @since 3.4.0
+   */
+  def bloomFilter(col: Column, expectedNumItems: Long, fpp: Double): 
BloomFilter = {
+    buildBloomFilter(col, expectedNumItems, -1L, fpp)
+  }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param numBits
+   *   expected number of bits of the filter.
+   * @since 3.4.0
+   */
+  def bloomFilter(colName: String, expectedNumItems: Long, numBits: Long): 
BloomFilter = {
+    buildBloomFilter(Column(colName), expectedNumItems, numBits, Double.NaN)
+  }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param col
+   *   the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param numBits
+   *   expected number of bits of the filter.
+   * @since 3.4.0
+   */
+  def bloomFilter(col: Column, expectedNumItems: Long, numBits: Long): 
BloomFilter = {
+    buildBloomFilter(col, expectedNumItems, numBits, Double.NaN)
+  }
+
+  private def buildBloomFilter(
+      col: Column,
+      expectedNumItems: Long,
+      numBits: Long,
+      fpp: Double): BloomFilter = {
+
+    def optimalNumOfBits(n: Long, p: Double): Long =

Review Comment:
   Do you mean to pass all 3 parameters to the server side, and then do the 
`conversion(optimalNumOfBits)` on the server side?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to