cboumalh commented on code in PR #51298:
URL: https://github.com/apache/spark/pull/51298#discussion_r2308322075


##########
sql/api/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -3552,6 +3715,154 @@ object functions {
     hll_union(Column(columnName1), Column(columnName2), 
allowDifferentLgConfigK)
   }
 
+  /**
+   * Subtracts two binary representations of Datasketches ThetaSketch objects, 
using a
+   * Datasketches AnotB object. Uses default log nominal entries.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_difference(c1: Column, c2: Column): Column =
+    Column.fn("theta_difference", c1, c2)
+
+  /**
+   * Subtracts two binary representations of Datasketches ThetaSketch objects, 
using a
+   * Datasketches AnotB object. Uses default log nominal entries.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_difference(columnName1: String, columnName2: String): Column = {
+    theta_difference(Column(columnName1), Column(columnName2))
+  }
+
+  /**
+   * Subtracts two binary representations of Datasketches ThetaSketch objects, 
using a
+   * Datasketches AnotB object. Allows setting of log nominal entries for the 
difference buffer.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_difference(c1: Column, c2: Column, lgNomEntries: Int): Column =
+    Column.fn("theta_difference", c1, c2, lit(lgNomEntries))
+
+  /**
+   * Subtracts two binary representations of Datasketches ThetaSketch objects, 
using a
+   * Datasketches AnotB object. Allows setting of log nominal entries for the 
difference buffer.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_difference(columnName1: String, columnName2: String, lgNomEntries: 
Int): Column = {
+    theta_difference(Column(columnName1), Column(columnName2), lgNomEntries)
+  }
+
+  /**
+   * Intersects two binary representations of Datasketches ThetaSketch 
objects, using a
+   * Datasketches Intersection object. Uses default log nominal entries.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_intersection(c1: Column, c2: Column): Column =
+    Column.fn("theta_intersection", c1, c2)
+
+  /**
+   * Intersects two binary representations of Datasketches ThetaSketch 
objects, using a
+   * Datasketches Intersection object. Uses default log nominal entries.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_intersection(columnName1: String, columnName2: String): Column = {
+    theta_intersection(Column(columnName1), Column(columnName2))
+  }
+
+  /**
+   * Intersects two binary representations of Datasketches ThetaSketch 
objects, using a
+   * Datasketches Intersection object. Allows setting of log nominal entries 
for the intersection
+   * buffer.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_intersection(c1: Column, c2: Column, lgNomEntries: Int): Column =
+    Column.fn("theta_intersection", c1, c2, lit(lgNomEntries))
+
+  /**
+   * Intersects two binary representations of Datasketches ThetaSketch 
objects, using a
+   * Datasketches Intersection object. Allows setting of log nominal entries 
for the intersection
+   * buffer.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_intersection(columnName1: String, columnName2: String, 
lgNomEntries: Int): Column = {
+    theta_intersection(Column(columnName1), Column(columnName2), lgNomEntries)
+  }
+
+  /**
+   * Returns the estimated number of unique values given the binary 
representation of a
+   * Datasketches ThetaSketch.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_sketch_estimate(c: Column): Column = 
Column.fn("theta_sketch_estimate", c)
+
+  /**
+   * Returns the estimated number of unique values given the binary 
representation of a
+   * Datasketches ThetaSketch.
+   *
+   * @group misc_funcs
+   * @since 4.1.0
+   */
+  def theta_sketch_estimate(columnName: String): Column = {
+    theta_sketch_estimate(Column(columnName))
+  }
+
+  /**
+   * Merges two binary representations of Datasketches ThetaSketch objects, 
using a Datasketches
+   * Union object. Uses default log nominal entries.

Review Comment:
   Was just trying to point out that in the case of `def 
theta_union(columnName1: String, columnName2: String)` for example, the 
function will use the default value of log nominal entries, whereas `def 
theta_union(c1: Column, c2: Column, lgNomEntries: Int)` is allowing the user to 
explicitly assign the value. Please let me know what you think.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to