cboumalh commented on code in PR #54338:
URL: https://github.com/apache/spark/pull/54338#discussion_r2828312682


##########
sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala:
##########
@@ -3904,6 +3904,213 @@ class DataFrameAggregateSuite extends QueryTest
     assert(estimate == 1.0)
   }
 
+  test("SPARK-55558: tuple_difference_theta_double basic functionality") {
+    val df1 = Seq((1, 1.5), (2, 2.5), (3, 3.5), (5, 5.5)).toDF("key", 
"summary")
+    val df2 = Seq(1, 2, 4).toDF("value")
+
+    val tupleSketchDf = df1.agg(tuple_sketch_agg_double($"key", 
$"summary").alias("tuple_sketch"))
+    val thetaSketchDf = 
df2.agg(theta_sketch_agg($"value").alias("theta_sketch"))
+
+    val joined = tupleSketchDf.crossJoin(thetaSketchDf)
+
+    // Test difference (keys in tuple_sketch but not in theta_sketch: 3 and 5)
+    val difference = joined
+      .select(tuple_difference_theta_double($"tuple_sketch", $"theta_sketch"))
+      .collect()(0)(0)
+    assert(difference != null)
+    assert(difference.asInstanceOf[Array[Byte]].length > 0)
+
+    // Test with column names
+    val difference2 = joined
+      .select(tuple_difference_theta_double("tuple_sketch", "theta_sketch"))
+      .collect()(0)(0)
+    assert(difference2 != null)
+
+    // Verify estimate from difference
+    val estimate = joined
+      .select(tuple_sketch_estimate_double(
+        tuple_difference_theta_double($"tuple_sketch", $"theta_sketch")))
+      .collect()(0)(0)
+    assert(estimate == 2.0)

Review Comment:
   added



##########
sql/core/src/test/resources/sql-tests/inputs/tuplesketch.sql:
##########
@@ -371,6 +371,139 @@ SELECT tuple_sketch_estimate_integer(
     tuple_sketch_agg_integer(key2, val2, 12, 'sum')))
 FROM t_int_int_1_5_through_7_11;
 
+-- Test tuple_union_theta_double function with IntegerType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_union_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_int_double_1_5_through_7_11;
+
+-- Test tuple_union_theta_double function with LongType key sketches and 
explicit lgNomEntries parameter
+SELECT tuple_sketch_estimate_double(
+  tuple_union_theta_double(
+    tuple_sketch_agg_double(key1, val1, 15),
+    theta_sketch_agg(key2), 15))
+FROM t_long_double_1_5_through_7_11;
+
+-- Test tuple_union_theta_double function with DoubleType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_union_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_double_double_1_1_1_4_through_1_5_1_8;
+
+-- Test tuple_union_theta_double function with StringType key sketches and 
explicit lgNomEntries parameter
+SELECT tuple_sketch_estimate_double(
+  tuple_union_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2), 14))
+FROM t_string_double_a_d_through_e_h;
+
+-- Test tuple_union_theta_double with lgNomEntries and mode parameters
+SELECT tuple_sketch_estimate_double(
+  tuple_union_theta_double(
+    tuple_sketch_agg_double(key1, val1, 12, 'sum'),
+    theta_sketch_agg(key2), 12, 'sum'))
+FROM t_int_double_1_5_through_7_11;
+
+-- Test tuple_union_theta_integer function with IntegerType key sketches
+SELECT tuple_sketch_estimate_integer(
+  tuple_union_theta_integer(
+    tuple_sketch_agg_integer(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_int_int_1_5_through_7_11;
+
+-- Test tuple_union_theta_integer with lgNomEntries and mode parameters
+SELECT tuple_sketch_estimate_integer(
+  tuple_union_theta_integer(
+    tuple_sketch_agg_integer(key1, val1, 12, 'sum'),
+    theta_sketch_agg(key2), 12, 'sum'))
+FROM t_int_int_1_5_through_7_11;
+
+-- Test tuple_intersection_theta_double function with IntegerType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_intersection_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_int_double_1_5_through_7_11;
+
+-- Test tuple_intersection_theta_double function with LongType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_intersection_theta_double(
+    tuple_sketch_agg_double(key1, val1, 5),
+    theta_sketch_agg(key2)))
+FROM t_long_double_1_5_through_7_11;
+
+-- Test tuple_intersection_theta_double function with DoubleType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_intersection_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_double_double_1_1_1_4_through_1_5_1_8;
+
+-- Test tuple_intersection_theta_double function with StringType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_intersection_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_string_double_a_d_through_e_h;
+
+-- Test tuple_intersection_theta_double with mode parameter
+SELECT tuple_sketch_estimate_double(
+  tuple_intersection_theta_double(
+    tuple_sketch_agg_double(key1, val1, 12, 'min'),
+    theta_sketch_agg(key2), 'min'))
+FROM t_int_double_1_5_through_7_11;
+
+-- Test tuple_intersection_theta_integer function with IntegerType key sketches
+SELECT tuple_sketch_estimate_integer(
+  tuple_intersection_theta_integer(
+    tuple_sketch_agg_integer(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_int_int_1_5_through_7_11;
+
+-- Test tuple_intersection_theta_integer with mode parameter
+SELECT tuple_sketch_estimate_integer(
+  tuple_intersection_theta_integer(
+    tuple_sketch_agg_integer(key1, val1, 12, 'sum'),
+    theta_sketch_agg(key2), 'sum'))
+FROM t_int_int_1_5_through_7_11;
+
+-- Test tuple_difference_theta_double function with IntegerType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_difference_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_int_double_1_5_through_7_11;
+
+-- Test tuple_difference_theta_double function with LongType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_difference_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_long_double_1_5_through_7_11;
+
+-- Test tuple_difference_theta_double function with DoubleType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_difference_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_double_double_1_1_1_4_through_1_5_1_8;
+
+-- Test tuple_difference_theta_double function with StringType key sketches
+SELECT tuple_sketch_estimate_double(
+  tuple_difference_theta_double(
+    tuple_sketch_agg_double(key1, val1),
+    theta_sketch_agg(key2)))
+FROM t_string_double_a_d_through_e_h;
+
+-- Test tuple_difference_theta_integer function with IntegerType key sketches
+SELECT tuple_sketch_estimate_integer(
+  tuple_difference_theta_integer(
+    tuple_sketch_agg_integer(key1, val1, 12, 'sum'),

Review Comment:
   yep added these!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to