kazuyukitanimura commented on code in PR #456:
URL: https://github.com/apache/datafusion-comet/pull/456#discussion_r1608866121
##########
spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala:
##########
@@ -547,6 +547,26 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde with CometExprShim
withInfo(aggExpr, child)
None
}
+ case corr @ Corr(child1, child2, nullOnDivideByZero) =>
+ val child1Expr = exprToProto(child1, inputs, binding)
+ val child2Expr = exprToProto(child2, inputs, binding)
+ val dataType = serializeDataType(corr.dataType)
+
+ if (child1Expr.isDefined && child2Expr.isDefined &&
dataType.isDefined) {
+ val corrBuilder = ExprOuterClass.Correlation.newBuilder()
+ corrBuilder.setChild1(child1Expr.get)
+ corrBuilder.setChild2(child2Expr.get)
+ corrBuilder.setNullOnDivideByZero(nullOnDivideByZero)
+ corrBuilder.setDatatype(dataType.get)
+
+ Some(
+ ExprOuterClass.AggExpr
+ .newBuilder()
+ .setCorrelation(corrBuilder)
+ .build())
+ } else {
+ None
Review Comment:
Should we add `withInfo()`?
##########
spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala:
##########
@@ -1212,6 +1212,157 @@ class CometAggregateSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
+ test("correlation") {
+ withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") {
+ Seq(false).foreach { cometColumnShuffleEnabled =>
+ withSQLConf(
+ CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key ->
cometColumnShuffleEnabled.toString) {
+ Seq(false).foreach { dictionary =>
+ withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+ Seq(true).foreach { nullOnDivideByZero =>
+ withSQLConf(
+ "spark.sql.legacy.statisticalAggregate" ->
nullOnDivideByZero.toString) {
+ val table = "test"
+ withTable(table) {
+ sql(
+ s"create table $table(col1 double, col2 double, col3
double) using parquet")
+ sql(s"insert into $table values(1, 4, 1), (2, 5, 1), (3,
6, 2)")
+ val expectedNumOfCometAggregates = 2
+
+ sql("SELECT corr(col1, col2) FROM test GROUP BY col3").show
Review Comment:
nit: we can reuse s`$table`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]