kazuyukitanimura commented on code in PR #456: URL: https://github.com/apache/datafusion-comet/pull/456#discussion_r1608866121
########## spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala: ########## @@ -547,6 +547,26 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim withInfo(aggExpr, child) None } + case corr @ Corr(child1, child2, nullOnDivideByZero) => + val child1Expr = exprToProto(child1, inputs, binding) + val child2Expr = exprToProto(child2, inputs, binding) + val dataType = serializeDataType(corr.dataType) + + if (child1Expr.isDefined && child2Expr.isDefined && dataType.isDefined) { + val corrBuilder = ExprOuterClass.Correlation.newBuilder() + corrBuilder.setChild1(child1Expr.get) + corrBuilder.setChild2(child2Expr.get) + corrBuilder.setNullOnDivideByZero(nullOnDivideByZero) + corrBuilder.setDatatype(dataType.get) + + Some( + ExprOuterClass.AggExpr + .newBuilder() + .setCorrelation(corrBuilder) + .build()) + } else { + None Review Comment: Should we add `withInfo()`? ########## spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala: ########## @@ -1212,6 +1212,157 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + test("correlation") { + withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") { + Seq(false).foreach { cometColumnShuffleEnabled => + withSQLConf( + CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> cometColumnShuffleEnabled.toString) { + Seq(false).foreach { dictionary => + withSQLConf("parquet.enable.dictionary" -> dictionary.toString) { + Seq(true).foreach { nullOnDivideByZero => + withSQLConf( + "spark.sql.legacy.statisticalAggregate" -> nullOnDivideByZero.toString) { + val table = "test" + withTable(table) { + sql( + s"create table $table(col1 double, col2 double, col3 double) using parquet") + sql(s"insert into $table values(1, 4, 1), (2, 5, 1), (3, 6, 2)") + val expectedNumOfCometAggregates = 2 + + sql("SELECT corr(col1, col2) FROM test GROUP BY col3").show Review Comment: nit: we can reuse s`$table` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org