viirya commented on code in PR #987: URL: https://github.com/apache/datafusion-comet/pull/987#discussion_r1803804846
########## spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala: ########## @@ -222,6 +225,59 @@ object CometExecBenchmark extends CometBenchmarkBase { } } + // BloomFilterAgg takes an argument for the expected number of distinct values, which determines filter size and + // number of hash functions. We use the cardinality as a hint to the aggregate, otherwise the default Spark values + // make a big filter with a lot of hash functions. + def bloomFilterAggregate(values: Int, cardinality: Int): Unit = { + val benchmark = + new Benchmark( + s"BloomFilterAggregate Exec (cardinality $cardinality)", + values, + output = output) + + val funcId_bloom_filter_agg = new FunctionIdentifier("bloom_filter_agg") + spark.sessionState.functionRegistry.registerFunction( + funcId_bloom_filter_agg, + new ExpressionInfo(classOf[BloomFilterAggregate].getName, "bloom_filter_agg"), + (children: Seq[Expression]) => + children.size match { + case 1 => new BloomFilterAggregate(children.head) + case 2 => new BloomFilterAggregate(children.head, children(1)) + case 3 => new BloomFilterAggregate(children.head, children(1), children(2)) Review Comment: Do we use case 1 and case 3? Looks like `bloom_filter_agg` always have 2 arguments below. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org