comphead commented on code in PR #18677:
URL: https://github.com/apache/datafusion/pull/18677#discussion_r2524623010
##########
datafusion/functions-aggregate/src/correlation.rs:
##########
@@ -465,21 +417,33 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator {
// - Correlation can't be calculated when a group only has 1 record,
or when
// the `denominator` state is 0. In these cases, the final
aggregation
// result should be `Null` (according to PostgreSQL's behavior).
+ // - However, if any of the accumulated values contain NaN, the result
should
+ // be NaN regardless of the count (even for single-row groups).
//
for i in 0..n {
- if self.count[i] < 2 {
- values.push(0.0);
- nulls.append_null();
- continue;
- }
-
let count = self.count[i];
let sum_x = self.sum_x[i];
let sum_y = self.sum_y[i];
let sum_xy = self.sum_xy[i];
let sum_xx = self.sum_xx[i];
let sum_yy = self.sum_yy[i];
+ // Check for NaN in the sums BEFORE checking count
+ // If BOTH sum_x AND sum_y are NaN, then both input values are NaN
→ return NaN
+ // If only ONE of them is NaN, then only one input value is NaN →
return NULL
+ // This takes precedence over the count < 2 check
+ if sum_x.is_nan() && sum_y.is_nan() {
Review Comment:
this is the actual change
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]