parthchandra commented on code in PR #3580:
URL: https://github.com/apache/datafusion-comet/pull/3580#discussion_r2875444499


##########
native/spark-expr/src/agg_funcs/avg_decimal.rs:
##########
@@ -240,37 +311,44 @@ impl Accumulator for AvgDecimalAccumulator {
             // of the computation
             return Ok(());
         }
-
         let values = &values[0];
         let data = values.as_primitive::<Decimal128Type>();
-
         self.is_empty = self.is_empty && values.len() == values.null_count();
-
         if values.null_count() == 0 {
             for i in 0..data.len() {
-                self.update_single(data, i);
+                self.update_single(data, i)?;
             }
         } else {
             for i in 0..data.len() {
                 if data.is_null(i) {
                     continue;
                 }
-                self.update_single(data, i);
+                self.update_single(data, i)?;
             }
         }
         Ok(())
     }
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        let partial_sums = states[0].as_primitive::<Decimal128Type>();
+        let partial_counts = states[1].as_primitive::<Int64Type>();
+
+        // Update is_empty: if any partial state has data, we're not empty
+        if self.is_empty {
+            self.is_empty = partial_counts.len() == 
partial_counts.null_count();
+        }
+
         // counts are summed
-        self.count += 
sum(states[1].as_primitive::<Int64Type>()).unwrap_or_default();
+        self.count += sum(partial_counts).unwrap_or_default();
 
         // sums are summed
-        if let Some(x) = sum(states[0].as_primitive::<Decimal128Type>()) {
+        if let Some(x) = sum(partial_sums) {
             let v = self.sum.get_or_insert(0);
             let (result, overflowed) = v.overflowing_add(x);
-            if overflowed {
-                // Set to None if overflow happens
+
+            if overflowed || !is_valid_decimal_precision(result, 
self.sum_precision) {

Review Comment:
   This is a tricky one. `overflowing_add` checks for integer overflow but 
decimal can overflow if the precision is exceeded so we need to check for that 
as well. This is caught by the 
`DataFrameAggregateSuite.checkAggResultsForDecimalOverflow` test



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to