comphead commented on PR #16918:
URL: https://github.com/apache/datafusion/pull/16918#issuecomment-3122164585

   > does it mean converting `fn state()`'s such would fix the problem?
   
   Thanks @berkaysynnada I tried the code below(which is more concise version 
of your snippet), it didn't help
   
   ```
       fn state(&mut self) -> Result<Vec<ScalarValue>> {
           if self.ignore_nulls && self.last.is_null() {
               self.is_set = false;
           };
           Ok(vec![self.last.clone(), ScalarValue::from(self.is_set)])
       }
   ```
   
   > I couldn't understand this, can you explain a bit more?
   
   Please find some more details for this issue
   
   First file 
   
   |a  |  b|
   |---|---|
   |0  |  1|
   |0  |  2|
   |2 |  1|   
   
   Second file 
   
   |a  |  b|
   |---|---|
   |1  |  1|
   |1  |  2|
   |2 |  2| 
   
   See the row with key 2 split across 2 files.
   
   I'm running the same query 
   ```
    SELECT a, last_value(case when b==1 then null else b end) IGNORE NULLS FROM 
t1 GROUP BY a;
   ```
   
   and output values from `update_batch` and states from `merge_batch` from 
`TrivialLastValueAccumulator`
   
   DF
   
   ```
   > SELECT a, last_value(case when b==1 then null else b end) IGNORE NULLS 
FROM t1 GROUP BY a;
   [datafusion/functions-aggregate/src/first_last.rs:1302:9] &values = [
       PrimitiveArray<Int32>
       [
         2,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1302:9] &values = [
       PrimitiveArray<Int32>
       [
         null,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1302:9] &values = [
       PrimitiveArray<Int32>
       [
         null,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1302:9] &values = [
       PrimitiveArray<Int32>
       [
         2,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1302:9] &values = [
       PrimitiveArray<Int32>
       [
         null,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1302:9] &values = [
       PrimitiveArray<Int32>
       [
         2,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1327:9] &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1327:9] &states = [
       PrimitiveArray<Int32>
       [
         null,
       ],
       BooleanArray
       [
         false,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1327:9] &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1327:9] &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   [datafusion/functions-aggregate/src/first_last.rs:1327:9] &states = [
       PrimitiveArray<Int32>
       [
         null,
       ],
       BooleanArray
       [
         false,
       ],
   ]
   
+---+----------------------------------------------------------------------------+
   | a | last_value(CASE WHEN t1.b = Int64(1) THEN NULL ELSE t1.b END) IGNORE 
NULLS |
   
+---+----------------------------------------------------------------------------+
   | 2 | 2                                                                      
    |
   | 1 | 2                                                                      
    |
   | 0 | 2                                                                      
    |
   
+---+----------------------------------------------------------------------------+
   
   ```
   
   
   
   Comet
   ```
   25/07/26 10:06:39 INFO core/src/lib.rs: Comet native library version 0.10.0 
initialized
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1301:9]
 &values = [
       PrimitiveArray<Int32>
       [
         null,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1301:9]
 &values = [
       PrimitiveArray<Int32>
       [
         null,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1301:9]
 &values = [
       PrimitiveArray<Int32>
       [
         2,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1301:9]
 &values = [
       PrimitiveArray<Int32>
       [
         2,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1301:9]
 &values = [
       PrimitiveArray<Int32>
       [
         null,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1301:9]
 &values = [
       PrimitiveArray<Int32>
       [
         2,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         null,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         2,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   
[/Users/ovoievodin/.cargo/git/checkouts/datafusion-11a8b534adb6bd68/afb9099/datafusion/functions-aggregate/src/first_last.rs:1327:9]
 &states = [
       PrimitiveArray<Int32>
       [
         null,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   ```
   
   
   So values in `update_batch` are the same but ordering is different, not sure 
if this matters, tbh
   for states in `merge_batch`
   the Comet sends
   ```
   &states = [
       PrimitiveArray<Int32>
       [
         null,
       ],
       BooleanArray
       [
         true,
       ],
   ]
   ```
   
   which I believe confuses the accumulator as for DF the state sent as 
   ```
    &states = [
       PrimitiveArray<Int32>
       [
         null,
       ],
       BooleanArray
       [
         false,
       ],
   ]
   ``` 
   and not applied as it is false.
   
   Hope this explains
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to