Dandandan commented on code in PR #16136: URL: https://github.com/apache/datafusion/pull/16136#discussion_r2104358004
########## datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive/mod.rs: ########## @@ -116,42 +122,60 @@ where { fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> { assert_eq!(cols.len(), 1); + let col = cols[0].as_primitive::<T>(); + groups.clear(); + self.append_row_indices.clear(); - for v in cols[0].as_primitive::<T>() { + let mut num_total_groups = self.values.len(); + for (row_index, v) in col.iter().enumerate() { let group_id = match v { None => *self.null_group.get_or_insert_with(|| { - let group_id = self.values.len(); - self.values.push(Default::default()); + let group_id = num_total_groups; + self.append_row_indices.push(row_index as u32); + num_total_groups += 1; group_id }), Some(key) => { let state = &self.random_state; let hash = key.hash(state); let insert = self.map.entry( hash, - |&(g, _)| unsafe { self.values.get_unchecked(g).is_eq(key) }, - |&(_, h)| h, + |&(_, v)| v.is_eq(key), + |&(_, v)| v.hash(state), ); match insert { hashbrown::hash_table::Entry::Occupied(o) => o.get().0, hashbrown::hash_table::Entry::Vacant(v) => { - let g = self.values.len(); - v.insert((g, hash)); - self.values.push(key); + let g = num_total_groups; + v.insert((g, key)); + self.append_row_indices.push(row_index as u32); + num_total_groups += 1; g } } } }; groups.push(group_id) Review Comment: this can `extend` to `groups` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org