This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new b42ebe7 Clarify docs about `Accumulator::update` and
`Accumulator::update_batch` (#1542)
b42ebe7 is described below
commit b42ebe70b2d79426adbef6403f2f951b2a1b1ce6
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jan 11 12:58:35 2022 -0500
Clarify docs about `Accumulator::update` and `Accumulator::update_batch`
(#1542)
* Clarify docs about `Accumulator::update` and `Accumulator::update_batch`
* Update datafusion/src/physical_plan/mod.rs
Co-authored-by: QP Hou <[email protected]>
* Improve docs, add comments on merge_batch
Co-authored-by: QP Hou <[email protected]>
---
datafusion/src/physical_plan/mod.rs | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/datafusion/src/physical_plan/mod.rs
b/datafusion/src/physical_plan/mod.rs
index 8c5f662..d39a7a0 100644
--- a/datafusion/src/physical_plan/mod.rs
+++ b/datafusion/src/physical_plan/mod.rs
@@ -565,7 +565,17 @@ pub trait Accumulator: Send + Sync + Debug {
// of two values, sum and n.
fn state(&self) -> Result<Vec<ScalarValue>>;
- /// updates the accumulator's state from a vector of scalars.
+ /// Updates the accumulator's state from a vector of scalars
+ /// (called by default implementation of [`update_batch`]).
+ ///
+ /// Note: this method is often the simplest to implement and is
+ /// backwards compatible to help to lower the barrier to entry for
+ /// new users to write `Accumulators`
+ ///
+ /// You should always implement `update_batch` instead of this
+ /// method for production aggregators or if you find yourself
+ /// wanting to use mathematical kernels for [`ScalarValue`] such as
+ /// `ScalarValue::add`, `ScalarValue::mul`, etc
fn update(&mut self, values: &[ScalarValue]) -> Result<()>;
/// updates the accumulator's state from a vector of arrays.
@@ -582,7 +592,12 @@ pub trait Accumulator: Send + Sync + Debug {
})
}
- /// updates the accumulator's state from a vector of scalars.
+ /// Updates the accumulator's state from a vector of scalars.
+ /// (called by default implementation of [`merge`]).
+ ///
+ /// You should always implement `merge_batch` instead of this
+ /// method for production aggregators. Please see notes on
+ /// [`update`] for more detail and rationale.
fn merge(&mut self, states: &[ScalarValue]) -> Result<()>;
/// updates the accumulator's state from a vector of states.