(datafusion) branch main updated: Minor: expr-doc small fixes (#12960)

alamb Wed, 16 Oct 2024 10:31:23 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 7c0e15025a Minor: expr-doc small fixes (#12960)
7c0e15025a is described below

commit 7c0e15025a9455ec1b4a2aff13e0c59eb0cc7381
Author: Jonathan Chen <[email protected]>
AuthorDate: Wed Oct 16 13:31:13 2024 -0400

    Minor: expr-doc small fixes (#12960)
    
    * expr-common-doc-fixes
    
    * fmt fixes
---
 datafusion/expr-common/src/accumulator.rs          | 16 +++++------
 datafusion/expr-common/src/columnar_value.rs       |  2 +-
 datafusion/expr-common/src/groups_accumulator.rs   |  9 +++---
 datafusion/expr-common/src/interval_arithmetic.rs  |  4 +--
 datafusion/expr-common/src/signature.rs            |  6 ++--
 .../expr-common/src/type_coercion/aggregates.rs    | 32 +++++++++++-----------
 datafusion/expr-common/src/type_coercion/binary.rs | 22 +++++++--------
 7 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/datafusion/expr-common/src/accumulator.rs 
b/datafusion/expr-common/src/accumulator.rs
index 7533520945..7155c7993f 100644
--- a/datafusion/expr-common/src/accumulator.rs
+++ b/datafusion/expr-common/src/accumulator.rs
@@ -39,7 +39,7 @@ use std::fmt::Debug;
 ///   function])
 ///
 /// * convert its internal state to a vector of aggregate values via
-///   [`state`] and combine the state from multiple accumulators'
+///   [`state`] and combine the state from multiple accumulators
 ///   via [`merge_batch`], as part of efficient multi-phase grouping.
 ///
 /// [`GroupsAccumulator`]: crate::GroupsAccumulator
@@ -68,7 +68,7 @@ pub trait Accumulator: Send + Sync + Debug {
     /// result in potentially non-deterministic behavior.
     ///
     /// This function gets `&mut self` to allow for the accumulator to build
-    /// arrow compatible internal state that can be returned without copying
+    /// arrow-compatible internal state that can be returned without copying
     /// when possible (for example distinct strings)
     fn evaluate(&mut self) -> Result<ScalarValue>;
 
@@ -89,14 +89,14 @@ pub trait Accumulator: Send + Sync + Debug {
     /// result in potentially non-deterministic behavior.
     ///
     /// This function gets `&mut self` to allow for the accumulator to build
-    /// arrow compatible internal state that can be returned without copying
+    /// arrow-compatible internal state that can be returned without copying
     /// when possible (for example distinct strings).
     ///
     /// Intermediate state is used for "multi-phase" grouping in
     /// DataFusion, where an aggregate is computed in parallel with
     /// multiple `Accumulator` instances, as described below:
     ///
-    /// # MultiPhase Grouping
+    /// # Multi-Phase Grouping
     ///
     /// ```text
     ///                               ▲
@@ -140,9 +140,9 @@ pub trait Accumulator: Send + Sync + Debug {
     /// to be summed together)
     ///
     /// Some accumulators can return multiple values for their
-    /// intermediate states. For example average, tracks `sum` and
-    ///  `n`, and this function should return
-    /// a vector of two values, sum and n.
+    /// intermediate states. For example, the average accumulator
+    /// tracks `sum` and `n`, and this function should return a vector
+    /// of two values, sum and n.
     ///
     /// Note that [`ScalarValue::List`] can be used to pass multiple
     /// values if the number of intermediate values is not known at
@@ -204,7 +204,7 @@ pub trait Accumulator: Send + Sync + Debug {
     /// The final output is computed by repartitioning the result of
     /// [`Self::state`] from each Partial aggregate and `hash(group keys)` so
     /// that each distinct group key appears in exactly one of the
-    /// `AggregateMode::Final` GroupBy nodes. The output of the final nodes are
+    /// `AggregateMode::Final` GroupBy nodes. The outputs of the final nodes 
are
     /// then unioned together to produce the overall final output.
     ///
     /// Here is an example that shows the distribution of groups in the
diff --git a/datafusion/expr-common/src/columnar_value.rs 
b/datafusion/expr-common/src/columnar_value.rs
index 7b614ba9c4..1ee90eb4b4 100644
--- a/datafusion/expr-common/src/columnar_value.rs
+++ b/datafusion/expr-common/src/columnar_value.rs
@@ -129,7 +129,7 @@ impl ColumnarValue {
         })
     }
 
-    /// null columnar values are implemented as a null array in order to pass 
batch
+    /// Null columnar values are implemented as a null array in order to pass 
batch
     /// num_rows
     pub fn create_null_array(num_rows: usize) -> Self {
         ColumnarValue::Array(Arc::new(NullArray::new(num_rows)))
diff --git a/datafusion/expr-common/src/groups_accumulator.rs 
b/datafusion/expr-common/src/groups_accumulator.rs
index d19d8e4696..2c8b126cb5 100644
--- a/datafusion/expr-common/src/groups_accumulator.rs
+++ b/datafusion/expr-common/src/groups_accumulator.rs
@@ -111,8 +111,7 @@ pub trait GroupsAccumulator: Send {
     ///
     /// * `values`: the input arguments to the accumulator
     ///
-    /// * `group_indices`: To which groups do the rows in `values`
-    ///   belong, group id)
+    /// * `group_indices`: The group indices to which each row in `values` 
belongs.
     ///
     /// * `opt_filter`: if present, only update aggregate state using
     ///   `values[i]` if `opt_filter[i]` is true
@@ -185,9 +184,9 @@ pub trait GroupsAccumulator: Send {
     /// differ. See [`Self::state`] for more details on how state is
     /// used and merged.
     ///
-    /// * `values`: arrays produced from calling `state` previously to the 
accumulator
+    /// * `values`: arrays produced from previously calling `state` on other 
accumulators.
     ///
-    /// Other arguments are the same as for [`Self::update_batch`];
+    /// Other arguments are the same as for [`Self::update_batch`].
     fn merge_batch(
         &mut self,
         values: &[ArrayRef],
@@ -196,7 +195,7 @@ pub trait GroupsAccumulator: Send {
         total_num_groups: usize,
     ) -> Result<()>;
 
-    /// Converts an input batch directly the intermediate aggregate state.
+    /// Converts an input batch directly to the intermediate aggregate state.
     ///
     /// This is the equivalent of treating each input row as its own group. It
     /// is invoked when the Partial phase of a multi-phase aggregation is not
diff --git a/datafusion/expr-common/src/interval_arithmetic.rs 
b/datafusion/expr-common/src/interval_arithmetic.rs
index e76453d91a..ffaa32f080 100644
--- a/datafusion/expr-common/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -1223,8 +1223,8 @@ pub fn satisfy_greater(
         }
     }
 
-    // Only the lower bound of left hand side and the upper bound of the right
-    // hand side can change after propagating the greater-than operation.
+    // Only the lower bound of left-hand side and the upper bound of the 
right-hand
+    // side can change after propagating the greater-than operation.
     let new_left_lower = if left.lower.is_null() || left.lower <= right.lower {
         if strict {
             next_value(right.lower.clone())
diff --git a/datafusion/expr-common/src/signature.rs 
b/datafusion/expr-common/src/signature.rs
index 320e1303a2..24cb54f634 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -35,7 +35,7 @@ pub const TIMEZONE_WILDCARD: &str = "+TZ";
 /// valid length. It exists to avoid the need to enumerate all possible fixed 
size list lengths.
 pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
 
-///A function's volatility, which defines the functions eligibility for 
certain optimizations
+/// A function's volatility, which defines the functions eligibility for 
certain optimizations
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub enum Volatility {
     /// An immutable function will always return the same output when given 
the same
@@ -86,7 +86,7 @@ pub enum Volatility {
 /// ```
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
 pub enum TypeSignature {
-    /// One or more arguments of an common type out of a list of valid types.
+    /// One or more arguments of a common type out of a list of valid types.
     ///
     /// # Examples
     /// A function such as `concat` is `Variadic(vec![DataType::Utf8, 
DataType::LargeUtf8])`
@@ -127,7 +127,7 @@ pub enum TypeSignature {
     Numeric(usize),
     /// Fixed number of arguments of all the same string types.
     /// The precedence of type from high to low is Utf8View, LargeUtf8 and 
Utf8.
-    /// Null is considerd as Utf8 by default
+    /// Null is considerd as `Utf8` by default
     /// Dictionary with string value type is also handled.
     String(usize),
 }
diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs 
b/datafusion/expr-common/src/type_coercion/aggregates.rs
index 2add9e7c18..fee75f9e45 100644
--- a/datafusion/expr-common/src/type_coercion/aggregates.rs
+++ b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -143,21 +143,21 @@ pub fn check_arg_count(
     Ok(())
 }
 
-/// function return type of a sum
+/// Function return type of a sum
 pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
     match arg_type {
         DataType::Int64 => Ok(DataType::Int64),
         DataType::UInt64 => Ok(DataType::UInt64),
         DataType::Float64 => Ok(DataType::Float64),
         DataType::Decimal128(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+10), 
s)
-            // ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+            // In the spark, the result type is DECIMAL(min(38,precision+10), 
s)
+            // Ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
             let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
             Ok(DataType::Decimal128(new_precision, *scale))
         }
         DataType::Decimal256(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+10), 
s)
-            // ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+            // In the spark, the result type is DECIMAL(min(38,precision+10), 
s)
+            // Ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
             let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
             Ok(DataType::Decimal256(new_precision, *scale))
         }
@@ -165,7 +165,7 @@ pub fn sum_return_type(arg_type: &DataType) -> 
Result<DataType> {
     }
 }
 
-/// function return type of variance
+/// Function return type of variance
 pub fn variance_return_type(arg_type: &DataType) -> Result<DataType> {
     if NUMERICS.contains(arg_type) {
         Ok(DataType::Float64)
@@ -174,7 +174,7 @@ pub fn variance_return_type(arg_type: &DataType) -> 
Result<DataType> {
     }
 }
 
-/// function return type of covariance
+/// Function return type of covariance
 pub fn covariance_return_type(arg_type: &DataType) -> Result<DataType> {
     if NUMERICS.contains(arg_type) {
         Ok(DataType::Float64)
@@ -183,7 +183,7 @@ pub fn covariance_return_type(arg_type: &DataType) -> 
Result<DataType> {
     }
 }
 
-/// function return type of correlation
+/// Function return type of correlation
 pub fn correlation_return_type(arg_type: &DataType) -> Result<DataType> {
     if NUMERICS.contains(arg_type) {
         Ok(DataType::Float64)
@@ -192,19 +192,19 @@ pub fn correlation_return_type(arg_type: &DataType) -> 
Result<DataType> {
     }
 }
 
-/// function return type of an average
+/// Function return type of an average
 pub fn avg_return_type(func_name: &str, arg_type: &DataType) -> 
Result<DataType> {
     match arg_type {
         DataType::Decimal128(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+4), 
min(38,scale+4)).
-            // ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+            // In the spark, the result type is DECIMAL(min(38,precision+4), 
min(38,scale+4)).
+            // Ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
             let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
             let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
             Ok(DataType::Decimal128(new_precision, new_scale))
         }
         DataType::Decimal256(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+4), 
min(38,scale+4)).
-            // ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+            // In the spark, the result type is DECIMAL(min(38,precision+4), 
min(38,scale+4)).
+            // Ref: 
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
             let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
             let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
             Ok(DataType::Decimal256(new_precision, new_scale))
@@ -217,16 +217,16 @@ pub fn avg_return_type(func_name: &str, arg_type: 
&DataType) -> Result<DataType>
     }
 }
 
-/// internal sum type of an average
+/// Internal sum type of an average
 pub fn avg_sum_type(arg_type: &DataType) -> Result<DataType> {
     match arg_type {
         DataType::Decimal128(precision, scale) => {
-            // in the spark, the sum type of avg is 
DECIMAL(min(38,precision+10), s)
+            // In the spark, the sum type of avg is 
DECIMAL(min(38,precision+10), s)
             let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
             Ok(DataType::Decimal128(new_precision, *scale))
         }
         DataType::Decimal256(precision, scale) => {
-            // in Spark the sum type of avg is DECIMAL(min(38,precision+10), s)
+            // In Spark the sum type of avg is DECIMAL(min(38,precision+10), s)
             let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
             Ok(DataType::Decimal256(new_precision, *scale))
         }
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs 
b/datafusion/expr-common/src/type_coercion/binary.rs
index e042dd5d3a..887586f4f7 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -191,7 +191,7 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) 
-> Result<Signature>
     }
 }
 
-/// returns the resulting type of a binary expression evaluating the `op` with 
the left and right hand types
+/// Returns the resulting type of a binary expression evaluating the `op` with 
the left and right hand types
 pub fn get_result_type(
     lhs: &DataType,
     op: &Operator,
@@ -377,12 +377,12 @@ pub fn type_union_resolution(data_types: &[DataType]) -> 
Option<DataType> {
         return None;
     }
 
-    // if all the data_types is the same return first one
+    // If all the data_types is the same return first one
     if data_types.iter().all(|t| t == &data_types[0]) {
         return Some(data_types[0].clone());
     }
 
-    // if all the data_types are null, return string
+    // If all the data_types are null, return string
     if data_types.iter().all(|t| t == &DataType::Null) {
         return Some(DataType::Utf8);
     }
@@ -401,7 +401,7 @@ pub fn type_union_resolution(data_types: &[DataType]) -> 
Option<DataType> {
         return None;
     }
 
-    // check if there is only one category excluding Unknown
+    // Check if there is only one category excluding Unknown
     let categories: HashSet<TypeCategory> = HashSet::from_iter(
         data_types_category
             .iter()
@@ -519,7 +519,7 @@ fn type_union_resolution_coercion(
             Some(DataType::Struct(fields.into()))
         }
         _ => {
-            // numeric coercion is the same as comparison coercion, both find 
the narrowest type
+            // Numeric coercion is the same as comparison coercion, both find 
the narrowest type
             // that can accommodate both types
             binary_numeric_coercion(lhs_type, rhs_type)
                 .or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, 
rhs_type))
@@ -630,7 +630,7 @@ pub fn binary_numeric_coercion(
         return Some(t);
     }
 
-    // these are ordered from most informative to least informative so
+    // These are ordered from most informative to least informative so
     // that the coercion does not lose information via truncation
     match (lhs_type, rhs_type) {
         (Float64, _) | (_, Float64) => Some(Float64),
@@ -856,12 +856,12 @@ fn mathematics_numerical_coercion(
 ) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
 
-    // error on any non-numeric type
+    // Error on any non-numeric type
     if !both_numeric_or_null_and_numeric(lhs_type, rhs_type) {
         return None;
     };
 
-    // these are ordered from most informative to least informative so
+    // These are ordered from most informative to least informative so
     // that the coercion removes the least amount of information
     match (lhs_type, rhs_type) {
         (Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => {
@@ -1122,7 +1122,7 @@ fn binary_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataType>
     }
 }
 
-/// coercion rules for like operations.
+/// Coercion rules for like operations.
 /// This is a union of string coercion rules and dictionary coercion rules
 pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
     string_coercion(lhs_type, rhs_type)
@@ -1133,7 +1133,7 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataTyp
         .or_else(|| null_coercion(lhs_type, rhs_type))
 }
 
-/// coercion rules for regular expression comparison operations with NULL 
input.
+/// Coercion rules for regular expression comparison operations with NULL 
input.
 fn regex_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
@@ -1295,7 +1295,7 @@ fn timeunit_coercion(lhs_unit: &TimeUnit, rhs_unit: 
&TimeUnit) -> TimeUnit {
     }
 }
 
-/// coercion rules from NULL type. Since NULL can be casted to any other type 
in arrow,
+/// Coercion rules from NULL type. Since NULL can be casted to any other type 
in arrow,
 /// either lhs or rhs is NULL, if NULL can be casted to type of the other 
side, the coercion is valid.
 fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> 
{
     match (lhs_type, rhs_type) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Minor: expr-doc small fixes (#12960)

Reply via email to