This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7c0e15025a Minor: expr-doc small fixes (#12960)
7c0e15025a is described below
commit 7c0e15025a9455ec1b4a2aff13e0c59eb0cc7381
Author: Jonathan Chen <[email protected]>
AuthorDate: Wed Oct 16 13:31:13 2024 -0400
Minor: expr-doc small fixes (#12960)
* expr-common-doc-fixes
* fmt fixes
---
datafusion/expr-common/src/accumulator.rs | 16 +++++------
datafusion/expr-common/src/columnar_value.rs | 2 +-
datafusion/expr-common/src/groups_accumulator.rs | 9 +++---
datafusion/expr-common/src/interval_arithmetic.rs | 4 +--
datafusion/expr-common/src/signature.rs | 6 ++--
.../expr-common/src/type_coercion/aggregates.rs | 32 +++++++++++-----------
datafusion/expr-common/src/type_coercion/binary.rs | 22 +++++++--------
7 files changed, 45 insertions(+), 46 deletions(-)
diff --git a/datafusion/expr-common/src/accumulator.rs
b/datafusion/expr-common/src/accumulator.rs
index 7533520945..7155c7993f 100644
--- a/datafusion/expr-common/src/accumulator.rs
+++ b/datafusion/expr-common/src/accumulator.rs
@@ -39,7 +39,7 @@ use std::fmt::Debug;
/// function])
///
/// * convert its internal state to a vector of aggregate values via
-/// [`state`] and combine the state from multiple accumulators'
+/// [`state`] and combine the state from multiple accumulators
/// via [`merge_batch`], as part of efficient multi-phase grouping.
///
/// [`GroupsAccumulator`]: crate::GroupsAccumulator
@@ -68,7 +68,7 @@ pub trait Accumulator: Send + Sync + Debug {
/// result in potentially non-deterministic behavior.
///
/// This function gets `&mut self` to allow for the accumulator to build
- /// arrow compatible internal state that can be returned without copying
+ /// arrow-compatible internal state that can be returned without copying
/// when possible (for example distinct strings)
fn evaluate(&mut self) -> Result<ScalarValue>;
@@ -89,14 +89,14 @@ pub trait Accumulator: Send + Sync + Debug {
/// result in potentially non-deterministic behavior.
///
/// This function gets `&mut self` to allow for the accumulator to build
- /// arrow compatible internal state that can be returned without copying
+ /// arrow-compatible internal state that can be returned without copying
/// when possible (for example distinct strings).
///
/// Intermediate state is used for "multi-phase" grouping in
/// DataFusion, where an aggregate is computed in parallel with
/// multiple `Accumulator` instances, as described below:
///
- /// # MultiPhase Grouping
+ /// # Multi-Phase Grouping
///
/// ```text
/// ▲
@@ -140,9 +140,9 @@ pub trait Accumulator: Send + Sync + Debug {
/// to be summed together)
///
/// Some accumulators can return multiple values for their
- /// intermediate states. For example average, tracks `sum` and
- /// `n`, and this function should return
- /// a vector of two values, sum and n.
+ /// intermediate states. For example, the average accumulator
+ /// tracks `sum` and `n`, and this function should return a vector
+ /// of two values, sum and n.
///
/// Note that [`ScalarValue::List`] can be used to pass multiple
/// values if the number of intermediate values is not known at
@@ -204,7 +204,7 @@ pub trait Accumulator: Send + Sync + Debug {
/// The final output is computed by repartitioning the result of
/// [`Self::state`] from each Partial aggregate and `hash(group keys)` so
/// that each distinct group key appears in exactly one of the
- /// `AggregateMode::Final` GroupBy nodes. The output of the final nodes are
+ /// `AggregateMode::Final` GroupBy nodes. The outputs of the final nodes
are
/// then unioned together to produce the overall final output.
///
/// Here is an example that shows the distribution of groups in the
diff --git a/datafusion/expr-common/src/columnar_value.rs
b/datafusion/expr-common/src/columnar_value.rs
index 7b614ba9c4..1ee90eb4b4 100644
--- a/datafusion/expr-common/src/columnar_value.rs
+++ b/datafusion/expr-common/src/columnar_value.rs
@@ -129,7 +129,7 @@ impl ColumnarValue {
})
}
- /// null columnar values are implemented as a null array in order to pass
batch
+ /// Null columnar values are implemented as a null array in order to pass
batch
/// num_rows
pub fn create_null_array(num_rows: usize) -> Self {
ColumnarValue::Array(Arc::new(NullArray::new(num_rows)))
diff --git a/datafusion/expr-common/src/groups_accumulator.rs
b/datafusion/expr-common/src/groups_accumulator.rs
index d19d8e4696..2c8b126cb5 100644
--- a/datafusion/expr-common/src/groups_accumulator.rs
+++ b/datafusion/expr-common/src/groups_accumulator.rs
@@ -111,8 +111,7 @@ pub trait GroupsAccumulator: Send {
///
/// * `values`: the input arguments to the accumulator
///
- /// * `group_indices`: To which groups do the rows in `values`
- /// belong, group id)
+ /// * `group_indices`: The group indices to which each row in `values`
belongs.
///
/// * `opt_filter`: if present, only update aggregate state using
/// `values[i]` if `opt_filter[i]` is true
@@ -185,9 +184,9 @@ pub trait GroupsAccumulator: Send {
/// differ. See [`Self::state`] for more details on how state is
/// used and merged.
///
- /// * `values`: arrays produced from calling `state` previously to the
accumulator
+ /// * `values`: arrays produced from previously calling `state` on other
accumulators.
///
- /// Other arguments are the same as for [`Self::update_batch`];
+ /// Other arguments are the same as for [`Self::update_batch`].
fn merge_batch(
&mut self,
values: &[ArrayRef],
@@ -196,7 +195,7 @@ pub trait GroupsAccumulator: Send {
total_num_groups: usize,
) -> Result<()>;
- /// Converts an input batch directly the intermediate aggregate state.
+ /// Converts an input batch directly to the intermediate aggregate state.
///
/// This is the equivalent of treating each input row as its own group. It
/// is invoked when the Partial phase of a multi-phase aggregation is not
diff --git a/datafusion/expr-common/src/interval_arithmetic.rs
b/datafusion/expr-common/src/interval_arithmetic.rs
index e76453d91a..ffaa32f080 100644
--- a/datafusion/expr-common/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -1223,8 +1223,8 @@ pub fn satisfy_greater(
}
}
- // Only the lower bound of left hand side and the upper bound of the right
- // hand side can change after propagating the greater-than operation.
+ // Only the lower bound of left-hand side and the upper bound of the
right-hand
+ // side can change after propagating the greater-than operation.
let new_left_lower = if left.lower.is_null() || left.lower <= right.lower {
if strict {
next_value(right.lower.clone())
diff --git a/datafusion/expr-common/src/signature.rs
b/datafusion/expr-common/src/signature.rs
index 320e1303a2..24cb54f634 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -35,7 +35,7 @@ pub const TIMEZONE_WILDCARD: &str = "+TZ";
/// valid length. It exists to avoid the need to enumerate all possible fixed
size list lengths.
pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
-///A function's volatility, which defines the functions eligibility for
certain optimizations
+/// A function's volatility, which defines the functions eligibility for
certain optimizations
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum Volatility {
/// An immutable function will always return the same output when given
the same
@@ -86,7 +86,7 @@ pub enum Volatility {
/// ```
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum TypeSignature {
- /// One or more arguments of an common type out of a list of valid types.
+ /// One or more arguments of a common type out of a list of valid types.
///
/// # Examples
/// A function such as `concat` is `Variadic(vec![DataType::Utf8,
DataType::LargeUtf8])`
@@ -127,7 +127,7 @@ pub enum TypeSignature {
Numeric(usize),
/// Fixed number of arguments of all the same string types.
/// The precedence of type from high to low is Utf8View, LargeUtf8 and
Utf8.
- /// Null is considerd as Utf8 by default
+ /// Null is considerd as `Utf8` by default
/// Dictionary with string value type is also handled.
String(usize),
}
diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs
b/datafusion/expr-common/src/type_coercion/aggregates.rs
index 2add9e7c18..fee75f9e45 100644
--- a/datafusion/expr-common/src/type_coercion/aggregates.rs
+++ b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -143,21 +143,21 @@ pub fn check_arg_count(
Ok(())
}
-/// function return type of a sum
+/// Function return type of a sum
pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
match arg_type {
DataType::Int64 => Ok(DataType::Int64),
DataType::UInt64 => Ok(DataType::UInt64),
DataType::Float64 => Ok(DataType::Float64),
DataType::Decimal128(precision, scale) => {
- // in the spark, the result type is DECIMAL(min(38,precision+10),
s)
- // ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+ // In the spark, the result type is DECIMAL(min(38,precision+10),
s)
+ // Ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
Ok(DataType::Decimal128(new_precision, *scale))
}
DataType::Decimal256(precision, scale) => {
- // in the spark, the result type is DECIMAL(min(38,precision+10),
s)
- // ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+ // In the spark, the result type is DECIMAL(min(38,precision+10),
s)
+ // Ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
Ok(DataType::Decimal256(new_precision, *scale))
}
@@ -165,7 +165,7 @@ pub fn sum_return_type(arg_type: &DataType) ->
Result<DataType> {
}
}
-/// function return type of variance
+/// Function return type of variance
pub fn variance_return_type(arg_type: &DataType) -> Result<DataType> {
if NUMERICS.contains(arg_type) {
Ok(DataType::Float64)
@@ -174,7 +174,7 @@ pub fn variance_return_type(arg_type: &DataType) ->
Result<DataType> {
}
}
-/// function return type of covariance
+/// Function return type of covariance
pub fn covariance_return_type(arg_type: &DataType) -> Result<DataType> {
if NUMERICS.contains(arg_type) {
Ok(DataType::Float64)
@@ -183,7 +183,7 @@ pub fn covariance_return_type(arg_type: &DataType) ->
Result<DataType> {
}
}
-/// function return type of correlation
+/// Function return type of correlation
pub fn correlation_return_type(arg_type: &DataType) -> Result<DataType> {
if NUMERICS.contains(arg_type) {
Ok(DataType::Float64)
@@ -192,19 +192,19 @@ pub fn correlation_return_type(arg_type: &DataType) ->
Result<DataType> {
}
}
-/// function return type of an average
+/// Function return type of an average
pub fn avg_return_type(func_name: &str, arg_type: &DataType) ->
Result<DataType> {
match arg_type {
DataType::Decimal128(precision, scale) => {
- // in the spark, the result type is DECIMAL(min(38,precision+4),
min(38,scale+4)).
- // ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+ // In the spark, the result type is DECIMAL(min(38,precision+4),
min(38,scale+4)).
+ // Ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
Ok(DataType::Decimal128(new_precision, new_scale))
}
DataType::Decimal256(precision, scale) => {
- // in the spark, the result type is DECIMAL(min(38,precision+4),
min(38,scale+4)).
- // ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+ // In the spark, the result type is DECIMAL(min(38,precision+4),
min(38,scale+4)).
+ // Ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
Ok(DataType::Decimal256(new_precision, new_scale))
@@ -217,16 +217,16 @@ pub fn avg_return_type(func_name: &str, arg_type:
&DataType) -> Result<DataType>
}
}
-/// internal sum type of an average
+/// Internal sum type of an average
pub fn avg_sum_type(arg_type: &DataType) -> Result<DataType> {
match arg_type {
DataType::Decimal128(precision, scale) => {
- // in the spark, the sum type of avg is
DECIMAL(min(38,precision+10), s)
+ // In the spark, the sum type of avg is
DECIMAL(min(38,precision+10), s)
let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
Ok(DataType::Decimal128(new_precision, *scale))
}
DataType::Decimal256(precision, scale) => {
- // in Spark the sum type of avg is DECIMAL(min(38,precision+10), s)
+ // In Spark the sum type of avg is DECIMAL(min(38,precision+10), s)
let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
Ok(DataType::Decimal256(new_precision, *scale))
}
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs
b/datafusion/expr-common/src/type_coercion/binary.rs
index e042dd5d3a..887586f4f7 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -191,7 +191,7 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType)
-> Result<Signature>
}
}
-/// returns the resulting type of a binary expression evaluating the `op` with
the left and right hand types
+/// Returns the resulting type of a binary expression evaluating the `op` with
the left and right hand types
pub fn get_result_type(
lhs: &DataType,
op: &Operator,
@@ -377,12 +377,12 @@ pub fn type_union_resolution(data_types: &[DataType]) ->
Option<DataType> {
return None;
}
- // if all the data_types is the same return first one
+ // If all the data_types is the same return first one
if data_types.iter().all(|t| t == &data_types[0]) {
return Some(data_types[0].clone());
}
- // if all the data_types are null, return string
+ // If all the data_types are null, return string
if data_types.iter().all(|t| t == &DataType::Null) {
return Some(DataType::Utf8);
}
@@ -401,7 +401,7 @@ pub fn type_union_resolution(data_types: &[DataType]) ->
Option<DataType> {
return None;
}
- // check if there is only one category excluding Unknown
+ // Check if there is only one category excluding Unknown
let categories: HashSet<TypeCategory> = HashSet::from_iter(
data_types_category
.iter()
@@ -519,7 +519,7 @@ fn type_union_resolution_coercion(
Some(DataType::Struct(fields.into()))
}
_ => {
- // numeric coercion is the same as comparison coercion, both find
the narrowest type
+ // Numeric coercion is the same as comparison coercion, both find
the narrowest type
// that can accommodate both types
binary_numeric_coercion(lhs_type, rhs_type)
.or_else(|| temporal_coercion_nonstrict_timezone(lhs_type,
rhs_type))
@@ -630,7 +630,7 @@ pub fn binary_numeric_coercion(
return Some(t);
}
- // these are ordered from most informative to least informative so
+ // These are ordered from most informative to least informative so
// that the coercion does not lose information via truncation
match (lhs_type, rhs_type) {
(Float64, _) | (_, Float64) => Some(Float64),
@@ -856,12 +856,12 @@ fn mathematics_numerical_coercion(
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
- // error on any non-numeric type
+ // Error on any non-numeric type
if !both_numeric_or_null_and_numeric(lhs_type, rhs_type) {
return None;
};
- // these are ordered from most informative to least informative so
+ // These are ordered from most informative to least informative so
// that the coercion removes the least amount of information
match (lhs_type, rhs_type) {
(Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => {
@@ -1122,7 +1122,7 @@ fn binary_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataType>
}
}
-/// coercion rules for like operations.
+/// Coercion rules for like operations.
/// This is a union of string coercion rules and dictionary coercion rules
pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
string_coercion(lhs_type, rhs_type)
@@ -1133,7 +1133,7 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataTyp
.or_else(|| null_coercion(lhs_type, rhs_type))
}
-/// coercion rules for regular expression comparison operations with NULL
input.
+/// Coercion rules for regular expression comparison operations with NULL
input.
fn regex_null_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
@@ -1295,7 +1295,7 @@ fn timeunit_coercion(lhs_unit: &TimeUnit, rhs_unit:
&TimeUnit) -> TimeUnit {
}
}
-/// coercion rules from NULL type. Since NULL can be casted to any other type
in arrow,
+/// Coercion rules from NULL type. Since NULL can be casted to any other type
in arrow,
/// either lhs or rhs is NULL, if NULL can be casted to type of the other
side, the coercion is valid.
fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
{
match (lhs_type, rhs_type) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]