This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1e6115a1c2 docs: rm duplicate words. (#9449)
1e6115a1c2 is described below
commit 1e6115a1c2f48ce02d3bf08febdf34cd893374e1
Author: yi wang <[email protected]>
AuthorDate: Mon Mar 4 19:04:05 2024 +0800
docs: rm duplicate words. (#9449)
---
datafusion-examples/examples/advanced_udwf.rs | 2 +-
datafusion-examples/examples/simple_udwf.rs | 2 +-
datafusion/common/src/scalar/mod.rs | 2 +-
datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs | 2 +-
datafusion/core/src/physical_optimizer/join_selection.rs | 2 +-
datafusion/core/tests/fuzz_cases/merge_fuzz.rs | 2 +-
datafusion/core/tests/memory_limit.rs | 2 +-
datafusion/physical-expr/src/utils/guarantee.rs | 2 +-
datafusion/physical-plan/src/lib.rs | 2 +-
datafusion/physical-plan/src/metrics/builder.rs | 2 +-
datafusion/physical-plan/src/test/exec.rs | 2 +-
11 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/datafusion-examples/examples/advanced_udwf.rs
b/datafusion-examples/examples/advanced_udwf.rs
index 826abc28e1..41c6381df5 100644
--- a/datafusion-examples/examples/advanced_udwf.rs
+++ b/datafusion-examples/examples/advanced_udwf.rs
@@ -169,7 +169,7 @@ async fn main() -> Result<()> {
// creating a new `PartitionEvaluator`)
//
// `ORDER BY time`: within each partition ('green' or 'red') the
- // rows will be be ordered by the value in the `time` column
+ // rows will be ordered by the value in the `time` column
//
// `evaluate_inside_range` is invoked with a window defined by the
// SQL. In this case:
diff --git a/datafusion-examples/examples/simple_udwf.rs
b/datafusion-examples/examples/simple_udwf.rs
index a6149d661e..5555e873ae 100644
--- a/datafusion-examples/examples/simple_udwf.rs
+++ b/datafusion-examples/examples/simple_udwf.rs
@@ -72,7 +72,7 @@ async fn main() -> Result<()> {
// creating a new `PartitionEvaluator`)
//
// `ORDER BY time`: within each partition ('green' or 'red') the
- // rows will be be ordered by the value in the `time` column
+ // rows will be ordered by the value in the `time` column
//
// `evaluate_inside_range` is invoked with a window defined by the
// SQL. In this case:
diff --git a/datafusion/common/src/scalar/mod.rs
b/datafusion/common/src/scalar/mod.rs
index 6ab4507f94..f431e62643 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -4451,7 +4451,7 @@ mod tests {
// per distinct value.
//
// The alignment requirements differ across architectures and
- // thus the size of the enum appears to as as well
+ // thus the size of the enum appears to as well
assert_eq!(std::mem::size_of::<ScalarValue>(), 48);
}
diff --git
a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
index 556ae35d48..064a8e1fff 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
@@ -323,7 +323,7 @@ fn prune_pages_in_one_row_group(
assert_eq!(row_vec.len(), values.len());
let mut sum_row = *row_vec.first().unwrap();
let mut selected = *values.first().unwrap();
- trace!("Pruned to to {:?} using {:?}", values, pruning_stats);
+ trace!("Pruned to {:?} using {:?}", values, pruning_stats);
for (i, &f) in values.iter().enumerate().skip(1) {
if f == selected {
sum_row += *row_vec.get(i).unwrap();
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs
b/datafusion/core/src/physical_optimizer/join_selection.rs
index ee60c65ead..cc629df731 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -57,7 +57,7 @@ impl JoinSelection {
}
// TODO: We need some performance test for Right Semi/Right Join swap to Left
Semi/Left Join in case that the right side is smaller but not much smaller.
-// TODO: In PrestoSQL, the optimizer flips join sides only if one side is much
smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default
is is 8 times.
+// TODO: In PrestoSQL, the optimizer flips join sides only if one side is much
smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default
is 8 times.
/// Checks statistics for join swap.
fn should_swap_join_order(
left: &dyn ExecutionPlan,
diff --git a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
index c38ff41f57..95cd75f50a 100644
--- a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
@@ -89,7 +89,7 @@ async fn test_merge_3_gaps() {
/// Merge a set of input streams using SortPreservingMergeExec and
/// `Vec::sort` and ensure the results are the same.
///
-/// For each case, the `input` streams are turned into a set of of
+/// For each case, the `input` streams are turned into a set of
/// streams which are then merged together by [SortPreservingMerge]
///
/// Each `Vec<RecordBatch>` in `input` must be sorted and have a
diff --git a/datafusion/core/tests/memory_limit.rs
b/datafusion/core/tests/memory_limit.rs
index a98d097856..4735a97fee 100644
--- a/datafusion/core/tests/memory_limit.rs
+++ b/datafusion/core/tests/memory_limit.rs
@@ -473,7 +473,7 @@ enum Scenario {
/// [`StreamingTable`]
AccessLogStreaming,
- /// N partitions of of sorted, dictionary encoded strings.
+ /// N partitions of sorted, dictionary encoded strings.
DictionaryStrings {
partitions: usize,
/// If true, splits all input batches into 1 row each
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs
b/datafusion/physical-expr/src/utils/guarantee.rs
index 26ee95f479..c249af232b 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -470,7 +470,7 @@ mod test {
test_analyze(
col("b").not_eq(lit(1)).and(col("b").eq(lit(2))),
vec![
- // can only be true of b is not 1 and b is is 2 (even though
it is redundant)
+ // can only be true of b is not 1 and b is 2 (even though it
is redundant)
not_in_guarantee("b", [1]),
in_guarantee("b", [2]),
],
diff --git a/datafusion/physical-plan/src/lib.rs
b/datafusion/physical-plan/src/lib.rs
index b527466493..ac864668a1 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -341,7 +341,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
///
/// ## Lazily (async) create a Stream
///
- /// If you need to to create the return `Stream` using an `async` function,
+ /// If you need to create the return `Stream` using an `async` function,
/// you can do so by flattening the result:
///
/// ```
diff --git a/datafusion/physical-plan/src/metrics/builder.rs
b/datafusion/physical-plan/src/metrics/builder.rs
index beecc13e00..5e8ff72df3 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -183,7 +183,7 @@ impl<'a> MetricBuilder<'a> {
}
/// Consumes self and creates a new Timer for recording some
- /// subset of of an operators execution time.
+ /// subset of an operators execution time.
pub fn subset_time(
self,
subset_name: impl Into<Cow<'static, str>>,
diff --git a/datafusion/physical-plan/src/test/exec.rs
b/datafusion/physical-plan/src/test/exec.rs
index 23df3753e8..b4f1eac0a6 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -118,7 +118,7 @@ pub struct MockExec {
/// the results to send back
data: Vec<Result<RecordBatch>>,
schema: SchemaRef,
- /// if true (the default), sends data using a separate task to to ensure
the
+ /// if true (the default), sends data using a separate task to ensure the
/// batches are not available without this stream yielding first
use_task: bool,
cache: PlanProperties,