(arrow-datafusion) branch main updated: docs: rm duplicate words. (#9449)

alamb Mon, 04 Mar 2024 03:04:33 -0800

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 1e6115a1c2 docs: rm duplicate words. (#9449)
1e6115a1c2 is described below

commit 1e6115a1c2f48ce02d3bf08febdf34cd893374e1
Author: yi wang <[email protected]>
AuthorDate: Mon Mar 4 19:04:05 2024 +0800

    docs: rm duplicate words. (#9449)
---
 datafusion-examples/examples/advanced_udwf.rs                       | 2 +-
 datafusion-examples/examples/simple_udwf.rs                         | 2 +-
 datafusion/common/src/scalar/mod.rs                                 | 2 +-
 datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs | 2 +-
 datafusion/core/src/physical_optimizer/join_selection.rs            | 2 +-
 datafusion/core/tests/fuzz_cases/merge_fuzz.rs                      | 2 +-
 datafusion/core/tests/memory_limit.rs                               | 2 +-
 datafusion/physical-expr/src/utils/guarantee.rs                     | 2 +-
 datafusion/physical-plan/src/lib.rs                                 | 2 +-
 datafusion/physical-plan/src/metrics/builder.rs                     | 2 +-
 datafusion/physical-plan/src/test/exec.rs                           | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/datafusion-examples/examples/advanced_udwf.rs 
b/datafusion-examples/examples/advanced_udwf.rs
index 826abc28e1..41c6381df5 100644
--- a/datafusion-examples/examples/advanced_udwf.rs
+++ b/datafusion-examples/examples/advanced_udwf.rs
@@ -169,7 +169,7 @@ async fn main() -> Result<()> {
     // creating a new `PartitionEvaluator`)
     //
     // `ORDER BY time`: within each partition ('green' or 'red') the
-    // rows will be be ordered by the value in the `time` column
+    // rows will be ordered by the value in the `time` column
     //
     // `evaluate_inside_range` is invoked with a window defined by the
     // SQL. In this case:
diff --git a/datafusion-examples/examples/simple_udwf.rs 
b/datafusion-examples/examples/simple_udwf.rs
index a6149d661e..5555e873ae 100644
--- a/datafusion-examples/examples/simple_udwf.rs
+++ b/datafusion-examples/examples/simple_udwf.rs
@@ -72,7 +72,7 @@ async fn main() -> Result<()> {
     // creating a new `PartitionEvaluator`)
     //
     // `ORDER BY time`: within each partition ('green' or 'red') the
-    // rows will be be ordered by the value in the `time` column
+    // rows will be ordered by the value in the `time` column
     //
     // `evaluate_inside_range` is invoked with a window defined by the
     // SQL. In this case:
diff --git a/datafusion/common/src/scalar/mod.rs 
b/datafusion/common/src/scalar/mod.rs
index 6ab4507f94..f431e62643 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -4451,7 +4451,7 @@ mod tests {
         // per distinct value.
         //
         // The alignment requirements differ across architectures and
-        // thus the size of the enum appears to as as well
+        // thus the size of the enum appears to as well
 
         assert_eq!(std::mem::size_of::<ScalarValue>(), 48);
     }
diff --git 
a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs 
b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
index 556ae35d48..064a8e1fff 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
@@ -323,7 +323,7 @@ fn prune_pages_in_one_row_group(
                 assert_eq!(row_vec.len(), values.len());
                 let mut sum_row = *row_vec.first().unwrap();
                 let mut selected = *values.first().unwrap();
-                trace!("Pruned to to {:?} using {:?}", values, pruning_stats);
+                trace!("Pruned to {:?} using {:?}", values, pruning_stats);
                 for (i, &f) in values.iter().enumerate().skip(1) {
                     if f == selected {
                         sum_row += *row_vec.get(i).unwrap();
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs 
b/datafusion/core/src/physical_optimizer/join_selection.rs
index ee60c65ead..cc629df731 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -57,7 +57,7 @@ impl JoinSelection {
 }
 
 // TODO: We need some performance test for Right Semi/Right Join swap to Left 
Semi/Left Join in case that the right side is smaller but not much smaller.
-// TODO: In PrestoSQL, the optimizer flips join sides only if one side is much 
smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default 
is is 8 times.
+// TODO: In PrestoSQL, the optimizer flips join sides only if one side is much 
smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default 
is 8 times.
 /// Checks statistics for join swap.
 fn should_swap_join_order(
     left: &dyn ExecutionPlan,
diff --git a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs 
b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
index c38ff41f57..95cd75f50a 100644
--- a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
@@ -89,7 +89,7 @@ async fn test_merge_3_gaps() {
 /// Merge a set of input streams using SortPreservingMergeExec and
 /// `Vec::sort` and ensure the results are the same.
 ///
-/// For each case, the `input` streams are turned into a set of of
+/// For each case, the `input` streams are turned into a set of
 /// streams which are then merged together by [SortPreservingMerge]
 ///
 /// Each `Vec<RecordBatch>` in `input` must be sorted and have a
diff --git a/datafusion/core/tests/memory_limit.rs 
b/datafusion/core/tests/memory_limit.rs
index a98d097856..4735a97fee 100644
--- a/datafusion/core/tests/memory_limit.rs
+++ b/datafusion/core/tests/memory_limit.rs
@@ -473,7 +473,7 @@ enum Scenario {
     /// [`StreamingTable`]
     AccessLogStreaming,
 
-    /// N partitions of of sorted, dictionary encoded strings.
+    /// N partitions of sorted, dictionary encoded strings.
     DictionaryStrings {
         partitions: usize,
         /// If true, splits all input batches into 1 row each
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs 
b/datafusion/physical-expr/src/utils/guarantee.rs
index 26ee95f479..c249af232b 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -470,7 +470,7 @@ mod test {
         test_analyze(
             col("b").not_eq(lit(1)).and(col("b").eq(lit(2))),
             vec![
-                // can only be true of b is not 1 and b is is 2 (even though 
it is redundant)
+                // can only be true of b is not 1 and b is 2 (even though it 
is redundant)
                 not_in_guarantee("b", [1]),
                 in_guarantee("b", [2]),
             ],
diff --git a/datafusion/physical-plan/src/lib.rs 
b/datafusion/physical-plan/src/lib.rs
index b527466493..ac864668a1 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -341,7 +341,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     ///
     /// ## Lazily (async) create a Stream
     ///
-    /// If you need to to create the return `Stream` using an `async` function,
+    /// If you need to create the return `Stream` using an `async` function,
     /// you can do so by flattening the result:
     ///
     /// ```
diff --git a/datafusion/physical-plan/src/metrics/builder.rs 
b/datafusion/physical-plan/src/metrics/builder.rs
index beecc13e00..5e8ff72df3 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -183,7 +183,7 @@ impl<'a> MetricBuilder<'a> {
     }
 
     /// Consumes self and creates a new Timer for recording some
-    /// subset of of an operators execution time.
+    /// subset of an operators execution time.
     pub fn subset_time(
         self,
         subset_name: impl Into<Cow<'static, str>>,
diff --git a/datafusion/physical-plan/src/test/exec.rs 
b/datafusion/physical-plan/src/test/exec.rs
index 23df3753e8..b4f1eac0a6 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -118,7 +118,7 @@ pub struct MockExec {
     /// the results to send back
     data: Vec<Result<RecordBatch>>,
     schema: SchemaRef,
-    /// if true (the default), sends data using a separate task to to ensure 
the
+    /// if true (the default), sends data using a separate task to ensure the
     /// batches are not available without this stream yielding first
     use_task: bool,
     cache: PlanProperties,

(arrow-datafusion) branch main updated: docs: rm duplicate words. (#9449)

Reply via email to