This is an automated email from the ASF dual-hosted git repository. wayne pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 1c86ec7f52 chore: fix typos (#17135) 1c86ec7f52 is described below commit 1c86ec7f5244c3b2e6d3ac722640ef678a027a18 Author: Ruihang Xia <waynest...@gmail.com> AuthorDate: Wed Aug 27 14:14:40 2025 -0700 chore: fix typos (#17135) Co-authored-by: Piotr Findeisen <piotr.findei...@gmail.com> --- benchmarks/src/bin/external_aggr.rs | 2 +- datafusion-cli/tests/cli_integration.rs | 2 +- datafusion-examples/examples/custom_file_casts.rs | 2 +- datafusion-examples/examples/expr_api.rs | 4 ++-- datafusion/catalog/src/information_schema.rs | 2 +- datafusion/common/src/test_util.rs | 4 ++-- datafusion/core/benches/spm.rs | 4 ++-- datafusion/core/src/bin/print_functions_docs.rs | 2 +- datafusion/core/src/datasource/listing/table.rs | 20 ++++++++++---------- .../core/src/datasource/physical_plan/parquet.rs | 8 ++++---- datafusion/core/src/physical_planner.rs | 2 +- datafusion/core/tests/dataframe/mod.rs | 6 +++--- datafusion/core/tests/execution/coop.rs | 12 ++++++------ datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs | 2 +- datafusion/core/tests/fuzz_cases/pruning.rs | 2 +- datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs | 2 +- .../core/tests/fuzz_cases/topk_filter_pushdown.rs | 10 +++++----- .../memory_limit_validation/sort_mem_validation.rs | 2 +- datafusion/core/tests/memory_limit/mod.rs | 2 +- .../tests/physical_optimizer/enforce_distribution.rs | 6 +++--- .../tests/physical_optimizer/filter_pushdown/mod.rs | 4 ++-- .../replace_with_order_preserving_variants.rs | 2 +- .../core/tests/physical_optimizer/window_optimize.rs | 10 +++++++--- .../user_defined/user_defined_scalar_functions.rs | 18 +++++++++--------- datafusion/datasource-parquet/src/opener.rs | 4 ++-- datafusion/datasource-parquet/src/row_filter.rs | 2 +- datafusion/datasource-parquet/src/source.rs | 2 +- datafusion/expr/src/logical_plan/builder.rs | 2 +- datafusion/expr/src/logical_plan/invariants.rs | 2 +- datafusion/expr/src/logical_plan/plan.rs | 2 +- datafusion/ffi/src/catalog_provider.rs | 2 +- datafusion/ffi/src/schema_provider.rs | 2 +- datafusion/ffi/src/tests/mod.rs | 2 +- datafusion/ffi/src/udaf/mod.rs | 2 +- datafusion/ffi/src/udf/mod.rs | 2 +- datafusion/ffi/src/udwf/mod.rs | 2 +- datafusion/ffi/src/util.rs | 4 ++-- datafusion/functions/src/core/union_tag.rs | 2 +- datafusion/functions/src/datetime/date_trunc.rs | 2 +- datafusion/functions/src/datetime/to_timestamp.rs | 2 +- datafusion/optimizer/src/analyzer/type_coercion.rs | 4 ++-- .../physical-expr-adapter/src/schema_rewriter.rs | 6 +++--- datafusion/physical-expr-common/src/physical_expr.rs | 4 ++-- .../physical-expr/src/equivalence/properties/mod.rs | 2 +- datafusion/physical-expr/src/expressions/case.rs | 2 +- datafusion/physical-optimizer/src/filter_pushdown.rs | 2 +- datafusion/physical-plan/benches/spill_io.rs | 2 +- datafusion/physical-plan/src/filter_pushdown.rs | 2 +- datafusion/physical-plan/src/joins/hash_join/exec.rs | 4 ++-- datafusion/physical-plan/src/metrics/baseline.rs | 6 +++--- datafusion/physical-plan/src/sorts/stream.rs | 2 +- datafusion/physical-plan/src/stream.rs | 2 +- datafusion/proto/src/logical_plan/mod.rs | 2 +- datafusion/proto/src/physical_plan/mod.rs | 2 +- .../proto/tests/cases/roundtrip_physical_plan.rs | 10 +++++----- datafusion/pruning/src/file_pruner.rs | 2 +- datafusion/pruning/src/pruning_predicate.rs | 2 +- datafusion/sql/src/expr/function.rs | 10 +++++----- datafusion/sql/src/statement.rs | 10 +++++----- datafusion/sql/src/utils.rs | 2 +- datafusion/sql/tests/sql_integration.rs | 4 ++-- datafusion/substrait/src/variation_const.rs | 4 ++-- .../substrait/tests/cases/roundtrip_logical_plan.rs | 2 +- dev/update_config_docs.sh | 2 +- .../source/library-user-guide/extending-operators.md | 2 +- docs/source/library-user-guide/upgrading.md | 8 ++++---- docs/source/user-guide/configs.md | 2 +- docs/source/user-guide/explain-usage.md | 2 +- 68 files changed, 137 insertions(+), 133 deletions(-) diff --git a/benchmarks/src/bin/external_aggr.rs b/benchmarks/src/bin/external_aggr.rs index 0e519367ba..46b6cc9a80 100644 --- a/benchmarks/src/bin/external_aggr.rs +++ b/benchmarks/src/bin/external_aggr.rs @@ -113,7 +113,7 @@ impl ExternalAggrConfig { "#, ]; - /// If `--query` and `--memory-limit` is not speicified, run all queries + /// If `--query` and `--memory-limit` is not specified, run all queries /// with pre-configured memory limits /// If only `--query` is specified, run the query with all memory limits /// for this query diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index ba3f29be9a..125771acb3 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -64,7 +64,7 @@ async fn setup_minio_container() -> ContainerAsync<minio::MinIO> { match container { Ok(container) => { - // We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency + // We wait for MinIO to be healthy and prepare test files. We do it via CLI to avoid s3 dependency let commands = [ ExecCommand::new(["/usr/bin/mc", "ready", "local"]), ExecCommand::new([ diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_file_casts.rs index a787c07c2b..e30ea1fb7e 100644 --- a/datafusion-examples/examples/custom_file_casts.rs +++ b/datafusion-examples/examples/custom_file_casts.rs @@ -41,7 +41,7 @@ use object_store::path::Path; use object_store::{ObjectStore, PutPayload}; // Example showing how to implement custom casting rules to adapt file schemas. -// This example enforces that casts must be stricly widening: if the file type is Int64 and the table type is Int32, it will error +// This example enforces that casts must be strictly widening: if the file type is Int64 and the table type is Int32, it will error // before even reading the data. // Without this custom cast rule DataFusion would happily do the narrowing cast, potentially erroring only if it found a row with data it could not cast. diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs index 92cf33f4fd..ac17bd417c 100644 --- a/datafusion-examples/examples/expr_api.rs +++ b/datafusion-examples/examples/expr_api.rs @@ -85,7 +85,7 @@ async fn main() -> Result<()> { boundary_analysis_and_selectivity_demo()?; // See how boundary analysis works for `AND` & `OR` conjunctions. - boundary_analysis_in_conjuctions_demo()?; + boundary_analysis_in_conjunctions_demo()?; // See how to determine the data types of expressions expression_type_demo()?; @@ -351,7 +351,7 @@ fn boundary_analysis_and_selectivity_demo() -> Result<()> { /// This function shows how to think about and leverage the analysis API /// to infer boundaries in `AND` & `OR` conjunctions. -fn boundary_analysis_in_conjuctions_demo() -> Result<()> { +fn boundary_analysis_in_conjunctions_demo() -> Result<()> { // Let us consider the more common case of AND & OR conjunctions. // // age > 18 AND age <= 25 diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 83b6d64ef4..fce3b8b207 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -810,7 +810,7 @@ impl InformationSchemaColumnsBuilder { ) { use DataType::*; - // Note: append_value is actually infallable. + // Note: append_value is actually infallible. self.catalog_names.append_value(catalog_name); self.schema_names.append_value(schema_name); self.table_names.append_value(table_name); diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs index 2bd702f06a..d97d4003e7 100644 --- a/datafusion/common/src/test_util.rs +++ b/datafusion/common/src/test_util.rs @@ -158,7 +158,7 @@ macro_rules! assert_batches_sorted_eq { /// Is a macro so test error /// messages are on the same line as the failure; /// -/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>) +/// Both arguments must be convertible into Strings ([`Into`]<[`String`]>) #[macro_export] macro_rules! assert_contains { ($ACTUAL: expr, $EXPECTED: expr) => { @@ -181,7 +181,7 @@ macro_rules! assert_contains { /// Is a macro so test error /// messages are on the same line as the failure; /// -/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>) +/// Both arguments must be convertible into Strings ([`Into`]<[`String`]>) #[macro_export] macro_rules! assert_not_contains { ($ACTUAL: expr, $UNEXPECTED: expr) => { diff --git a/datafusion/core/benches/spm.rs b/datafusion/core/benches/spm.rs index d134078642..5c24483230 100644 --- a/datafusion/core/benches/spm.rs +++ b/datafusion/core/benches/spm.rs @@ -66,7 +66,7 @@ fn generate_spm_for_round_robin_tie_breaker( }; let rbs = (0..batch_count).map(|_| rb.clone()).collect::<Vec<_>>(); - let partitiones = vec![rbs.clone(); partition_count]; + let partitions = vec![rbs.clone(); partition_count]; let schema = rb.schema(); let sort = [ @@ -81,7 +81,7 @@ fn generate_spm_for_round_robin_tie_breaker( ] .into(); - let exec = MemorySourceConfig::try_new_exec(&partitiones, schema, None).unwrap(); + let exec = MemorySourceConfig::try_new_exec(&partitions, schema, None).unwrap(); SortPreservingMergeExec::new(sort, exec) .with_round_robin_repartition(enable_round_robin_repartition) } diff --git a/datafusion/core/src/bin/print_functions_docs.rs b/datafusion/core/src/bin/print_functions_docs.rs index 1044717aaf..63387c023b 100644 --- a/datafusion/core/src/bin/print_functions_docs.rs +++ b/datafusion/core/src/bin/print_functions_docs.rs @@ -260,7 +260,7 @@ fn print_docs( } } -/// Trait for accessing name / aliases / documentation for differnet functions +/// Trait for accessing name / aliases / documentation for different functions trait DocProvider { fn get_name(&self) -> String; fn get_aliases(&self) -> Vec<String>; diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index d289a1d071..690ce31d0d 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -215,16 +215,16 @@ impl ListingTableConfig { ) -> Result<(String, Option<String>)> { let mut exts = path.rsplit('.'); - let splitted = exts.next().unwrap_or(""); + let split = exts.next().unwrap_or(""); - let file_compression_type = FileCompressionType::from_str(splitted) + let file_compression_type = FileCompressionType::from_str(split) .unwrap_or(FileCompressionType::UNCOMPRESSED); if file_compression_type.is_compressed() { - let splitted2 = exts.next().unwrap_or(""); - Ok((splitted2.to_string(), Some(splitted.to_string()))) + let split2 = exts.next().unwrap_or(""); + Ok((split2.to_string(), Some(split.to_string()))) } else { - Ok((splitted.to_string(), None)) + Ok((split.to_string(), None)) } } @@ -502,7 +502,7 @@ impl ListingOptions { /// /// Currently this sets `target_partitions` and `collect_stat` /// but if more options are added in the future that need to be coordinated - /// they will be synchronized thorugh this method. + /// they will be synchronized through this method. pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self { self = self.with_target_partitions(config.target_partitions()); self = self.with_collect_stat(config.collect_statistics()); @@ -1132,8 +1132,8 @@ impl ListingTable { } // Expressions can be used for parttion pruning if they can be evaluated using -// only the partiton columns and there are partition columns. -fn can_be_evaluted_for_partition_pruning( +// only the partition columns and there are partition columns. +fn can_be_evaluated_for_partition_pruning( partition_column_names: &[&str], expr: &Expr, ) -> bool { @@ -1182,7 +1182,7 @@ impl TableProvider for ListingTable { // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated let (partition_filters, filters): (Vec<_>, Vec<_>) = filters.iter().cloned().partition(|filter| { - can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter) + can_be_evaluated_for_partition_pruning(&table_partition_col_names, filter) }); // We should not limit the number of partitioned files to scan if there are filters and limit @@ -1271,7 +1271,7 @@ impl TableProvider for ListingTable { filters .iter() .map(|filter| { - if can_be_evaluted_for_partition_pruning(&partition_column_names, filter) + if can_be_evaluated_for_partition_pruning(&partition_column_names, filter) { // if filter can be handled by partition pruning, it is exact return Ok(TableProviderFilterPushDown::Exact); diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs index 55db0d8542..6f1c6c4171 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet.rs @@ -332,7 +332,7 @@ mod tests { let metric = get_value(&metrics, "pushdown_rows_pruned"); assert_eq!(metric, 3, "Expected all rows to be pruned"); - // If we excplicitly allow nulls the rest of the predicate should work + // If we explicitly allow nulls the rest of the predicate should work let filter = col("c2").is_null().and(col("c1").eq(lit(1_i32))); let rt = RoundTrip::new() .with_table_schema(table_schema.clone()) @@ -390,7 +390,7 @@ mod tests { let metric = get_value(&metrics, "pushdown_rows_pruned"); assert_eq!(metric, 3, "Expected all rows to be pruned"); - // If we excplicitly allow nulls the rest of the predicate should work + // If we explicitly allow nulls the rest of the predicate should work let filter = col("c2").is_null().and(col("c1").eq(lit(1_i32))); let rt = RoundTrip::new() .with_table_schema(table_schema.clone()) @@ -452,7 +452,7 @@ mod tests { let metric = get_value(&metrics, "pushdown_rows_pruned"); assert_eq!(metric, 3, "Expected all rows to be pruned"); - // If we excplicitly allow nulls the rest of the predicate should work + // If we explicitly allow nulls the rest of the predicate should work let filter = col("c2").is_null().and(col("c1").eq(lit(1_i32))); let rt = RoundTrip::new() .with_table_schema(table_schema.clone()) @@ -514,7 +514,7 @@ mod tests { let metric = get_value(&metrics, "pushdown_rows_pruned"); assert_eq!(metric, 3, "Expected all rows to be pruned"); - // If we excplicitly allow nulls the rest of the predicate should work + // If we explicitly allow nulls the rest of the predicate should work let filter = col("c2").is_null().and(col("c3").eq(lit(7_i32))); let rt = RoundTrip::new() .with_table_schema(table_schema.clone()) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 0ce5621ac8..a107db7f5c 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -2355,7 +2355,7 @@ impl<'n> TreeNodeVisitor<'n> for OptimizationInvariantChecker<'_> { fn f_down(&mut self, node: &'n Self::Node) -> Result<TreeNodeRecursion> { // Checks for the more permissive `InvariantLevel::Always`. - // Plans are not guarenteed to be executable after each physical optimizer run. + // Plans are not guaranteed to be executable after each physical optimizer run. node.check_invariants(InvariantLevel::Always).map_err(|e| e.context(format!("Invariant for ExecutionPlan node '{}' failed for PhysicalOptimizer rule '{}'", node.name(), self.rule.name())) )?; diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 38dc0dc735..27afbd7246 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -92,8 +92,8 @@ async fn physical_plan_to_string(df: &DataFrame) -> String { .await .expect("Error creating physical plan"); - let formated = displayable(physical_plan.as_ref()).indent(true); - formated.to_string() + let formatted = displayable(physical_plan.as_ref()).indent(true); + formatted.to_string() } pub fn table_with_constraints() -> Arc<dyn TableProvider> { @@ -5660,7 +5660,7 @@ async fn test_alias() -> Result<()> { .await? .select(vec![col("a"), col("test.b"), lit(1).alias("one")])? .alias("table_alias")?; - // All ouput column qualifiers are changed to "table_alias" + // All output column qualifiers are changed to "table_alias" df.schema().columns().iter().for_each(|c| { assert_eq!(c.relation, Some("table_alias".into())); }); diff --git a/datafusion/core/tests/execution/coop.rs b/datafusion/core/tests/execution/coop.rs index 338446cfeb..f2c4fde2c0 100644 --- a/datafusion/core/tests/execution/coop.rs +++ b/datafusion/core/tests/execution/coop.rs @@ -434,7 +434,7 @@ async fn interleave_then_filter_all_yields( let mut infinite_children = vec![]; // Use 32 distinct thresholds (each >0 and <8 192) to force 32 infinite inputs - for thr in 1..32 { + for threshold in 1..32 { // One infinite exec: let mut inf = make_lazy_exec_with_range("value", 0..i64::MAX, pretend_infinite); @@ -444,7 +444,7 @@ async fn interleave_then_filter_all_yields( let partitioning = Partitioning::Hash(exprs, 1); inf.try_set_partitioning(partitioning)?; - // Apply a FilterExec: “(value / 8192) % thr == 0”. + // Apply a FilterExec: “(value / 8192) % threshold == 0”. let filter_expr = binary( binary( binary( @@ -454,7 +454,7 @@ async fn interleave_then_filter_all_yields( &inf.schema(), )?, Modulo, - lit(thr as i64), + lit(threshold as i64), &inf.schema(), )?, Eq, @@ -490,7 +490,7 @@ async fn interleave_then_aggregate_yields( let mut infinite_children = vec![]; // Use 32 distinct thresholds (each >0 and <8 192) to force 32 infinite inputs - for thr in 1..32 { + for threshold in 1..32 { // One infinite exec: let mut inf = make_lazy_exec_with_range("value", 0..i64::MAX, pretend_infinite); @@ -500,7 +500,7 @@ async fn interleave_then_aggregate_yields( let partitioning = Partitioning::Hash(exprs, 1); inf.try_set_partitioning(partitioning)?; - // Apply a FilterExec: “(value / 8192) % thr == 0”. + // Apply a FilterExec: “(value / 8192) % threshold == 0”. let filter_expr = binary( binary( binary( @@ -510,7 +510,7 @@ async fn interleave_then_aggregate_yields( &inf.schema(), )?, Modulo, - lit(thr as i64), + lit(threshold as i64), &inf.schema(), )?, Eq, diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index c103daa885..01554c1af7 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -403,7 +403,7 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str Left Plan:\n{}\n\ Right Plan:\n{}\n\ schema:\n{schema}\n\ - Left Ouptut:\n{}\n\ + Left Output:\n{}\n\ Right Output:\n{}\n\ input:\n{}\n\ ", diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs index c6e30c0722..f8bd4dbc1a 100644 --- a/datafusion/core/tests/fuzz_cases/pruning.rs +++ b/datafusion/core/tests/fuzz_cases/pruning.rs @@ -201,7 +201,7 @@ impl Utf8Test { } } - /// all combinations of interesting charactes with lengths ranging from 1 to 4 + /// all combinations of interesting characters with lengths ranging from 1 to 4 fn values() -> &'static [String] { &VALUES } diff --git a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs index 4cf6609fd4..2ce7db3ea4 100644 --- a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs @@ -220,7 +220,7 @@ impl SortQueryFuzzer { .test_gen .fuzzer_run(init_seed, query_seed, config_seed) .await?; - println!("\n"); // Seperator between tested runs + println!("\n"); // Separator between tested runs if expected_results.is_none() { expected_results = Some(results); diff --git a/datafusion/core/tests/fuzz_cases/topk_filter_pushdown.rs b/datafusion/core/tests/fuzz_cases/topk_filter_pushdown.rs index a5934882cb..3caab8d4b5 100644 --- a/datafusion/core/tests/fuzz_cases/topk_filter_pushdown.rs +++ b/datafusion/core/tests/fuzz_cases/topk_filter_pushdown.rs @@ -219,16 +219,16 @@ struct RunQueryResult { } impl RunQueryResult { - fn expected_formated(&self) -> String { + fn expected_formatted(&self) -> String { format!("{}", pretty_format_batches(&self.expected).unwrap()) } - fn result_formated(&self) -> String { + fn result_formatted(&self) -> String { format!("{}", pretty_format_batches(&self.result).unwrap()) } fn is_ok(&self) -> bool { - self.expected_formated() == self.result_formated() + self.expected_formatted() == self.result_formatted() } } @@ -374,8 +374,8 @@ async fn test_fuzz_topk_filter_pushdown() { for failure in &failures { println!("Failure:"); println!("Query:\n{}", failure.query); - println!("\nExpected:\n{}", failure.expected_formated()); - println!("\nResult:\n{}", failure.result_formated()); + println!("\nExpected:\n{}", failure.expected_formatted()); + println!("\nResult:\n{}", failure.result_formatted()); println!("\n\n"); } diff --git a/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs b/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs index 64ab137834..a8002cf400 100644 --- a/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs +++ b/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs @@ -31,7 +31,7 @@ static INIT: Once = Once::new(); // =========================================================================== // Test runners: -// Runners are splitted into multiple tests to run in parallel +// Runners are split into multiple tests to run in parallel // =========================================================================== #[test] diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs index b4b88ba5aa..554c30eb87 100644 --- a/datafusion/core/tests/memory_limit/mod.rs +++ b/datafusion/core/tests/memory_limit/mod.rs @@ -546,7 +546,7 @@ async fn test_external_sort_zero_merge_reservation() { // Tests for disk limit (`max_temp_directory_size` in `DiskManager`) // ------------------------------------------------------------------ -// Create a new `SessionContext` with speicified disk limit, memory pool limit, and spill compression codec +// Create a new `SessionContext` with specified disk limit, memory pool limit, and spill compression codec async fn setup_context( disk_limit: u64, memory_pool_limit: usize, diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs index fd84776312..7e0528581c 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs @@ -441,7 +441,7 @@ impl TestConfig { /// Perform a series of runs using the current [`TestConfig`], /// assert the expected plan result, - /// and return the result plan (for potentional subsequent runs). + /// and return the result plan (for potential subsequent runs). fn run( &self, expected_lines: &[&str], @@ -2610,7 +2610,7 @@ fn parallelization_two_partitions_into_four() -> Result<()> { "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]", " RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4", " AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]", - // Multiple source files splitted across partitions + // Multiple source files split across partitions " DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=parquet", ]; test_config.run( @@ -2625,7 +2625,7 @@ fn parallelization_two_partitions_into_four() -> Result<()> { "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]", " RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4", " AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]", - // Multiple source files splitted across partitions + // Multiple source files split across partitions " DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", ]; test_config.run(&expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?; diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index 6752bc30bc..94710a14cd 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -236,7 +236,7 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() { config.optimizer.enable_dynamic_filter_pushdown = true; config.execution.parquet.pushdown_filters = true; - // Appy the FilterPushdown optimizer rule + // Apply the FilterPushdown optimizer rule let plan = FilterPushdown::new_post_optimization() .optimize(Arc::clone(&plan), &config) .unwrap(); @@ -603,7 +603,7 @@ fn test_no_pushdown_through_aggregates() { } /// Test various combinations of handling of child pushdown results -/// in an ExectionPlan in combination with support/not support in a DataSource. +/// in an ExecutionPlan in combination with support/not support in a DataSource. #[test] fn test_node_handles_child_pushdown_result() { // If we set `with_support(true)` + `inject_filter = true` then the filter is pushed down to the DataSource diff --git a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs index c9baa9a932..a1134c3d83 100644 --- a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs +++ b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs @@ -667,7 +667,7 @@ async fn test_not_replacing_when_no_need_to_preserve_sorting( #[rstest] #[tokio::test] -async fn test_with_multiple_replacable_repartitions( +async fn test_with_multiple_replaceable_repartitions( #[values(false, true)] source_unbounded: bool, #[values(false, true)] prefer_existing_sort: bool, ) -> Result<()> { diff --git a/datafusion/core/tests/physical_optimizer/window_optimize.rs b/datafusion/core/tests/physical_optimizer/window_optimize.rs index ba0ffb022a..d16b516d5c 100644 --- a/datafusion/core/tests/physical_optimizer/window_optimize.rs +++ b/datafusion/core/tests/physical_optimizer/window_optimize.rs @@ -42,10 +42,14 @@ mod test { .schema(schema.clone()) .alias("t") .build()?; - let parition = [col("a", &schema)?]; + let partition = [col("a", &schema)?]; let frame = WindowFrame::new(None); - let plain = - PlainAggregateWindowExpr::new(Arc::new(cnt), &parition, &[], Arc::new(frame)); + let plain = PlainAggregateWindowExpr::new( + Arc::new(cnt), + &partition, + &[], + Arc::new(frame), + ); let bounded_agg_exec = BoundedWindowAggExec::try_new( vec![Arc::new(plain)], diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index c9b6622554..a3a1101c81 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -1438,7 +1438,7 @@ impl ScalarUDFImpl for MetadataBasedUdf { .get("modify_values") .map(|v| v == "double_output") .unwrap_or(false); - let mulitplier = if should_double { 2 } else { 1 }; + let multiplier = if should_double { 2 } else { 1 }; match &args.args[0] { ColumnarValue::Array(array) => { @@ -1447,7 +1447,7 @@ impl ScalarUDFImpl for MetadataBasedUdf { .downcast_ref::<UInt64Array>() .unwrap() .iter() - .map(|v| v.map(|x| x * mulitplier)) + .map(|v| v.map(|x| x * multiplier)) .collect(); let array_ref = Arc::new(UInt64Array::from(array_values)) as ArrayRef; Ok(ColumnarValue::Array(array_ref)) @@ -1458,7 +1458,7 @@ impl ScalarUDFImpl for MetadataBasedUdf { }; Ok(ColumnarValue::Scalar(ScalarValue::UInt64( - value.map(|v| v * mulitplier), + value.map(|v| v * multiplier), ))) } } @@ -1634,7 +1634,7 @@ impl ScalarUDFImpl for ExtensionBasedUdf { fn return_field_from_args(&self, _args: ReturnFieldArgs) -> Result<FieldRef> { Ok(Field::new("canonical_extension_udf", DataType::Utf8, true) - .with_extension_type(MyUserExtentionType {}) + .with_extension_type(MyUserExtensionType {}) .into()) } @@ -1682,10 +1682,10 @@ impl ScalarUDFImpl for ExtensionBasedUdf { } } -struct MyUserExtentionType {} +struct MyUserExtensionType {} -impl ExtensionType for MyUserExtentionType { - const NAME: &'static str = "my_user_extention_type"; +impl ExtensionType for MyUserExtensionType { + const NAME: &'static str = "my_user_Extension_type"; type Metadata = (); fn metadata(&self) -> &Self::Metadata { @@ -1757,9 +1757,9 @@ async fn test_extension_based_udf() -> Result<()> { // To test for input extensions handling, we check the strings returned let expected_schema = Schema::new(vec![ Field::new("without_bool8_extension", DataType::Utf8, true) - .with_extension_type(MyUserExtentionType {}), + .with_extension_type(MyUserExtensionType {}), Field::new("with_bool8_extension", DataType::Utf8, true) - .with_extension_type(MyUserExtentionType {}), + .with_extension_type(MyUserExtensionType {}), ]); let expected = record_batch!( diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index 42b5776abe..c96d73242e 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -198,7 +198,7 @@ impl FileOpener for ParquetOpener { // Don't load the page index yet. Since it is not stored inline in // the footer, loading the page index if it is not needed will do - // unecessary I/O. We decide later if it is needed to evaluate the + // unnecessary I/O. We decide later if it is needed to evaluate the // pruning predicates. Thus default to not requesting if from the // underlying reader. let mut options = ArrowReaderOptions::new().with_page_index(false); @@ -1352,7 +1352,7 @@ mod test { } } - // Test that if no expression rewriter is provided we use a schemaadapter to adapt the data to the expresssion + // Test that if no expression rewriter is provided we use a schemaadapter to adapt the data to the expression let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>; let batch = record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap(); // Write out the batch to a Parquet file diff --git a/datafusion/datasource-parquet/src/row_filter.rs b/datafusion/datasource-parquet/src/row_filter.rs index 70750a75bc..166e6d8b0b 100644 --- a/datafusion/datasource-parquet/src/row_filter.rs +++ b/datafusion/datasource-parquet/src/row_filter.rs @@ -184,7 +184,7 @@ pub(crate) struct FilterCandidate { /// Can this filter use an index (e.g. a page index) to prune rows? can_use_index: bool, /// The projection to read from the file schema to get the columns - /// required to pass thorugh a `SchemaMapper` to the table schema + /// required to pass through a `SchemaMapper` to the table schema /// upon which we then evaluate the filter expression. projection: Vec<usize>, /// A `SchemaMapper` used to map batches read from the file schema to diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index 1277ec52ad..572cb20ebc 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -521,7 +521,7 @@ impl FileSource for ParquetSource { } (None, Some(schema_adapter_factory)) => { // If a custom schema adapter factory is provided but no expr adapter factory is provided use the custom SchemaAdapter for both projections and predicate pushdown. - // This maximizes compatiblity with existing code that uses the SchemaAdapter API and did not explicitly opt into the PhysicalExprAdapterFactory API. + // This maximizes compatibility with existing code that uses the SchemaAdapter API and did not explicitly opt into the PhysicalExprAdapterFactory API. (None, Arc::clone(schema_adapter_factory) as _) } (None, None) => { diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 1ab5ffa758..88d49722a5 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -1520,7 +1520,7 @@ impl ValuesFields { // `name_map` tracks a mapping between a field name and the number of appearances of that field. // // Some field names might already come to this function with the count (number of times it appeared) -// as a sufix e.g. id:1, so there's still a chance of name collisions, for example, +// as a suffix e.g. id:1, so there's still a chance of name collisions, for example, // if these three fields passed to this function: "col:1", "col" and "col", the function // would rename them to -> col:1, col, col:1 causing a posteriror error when building the DFSchema. // that's why we need the `seen` set, so the fields are always unique. diff --git a/datafusion/expr/src/logical_plan/invariants.rs b/datafusion/expr/src/logical_plan/invariants.rs index 2d8ed07171..ccdf9e444b 100644 --- a/datafusion/expr/src/logical_plan/invariants.rs +++ b/datafusion/expr/src/logical_plan/invariants.rs @@ -102,7 +102,7 @@ fn assert_unique_field_names(plan: &LogicalPlan) -> Result<()> { plan.schema().check_names() } -/// Returns an error if the plan is not sematically valid. +/// Returns an error if the plan is not semantically valid. fn assert_valid_semantic_plan(plan: &LogicalPlan) -> Result<()> { assert_subqueries_are_valid(plan)?; diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 887afd7cde..6d4c57c5c4 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -3796,7 +3796,7 @@ impl Join { }) } - /// Create Join with input which wrapped with projection, this method is used in physcial planning only to help + /// Create Join with input which wrapped with projection, this method is used in physical planning only to help /// create the physical join. pub fn try_new_with_project_input( original: &LogicalPlan, diff --git a/datafusion/ffi/src/catalog_provider.rs b/datafusion/ffi/src/catalog_provider.rs index 0886d4749d..65dcab34f1 100644 --- a/datafusion/ffi/src/catalog_provider.rs +++ b/datafusion/ffi/src/catalog_provider.rs @@ -327,7 +327,7 @@ mod tests { assert!(returned_schema.is_some()); assert_eq!(foreign_catalog.schema_names().len(), 1); - // Retrieve non-existant schema + // Retrieve non-existent schema let returned_schema = foreign_catalog.schema("prior_schema"); assert!(returned_schema.is_none()); diff --git a/datafusion/ffi/src/schema_provider.rs b/datafusion/ffi/src/schema_provider.rs index 6e5a590e1a..b5970d5881 100644 --- a/datafusion/ffi/src/schema_provider.rs +++ b/datafusion/ffi/src/schema_provider.rs @@ -366,7 +366,7 @@ mod tests { assert!(returned_schema.is_some()); assert_eq!(foreign_schema_provider.table_names().len(), 1); - // Retrieve non-existant table + // Retrieve non-existent table let returned_schema = foreign_schema_provider .table("prior_table") .await diff --git a/datafusion/ffi/src/tests/mod.rs b/datafusion/ffi/src/tests/mod.rs index db596f51fc..816086c320 100644 --- a/datafusion/ffi/src/tests/mod.rs +++ b/datafusion/ffi/src/tests/mod.rs @@ -75,7 +75,7 @@ pub struct ForeignLibraryModule { /// Create an aggregate UDAF using sum pub create_sum_udaf: extern "C" fn() -> FFI_AggregateUDF, - /// Createa grouping UDAF using stddev + /// Create grouping UDAF using stddev pub create_stddev_udaf: extern "C" fn() -> FFI_AggregateUDF, pub create_rank_udwf: extern "C" fn() -> FFI_WindowUDF, diff --git a/datafusion/ffi/src/udaf/mod.rs b/datafusion/ffi/src/udaf/mod.rs index a2525e6ad4..e9f4ca8fed 100644 --- a/datafusion/ffi/src/udaf/mod.rs +++ b/datafusion/ffi/src/udaf/mod.rs @@ -125,7 +125,7 @@ pub struct FFI_AggregateUDF { pub order_sensitivity: unsafe extern "C" fn(udaf: &FFI_AggregateUDF) -> FFI_AggregateOrderSensitivity, - /// Performs type coersion. To simply this interface, all UDFs are treated as having + /// Performs type coercion. To simply this interface, all UDFs are treated as having /// user defined signatures, which will in turn call coerce_types to be called. This /// call should be transparent to most users as the internal function performs the /// appropriate calls on the underlying [`AggregateUDF`] diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs index 390b03fe62..5e59cfc5ec 100644 --- a/datafusion/ffi/src/udf/mod.rs +++ b/datafusion/ffi/src/udf/mod.rs @@ -95,7 +95,7 @@ pub struct FFI_ScalarUDF { /// See [`ScalarUDFImpl`] for details on short_circuits pub short_circuits: bool, - /// Performs type coersion. To simply this interface, all UDFs are treated as having + /// Performs type coercion. To simply this interface, all UDFs are treated as having /// user defined signatures, which will in turn call coerce_types to be called. This /// call should be transparent to most users as the internal function performs the /// appropriate calls on the underlying [`ScalarUDF`] diff --git a/datafusion/ffi/src/udwf/mod.rs b/datafusion/ffi/src/udwf/mod.rs index d17999e274..5362734db2 100644 --- a/datafusion/ffi/src/udwf/mod.rs +++ b/datafusion/ffi/src/udwf/mod.rs @@ -81,7 +81,7 @@ pub struct FFI_WindowUDF { display_name: RString, ) -> RResult<WrappedSchema, RString>, - /// Performs type coersion. To simply this interface, all UDFs are treated as having + /// Performs type coercion. To simply this interface, all UDFs are treated as having /// user defined signatures, which will in turn call coerce_types to be called. This /// call should be transparent to most users as the internal function performs the /// appropriate calls on the underlying [`WindowUDF`] diff --git a/datafusion/ffi/src/util.rs b/datafusion/ffi/src/util.rs index abe369c572..5588996c3c 100644 --- a/datafusion/ffi/src/util.rs +++ b/datafusion/ffi/src/util.rs @@ -36,7 +36,7 @@ macro_rules! df_result { }; } -/// This macro is a helpful conversion utility to conver from a DataFusion Result to an abi_stable::RResult +/// This macro is a helpful conversion utility to convert from a DataFusion Result to an abi_stable::RResult #[macro_export] macro_rules! rresult { ( $x:expr ) => { @@ -49,7 +49,7 @@ macro_rules! rresult { }; } -/// This macro is a helpful conversion utility to conver from a DataFusion Result to an abi_stable::RResult +/// This macro is a helpful conversion utility to convert from a DataFusion Result to an abi_stable::RResult /// and to also call return when it is an error. Since you cannot use `?` on an RResult, this is designed /// to mimic the pattern. #[macro_export] diff --git a/datafusion/functions/src/core/union_tag.rs b/datafusion/functions/src/core/union_tag.rs index 3e6370fa01..aeadb8292b 100644 --- a/datafusion/functions/src/core/union_tag.rs +++ b/datafusion/functions/src/core/union_tag.rs @@ -136,7 +136,7 @@ impl ScalarUDFImpl for UnionTagFunc { }) .ok_or_else(|| { exec_datafusion_err!( - "union_tag: union scalar with unknow type_id {value_type_id}" + "union_tag: union scalar with unknown type_id {value_type_id}" ) }), None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null( diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 4165014f22..36af504c96 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -191,7 +191,7 @@ impl ScalarUDFImpl for DateTruncFunc { // fast path for fine granularities if matches!( granularity.as_str(), - // For morden timezones, it's correct to truncate "minute" in this way. + // For modern timezones, it's correct to truncate "minute" in this way. // Both datafusion and arrow are ignoring historical timezone's non-minute granularity // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16). "second" | "minute" | "millisecond" | "microsecond" diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 1db6d7a1c6..34914d256c 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -804,7 +804,7 @@ mod tests { } #[test] - fn to_timestamp_with_unparseable_data() -> Result<()> { + fn to_timestamp_with_unparsable_data() -> Result<()> { let mut date_string_builder = StringBuilder::with_capacity(2, 1024); date_string_builder.append_null(); diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 7d4920a6cb..2d7db740da 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -2451,9 +2451,9 @@ mod test { let map_type_entries = DataType::Map(Arc::new(fields), false); let fields = Field::new("key_value", DataType::Struct(struct_fields), false); - let may_type_cutsom = DataType::Map(Arc::new(fields), false); + let may_type_custom = DataType::Map(Arc::new(fields), false); - let expr = col("a").eq(cast(col("a"), may_type_cutsom)); + let expr = col("a").eq(cast(col("a"), may_type_custom)); let empty = empty_with_type(map_type_entries); let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); diff --git a/datafusion/physical-expr-adapter/src/schema_rewriter.rs b/datafusion/physical-expr-adapter/src/schema_rewriter.rs index 3bdff1bdfb..61cc97dae3 100644 --- a/datafusion/physical-expr-adapter/src/schema_rewriter.rs +++ b/datafusion/physical-expr-adapter/src/schema_rewriter.rs @@ -357,12 +357,12 @@ impl<'a> DefaultPhysicalExprAdapterRewriter<'a> { self.physical_file_schema.field_with_name(column.name()) { // If the column exists in the physical schema, we can use it in place of the logical column. - // This is nice to users because if they do a rewrite that results in something like `phyiscal_int32_col = 123u64` + // This is nice to users because if they do a rewrite that results in something like `physical_int32_col = 123u64` // we'll at least handle the casts for them. physical_field } else { // A completely unknown column that doesn't exist in either schema! - // This should probably never be hit unless something upstream broke, but nontheless it's better + // This should probably never be hit unless something upstream broke, but nonetheless it's better // for us to return a handleable error than to panic / do something unexpected. return Err(e.into()); } @@ -483,7 +483,7 @@ mod tests { } #[test] - fn test_rewrite_mulit_column_expr_with_type_cast() { + fn test_rewrite_multi_column_expr_with_type_cast() { let (physical_schema, logical_schema) = create_test_schema(); let factory = DefaultPhysicalExprAdapterFactory; let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema)); diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs index b540b411a8..023b697aed 100644 --- a/datafusion/physical-expr-common/src/physical_expr.rs +++ b/datafusion/physical-expr-common/src/physical_expr.rs @@ -349,7 +349,7 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash { /// /// Systems that implement remote execution of plans, e.g. serialize a portion of the query plan /// and send it across the wire to a remote executor may want to call this method after - /// every batch on the source side and brodcast / update the current snaphot to the remote executor. + /// every batch on the source side and broadcast / update the current snapshot to the remote executor. /// /// Note for implementers: this method should *not* handle recursion. /// Recursion is handled in [`snapshot_physical_expr`]. @@ -363,7 +363,7 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash { /// Returns the generation of this `PhysicalExpr` for snapshotting purposes. /// The generation is an arbitrary u64 that can be used to track changes /// in the state of the `PhysicalExpr` over time without having to do an exhaustive comparison. - /// This is useful to avoid unecessary computation or serialization if there are no changes to the expression. + /// This is useful to avoid unnecessary computation or serialization if there are no changes to the expression. /// In particular, dynamic expressions that may change over time; this allows cheap checks for changes. /// Static expressions that do not change over time should return 0, as does the default implementation. /// You should not call this method directly as it does not handle recursion. diff --git a/datafusion/physical-expr/src/equivalence/properties/mod.rs b/datafusion/physical-expr/src/equivalence/properties/mod.rs index 6d18d34ca4..6b4faf5e14 100644 --- a/datafusion/physical-expr/src/equivalence/properties/mod.rs +++ b/datafusion/physical-expr/src/equivalence/properties/mod.rs @@ -315,7 +315,7 @@ impl EquivalenceProperties { self.oeq_class.extend(orderings); // Normalize given orderings to update the cache: self.oeq_cache.normal_cls.extend(normal_orderings); - // TODO: If no ordering is found to be redunant during extension, we + // TODO: If no ordering is found to be redundant during extension, we // can use a shortcut algorithm to update the leading map. self.oeq_cache.update_map(); } diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 5fa5144ecd..65a2108266 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -415,7 +415,7 @@ impl CaseExpr { fn expr_or_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> { let return_type = self.data_type(&batch.schema())?; - // evalute when condition on batch + // evaluate when condition on batch let when_value = self.when_then_expr[0].0.evaluate(batch)?; let when_value = when_value.into_array(batch.num_rows())?; let when_value = as_boolean_array(&when_value).map_err(|e| { diff --git a/datafusion/physical-optimizer/src/filter_pushdown.rs b/datafusion/physical-optimizer/src/filter_pushdown.rs index 2838d1d1f2..5aedce06b9 100644 --- a/datafusion/physical-optimizer/src/filter_pushdown.rs +++ b/datafusion/physical-optimizer/src/filter_pushdown.rs @@ -47,7 +47,7 @@ use datafusion_physical_plan::{with_new_children_if_necessary, ExecutionPlan}; use itertools::{izip, Itertools}; -/// Attempts to recursively push given filters from the top of the tree into leafs. +/// Attempts to recursively push given filters from the top of the tree into leaves. /// /// # Default Implementation /// diff --git a/datafusion/physical-plan/benches/spill_io.rs b/datafusion/physical-plan/benches/spill_io.rs index 699f20f61f..e42c8073ae 100644 --- a/datafusion/physical-plan/benches/spill_io.rs +++ b/datafusion/physical-plan/benches/spill_io.rs @@ -305,7 +305,7 @@ fn create_q20_like_batches( (schema, batches) } -/// Genereate `num_batches` wide RecordBatches resembling sort-tpch Q10 for benchmarking. +/// Generate `num_batches` wide RecordBatches resembling sort-tpch Q10 for benchmarking. /// This includes multiple numeric, date, and Utf8View columns (15 total). pub fn create_wide_batches( num_batches: usize, diff --git a/datafusion/physical-plan/src/filter_pushdown.rs b/datafusion/physical-plan/src/filter_pushdown.rs index d10bd7aea3..1f9b6ce7fb 100644 --- a/datafusion/physical-plan/src/filter_pushdown.rs +++ b/datafusion/physical-plan/src/filter_pushdown.rs @@ -281,7 +281,7 @@ impl<T> FilterPushdownPropagation<T> { } /// Bind an updated node to the [`FilterPushdownPropagation`]. - /// Use this when the current node wants to update iself in the tree or replace itself with a new node (e.g. one of it's children). + /// Use this when the current node wants to update itself in the tree or replace itself with a new node (e.g. one of it's children). /// You do not need to call this if one of the children of the current node may have updated itself, that is handled by the optimizer. pub fn with_updated_node(mut self, updated_node: T) -> Self { self.updated_node = Some(updated_node); diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 359d36a29c..5710bfefb5 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -3696,7 +3696,7 @@ mod tests { ]; assert_batches_sorted_eq!(expected, &batches); - // THIS MIGRATION HAULTED DUE TO ISSUE #15312 + // THIS MIGRATION HALTED DUE TO ISSUE #15312 //allow_duplicates! { // assert_snapshot!(batches_to_sort_string(&batches), @r#" // +---+---+---+----+---+---+ @@ -3955,7 +3955,7 @@ mod tests { } #[tokio::test] - async fn join_splitted_batch() { + async fn join_split_batch() { let left = build_table( ("a1", &vec![1, 2, 3, 4]), ("b1", &vec![1, 1, 1, 1]), diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs index 41d67c2917..15efb8f90a 100644 --- a/datafusion/physical-plan/src/metrics/baseline.rs +++ b/datafusion/physical-plan/src/metrics/baseline.rs @@ -173,15 +173,15 @@ impl SpillMetrics { #[derive(Debug, Clone)] pub struct SplitMetrics { /// Number of times an input [`RecordBatch`] was split - pub batches_splitted: Count, + pub batches_split: Count, } impl SplitMetrics { /// Create a new [`SplitMetrics`] pub fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { Self { - batches_splitted: MetricBuilder::new(metrics) - .counter("batches_splitted", partition), + batches_split: MetricBuilder::new(metrics) + .counter("batches_split", partition), } } } diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs index 49e7413122..97dd1761b1 100644 --- a/datafusion/physical-plan/src/sorts/stream.rs +++ b/datafusion/physical-plan/src/sorts/stream.rs @@ -100,7 +100,7 @@ impl ReusableRows { // save the Rows fn save(&mut self, stream_idx: usize, rows: Arc<Rows>) { self.inner[stream_idx][1] = Some(Arc::clone(&rows)); - // swap the curent with the previous one, so that the next poll can reuse the Rows from the previous poll + // swap the current with the previous one, so that the next poll can reuse the Rows from the previous poll let [a, b] = &mut self.inner[stream_idx]; std::mem::swap(a, b); } diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs index a7e2904ad4..100a6a7ffc 100644 --- a/datafusion/physical-plan/src/stream.rs +++ b/datafusion/physical-plan/src/stream.rs @@ -628,7 +628,7 @@ impl BatchSplitStream { let to_take = remaining.min(self.batch_size); let out = batch.slice(self.offset, to_take); - self.metrics.batches_splitted.add(1); + self.metrics.batches_split.add(1); self.offset += to_take; if self.offset < batch.num_rows() { // More data remains in this batch, store it back diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 576a51707c..cc3e805ed1 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -1425,7 +1425,7 @@ impl AsLogicalPlan for LogicalPlanNode { )?; // Assumed common usize field was batch size - // Used u64 to avoid any nastyness involving large values, most data clusters are probably uniformly 64 bits any ways + // Used u64 to avoid any nastiness involving large values, most data clusters are probably uniformly 64 bits any ways use protobuf::repartition_node::PartitionMethod; let pb_partition_method = match partitioning_scheme { diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index f1e82841d0..1a1b369fab 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -3274,7 +3274,7 @@ pub struct ComposedPhysicalExtensionCodec { } impl ComposedPhysicalExtensionCodec { - // Position in this codesc list is important as it will be used for decoding. + // Position in this codecs list is important as it will be used for decoding. // If new codec is added it should go to last position. pub fn new(codecs: Vec<Arc<dyn PhysicalExtensionCodec>>) -> Self { Self { codecs } diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index 86ad54d3f1..4c4c9aaa0f 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -399,7 +399,7 @@ fn roundtrip_window() -> Result<()> { let args = vec![cast(col("a", &schema)?, &schema, DataType::Float64)?]; let sum_expr = AggregateExprBuilder::new(sum_udaf(), args) .schema(Arc::clone(&schema)) - .alias("SUM(a) RANGE BETWEEN CURRENT ROW AND UNBOUNDED PRECEEDING") + .alias("SUM(a) RANGE BETWEEN CURRENT ROW AND UNBOUNDED PRECEDING") .build() .map(Arc::new)?; @@ -527,7 +527,7 @@ fn test_distinct_window_serialization_end_to_end() -> Result<()> { } #[test] -fn rountrip_aggregate() -> Result<()> { +fn roundtrip_aggregate() -> Result<()> { let field_a = Field::new("a", DataType::Int64, false); let field_b = Field::new("b", DataType::Int64, false); let schema = Arc::new(Schema::new(vec![field_a, field_b])); @@ -575,7 +575,7 @@ fn rountrip_aggregate() -> Result<()> { } #[test] -fn rountrip_aggregate_with_limit() -> Result<()> { +fn roundtrip_aggregate_with_limit() -> Result<()> { let field_a = Field::new("a", DataType::Int64, false); let field_b = Field::new("b", DataType::Int64, false); let schema = Arc::new(Schema::new(vec![field_a, field_b])); @@ -605,7 +605,7 @@ fn rountrip_aggregate_with_limit() -> Result<()> { } #[test] -fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> { +fn roundtrip_aggregate_with_approx_pencentile_cont() -> Result<()> { let field_a = Field::new("a", DataType::Int64, false); let field_b = Field::new("b", DataType::Int64, false); let schema = Arc::new(Schema::new(vec![field_a, field_b])); @@ -634,7 +634,7 @@ fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> { } #[test] -fn rountrip_aggregate_with_sort() -> Result<()> { +fn roundtrip_aggregate_with_sort() -> Result<()> { let field_a = Field::new("a", DataType::Int64, false); let field_b = Field::new("b", DataType::Int64, false); let schema = Arc::new(Schema::new(vec![field_a, field_b])); diff --git a/datafusion/pruning/src/file_pruner.rs b/datafusion/pruning/src/file_pruner.rs index bce1a64eda..ed4de43b43 100644 --- a/datafusion/pruning/src/file_pruner.rs +++ b/datafusion/pruning/src/file_pruner.rs @@ -56,7 +56,7 @@ impl FilePruner { predicate_creation_errors: Count, ) -> Result<Self> { // Build a pruning schema that combines the file fields and partition fields. - // Partition fileds are always at the end. + // Partition fields are always at the end. let pruning_schema = Arc::new( Schema::new( logical_file_schema diff --git a/datafusion/pruning/src/pruning_predicate.rs b/datafusion/pruning/src/pruning_predicate.rs index 13490ebff6..5e92dbe227 100644 --- a/datafusion/pruning/src/pruning_predicate.rs +++ b/datafusion/pruning/src/pruning_predicate.rs @@ -4423,7 +4423,7 @@ mod tests { // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) true, // s1 ["A\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no row match. (min, max) maybe truncate - // orignal (min, max) maybe ("A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}\u{10ffff}\u{10ffff}") + // original (min, max) maybe ("A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}\u{10ffff}\u{10ffff}") true, ]; prune_with_expr(expr, &schema, &statistics, expected_ret); diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index fd0e7dc6e3..7f8bfa4659 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -94,7 +94,7 @@ struct FunctionArgs { /// WITHIN GROUP clause, if any within_group: Vec<OrderByExpr>, /// Was the function called without parenthesis, i.e. could this also be a column reference? - function_without_paranthesis: bool, + function_without_parentheses: bool, } impl FunctionArgs { @@ -120,7 +120,7 @@ impl FunctionArgs { null_treatment, distinct: false, within_group, - function_without_paranthesis: matches!(args, FunctionArguments::None), + function_without_parentheses: matches!(args, FunctionArguments::None), }); }; @@ -202,7 +202,7 @@ impl FunctionArgs { null_treatment, distinct, within_group, - function_without_paranthesis: false, + function_without_parentheses: false, }) } } @@ -224,7 +224,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> { null_treatment, distinct, within_group, - function_without_paranthesis, + function_without_parentheses, } = function_args; if over.is_some() && !within_group.is_empty() { @@ -477,7 +477,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> { } // workaround for https://github.com/apache/datafusion-sqlparser-rs/issues/1909 - if function_without_paranthesis { + if function_without_parentheses { let maybe_ids = object_name .0 .iter() diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 2d9867b099..4d812169d0 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -977,16 +977,16 @@ impl<S: ContextProvider> SqlToRel<'_, S> { returning, or, } => { - let froms = + let from_clauses = from.map(|update_table_from_kind| match update_table_from_kind { - UpdateTableFromKind::BeforeSet(froms) => froms, - UpdateTableFromKind::AfterSet(froms) => froms, + UpdateTableFromKind::BeforeSet(from_clauses) => from_clauses, + UpdateTableFromKind::AfterSet(from_clauses) => from_clauses, }); // TODO: support multiple tables in UPDATE SET FROM - if froms.as_ref().is_some_and(|f| f.len() > 1) { + if from_clauses.as_ref().is_some_and(|f| f.len() > 1) { plan_err!("Multiple tables in UPDATE SET FROM not yet supported")?; } - let update_from = froms.and_then(|mut f| f.pop()); + let update_from = from_clauses.and_then(|mut f| f.pop()); if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 52832e1324..7557377384 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -548,7 +548,7 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> { let most_inner = unnest_stack.first().unwrap(); let inner_expr = most_inner.expr.as_ref(); // unnest(unnest(struct_arr_col)) is not allow to be done recursively - // it needs to be splitted into multiple unnest logical plan + // it needs to be split into multiple unnest logical plan // unnest(struct_arr) // unnest(struct_arr_col) as struct_arr // instead of unnest(struct_arr_col, depth = 2) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 28181771f1..103f4c0465 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -738,8 +738,8 @@ fn plan_update() { } #[rstest] -#[case::missing_assignement_target("UPDATE person SET doesnotexist = true")] -#[case::missing_assignement_expression("UPDATE person SET age = doesnotexist + 42")] +#[case::missing_assignment_target("UPDATE person SET doesnotexist = true")] +#[case::missing_assignment_expression("UPDATE person SET age = doesnotexist + 42")] #[case::missing_selection_expression( "UPDATE person SET age = 42 WHERE doesnotexist = true" )] diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index a967e7d5ae..f78b3d7853 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -109,7 +109,7 @@ pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2; /// [`ScalarValue::IntervalMonthDayNano`]: datafusion::common::ScalarValue::IntervalMonthDayNano #[deprecated( since = "41.0.0", - note = "Use Substrait `IntervalCompund` type instead" + note = "Use Substrait `IntervalCompound` type instead" )] pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3; @@ -119,6 +119,6 @@ pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3; /// [`IntervalUnit::MonthDayNano`]: datafusion::arrow::datatypes::IntervalUnit::MonthDayNano #[deprecated( since = "43.0.0", - note = "Use Substrait `IntervalCompund` type instead" + note = "Use Substrait `IntervalCompound` type instead" )] pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano"; diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index 6e4e6ea59d..606d32ee4d 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -422,7 +422,7 @@ async fn simple_scalar_function_substr() -> Result<()> { roundtrip("SELECT SUBSTR(f, 1, 3) FROM data").await } -// Test that DataFusion functions gets correctly mapped to Substrait names (when the names are diferent) +// Test that DataFusion functions gets correctly mapped to Substrait names (when the names are different) // Follows the same structure as existing roundtrip tests, but more explicitly tests for name mappings async fn test_substrait_to_df_name_mapping( substrait_name: &str, diff --git a/dev/update_config_docs.sh b/dev/update_config_docs.sh index af8ab04f3c..ed3e699c14 100755 --- a/dev/update_config_docs.sh +++ b/dev/update_config_docs.sh @@ -167,7 +167,7 @@ In conclusion, for queries under a very tight memory limit, it's recommended to set `target_partitions` and `batch_size` to smaller values. ```sql --- Query still gets paralleized, but each partition will have more memory to use +-- Query still gets parallelized, but each partition will have more memory to use SET datafusion.execution.target_partitions = 4; -- Smaller than the default '8192', while still keep the benefit of vectorized execution SET datafusion.execution.batch_size = 1024; diff --git a/docs/source/library-user-guide/extending-operators.md b/docs/source/library-user-guide/extending-operators.md index 3d491806a4..5c28d1e670 100644 --- a/docs/source/library-user-guide/extending-operators.md +++ b/docs/source/library-user-guide/extending-operators.md @@ -35,7 +35,7 @@ fn rewrite( plan: LogicalPlan, _config: &dyn OptimizerConfig, ) -> Result<Transformed<LogicalPlan>> { - // Attemps to rewrite a logical plan to a uwheel-based plan that either provides + // Attempts to rewrite a logical plan to a uwheel-based plan that either provides // plan-time aggregates or skips execution based on min/max pruning. if let Some(rewritten) = self.try_rewrite(&plan) { Ok(Transformed::yes(rewritten)) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 05c0de0118..49e58cb116 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -180,7 +180,7 @@ for more details. ### Added `ExecutionPlan::reset_state` -In order to fix a bug in DataFusion `49.0.0` where dynamic filters (currently only generated in the precense of a query such as `ORDER BY ... LIMIT ...`) +In order to fix a bug in DataFusion `49.0.0` where dynamic filters (currently only generated in the presence of a query such as `ORDER BY ... LIMIT ...`) produced incorrect results in recursive queries, a new method `reset_state` has been added to the `ExecutionPlan` trait. Any `ExecutionPlan` that needs to maintain internal state or references to other nodes in the execution plan tree should implement this method to reset that state. @@ -614,7 +614,7 @@ to access the metadata associated with the columnar values during invocation. To upgrade user defined aggregate functions, there is now a function `return_field` that will allow you to specify both metadata and nullability of your function. You are not required to implement this if you do not need to -handle metatdata. +handle metadata. The largest change to aggregate functions happens in the accumulator arguments. Both the `AccumulatorArgs` and `StateFieldsArgs` now contain `FieldRef` rather @@ -976,8 +976,8 @@ Elapsed 0.005 seconds. DataFusion 46 has changed the way scalar array function signatures are declared. Previously, functions needed to select from a list of predefined signatures within the `ArrayFunctionSignature` enum. Now the signatures -can be defined via a `Vec` of psuedo-types, which each correspond to a -single argument. Those psuedo-types are the variants of the +can be defined via a `Vec` of pseudo-types, which each correspond to a +single argument. Those pseudo-types are the variants of the `ArrayFunctionArgument` enum and are as follows: - `Array`: An argument of type List/LargeList/FixedSizeList. All Array diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 0736376fa8..877a46ef47 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -239,7 +239,7 @@ In conclusion, for queries under a very tight memory limit, it's recommended to set `target_partitions` and `batch_size` to smaller values. ```sql --- Query still gets paralleized, but each partition will have more memory to use +-- Query still gets parallelized, but each partition will have more memory to use SET datafusion.execution.target_partitions = 4; -- Smaller than the default '8192', while still keep the benefit of vectorized execution SET datafusion.execution.batch_size = 1024; diff --git a/docs/source/user-guide/explain-usage.md b/docs/source/user-guide/explain-usage.md index 68712012f4..2289205089 100644 --- a/docs/source/user-guide/explain-usage.md +++ b/docs/source/user-guide/explain-usage.md @@ -249,7 +249,7 @@ a separate core. Data crosses between cores only within certain operators such a You can read more about this in the [Partitioning Docs]. -[partitoning docs]: https://docs.rs/datafusion/latest/datafusion/physical_expr/enum.Partitioning.html +[partitioning docs]: https://docs.rs/datafusion/latest/datafusion/physical_expr/enum.Partitioning.html ## Example of an Aggregate Query --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org