This is an automated email from the ASF dual-hosted git repository. blaginin pushed a commit to branch db/dorny-md in repository https://gitbox.apache.org/repos/asf/datafusion.git
commit b0b43a311705424a30afd7468d0b5f4d338f770b Merge: 4a9f34d1d2 fc77be9457 Author: blaginin <[email protected]> AuthorDate: Sun Nov 23 18:09:23 2025 +0000 Merge branch 'main' into sandbox-main # Conflicts: # .asf.yaml # .github/workflows/rust.yml # README.md .github/workflows/audit.yml | 4 +- .github/workflows/dependencies.yml | 4 +- .github/workflows/dev.yml | 4 +- .github/workflows/docs.yaml | 4 +- .github/workflows/docs_pr.yaml | 2 +- .github/workflows/extended.yml | 8 +- .github/workflows/labeler.yml | 2 +- .github/workflows/large_files.yml | 2 +- .github/workflows/rust.yml | 94 +- .github/workflows/take.yml | 32 +- Cargo.lock | 317 ++-- Cargo.toml | 81 +- ci/scripts/rust_clippy.sh | 2 +- datafusion-cli/src/exec.rs | 4 +- datafusion-examples/Cargo.toml | 5 - datafusion-examples/README.md | 69 +- .../examples/builtin_functions/date_time.rs | 2 + .../examples/builtin_functions/function_factory.rs | 2 + .../examples/builtin_functions/main.rs | 5 + .../examples/builtin_functions/regexp.rs | 2 + .../{ => custom_data_source}/csv_json_opener.rs | 27 +- .../{ => custom_data_source}/csv_sql_streaming.rs | 5 +- .../{ => custom_data_source}/custom_datasource.rs | 5 +- .../{ => custom_data_source}/custom_file_casts.rs | 6 +- .../{ => custom_data_source}/custom_file_format.rs | 81 +- .../default_column_values.rs | 48 +- .../file_stream_provider.rs | 38 +- .../examples/custom_data_source/main.rs | 135 ++ .../examples/{ => data_io}/catalog.rs | 17 +- .../examples/{ => data_io}/json_shredding.rs | 5 +- datafusion-examples/examples/data_io/main.rs | 160 ++ .../parquet_advanced_index.rs} | 20 +- .../{ => data_io}/parquet_embedded_index.rs | 58 +- .../examples/{ => data_io}/parquet_encrypted.rs | 6 +- .../{ => data_io}/parquet_encrypted_with_kms.rs | 5 +- .../examples/{ => data_io}/parquet_exec_visitor.rs | 15 +- .../examples/{ => data_io}/parquet_index.rs | 10 +- .../query_http_csv.rs} | 8 +- .../examples/{ => data_io}/remote_catalog.rs | 6 +- .../examples/{ => dataframe}/dataframe.rs | 13 +- .../{ => dataframe}/deserialize_to_struct.rs | 5 +- .../{builtin_functions => dataframe}/main.rs | 45 +- .../main.rs | 57 +- .../memory_pool_execution_plan.rs | 6 +- .../memory_pool_tracking.rs | 9 +- .../examples/{ => execution_monitoring}/tracing.rs | 6 +- datafusion-examples/examples/flight/client.rs | 2 + datafusion-examples/examples/flight/main.rs | 5 + datafusion-examples/examples/flight/server.rs | 2 + datafusion-examples/examples/flight/sql_server.rs | 2 + .../{ => proto}/composed_extension_codec.rs | 16 +- .../examples/{builtin_functions => proto}/main.rs | 39 +- .../examples/{ => query_planning}/analyzer_rule.rs | 5 +- .../examples/{ => query_planning}/expr_api.rs | 5 +- .../examples/query_planning/main.rs | 138 ++ .../{ => query_planning}/optimizer_rule.rs | 5 +- .../{ => query_planning}/parse_sql_expr.rs | 10 +- .../examples/{ => query_planning}/plan_to_sql.rs | 20 +- .../examples/{ => query_planning}/planner_api.rs | 5 +- .../examples/{ => query_planning}/pruning.rs | 10 +- .../examples/{ => query_planning}/thread_pools.rs | 7 +- .../{sql_analysis.rs => sql_ops/analysis.rs} | 273 ++-- .../{sql_dialect.rs => sql_ops/dialect.rs} | 5 +- .../{sql_frontend.rs => sql_ops/frontend.rs} | 4 +- .../{builtin_functions => sql_ops}/main.rs | 55 +- .../examples/{sql_query.rs => sql_ops/query.rs} | 5 +- datafusion-examples/examples/udf/advanced_udaf.rs | 2 + datafusion-examples/examples/udf/advanced_udf.rs | 2 + datafusion-examples/examples/udf/advanced_udwf.rs | 2 + datafusion-examples/examples/udf/async_udf.rs | 2 + datafusion-examples/examples/udf/main.rs | 5 + datafusion-examples/examples/udf/simple_udaf.rs | 2 + datafusion-examples/examples/udf/simple_udf.rs | 2 + datafusion-examples/examples/udf/simple_udtf.rs | 2 + datafusion-examples/examples/udf/simple_udwf.rs | 2 + datafusion/catalog-listing/Cargo.toml | 1 - datafusion/catalog-listing/src/helpers.rs | 573 ++----- datafusion/catalog-listing/src/mod.rs | 3 + datafusion/catalog-listing/src/table.rs | 38 +- datafusion/catalog/src/async.rs | 2 +- datafusion/catalog/src/catalog.rs | 4 +- datafusion/catalog/src/information_schema.rs | 37 +- datafusion/catalog/src/lib.rs | 2 + datafusion/common-runtime/src/lib.rs | 3 + datafusion/common/Cargo.toml | 4 +- datafusion/common/src/config.rs | 144 +- datafusion/common/src/error.rs | 239 ++- datafusion/common/src/hash_utils.rs | 333 +++- datafusion/common/src/lib.rs | 2 - datafusion/common/src/pyarrow.rs | 169 -- datafusion/common/src/scalar/mod.rs | 281 +++- datafusion/common/src/utils/mod.rs | 9 +- datafusion/common/src/utils/proxy.rs | 76 +- datafusion/core/Cargo.toml | 5 +- datafusion/core/benches/map_query_sql.rs | 10 +- datafusion/core/benches/sql_planner.rs | 71 +- datafusion/core/benches/topk_aggregate.rs | 30 +- datafusion/core/src/dataframe/mod.rs | 22 +- datafusion/core/src/datasource/file_format/mod.rs | 6 +- datafusion/core/src/datasource/mod.rs | 13 +- .../core/src/datasource/physical_plan/avro.rs | 29 +- .../core/src/datasource/physical_plan/csv.rs | 185 ++- .../core/src/datasource/physical_plan/json.rs | 24 +- .../core/src/datasource/physical_plan/parquet.rs | 61 +- datafusion/core/src/execution/context/mod.rs | 96 +- datafusion/core/src/execution/session_state.rs | 40 + datafusion/core/src/lib.rs | 2 + datafusion/core/src/physical_planner.rs | 101 +- datafusion/core/src/test/mod.rs | 31 +- datafusion/core/src/test_util/parquet.rs | 37 +- .../tests/{execution => catalog_listing}/mod.rs | 4 +- .../tests/catalog_listing/pruned_partition_list.rs | 251 +++ datafusion/core/tests/core_integration.rs | 3 + .../part=123/data.arrow | Bin 0 -> 1608 bytes .../part=456/data.arrow | Bin 0 -> 1608 bytes datafusion/core/tests/dataframe/mod.rs | 251 ++- .../core/tests/datasource/object_store_access.rs | 63 +- datafusion/core/tests/execution/mod.rs | 1 + datafusion/core/tests/execution/register_arrow.rs | 90 ++ datafusion/core/tests/fuzz_cases/join_fuzz.rs | 697 ++++---- datafusion/core/tests/fuzz_cases/pruning.rs | 5 +- datafusion/core/tests/macro_hygiene/mod.rs | 1 + datafusion/core/tests/optimizer/mod.rs | 26 +- datafusion/core/tests/parquet/custom_reader.rs | 3 +- .../core/tests/parquet/external_access_plan.rs | 11 +- datafusion/core/tests/parquet/page_pruning.rs | 4 +- datafusion/core/tests/parquet/schema_adapter.rs | 22 +- datafusion/core/tests/parquet/schema_coercion.rs | 15 +- .../physical_optimizer/enforce_distribution.rs | 523 +++--- .../tests/physical_optimizer/enforce_sorting.rs | 49 +- .../physical_optimizer/filter_pushdown/mod.rs | 318 +++- .../physical_optimizer/filter_pushdown/util.rs | 113 +- .../tests/physical_optimizer/limit_pushdown.rs | 7 +- .../physical_optimizer/projection_pushdown.rs | 59 +- .../replace_with_order_preserving_variants.rs | 120 +- .../tests/physical_optimizer/sanity_checker.rs | 10 +- .../core/tests/physical_optimizer/test_utils.rs | 14 +- .../schema_adapter_integration_tests.rs | 42 +- datafusion/core/tests/sql/explain_analyze.rs | 165 +- datafusion/core/tests/sql/joins.rs | 30 +- datafusion/core/tests/sql/mod.rs | 18 +- datafusion/core/tests/sql/select.rs | 42 +- datafusion/core/tests/tpcds_planning.rs | 5 +- datafusion/core/tests/user_defined/mod.rs | 3 + .../user_defined_async_scalar_functions.rs | 139 ++ .../core/tests/user_defined/user_defined_plan.rs | 19 +- .../user_defined/user_defined_scalar_functions.rs | 233 ++- datafusion/datasource-arrow/src/file_format.rs | 436 +++-- datafusion/datasource-arrow/src/mod.rs | 5 + datafusion/datasource-arrow/src/source.rs | 566 +++++-- .../tests/data/example_stream.arrow | Bin 0 -> 1480 bytes .../example_stream_corrupted_metadata_length.arrow | Bin 0 -> 1480 bytes .../tests/data/example_stream_empty.arrow | Bin 0 -> 776 bytes .../src/avro_to_arrow/arrow_array_reader.rs | 98 +- .../datasource-avro/src/avro_to_arrow/reader.rs | 8 +- datafusion/datasource-avro/src/file_format.rs | 10 +- datafusion/datasource-avro/src/mod.rs | 2 + datafusion/datasource-avro/src/source.rs | 47 +- datafusion/datasource-csv/src/file_format.rs | 36 +- datafusion/datasource-csv/src/mod.rs | 7 +- datafusion/datasource-csv/src/source.rs | 143 +- datafusion/datasource-json/src/file_format.rs | 11 +- datafusion/datasource-json/src/mod.rs | 3 + datafusion/datasource-json/src/source.rs | 37 +- datafusion/datasource-parquet/src/access_plan.rs | 17 +- datafusion/datasource-parquet/src/file_format.rs | 20 +- datafusion/datasource-parquet/src/metadata.rs | 32 +- datafusion/datasource-parquet/src/metrics.rs | 15 +- datafusion/datasource-parquet/src/mod.rs | 3 + datafusion/datasource-parquet/src/opener.rs | 2 +- datafusion/datasource-parquet/src/page_filter.rs | 1 + datafusion/datasource-parquet/src/reader.rs | 56 +- .../datasource-parquet/src/row_group_filter.rs | 16 +- datafusion/datasource-parquet/src/source.rs | 103 +- datafusion/datasource/Cargo.toml | 6 +- datafusion/datasource/src/file.rs | 13 +- datafusion/datasource/src/file_compression_type.rs | 4 +- datafusion/datasource/src/file_format.rs | 5 +- datafusion/datasource/src/file_scan_config.rs | 221 ++- datafusion/datasource/src/file_stream.rs | 4 +- datafusion/datasource/src/memory.rs | 25 +- datafusion/datasource/src/mod.rs | 4 + datafusion/datasource/src/sink.rs | 10 +- datafusion/datasource/src/source.rs | 9 +- datafusion/datasource/src/statistics.rs | 39 +- datafusion/datasource/src/table_schema.rs | 6 + datafusion/datasource/src/test_util.rs | 52 +- datafusion/datasource/src/url.rs | 85 +- datafusion/datasource/src/write/demux.rs | 4 +- datafusion/doc/src/lib.rs | 3 + datafusion/execution/src/cache/cache_manager.rs | 2 +- datafusion/execution/src/cache/cache_unit.rs | 12 +- datafusion/execution/src/cache/mod.rs | 2 +- datafusion/execution/src/disk_manager.rs | 26 +- datafusion/execution/src/lib.rs | 3 + datafusion/execution/src/memory_pool/mod.rs | 107 +- datafusion/execution/src/memory_pool/pool.rs | 8 +- datafusion/execution/src/runtime_env.rs | 121 +- datafusion/expr-common/src/columnar_value.rs | 206 ++- datafusion/expr-common/src/interval_arithmetic.rs | 1101 ++++++++++--- datafusion/expr-common/src/lib.rs | 3 + datafusion/expr-common/src/statistics.rs | 160 +- datafusion/expr-common/src/type_coercion/binary.rs | 84 +- .../src/type_coercion/binary/tests/mod.rs | 1 + .../type_coercion/binary/tests/run_end_encoded.rs | 99 ++ datafusion/expr/src/execution_props.rs | 1 + datafusion/expr/src/expr_fn.rs | 5 + datafusion/expr/src/expr_rewriter/guarantees.rs | 668 ++++++++ datafusion/expr/src/expr_rewriter/mod.rs | 5 + datafusion/expr/src/expr_rewriter/order_by.rs | 6 +- datafusion/expr/src/expr_schema.rs | 220 ++- datafusion/expr/src/lib.rs | 4 + datafusion/expr/src/literal.rs | 3 + datafusion/expr/src/logical_plan/invariants.rs | 20 +- datafusion/expr/src/logical_plan/mod.rs | 5 +- datafusion/expr/src/logical_plan/plan.rs | 126 +- datafusion/expr/src/logical_plan/statement.rs | 12 + datafusion/expr/src/predicate_bounds.rs | 681 ++++++++ datafusion/expr/src/udaf.rs | 10 +- datafusion/expr/src/udf.rs | 21 +- datafusion/expr/src/utils.rs | 7 +- datafusion/ffi/src/arrow_wrappers.rs | 5 +- datafusion/ffi/src/catalog_provider.rs | 2 +- datafusion/ffi/src/catalog_provider_list.rs | 283 ++++ datafusion/ffi/src/lib.rs | 4 + datafusion/ffi/src/tests/async_provider.rs | 4 +- datafusion/ffi/src/tests/catalog.rs | 57 +- datafusion/ffi/src/tests/mod.rs | 6 + .../tests/{ffi_integration.rs => ffi_catalog.rs} | 74 +- datafusion/ffi/tests/ffi_integration.rs | 27 - .../src/aggregate/count_distinct/native.rs | 70 +- .../src/aggregate/groups_accumulator/accumulate.rs | 10 +- .../src/aggregate/sum_distinct/numeric.rs | 78 +- datafusion/functions-aggregate-common/src/lib.rs | 4 + .../src/noop_accumulator.rs | 70 + .../functions-aggregate-common/src/tdigest.rs | 1 + datafusion/functions-aggregate-common/src/utils.rs | 99 +- .../functions-aggregate/src/approx_distinct.rs | 52 +- .../src/approx_percentile_cont.rs | 8 +- .../src/approx_percentile_cont_with_weight.rs | 6 +- datafusion/functions-aggregate/src/array_agg.rs | 20 +- datafusion/functions-aggregate/src/average.rs | 55 +- .../functions-aggregate/src/bit_and_or_xor.rs | 33 +- datafusion/functions-aggregate/src/correlation.rs | 137 +- datafusion/functions-aggregate/src/first_last.rs | 116 +- datafusion/functions-aggregate/src/lib.rs | 3 + datafusion/functions-aggregate/src/median.rs | 73 +- datafusion/functions-aggregate/src/nth_value.rs | 22 +- .../functions-aggregate/src/percentile_cont.rs | 93 +- datafusion/functions-aggregate/src/sum.rs | 136 +- .../functions-nested/benches/array_reverse.rs | 76 +- datafusion/functions-nested/src/array_has.rs | 35 +- datafusion/functions-nested/src/flatten.rs | 12 +- datafusion/functions-nested/src/lib.rs | 3 + datafusion/functions-nested/src/map.rs | 410 ++++- datafusion/functions-nested/src/position.rs | 20 +- datafusion/functions-nested/src/remove.rs | 10 +- datafusion/functions-nested/src/replace.rs | 14 +- datafusion/functions-nested/src/reverse.rs | 117 +- datafusion/functions-nested/src/set_ops.rs | 15 +- datafusion/functions-nested/src/string.rs | 60 +- datafusion/functions-table/src/lib.rs | 3 + datafusion/functions-window-common/src/lib.rs | 3 + datafusion/functions-window/src/lead_lag.rs | 2 +- datafusion/functions-window/src/lib.rs | 3 + datafusion/functions-window/src/nth_value.rs | 2 +- datafusion/functions-window/src/ntile.rs | 4 +- datafusion/functions-window/src/utils.rs | 4 +- datafusion/functions/src/core/getfield.rs | 52 +- datafusion/functions/src/core/greatest.rs | 14 +- .../functions/src/core/greatest_least_utils.rs | 27 +- datafusion/functions/src/core/least.rs | 14 +- datafusion/functions/src/core/mod.rs | 2 + datafusion/functions/src/crypto/basic.rs | 13 +- datafusion/functions/src/datetime/common.rs | 8 +- datafusion/functions/src/datetime/date_trunc.rs | 177 ++- datafusion/functions/src/datetime/to_char.rs | 14 +- datafusion/functions/src/lib.rs | 3 + datafusion/functions/src/macros.rs | 10 +- datafusion/functions/src/math/abs.rs | 16 +- datafusion/functions/src/math/monotonicity.rs | 20 +- datafusion/functions/src/math/pi.rs | 10 +- datafusion/functions/src/math/power.rs | 448 +++++- datafusion/functions/src/math/random.rs | 10 +- datafusion/functions/src/regex/regexpcount.rs | 38 +- datafusion/functions/src/regex/regexpinstr.rs | 1 + datafusion/functions/src/regex/regexpreplace.rs | 1 + datafusion/functions/src/string/ascii.rs | 8 +- datafusion/functions/src/string/common.rs | 1 + datafusion/functions/src/string/repeat.rs | 8 +- datafusion/functions/src/string/split_part.rs | 1 + datafusion/functions/src/string/uuid.rs | 10 +- .../functions/src/unicode/character_length.rs | 8 +- datafusion/functions/src/unicode/find_in_set.rs | 29 +- datafusion/functions/src/unicode/lpad.rs | 12 +- datafusion/functions/src/unicode/reverse.rs | 6 +- datafusion/functions/src/unicode/rpad.rs | 12 +- datafusion/functions/src/unicode/strpos.rs | 22 +- datafusion/functions/src/unicode/substr.rs | 106 +- datafusion/functions/src/utils.rs | 62 +- .../src/decorrelate_predicate_subquery.rs | 13 +- datafusion/optimizer/src/eliminate_one_union.rs | 121 -- .../optimizer/src/extract_equijoin_predicate.rs | 15 +- datafusion/optimizer/src/lib.rs | 9 +- .../optimizer/src/optimize_projections/mod.rs | 28 +- ...liminate_nested_union.rs => optimize_unions.rs} | 43 +- datafusion/optimizer/src/optimizer.rs | 7 +- .../optimizer/src/propagate_empty_relation.rs | 4 +- datafusion/optimizer/src/push_down_filter.rs | 20 +- .../optimizer/src/scalar_subquery_to_join.rs | 18 +- .../src/simplify_expressions/expr_simplifier.rs | 10 +- .../src/simplify_expressions/guarantees.rs | 476 ------ .../optimizer/src/simplify_expressions/mod.rs | 3 +- .../optimizer/src/single_distinct_to_groupby.rs | 18 +- datafusion/physical-expr-adapter/src/lib.rs | 3 + datafusion/physical-expr-common/src/datum.rs | 83 +- datafusion/physical-expr-common/src/lib.rs | 3 + .../physical-expr-common/src/physical_expr.rs | 23 +- datafusion/physical-expr-common/src/utils.rs | 21 + datafusion/physical-expr/Cargo.toml | 4 + datafusion/physical-expr/src/aggregate.rs | 9 +- datafusion/physical-expr/src/analysis.rs | 43 +- .../physical-expr/src/async_scalar_function.rs | 25 +- .../src/equivalence/properties/mod.rs | 2 +- datafusion/physical-expr/src/expressions/binary.rs | 117 +- .../src/expressions/binary/kernels.rs | 6 +- datafusion/physical-expr/src/expressions/case.rs | 404 ++++- .../src/expressions/dynamic_filters.rs | 102 +- .../physical-expr/src/expressions/in_list.rs | 1677 +++++++++++++++++--- datafusion/physical-expr/src/expressions/like.rs | 22 +- .../physical-expr/src/expressions/literal.rs | 1 + datafusion/physical-expr/src/expressions/not.rs | 8 +- .../physical-expr/src/intervals/cp_solver.rs | 79 +- datafusion/physical-expr/src/lib.rs | 3 + datafusion/physical-expr/src/projection.rs | 194 ++- .../physical-expr/src/statistics/stats_solver.rs | 2 +- datafusion/physical-expr/src/utils/guarantee.rs | 4 +- .../src/window/standard_window_function_expr.rs | 9 +- datafusion/physical-expr/src/window/window_expr.rs | 9 +- .../src/coalesce_async_exec_input.rs | 71 - .../physical-optimizer/src/coalesce_batches.rs | 31 +- .../physical-optimizer/src/enforce_distribution.rs | 3 +- .../physical-optimizer/src/enforce_sorting/mod.rs | 7 +- .../replace_with_order_preserving_variants.rs | 23 +- .../src/enforce_sorting/sort_pushdown.rs | 4 +- .../physical-optimizer/src/filter_pushdown.rs | 58 +- datafusion/physical-optimizer/src/lib.rs | 4 +- datafusion/physical-optimizer/src/optimizer.rs | 2 - .../physical-optimizer/src/projection_pushdown.rs | 4 +- .../group_values/multi_group_by/primitive.rs | 114 +- .../src/aggregates/group_values/null_builder.rs | 8 + .../src/aggregates/group_values/row.rs | 14 +- datafusion/physical-plan/src/aggregates/mod.rs | 71 +- .../physical-plan/src/aggregates/no_grouping.rs | 23 +- .../physical-plan/src/aggregates/row_hash.rs | 225 ++- .../physical-plan/src/aggregates/topk/heap.rs | 14 +- .../physical-plan/src/aggregates/topk_stream.rs | 11 +- datafusion/physical-plan/src/analyze.rs | 22 +- datafusion/physical-plan/src/async_func.rs | 10 +- datafusion/physical-plan/src/coalesce/mod.rs | 11 +- .../physical-plan/src/coalesce_partitions.rs | 12 +- datafusion/physical-plan/src/coop.rs | 10 +- datafusion/physical-plan/src/empty.rs | 28 +- datafusion/physical-plan/src/execution_plan.rs | 58 +- datafusion/physical-plan/src/explain.rs | 10 +- datafusion/physical-plan/src/filter.rs | 168 +- datafusion/physical-plan/src/filter_pushdown.rs | 1 + datafusion/physical-plan/src/joins/cross_join.rs | 16 +- .../physical-plan/src/joins/hash_join/exec.rs | 257 ++- .../physical-plan/src/joins/hash_join/mod.rs | 1 + .../src/joins/hash_join/partitioned_hash_eval.rs | 158 ++ .../src/joins/hash_join/shared_bounds.rs | 409 +++-- .../physical-plan/src/joins/hash_join/stream.rs | 112 +- .../physical-plan/src/joins/nested_loop_join.rs | 332 +++- .../src/joins/piecewise_merge_join/classic_join.rs | 20 +- .../src/joins/sort_merge_join/exec.rs | 15 +- .../src/joins/sort_merge_join/metrics.rs | 8 - .../src/joins/sort_merge_join/stream.rs | 66 +- .../src/joins/sort_merge_join/tests.rs | 234 +++ .../physical-plan/src/joins/stream_join_utils.rs | 6 - .../physical-plan/src/joins/symmetric_hash_join.rs | 51 +- datafusion/physical-plan/src/joins/test_utils.rs | 6 +- datafusion/physical-plan/src/joins/utils.rs | 62 +- datafusion/physical-plan/src/lib.rs | 3 + datafusion/physical-plan/src/limit.rs | 20 +- datafusion/physical-plan/src/memory.rs | 48 +- datafusion/physical-plan/src/metrics/baseline.rs | 14 + datafusion/physical-plan/src/metrics/builder.rs | 22 +- datafusion/physical-plan/src/metrics/mod.rs | 5 +- datafusion/physical-plan/src/metrics/value.rs | 300 +++- datafusion/physical-plan/src/placeholder_row.rs | 14 +- datafusion/physical-plan/src/projection.rs | 95 +- datafusion/physical-plan/src/recursive_query.rs | 17 +- datafusion/physical-plan/src/repartition/mod.rs | 147 +- datafusion/physical-plan/src/sorts/sort.rs | 43 +- .../src/sorts/sort_preserving_merge.rs | 14 +- datafusion/physical-plan/src/sorts/stream.rs | 13 +- .../physical-plan/src/sorts/streaming_merge.rs | 11 +- datafusion/physical-plan/src/spill/mod.rs | 1 + datafusion/physical-plan/src/test.rs | 19 +- datafusion/physical-plan/src/topk/mod.rs | 67 +- datafusion/physical-plan/src/union.rs | 52 +- datafusion/physical-plan/src/unnest.rs | 15 +- .../physical-plan/src/windows/window_agg_exec.rs | 16 +- datafusion/physical-plan/src/work_table.rs | 14 +- datafusion/proto-common/src/lib.rs | 3 + datafusion/proto/src/common.rs | 14 +- datafusion/proto/src/lib.rs | 3 + datafusion/proto/src/logical_plan/mod.rs | 13 +- datafusion/proto/src/physical_plan/from_proto.rs | 58 +- datafusion/proto/src/physical_plan/mod.rs | 41 +- datafusion/proto/src/physical_plan/to_proto.rs | 3 +- .../proto/tests/cases/roundtrip_logical_plan.rs | 4 +- .../proto/tests/cases/roundtrip_physical_plan.rs | 131 +- datafusion/pruning/src/lib.rs | 4 + datafusion/pruning/src/pruning_predicate.rs | 22 +- datafusion/session/src/lib.rs | 4 + datafusion/spark/src/function/aggregate/avg.rs | 35 +- datafusion/spark/src/function/bitwise/bit_count.rs | 25 +- datafusion/spark/src/function/bitwise/bit_get.rs | 237 +-- datafusion/spark/src/function/bitwise/bit_shift.rs | 727 ++------- datafusion/spark/src/function/bitwise/mod.rs | 18 +- datafusion/spark/src/function/datetime/date_add.rs | 8 +- datafusion/spark/src/function/datetime/last_day.rs | 10 +- .../spark/src/function/datetime/make_interval.rs | 43 +- datafusion/spark/src/function/hash/crc32.rs | 56 +- datafusion/spark/src/function/hash/sha1.rs | 58 +- datafusion/spark/src/function/math/abs.rs | 378 +++++ datafusion/spark/src/function/math/expm1.rs | 63 +- datafusion/spark/src/function/math/factorial.rs | 10 +- datafusion/spark/src/function/math/hex.rs | 13 +- datafusion/spark/src/function/math/mod.rs | 11 + datafusion/spark/src/function/math/modulus.rs | 28 +- datafusion/spark/src/function/math/rint.rs | 7 +- datafusion/spark/src/function/math/trigonometry.rs | 167 ++ .../spark/src/function/string/format_string.rs | 25 +- datafusion/spark/src/function/string/length.rs | 16 +- datafusion/spark/src/lib.rs | 3 + datafusion/sql/src/cte.rs | 12 +- datafusion/sql/src/expr/binary_op.rs | 4 +- datafusion/sql/src/expr/function.rs | 93 +- datafusion/sql/src/expr/identifier.rs | 9 +- datafusion/sql/src/expr/mod.rs | 16 +- datafusion/sql/src/expr/subquery.rs | 8 +- datafusion/sql/src/expr/value.rs | 22 +- datafusion/sql/src/lib.rs | 3 + datafusion/sql/src/parser.rs | 103 +- datafusion/sql/src/resolve.rs | 1 + datafusion/sql/src/statement.rs | 87 +- datafusion/sql/src/unparser/expr.rs | 35 +- datafusion/sql/src/unparser/plan.rs | 20 +- datafusion/sql/src/unparser/utils.rs | 26 +- datafusion/sql/src/utils.rs | 11 +- datafusion/sql/tests/cases/params.rs | 2 +- datafusion/sql/tests/sql_integration.rs | 24 +- datafusion/sqllogictest/README.md | 11 + datafusion/sqllogictest/bin/sqllogictests.rs | 84 +- .../src/engines/currently_executed_sql.rs | 85 + .../src/engines/datafusion_engine/runner.rs | 25 +- .../runner.rs | 20 + datafusion/sqllogictest/src/engines/mod.rs | 3 + .../src/engines/postgres_engine/mod.rs | 37 +- datafusion/sqllogictest/src/lib.rs | 1 + datafusion/sqllogictest/src/util.rs | 49 +- .../test_files/agg_func_substitute.slt | 6 +- datafusion/sqllogictest/test_files/aggregate.slt | 166 +- .../test_files/aggregate_repartition.slt | 10 +- .../sqllogictest/test_files/aggregates_topk.slt | 49 +- datafusion/sqllogictest/test_files/array.slt | 42 +- datafusion/sqllogictest/test_files/arrow_files.slt | 260 +++ datafusion/sqllogictest/test_files/async_udf.slt | 11 +- datafusion/sqllogictest/test_files/case.slt | 20 + .../sqllogictest/test_files/count_star_rule.slt | 23 +- datafusion/sqllogictest/test_files/cte.slt | 39 +- datafusion/sqllogictest/test_files/decimal.slt | 139 ++ datafusion/sqllogictest/test_files/dictionary.slt | 15 +- datafusion/sqllogictest/test_files/explain.slt | 23 +- .../{spark/math/expm1.slt => explain_analyze.slt} | 23 +- .../sqllogictest/test_files/explain_tree.slt | 871 +++++----- datafusion/sqllogictest/test_files/expr.slt | 207 +++ .../test_files/filter_without_sort_exec.slt | 42 +- datafusion/sqllogictest/test_files/functions.slt | 19 +- datafusion/sqllogictest/test_files/group_by.slt | 40 +- .../sqllogictest/test_files/information_schema.slt | 10 + datafusion/sqllogictest/test_files/insert.slt | 15 +- .../sqllogictest/test_files/insert_to_external.slt | 10 +- datafusion/sqllogictest/test_files/join.slt.part | 37 +- .../test_files/join_disable_repartition_joins.slt | 9 +- .../test_files/join_is_not_distinct_from.slt | 9 +- datafusion/sqllogictest/test_files/joins.slt | 188 +-- datafusion/sqllogictest/test_files/limit.slt | 14 +- datafusion/sqllogictest/test_files/map.slt | 25 +- .../test_files/monotonic_projection_test.slt | 10 +- .../sqllogictest/test_files/named_arguments.slt | 2 +- datafusion/sqllogictest/test_files/operator.slt | 40 +- datafusion/sqllogictest/test_files/options.slt | 11 +- datafusion/sqllogictest/test_files/order.slt | 52 +- datafusion/sqllogictest/test_files/parquet.slt | 28 +- .../test_files/parquet_filter_pushdown.slt | 42 +- .../sqllogictest/test_files/parquet_statistics.slt | 21 +- datafusion/sqllogictest/test_files/predicates.slt | 49 +- datafusion/sqllogictest/test_files/prepare.slt | 22 +- datafusion/sqllogictest/test_files/projection.slt | 7 +- .../sqllogictest/test_files/push_down_filter.slt | 63 +- datafusion/sqllogictest/test_files/pwmj.slt | 46 +- datafusion/sqllogictest/test_files/qualify.slt | 71 +- .../sqllogictest/test_files/regexp/regexp_like.slt | 5 +- datafusion/sqllogictest/test_files/repartition.slt | 14 +- .../sqllogictest/test_files/repartition_scan.slt | 30 +- datafusion/sqllogictest/test_files/scalar.slt | 2 +- datafusion/sqllogictest/test_files/select.slt | 44 +- .../sqllogictest/test_files/set_variable.slt | 171 ++ .../sqllogictest/test_files/simplify_expr.slt | 21 +- .../{spark/math/expm1.slt => slt_features.slt} | 64 +- .../sqllogictest/test_files/sort_merge_join.slt | 49 + .../test_files/spark/bitwise/bit_count.slt | 15 + .../test_files/spark/bitwise/bit_get.slt | 25 + .../test_files/spark/bitwise/shiftright.slt | 115 ++ .../spark/bitwise/shiftrightunsigned.slt | 103 ++ .../sqllogictest/test_files/spark/hash/crc32.slt | 31 +- .../sqllogictest/test_files/spark/hash/sha1.slt | 25 +- .../sqllogictest/test_files/spark/math/abs.slt | 69 +- .../sqllogictest/test_files/spark/math/csc.slt | 23 +- .../sqllogictest/test_files/spark/math/expm1.slt | 5 + .../sqllogictest/test_files/spark/math/sec.slt | 22 +- .../test_files/spark/math/shiftleft.slt | 100 ++ .../test_files/string/string_literal.slt | 4 +- datafusion/sqllogictest/test_files/subquery.slt | 32 +- datafusion/sqllogictest/test_files/timestamps.slt | 55 +- datafusion/sqllogictest/test_files/topk.slt | 2 +- .../sqllogictest/test_files/tpch/plans/q1.slt.part | 5 +- .../test_files/tpch/plans/q10.slt.part | 30 +- .../test_files/tpch/plans/q11.slt.part | 60 +- .../test_files/tpch/plans/q12.slt.part | 11 +- .../test_files/tpch/plans/q13.slt.part | 14 +- .../test_files/tpch/plans/q14.slt.part | 12 +- .../test_files/tpch/plans/q15.slt.part | 31 +- .../test_files/tpch/plans/q16.slt.part | 18 +- .../test_files/tpch/plans/q17.slt.part | 19 +- .../test_files/tpch/plans/q18.slt.part | 30 +- .../test_files/tpch/plans/q19.slt.part | 18 +- .../sqllogictest/test_files/tpch/plans/q2.slt.part | 103 +- .../test_files/tpch/plans/q20.slt.part | 64 +- .../test_files/tpch/plans/q21.slt.part | 49 +- .../test_files/tpch/plans/q22.slt.part | 26 +- .../sqllogictest/test_files/tpch/plans/q3.slt.part | 25 +- .../sqllogictest/test_files/tpch/plans/q4.slt.part | 14 +- .../sqllogictest/test_files/tpch/plans/q5.slt.part | 45 +- .../sqllogictest/test_files/tpch/plans/q6.slt.part | 5 +- .../sqllogictest/test_files/tpch/plans/q7.slt.part | 49 +- .../sqllogictest/test_files/tpch/plans/q8.slt.part | 61 +- .../sqllogictest/test_files/tpch/plans/q9.slt.part | 39 +- datafusion/sqllogictest/test_files/union.slt | 48 +- datafusion/sqllogictest/test_files/unnest.slt | 6 +- datafusion/sqllogictest/test_files/window.slt | 177 +-- .../sqllogictest/test_files/window_limits.slt | 10 +- datafusion/substrait/src/extensions.rs | 4 +- datafusion/substrait/src/lib.rs | 3 + .../src/logical_plan/consumer/expr/mod.rs | 4 +- .../logical_plan/producer/substrait_producer.rs | 8 +- datafusion/substrait/src/physical_plan/consumer.rs | 4 +- .../tests/cases/roundtrip_physical_plan.rs | 34 +- datafusion/wasmtest/src/lib.rs | 3 + dev/changelog/51.0.0.md | 717 +++++++++ dev/release/README.md | 340 ++-- dev/release/add-branch-protection.sh | 160 ++ dev/update_function_docs.sh | 30 + docs/requirements.txt | 2 +- .../contributor-guide/development_environment.md | 2 +- docs/source/contributor-guide/index.md | 8 +- docs/source/contributor-guide/testing.md | 32 +- docs/source/download.md | 26 +- docs/source/library-user-guide/catalogs.md | 2 +- docs/source/library-user-guide/upgrading.md | 229 ++- docs/source/user-guide/configs.md | 237 +-- docs/source/user-guide/expressions.md | 4 +- docs/source/user-guide/metrics.md | 1 + docs/source/user-guide/sql/aggregate_functions.md | 30 + docs/source/user-guide/sql/explain.md | 18 +- 579 files changed, 23552 insertions(+), 11025 deletions(-) diff --cc .github/workflows/rust.yml index cb1cb054be,aec95c0562..73d4bb0523 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@@ -379,11 -327,10 +391,11 @@@ jobs # datafusion-cli tests linux-test-datafusion-cli: name: cargo test datafusion-cli (amd64) - needs: linux-build-lib + needs: [linux-build-lib, check-files] + if: needs.check-files.outputs.should_skip != 'true' runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: true fetch-depth: 1 @@@ -643,35 -584,8 +655,10 @@@ shell: bash run: cargo test --profile ci --exclude datafusion-cli --workspace --lib --tests --bins --features avro,json,backtrace,integration-tests - test-datafusion-pyarrow: - name: cargo test pyarrow (amd64) - needs: [linux-build-lib, check-files] - if: needs.check-files.outputs.should_skip != 'true' - runs-on: ubuntu-latest - container: - image: amd64/rust:bullseye # Use the bullseye tag image which comes with python3.9 - steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - submodules: true - fetch-depth: 1 - - name: Install PyArrow - run: | - echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV - apt-get update - apt-get install python3-pip -y - python3 -m pip install pyarrow - - name: Setup Rust toolchain - uses: ./.github/actions/setup-builder - with: - rust-version: stable - - name: Run datafusion-common tests - run: cargo test --profile ci -p datafusion-common --features=pyarrow,sql - vendor: name: Verify Vendored Code + needs: check-files + if: needs.check-files.outputs.should_skip != 'true' runs-on: ubuntu-latest container: image: amd64/rust @@@ -882,43 -789,9 +869,42 @@@ run: cargo msrv --output-format json --log-target stdout verify typos: name: Spell Check with Typos + needs: [check-files] + if: needs.check-files.outputs.should_skip != 'true' runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: persist-credentials: false - - uses: crate-ci/typos@07d900b8fa1097806b8adb6391b0d3e0ac2fdea7 # v1.39.0 + - uses: crate-ci/typos@626c4bedb751ce0b7f03262ca97ddda9a076ae1c # v1.39.2 + + rust-required-checks: + name: Validate Rust Required Checks + needs: [ linux-rustdoc, + linux-wasm-pack, + linux-build-lib, + macos-aarch64, + vendor, + check-fmt, + msrv, + linux-datafusion-common-features, + linux-datafusion-substrait-features, + linux-datafusion-proto-features, + linux-cargo-check-datafusion, + linux-cargo-check-datafusion-functions, + linux-test, + linux-test-example, + linux-test-doc, + verify-benchmark-results, + sqllogictest-postgres, + sqllogictest-substrait, - test-datafusion-pyarrow, + clippy, + cargo-toml-formatting-checks, + linux-test-datafusion-cli, + config-docs-check + ] + if: always() + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - run: echo '${{ toJSON(needs) }}' | jq -e 'all(.[]; .result == "success" or .result == "skipped")' --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
