This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch branch-33 in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
commit b248bc703ab52f057f408686aa1e59f4b75f1413 Merge: 262f08778b 1d21681e4e Author: Andy Grove <[email protected]> AuthorDate: Sun Nov 12 09:39:11 2023 -0700 prep 33rc2 .github/workflows/rust.yml | 14 +- Cargo.toml | 4 +- README.md | 17 +- benchmarks/Cargo.toml | 2 +- .../Cargo.toml => ci/scripts/rust_example.sh | 41 +- datafusion-cli/Cargo.lock | 74 +- datafusion-examples/Cargo.toml | 2 +- datafusion-examples/README.md | 8 +- .../examples/{ => external_dependency}/catalog.rs | 0 .../{ => external_dependency}/dataframe-to-s3.rs | 0 .../{ => external_dependency}/query-aws-s3.rs | 0 .../examples/{ => flight}/flight_client.rs | 0 .../examples/{ => flight}/flight_server.rs | 0 .../examples/{ => flight}/flight_sql_server.rs | 0 datafusion-examples/examples/simple_udwf.rs | 2 +- datafusion/common/Cargo.toml | 4 +- datafusion/common/src/cast.rs | 12 +- datafusion/common/src/config.rs | 5 + datafusion/common/src/dfschema.rs | 72 +- datafusion/common/src/pyarrow.rs | 2 +- datafusion/common/src/scalar.rs | 598 +++--- datafusion/common/src/stats.rs | 45 +- datafusion/common/src/tree_node.rs | 24 + datafusion/core/Cargo.toml | 6 +- datafusion/core/benches/data_utils/mod.rs | 85 + datafusion/core/benches/distinct_query_sql.rs | 208 ++ datafusion/core/benches/scalar.rs | 10 +- datafusion/core/benches/topk_aggregate.rs | 92 +- datafusion/core/src/dataframe/mod.rs | 86 + .../datasource/avro_to_arrow/arrow_array_reader.rs | 20 +- datafusion/core/src/datasource/file_format/json.rs | 10 +- datafusion/core/src/datasource/listing/helpers.rs | 8 +- .../datasource/physical_plan/file_scan_config.rs | 12 +- .../datasource/physical_plan/parquet/row_filter.rs | 2 +- .../datasource/physical_plan/parquet/row_groups.rs | 4 +- datafusion/core/src/datasource/statistics.rs | 6 +- datafusion/core/src/execution/context/mod.rs | 17 + datafusion/core/src/execution/context/parquet.rs | 143 ++ .../src/physical_optimizer/aggregate_statistics.rs | 8 +- .../combine_partial_final_agg.rs | 48 + .../src/physical_optimizer/enforce_distribution.rs | 28 +- .../core/src/physical_optimizer/enforce_sorting.rs | 4 +- .../limited_distinct_aggregation.rs | 626 ++++++ datafusion/core/src/physical_optimizer/mod.rs | 2 + .../core/src/physical_optimizer/optimizer.rs | 13 + .../src/physical_optimizer/output_requirements.rs | 6 +- .../src/physical_optimizer/projection_pushdown.rs | 2117 ++++++++++++++++++++ .../replace_with_order_preserving_variants.rs | 5 +- .../core/src/physical_optimizer/test_utils.rs | 2 +- .../src/physical_optimizer/topk_aggregation.rs | 13 +- datafusion/core/src/physical_planner.rs | 5 +- datafusion/core/tests/dataframe/mod.rs | 29 +- .../fuzz_cases/sort_preserving_repartition_fuzz.rs | 4 +- datafusion/core/tests/sql/explain_analyze.rs | 5 +- datafusion/expr/src/aggregate_function.rs | 31 +- datafusion/expr/src/built_in_function.rs | 64 +- datafusion/expr/src/columnar_value.rs | 14 +- datafusion/expr/src/expr.rs | 45 +- datafusion/expr/src/expr_fn.rs | 26 + datafusion/expr/src/expr_schema.rs | 34 +- datafusion/expr/src/logical_plan/builder.rs | 15 +- datafusion/expr/src/operator.rs | 8 +- datafusion/expr/src/signature.rs | 75 +- datafusion/expr/src/tree_node/expr.rs | 8 +- datafusion/expr/src/type_coercion/functions.rs | 11 +- datafusion/expr/src/utils.rs | 3 +- datafusion/expr/src/window_function.rs | 15 + datafusion/expr/src/window_state.rs | 2 +- .../optimizer/src/analyzer/count_wildcard_rule.rs | 66 +- .../optimizer/src/analyzer/inline_table_scan.rs | 2 +- .../optimizer/src/common_subexpr_eliminate.rs | 2 +- datafusion/optimizer/src/decorrelate.rs | 7 +- datafusion/optimizer/src/push_down_filter.rs | 3 +- .../src/simplify_expressions/expr_simplifier.rs | 3 +- .../optimizer/src/single_distinct_to_groupby.rs | 95 +- .../optimizer/src/unwrap_cast_in_comparison.rs | 8 +- .../physical-expr/src/aggregate/array_agg.rs | 43 +- .../src/aggregate/array_agg_distinct.rs | 13 +- .../src/aggregate/array_agg_ordered.rs | 20 +- datafusion/physical-expr/src/aggregate/build_in.rs | 18 +- .../physical-expr/src/aggregate/correlation.rs | 12 +- .../physical-expr/src/aggregate/covariance.rs | 12 +- .../physical-expr/src/aggregate/first_last.rs | 10 +- datafusion/physical-expr/src/aggregate/stddev.rs | 12 +- datafusion/physical-expr/src/aggregate/utils.rs | 4 +- datafusion/physical-expr/src/aggregate/variance.rs | 12 +- datafusion/physical-expr/src/array_expressions.rs | 916 ++++----- .../physical-expr/src/conditional_expressions.rs | 2 +- .../physical-expr/src/datetime_expressions.rs | 152 +- datafusion/physical-expr/src/expressions/binary.rs | 49 +- datafusion/physical-expr/src/expressions/case.rs | 64 +- datafusion/physical-expr/src/expressions/cast.rs | 17 +- datafusion/physical-expr/src/expressions/datum.rs | 14 +- .../src/expressions/get_indexed_field.rs | 33 +- .../physical-expr/src/expressions/in_list.rs | 11 +- .../physical-expr/src/expressions/is_not_null.rs | 5 +- .../physical-expr/src/expressions/is_null.rs | 5 +- datafusion/physical-expr/src/expressions/like.rs | 5 +- .../physical-expr/src/expressions/literal.rs | 5 +- datafusion/physical-expr/src/expressions/mod.rs | 12 +- .../physical-expr/src/expressions/negative.rs | 2 +- datafusion/physical-expr/src/expressions/not.rs | 5 +- datafusion/physical-expr/src/expressions/nullif.rs | 20 +- .../physical-expr/src/expressions/try_cast.rs | 12 +- datafusion/physical-expr/src/functions.rs | 265 +-- .../src/intervals/interval_aritmetic.rs | 2 +- datafusion/physical-expr/src/math_expressions.rs | 15 +- datafusion/physical-expr/src/planner.rs | 2 +- datafusion/physical-expr/src/scalar_function.rs | 10 +- datafusion/physical-expr/src/struct_expressions.rs | 15 +- .../src/window/built_in_window_function_expr.rs | 6 +- datafusion/physical-expr/src/window/lead_lag.rs | 1 + datafusion/physical-expr/src/window/window_expr.rs | 6 +- datafusion/physical-plan/src/aggregates/mod.rs | 83 +- .../physical-plan/src/aggregates/no_grouping.rs | 6 +- .../physical-plan/src/aggregates/row_hash.rs | 61 +- datafusion/physical-plan/src/filter.rs | 56 +- datafusion/physical-plan/src/insert.rs | 12 +- datafusion/physical-plan/src/joins/cross_join.rs | 2 +- datafusion/physical-plan/src/joins/hash_join.rs | 129 +- .../physical-plan/src/joins/hash_join_utils.rs | 116 +- .../physical-plan/src/joins/symmetric_hash_join.rs | 27 +- datafusion/physical-plan/src/joins/utils.rs | 4 +- datafusion/physical-plan/src/limit.rs | 159 +- datafusion/physical-plan/src/memory.rs | 12 + datafusion/physical-plan/src/projection.rs | 8 +- datafusion/physical-plan/src/repartition/mod.rs | 236 ++- datafusion/physical-plan/src/sorts/stream.rs | 4 +- datafusion/physical-plan/src/topk/mod.rs | 2 +- datafusion/physical-plan/src/unnest.rs | 2 +- datafusion/proto/Cargo.toml | 2 +- datafusion/proto/proto/datafusion.proto | 61 +- datafusion/proto/src/generated/pbjson.rs | 1338 +++++++++++-- datafusion/proto/src/generated/prost.rs | 152 +- datafusion/proto/src/logical_plan/from_proto.rs | 56 +- datafusion/proto/src/logical_plan/to_proto.rs | 12 +- datafusion/proto/src/physical_plan/from_proto.rs | 95 +- datafusion/proto/src/physical_plan/mod.rs | 81 +- datafusion/proto/src/physical_plan/to_proto.rs | 128 +- .../proto/tests/cases/roundtrip_logical_plan.rs | 12 +- .../proto/tests/cases/roundtrip_physical_plan.rs | 55 +- datafusion/sql/src/expr/function.rs | 6 +- datafusion/sql/src/select.rs | 2 +- datafusion/sql/src/statement.rs | 20 +- datafusion/sqllogictest/Cargo.toml | 2 +- datafusion/sqllogictest/test_files/aggregate.slt | 198 ++ datafusion/sqllogictest/test_files/array.slt | 416 +++- datafusion/sqllogictest/test_files/copy.slt | 2 +- datafusion/sqllogictest/test_files/errors.slt | 4 + datafusion/sqllogictest/test_files/explain.slt | 17 +- datafusion/sqllogictest/test_files/functions.slt | 4 + datafusion/sqllogictest/test_files/groupby.slt | 37 +- .../sqllogictest/test_files/information_schema.slt | 2 + datafusion/sqllogictest/test_files/insert.slt | 13 +- .../sqllogictest/test_files/insert_to_external.slt | 13 +- datafusion/sqllogictest/test_files/joins.slt | 238 +-- datafusion/sqllogictest/test_files/limit.slt | 85 + datafusion/sqllogictest/test_files/subquery.slt | 77 +- .../sqllogictest/test_files/tpch/q16.slt.part | 10 +- datafusion/sqllogictest/test_files/union.slt | 60 +- datafusion/sqllogictest/test_files/update.slt | 15 +- datafusion/sqllogictest/test_files/window.slt | 12 +- datafusion/substrait/Cargo.toml | 2 +- datafusion/substrait/src/logical_plan/consumer.rs | 87 +- datafusion/substrait/src/logical_plan/producer.rs | 89 +- .../tests/cases/roundtrip_logical_plan.rs | 74 +- datafusion/wasmtest/Cargo.toml | 2 +- dev/changelog/33.0.0.md | 70 +- dev/release/generate-changelog.py | 4 + docs/source/contributor-guide/communication.md | 50 +- docs/source/contributor-guide/index.md | 4 +- docs/source/index.rst | 3 +- docs/source/user-guide/configs.md | 1 + docs/source/user-guide/expressions.md | 1 + 174 files changed, 8984 insertions(+), 2441 deletions(-) diff --cc datafusion-cli/Cargo.lock index 74df8aab01,629293e483..0932fdafc2 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@@ -790,9 -790,9 +790,9 @@@ dependencies = [[package]] name = "bstr" --version = "1.7.0" ++version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" --checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019" ++checksum = "542f33a8835a0884b006a0c3df3dadd99c0c3f296ed26c2fdc8028e01ad6230c" dependencies = [ "memchr", "regex-automata", @@@ -1737,9 -1737,9 +1737,9 @@@ dependencies = [[package]] name = "http" --version = "0.2.9" ++version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" --checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" ++checksum = "f95b9abcae896730d42b78e09c155ed4ddf82c07b4de772c64aee5b2d8b7c150" dependencies = [ "bytes", "fnv", @@@ -2869,9 -2869,9 +2869,9 @@@ dependencies = [[package]] name = "rustls-pemfile" --version = "1.0.3" ++version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" --checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" ++checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ "base64", ] @@@ -3061,9 -3061,9 +3061,9 @@@ dependencies = [[package]] name = "smallvec" --version = "1.11.1" ++version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" --checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" ++checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" [[package]] name = "snafu" @@@ -3354,9 -3354,9 +3354,9 @@@ checksum = "1f3ccbac311fea05f86f61904b4 [[package]] name = "tokio" --version = "1.33.0" ++version = "1.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" --checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" ++checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" dependencies = [ "backtrace", "bytes", @@@ -3372,9 -3372,9 +3372,9 @@@ [[package]] name = "tokio-macros" --version = "2.1.0" ++version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" --checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" ++checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote",
