This is an automated email from the ASF dual-hosted git repository.
blaginin pushed a change to branch
dependabot/github_actions/runs-on/action-2.1.0
in repository https://gitbox.apache.org/repos/asf/datafusion.git
from 1b760f19d7 chore(deps): bump runs-on/action from 2.0.3 to 2.1.0
add 56e097a9b8 perf: optimize scatter with type-specific specialization
(#20498)
add 7f29cb050a Add `arrow_try_cast` UDF (#21130)
add 98defe6f32 chore: Fix all sqllogictest dangling configs (#21108)
add 76075e2f35 Preserve SPM when parent maintains input order (#21097)
add dc9098ef66 chore: update testcontainers and astral-tokio-tar for cargo
audit (#21114)
add 10fae81bf6 Fix push_down_filter for children with non-empty fetch
fields (#21057)
add 4d5aea4b5a perf: Optimize `array_min`, `array_max` for arrays of
primitive types (#21101)
add aa9d8195ac perf: optimize map validation for common key types (#20805)
add 8d47fc6034 Spark soundex function implementation (#20725)
add f30b85ccf5 chore(deps): bump env_logger from 0.11.9 to 0.11.10 in the
all-other-cargo-deps group across 1 directory (#21136)
add d5302768c9 Fix `elapsed_compute` metric for Parquet DataSourceExec
(#20767)
add 0294c6ca74 chore(deps): bump taiki-e/install-action from 2.68.34 to
2.69.7 (#21133)
add a3dc8fa0c6 chore(deps): bump github/codeql-action from 4.33.0 to
4.34.1 (#21132)
add a07490236c Update to arrow/parquet `58.1.0` (#21044)
add 4084a18bc7 Simplify sqllogictest timing summary to boolean flag and
remove top-N modes (#20598)
add e62533b727 Substrait join consumer should not merge nullability of
join keys (#21121)
add 6e0dde0890 fix(stats): widen sum_value integer arithmetic to
SUM-compatible types (#20865)
add 9b726bcf28 Enable debug assertions in CI. (#20832)
add 51f13d7435 perf: specialized SemiAntiSortMergeJoinStream (#20806)
add 78820c5101 chore(deps): bump requests from 2.32.5 to 2.33.0 (#21153)
add 139b0b4ac8 feat : support spark compatible int to timestamp cast
(#20555)
add c4562dc7fc [Minor]: support window functions in order by expressions
(#20963)
add 69cd666501 fix: skip empty metadata in intersect_metadata_for_union to
prevent s… (#21127)
add 757ce78496 chore: Optimize schema rewriter usages (#21158)
add 509ad090b4 Improvement: keep order-preserving repartitions for
streaming aggregates (#21107)
add 4e2e4e84d5 fix: Df int timestamp cast fix failing CI (#21163)
add 6980fcf1e3 docs: Add explicit fmt and clippy commands to AGENTS.md
(#21171)
add 4f13319b14 feat: implement PhysicalOptimizerRule in FFI crate (#20451)
add 7cbc6b4383 Remove as_any from scalar UDF trait definition (#20812)
add ba399a80f9 docs: add KalamDB to known users (#21181)
add 1416ed4d50 Add benchmarks for Parquet struct leaf-level projection
pruning (#21180)
add fb12029e70 chore: re-export projection in datafusion::datasource
(#21185)
add 1624d63070 perf: Add support for `GroupsAccumulator` to `string_agg`
(#21154)
add 38cc8e6874 test: add SMJ benchmarks from #21184 (#21188)
add 37978e32af [main] Update version to 53.0.0 and bring changelog (#21189)
add 627fabaa77 Migrate Avro reader to arrow-avro and remove internal
conversion code (#17861)
add 07ddfd7a22 Fix sort merge interleave overflow (#20922)
add 20434b0172 Reduce parquet struct projection benchmark data volume
(#21187)
add 1e3b956fa7 Minor: compute qualify window expressions only when QUALIFY
clause is present (#21173)
add 0359a3ce08 fix[physical-plan/aggregates]: fix grouping by Ree<Dict>
(#21195)
add 37cd3de82f [main] add 52.4.0 changelog (#21053)
add 580b0abdb4 Use leaf level `ProjectionMask` for parquet projections
(#20925)
add 37c1b75263 test: scale remaining sort-merge join (SMJ) benchmark
queries (#21200)
add e913557fe9 Fix: MemTable LIMIT ignored with reordered projections
(#21177)
add 9f893a4241 perf: Optimize `split_part`, support `Utf8View` (#21119)
add a910b039dc fix(unparser): Fix BigQuery timestamp literal format in SQL
unparsing (#21103)
add b09205afcb fix: propagate errors for unsupported table function
arguments instead of silently dropping them (#21135)
add 2c03881eac Add metric category filtering for EXPLAIN ANALYZE (#21160)
add 1a0af76f78 No cargo test for `sort_mem_validation` (#21222)
add aadae6bda3 Fix/support duplicate column names #6543 (#21126)
add e8831716f1 Use spot instances for extended tests (#21221)
add e57198ab92 feat(metric): Add output skewness metric to detect skewed
plans easier (#21211)
add cc5a3a7049 chore: Cleanup Cargo profiles (#21214)
add 06b4791d7f fix: Fix `main` compilation failure (#21242)
add 80a8bb8a7b feat: add sort pushdown benchmark and SLT tests (#21213)
add a0869e95ed chore(benchmark): Fix/update compile profile benchmark
(#21223)
add 81faba3293 Basic Extension Type Registry Implementation (#20312)
add 3b42dcf76f chore(deps): bump serialize-javascript,
terser-webpack-plugin and copy-webpack-plugin in
/datafusion/wasmtest/datafusion-wasm-app (#21235)
add eb68cf3aba chore(deps-dev): bump node-forge from 1.3.2 to 1.4.0 in
/datafusion/wasmtest/datafusion-wasm-app (#21225)
add 51d06df043 chore(deps): bump cryptography from 46.0.5 to 46.0.6
(#21224)
add 2aab55941d Fix FilterExec tree render missing fetch display (#21230)
add 0a8bb2933a fix: Revert "Fix/support duplicate column names #6543
(#21126)" (#21254)
add 010e5ee04b ci: use ubuntu-slim runner for lightweight CI jobs (#21252)
add 0be5982a91 perf: sort-merge join (SMJ) batch deferred filtering and
move mark joins to bitwise stream. Near-unique LEFT and FULL SMJ 20-50x faster
(#21184)
add 9385c70c77 kill `check_run_id` and `pr_number` from extended tests
(#21228)
add 3555322440 [Minor] add non topk benchmarks for utf8/utf8view string
aggregates (#21073)
add 2c08ddb4fe ci: Add datafusion/sql as a folder to trigger extended
tests for on changes (#21255)
add 11c2fbcb15 Misc minor optimization in the Physical Optimizer (#21216)
add 4a366752ac chore: Replace `TryInto` impl by `TryFrom` (#21203)
add f830ee39df Refactor parquet datasource into an explicit state machine
(#21190)
add 5ff80e4340 Provide session to the udtf call (#20222)
add 14a85fa383 chore: remove as_any from aggregate and window functions
(#21209)
add ccaf8022da Add flat vs. struct field projection benchmarks (#21257)
add 9c1e7abd2f Refactor: expose predicate constant inference from
physical-expr (#21167)
add 6412c3aaa0 Add end-to-end Parquet tests for List and LargeList struct
schema evolution (#20840)
add 2d9e268bc7 docs: Add `RESET` Command Documentation (#21245)
add 0bf9defb1d fix: Fix three bugs in query decorrelation (#21208)
add d87d8f6cac perf: Optimize `string_to_array` for scalar args (#21131)
add a120e4d1bd chore(deps): bump taiki-e/install-action from 2.69.7 to
2.70.3 (#21271)
add dcf818e4c5 chore(deps): bump rustyline from 17.0.2 to 18.0.0 (#21276)
add 26783124f1 chore(deps): bump ctor from 0.6.3 to 0.8.0 (#21282)
add 7138a832e3 chore(deps): bump snmalloc-rs from 0.3.8 to 0.7.4 (#21280)
add 4460ae0d26 chore(deps): bump sha1 from 0.10.6 to 0.11.0 (#21277)
add 55d1995624 chore(deps): bump astral-sh/setup-uv from 7.6.0 to 8.0.0
(#21272)
add 3d177fcc67 chore(deps): bump github/codeql-action from 4.34.1 to
4.35.1 (#21273)
add 19eb849b21 chore(deps): bump pygments from 2.19.2 to 2.20.0 (#21256)
add a9dc1dcac1 chore: fix upgrade guide link for object_store release
notes (#21283)
add c473c1852f feat(memory_pool): add `TrackConsumersPool::metrics()` to
expose cons… (#21147)
add e74b83ab02 fix: date overflow panic (#21233)
add 9de1253f2a Update repeat UDF to emit utf8view when input is utf8view
(#20645)
add fee90be613 Misc minor optimizations to query optimizer performance
(#21128)
add e75ed5b42c feat(sql): unparse array_has as ANY for Postgres (#20654)
add f3e8291ebd chore(deps): bump the all-other-cargo-deps group across 1
directory with 7 updates (#21274)
add bc2b36cf56 fix: `SELECT * EXCLUDE(...)` silently returns empty rows
when all columns are excluded (#21259)
add 1e68674705 doc: Add documentation explaining the behavior of `null`
values in struct comparisons (#21226)
add 2d5f016dda feat: feature-gate `sqllogictests` datafusion-substrait
behind optional 'substrait' feature (#21268)
add 0dff9d7f4d Merge branch 'main' into
dependabot/github_actions/runs-on/action-2.1.0
No new revisions were added by this update.
Summary of changes:
.github/workflows/audit.yml | 2 +-
.github/workflows/codeql.yml | 6 +-
.github/workflows/dev.yml | 2 +-
.github/workflows/docs.yaml | 2 +-
.github/workflows/docs_pr.yaml | 2 +-
.github/workflows/extended.yml | 13 +-
.github/workflows/labeler.yml | 2 +-
.github/workflows/large_files.yml | 2 +-
.github/workflows/rust.yml | 12 +-
.github/workflows/stale.yml | 2 +-
.github/workflows/take.yml | 2 +-
.gitignore | 3 +
AGENTS.md | 18 +
Cargo.lock | 607 ++++---
Cargo.toml | 156 +-
benchmarks/Cargo.toml | 2 +-
benchmarks/README.md | 2 +-
benchmarks/bench.sh | 30 +
benchmarks/compile_profile.py | 43 +-
benchmarks/queries/sort_pushdown/q1.sql | 6 +
benchmarks/queries/sort_pushdown/q2.sql | 7 +
benchmarks/queries/sort_pushdown/q3.sql | 5 +
benchmarks/queries/sort_pushdown/q4.sql | 5 +
benchmarks/src/bin/dfbench.rs | 5 +-
benchmarks/src/lib.rs | 1 +
benchmarks/src/smj.rs | 293 +++-
benchmarks/src/sort_pushdown.rs | 282 +++
datafusion-cli/Cargo.toml | 2 +-
datafusion-cli/src/functions.rs | 14 +-
datafusion-examples/README.md | 31 +-
.../examples/builtin_functions/function_factory.rs | 4 -
.../examples/data_io/json_shredding.rs | 5 -
.../main.rs | 26 +-
.../examples/extension_types/temperature.rs | 323 ++++
.../examples/query_planning/optimizer_rule.rs | 5 -
datafusion-examples/examples/udf/advanced_udaf.rs | 11 +-
datafusion-examples/examples/udf/advanced_udf.rs | 5 -
datafusion-examples/examples/udf/advanced_udwf.rs | 11 +-
datafusion-examples/examples/udf/async_udf.rs | 6 +-
datafusion-examples/examples/udf/main.rs | 8 +-
datafusion-examples/examples/udf/simple_udtf.rs | 5 +-
.../examples/udf/table_list_udtf.rs | 128 ++
datafusion/catalog/src/table.rs | 56 +-
datafusion/common/Cargo.toml | 9 +-
datafusion/common/src/config.rs | 36 +-
datafusion/common/src/error.rs | 18 -
datafusion/common/src/format.rs | 216 ++-
datafusion/common/src/stats.rs | 133 +-
.../src/types/canonical_extensions}/mod.rs | 2 +-
.../common/src/types/canonical_extensions/uuid.rs | 103 ++
datafusion/common/src/types/extension.rs | 80 +
datafusion/common/src/types/mod.rs | 3 +
datafusion/core/Cargo.toml | 7 +-
datafusion/core/benches/aggregate_query_sql.rs | 33 +
.../core/benches/parquet_struct_projection.rs | 496 ++++++
datafusion/core/benches/topk_aggregate.rs | 338 ++--
datafusion/core/src/dataframe/mod.rs | 6 +
datafusion/core/src/datasource/file_format/avro.rs | 26 +-
.../core/src/datasource/listing_table_factory.rs | 6 +
datafusion/core/src/datasource/mod.rs | 1 +
.../core/src/datasource/physical_plan/parquet.rs | 3 +-
datafusion/core/src/execution/session_state.rs | 39 +-
.../core/src/execution/session_state_defaults.rs | 8 +
datafusion/core/src/lib.rs | 8 +-
datafusion/core/src/physical_planner.rs | 16 +-
datafusion/core/tests/core_integration.rs | 3 +
.../core/tests/{catalog => extension_types}/mod.rs | 2 +-
.../core/tests/extension_types/pretty_printing.rs | 78 +
.../core/tests/fuzz_cases/equivalence/utils.rs | 4 -
datafusion/core/tests/fuzz_cases/join_fuzz.rs | 135 ++
.../memory_limit_validation/sort_mem_validation.rs | 54 +-
datafusion/core/tests/parquet/expr_adapter.rs | 524 +++++-
.../physical_optimizer/enforce_distribution.rs | 306 ++++
.../tests/physical_optimizer/enforce_sorting.rs | 3 +-
.../tests/physical_optimizer/limit_pushdown.rs | 35 +
.../physical_optimizer/projection_pushdown.rs | 5 -
datafusion/core/tests/sql/aggregates/basic.rs | 55 +
datafusion/core/tests/sql/explain_analyze.rs | 149 +-
.../tests/user_defined/user_defined_aggregates.rs | 9 -
.../user_defined_async_scalar_functions.rs | 4 -
.../user_defined/user_defined_scalar_functions.rs | 42 -
.../user_defined/user_defined_table_functions.rs | 7 +-
.../user_defined/user_defined_window_functions.rs | 13 -
datafusion/datasource-arrow/src/file_format.rs | 5 +
datafusion/datasource-avro/Cargo.toml | 8 +-
.../src/avro_to_arrow/arrow_array_reader.rs | 1807 --------------------
.../datasource-avro/src/avro_to_arrow/mod.rs | 39 -
.../datasource-avro/src/avro_to_arrow/reader.rs | 353 ----
.../datasource-avro/src/avro_to_arrow/schema.rs | 517 ------
datafusion/datasource-avro/src/file_format.rs | 3 +-
datafusion/datasource-avro/src/mod.rs | 122 +-
datafusion/datasource-avro/src/source.rs | 369 +++-
datafusion/datasource-parquet/src/file_format.rs | 12 +-
datafusion/datasource-parquet/src/metrics.rs | 26 +-
datafusion/datasource-parquet/src/mod.rs | 4 +
datafusion/datasource-parquet/src/opener.rs | 1255 +++++++++-----
datafusion/datasource-parquet/src/reader.rs | 2 +-
datafusion/datasource-parquet/src/row_filter.rs | 251 ++-
datafusion/datasource/src/file_stream.rs | 21 +-
datafusion/datasource/src/memory.rs | 59 +-
datafusion/datasource/src/source.rs | 19 +-
datafusion/datasource/src/statistics.rs | 81 +-
datafusion/datasource/src/url.rs | 5 +
datafusion/execution/src/memory_pool/pool.rs | 83 +
datafusion/expr-common/src/interval_arithmetic.rs | 2 +
datafusion/expr/Cargo.toml | 3 +-
datafusion/expr/src/async_udf.rs | 14 +-
datafusion/expr/src/expr.rs | 94 +-
datafusion/expr/src/expr_fn.rs | 13 -
.../src/extension_types/array_formatter_factory.rs | 67 +
.../csv => expr/src/extension_types}/mod.rs | 9 +-
datafusion/expr/src/lib.rs | 1 +
datafusion/expr/src/logical_plan/builder.rs | 9 +
datafusion/expr/src/logical_plan/plan.rs | 76 +
datafusion/expr/src/registry.rs | 307 +++-
datafusion/expr/src/test/function_stub.rs | 22 -
datafusion/expr/src/udaf.rs | 21 +-
datafusion/expr/src/udf.rs | 18 +-
datafusion/expr/src/udf_eq.rs | 13 +-
datafusion/expr/src/udwf.rs | 22 +-
datafusion/ffi/Cargo.toml | 1 +
datafusion/ffi/src/lib.rs | 1 +
datafusion/ffi/src/physical_optimizer.rs | 373 ++++
.../ffi/src/proto/logical_extension_codec.rs | 7 +-
.../ffi/src/proto/physical_extension_codec.rs | 13 +-
datafusion/ffi/src/session/mod.rs | 30 +-
datafusion/ffi/src/tests/mod.rs | 6 +
datafusion/ffi/src/tests/physical_optimizer.rs | 54 +
datafusion/ffi/src/tests/udf_udaf_udwf.rs | 8 -
datafusion/ffi/src/udaf/mod.rs | 22 +-
datafusion/ffi/src/udf/mod.rs | 19 +-
datafusion/ffi/src/udtf.rs | 100 +-
datafusion/ffi/src/udwf/mod.rs | 14 +-
datafusion/ffi/tests/ffi_physical_optimizer.rs | 74 +
.../functions-aggregate/src/approx_distinct.rs | 5 -
.../functions-aggregate/src/approx_median.rs | 5 -
.../src/approx_percentile_cont.rs | 5 -
.../src/approx_percentile_cont_with_weight.rs | 5 -
datafusion/functions-aggregate/src/array_agg.rs | 4 -
datafusion/functions-aggregate/src/average.rs | 5 -
.../functions-aggregate/src/bit_and_or_xor.rs | 5 -
datafusion/functions-aggregate/src/bool_and_or.rs | 9 -
datafusion/functions-aggregate/src/correlation.rs | 6 -
datafusion/functions-aggregate/src/count.rs | 4 -
datafusion/functions-aggregate/src/covariance.rs | 8 -
datafusion/functions-aggregate/src/first_last.rs | 9 -
datafusion/functions-aggregate/src/grouping.rs | 6 -
datafusion/functions-aggregate/src/median.rs | 4 -
datafusion/functions-aggregate/src/min_max.rs | 8 -
datafusion/functions-aggregate/src/nth_value.rs | 5 -
.../functions-aggregate/src/percentile_cont.rs | 4 -
datafusion/functions-aggregate/src/regr.rs | 5 -
datafusion/functions-aggregate/src/stddev.rs | 11 -
datafusion/functions-aggregate/src/string_agg.rs | 410 ++++-
datafusion/functions-aggregate/src/sum.rs | 5 -
datafusion/functions-aggregate/src/variance.rs | 8 -
datafusion/functions-nested/Cargo.toml | 9 +
.../functions-nested/benches/array_min_max.rs | 121 ++
datafusion/functions-nested/benches/map.rs | 210 ++-
.../functions-nested/benches/string_to_array.rs | 244 +++
datafusion/functions-nested/src/array_has.rs | 10 -
datafusion/functions-nested/src/arrays_zip.rs | 5 -
datafusion/functions-nested/src/cardinality.rs | 4 -
datafusion/functions-nested/src/concat.rs | 13 -
datafusion/functions-nested/src/dimension.rs | 7 -
datafusion/functions-nested/src/distance.rs | 5 -
datafusion/functions-nested/src/empty.rs | 4 -
datafusion/functions-nested/src/except.rs | 4 -
datafusion/functions-nested/src/extract.rs | 17 -
datafusion/functions-nested/src/flatten.rs | 5 -
datafusion/functions-nested/src/length.rs | 4 -
datafusion/functions-nested/src/make_array.rs | 5 -
datafusion/functions-nested/src/map.rs | 222 ++-
datafusion/functions-nested/src/map_entries.rs | 5 -
datafusion/functions-nested/src/map_extract.rs | 4 -
datafusion/functions-nested/src/map_keys.rs | 5 -
datafusion/functions-nested/src/map_values.rs | 5 -
datafusion/functions-nested/src/min_max.rs | 127 +-
datafusion/functions-nested/src/position.rs | 7 -
datafusion/functions-nested/src/range.rs | 5 -
datafusion/functions-nested/src/remove.rs | 13 -
datafusion/functions-nested/src/repeat.rs | 5 -
datafusion/functions-nested/src/replace.rs | 13 -
datafusion/functions-nested/src/resize.rs | 5 -
datafusion/functions-nested/src/reverse.rs | 5 -
datafusion/functions-nested/src/set_ops.rs | 13 -
datafusion/functions-nested/src/sort.rs | 5 -
datafusion/functions-nested/src/string.rs | 553 +++---
datafusion/functions-table/src/generate_series.rs | 13 +-
datafusion/functions-window/src/cume_dist.rs | 6 -
datafusion/functions-window/src/lead_lag.rs | 5 -
datafusion/functions-window/src/macros.rs | 28 -
datafusion/functions-window/src/nth_value.rs | 5 -
datafusion/functions-window/src/ntile.rs | 5 -
datafusion/functions-window/src/rank.rs | 5 -
datafusion/functions-window/src/row_number.rs | 5 -
datafusion/functions/Cargo.toml | 2 +-
datafusion/functions/benches/split_part.rs | 393 ++---
datafusion/functions/src/core/arrow_cast.rs | 13 +-
datafusion/functions/src/core/arrow_metadata.rs | 5 -
.../src/core/{arrow_cast.rs => arrow_try_cast.rs} | 115 +-
datafusion/functions/src/core/arrowtypeof.rs | 4 -
datafusion/functions/src/core/coalesce.rs | 5 -
datafusion/functions/src/core/getfield.rs | 8 +-
datafusion/functions/src/core/greatest.rs | 5 -
datafusion/functions/src/core/least.rs | 5 -
datafusion/functions/src/core/mod.rs | 9 +-
datafusion/functions/src/core/named_struct.rs | 9 +-
datafusion/functions/src/core/nullif.rs | 4 -
datafusion/functions/src/core/nvl.rs | 4 -
datafusion/functions/src/core/nvl2.rs | 4 -
datafusion/functions/src/core/overlay.rs | 5 -
datafusion/functions/src/core/struct.rs | 8 +-
datafusion/functions/src/core/union_extract.rs | 4 -
datafusion/functions/src/core/union_tag.rs | 4 -
datafusion/functions/src/core/version.rs | 5 -
datafusion/functions/src/crypto/digest.rs | 5 -
datafusion/functions/src/crypto/md5.rs | 6 +-
datafusion/functions/src/crypto/sha.rs | 5 -
datafusion/functions/src/datetime/current_date.rs | 6 -
datafusion/functions/src/datetime/current_time.rs | 5 -
datafusion/functions/src/datetime/date_bin.rs | 5 -
datafusion/functions/src/datetime/date_part.rs | 5 -
datafusion/functions/src/datetime/date_trunc.rs | 5 -
datafusion/functions/src/datetime/from_unixtime.rs | 5 -
datafusion/functions/src/datetime/make_date.rs | 5 -
datafusion/functions/src/datetime/make_time.rs | 5 -
datafusion/functions/src/datetime/now.rs | 5 -
datafusion/functions/src/datetime/to_char.rs | 5 -
datafusion/functions/src/datetime/to_date.rs | 5 -
datafusion/functions/src/datetime/to_local_time.rs | 5 -
datafusion/functions/src/datetime/to_time.rs | 5 -
datafusion/functions/src/datetime/to_timestamp.rs | 21 -
datafusion/functions/src/datetime/to_unixtime.rs | 5 -
datafusion/functions/src/encoding/inner.rs | 9 -
datafusion/functions/src/macros.rs | 10 +-
datafusion/functions/src/math/abs.rs | 5 -
datafusion/functions/src/math/ceil.rs | 5 -
datafusion/functions/src/math/cot.rs | 5 -
datafusion/functions/src/math/factorial.rs | 5 -
datafusion/functions/src/math/floor.rs | 5 -
datafusion/functions/src/math/gcd.rs | 5 -
datafusion/functions/src/math/iszero.rs | 5 -
datafusion/functions/src/math/lcm.rs | 5 -
datafusion/functions/src/math/log.rs | 7 +-
datafusion/functions/src/math/nans.rs | 4 -
datafusion/functions/src/math/nanvl.rs | 5 -
datafusion/functions/src/math/pi.rs | 6 -
datafusion/functions/src/math/power.rs | 8 +-
datafusion/functions/src/math/random.rs | 5 -
datafusion/functions/src/math/round.rs | 6 -
datafusion/functions/src/math/signum.rs | 5 -
datafusion/functions/src/math/trunc.rs | 5 -
datafusion/functions/src/regex/regexpcount.rs | 4 -
datafusion/functions/src/regex/regexpinstr.rs | 4 -
datafusion/functions/src/regex/regexplike.rs | 5 -
datafusion/functions/src/regex/regexpmatch.rs | 5 -
datafusion/functions/src/regex/regexpreplace.rs | 5 -
datafusion/functions/src/string/ascii.rs | 5 -
datafusion/functions/src/string/bit_length.rs | 5 -
datafusion/functions/src/string/btrim.rs | 5 -
datafusion/functions/src/string/chr.rs | 5 -
datafusion/functions/src/string/concat.rs | 5 -
datafusion/functions/src/string/concat_ws.rs | 5 -
datafusion/functions/src/string/contains.rs | 5 -
datafusion/functions/src/string/ends_with.rs | 5 -
datafusion/functions/src/string/levenshtein.rs | 5 -
datafusion/functions/src/string/lower.rs | 5 -
datafusion/functions/src/string/ltrim.rs | 5 -
datafusion/functions/src/string/octet_length.rs | 5 -
datafusion/functions/src/string/repeat.rs | 103 +-
datafusion/functions/src/string/replace.rs | 5 -
datafusion/functions/src/string/rtrim.rs | 5 -
datafusion/functions/src/string/split_part.rs | 247 +--
datafusion/functions/src/string/starts_with.rs | 5 -
datafusion/functions/src/string/to_hex.rs | 5 -
datafusion/functions/src/string/upper.rs | 5 -
datafusion/functions/src/string/uuid.rs | 5 -
.../functions/src/unicode/character_length.rs | 5 -
datafusion/functions/src/unicode/find_in_set.rs | 5 -
datafusion/functions/src/unicode/initcap.rs | 5 -
datafusion/functions/src/unicode/left.rs | 6 -
datafusion/functions/src/unicode/lpad.rs | 5 -
datafusion/functions/src/unicode/reverse.rs | 5 -
datafusion/functions/src/unicode/right.rs | 6 -
datafusion/functions/src/unicode/rpad.rs | 5 -
datafusion/functions/src/unicode/strpos.rs | 5 -
datafusion/functions/src/unicode/substr.rs | 5 -
datafusion/functions/src/unicode/substrindex.rs | 5 -
datafusion/functions/src/unicode/translate.rs | 6 -
datafusion/optimizer/src/analyzer/type_coercion.rs | 12 +-
.../optimizer/src/common_subexpr_eliminate.rs | 15 +-
datafusion/optimizer/src/decorrelate.rs | 34 +-
.../optimizer/src/eliminate_group_by_constant.rs | 3 -
.../optimizer/src/extract_leaf_expressions.rs | 46 +-
datafusion/optimizer/src/optimize_unions.rs | 4 +-
datafusion/optimizer/src/push_down_filter.rs | 132 +-
datafusion/optimizer/src/push_down_limit.rs | 2 +-
.../optimizer/src/scalar_subquery_to_join.rs | 31 +-
.../src/simplify_expressions/expr_simplifier.rs | 65 +-
.../src/simplify_expressions/udf_preimage.rs | 6 +-
datafusion/optimizer/src/test/udfs.rs | 5 -
datafusion/optimizer/src/utils.rs | 46 +-
.../physical-expr-common/src/metrics/baseline.rs | 106 +-
.../physical-expr-common/src/metrics/builder.rs | 82 +-
.../physical-expr-common/src/metrics/expression.rs | 2 +-
datafusion/physical-expr-common/src/metrics/mod.rs | 88 +-
.../physical-expr-common/src/metrics/value.rs | 82 +-
datafusion/physical-expr-common/src/utils.rs | 485 +++++-
datafusion/physical-expr/src/aggregate.rs | 4 -
.../physical-expr/src/async_scalar_function.rs | 7 +-
datafusion/physical-expr/src/projection.rs | 42 +-
datafusion/physical-expr/src/scalar_function.rs | 9 +-
datafusion/physical-expr/src/utils/mod.rs | 99 +-
.../physical-optimizer/src/enforce_distribution.rs | 58 +-
.../physical-optimizer/src/enforce_sorting/mod.rs | 5 +-
.../src/enforce_sorting/sort_pushdown.rs | 13 +-
.../physical-optimizer/src/join_selection.rs | 6 +-
.../physical-optimizer/src/limit_pushdown.rs | 119 +-
.../src/limited_distinct_aggregation.rs | 12 +-
datafusion/physical-optimizer/src/optimizer.rs | 2 +-
.../physical-optimizer/src/output_requirements.rs | 9 +-
.../physical-optimizer/src/sanity_checker.rs | 35 +-
.../src/aggregates/group_values/row.rs | 32 +-
.../physical-plan/src/aggregates/row_hash.rs | 6 +-
datafusion/physical-plan/src/analyze.rs | 21 +-
datafusion/physical-plan/src/buffer.rs | 10 +-
datafusion/physical-plan/src/display.rs | 62 +-
datafusion/physical-plan/src/filter.rs | 65 +-
datafusion/physical-plan/src/joins/cross_join.rs | 85 +-
.../physical-plan/src/joins/hash_join/exec.rs | 3 +-
.../physical-plan/src/joins/nested_loop_join.rs | 2 +-
.../src/joins/sort_merge_join/bitwise_stream.rs | 1318 ++++++++++++++
.../src/joins/sort_merge_join/exec.rs | 78 +-
.../src/joins/sort_merge_join/filter.rs | 405 ++---
.../{stream.rs => materializing_stream.rs} | 605 ++++---
.../src/joins/sort_merge_join/metrics.rs | 25 +-
.../physical-plan/src/joins/sort_merge_join/mod.rs | 3 +-
.../src/joins/sort_merge_join/tests.rs | 1655 +++++++++++++-----
.../physical-plan/src/joins/stream_join_utils.rs | 29 +-
datafusion/physical-plan/src/joins/utils.rs | 31 +-
datafusion/physical-plan/src/sorts/builder.rs | 258 ++-
datafusion/physical-plan/src/sorts/merge.rs | 123 +-
datafusion/physical-plan/src/sorts/sort.rs | 92 +-
.../src/sorts/sort_preserving_merge.rs | 56 +
datafusion/physical-plan/src/topk/mod.rs | 4 +-
datafusion/physical-plan/src/union.rs | 2 +-
datafusion/physical-plan/src/unnest.rs | 13 +-
datafusion/proto-common/src/from_proto/mod.rs | 1 -
datafusion/proto/Cargo.toml | 2 +-
datafusion/proto/proto/datafusion.proto | 4 +
datafusion/proto/src/generated/pbjson.rs | 36 +
datafusion/proto/src/generated/prost.rs | 6 +
datafusion/proto/src/physical_plan/mod.rs | 21 +-
datafusion/proto/tests/cases/mod.rs | 11 -
.../proto/tests/cases/roundtrip_logical_plan.rs | 12 +-
.../proto/tests/cases/roundtrip_physical_plan.rs | 14 +-
datafusion/session/src/session.rs | 4 +
datafusion/spark/Cargo.toml | 2 +-
datafusion/spark/src/function/aggregate/avg.rs | 6 +-
datafusion/spark/src/function/aggregate/collect.rs | 10 +-
datafusion/spark/src/function/aggregate/try_sum.rs | 5 -
.../spark/src/function/array/array_contains.rs | 5 -
datafusion/spark/src/function/array/repeat.rs | 5 -
datafusion/spark/src/function/array/shuffle.rs | 5 -
datafusion/spark/src/function/array/slice.rs | 5 -
datafusion/spark/src/function/array/spark_array.rs | 6 +-
.../src/function/bitmap/bitmap_bit_position.rs | 5 -
.../src/function/bitmap/bitmap_bucket_number.rs | 5 -
.../spark/src/function/bitmap/bitmap_count.rs | 5 -
datafusion/spark/src/function/bitwise/bit_count.rs | 5 -
datafusion/spark/src/function/bitwise/bit_get.rs | 5 -
datafusion/spark/src/function/bitwise/bit_shift.rs | 5 -
.../spark/src/function/bitwise/bitwise_not.rs | 6 +-
datafusion/spark/src/function/collection/size.rs | 5 -
datafusion/spark/src/function/conditional/if.rs | 4 -
datafusion/spark/src/function/conversion/cast.rs | 654 +++++++
datafusion/spark/src/function/conversion/mod.rs | 19 +-
.../spark/src/function/datetime/add_months.rs | 5 -
datafusion/spark/src/function/datetime/date_add.rs | 5 -
.../spark/src/function/datetime/date_diff.rs | 5 -
.../spark/src/function/datetime/date_part.rs | 6 +-
datafusion/spark/src/function/datetime/date_sub.rs | 5 -
.../spark/src/function/datetime/date_trunc.rs | 5 -
datafusion/spark/src/function/datetime/extract.rs | 14 -
.../src/function/datetime/from_utc_timestamp.rs | 5 -
datafusion/spark/src/function/datetime/last_day.rs | 5 -
.../src/function/datetime/make_dt_interval.rs | 5 -
.../spark/src/function/datetime/make_interval.rs | 5 -
datafusion/spark/src/function/datetime/next_day.rs | 5 -
.../spark/src/function/datetime/time_trunc.rs | 5 -
.../src/function/datetime/to_utc_timestamp.rs | 5 -
datafusion/spark/src/function/datetime/trunc.rs | 5 -
datafusion/spark/src/function/datetime/unix.rs | 9 -
datafusion/spark/src/function/hash/crc32.rs | 5 -
datafusion/spark/src/function/hash/sha1.rs | 5 -
datafusion/spark/src/function/hash/sha2.rs | 5 -
datafusion/spark/src/function/json/json_tuple.rs | 5 -
.../spark/src/function/map/map_from_arrays.rs | 6 -
.../spark/src/function/map/map_from_entries.rs | 5 -
datafusion/spark/src/function/map/str_to_map.rs | 5 -
datafusion/spark/src/function/math/abs.rs | 5 -
datafusion/spark/src/function/math/bin.rs | 5 -
datafusion/spark/src/function/math/expm1.rs | 5 -
datafusion/spark/src/function/math/factorial.rs | 5 -
datafusion/spark/src/function/math/hex.rs | 5 -
datafusion/spark/src/function/math/modulus.rs | 9 -
datafusion/spark/src/function/math/negative.rs | 5 -
datafusion/spark/src/function/math/rint.rs | 5 -
datafusion/spark/src/function/math/trigonometry.rs | 9 -
datafusion/spark/src/function/math/unhex.rs | 5 -
datafusion/spark/src/function/math/width_bucket.rs | 5 -
datafusion/spark/src/function/string/ascii.rs | 5 -
datafusion/spark/src/function/string/base64.rs | 9 -
datafusion/spark/src/function/string/char.rs | 6 +-
datafusion/spark/src/function/string/concat.rs | 5 -
datafusion/spark/src/function/string/elt.rs | 5 -
.../spark/src/function/string/format_string.rs | 5 -
datafusion/spark/src/function/string/ilike.rs | 5 -
datafusion/spark/src/function/string/length.rs | 4 -
datafusion/spark/src/function/string/like.rs | 5 -
datafusion/spark/src/function/string/luhn_check.rs | 6 +-
datafusion/spark/src/function/string/mod.rs | 4 +
datafusion/spark/src/function/string/soundex.rs | 150 ++
datafusion/spark/src/function/string/space.rs | 5 -
datafusion/spark/src/function/string/substring.rs | 5 -
datafusion/spark/src/function/url/parse_url.rs | 5 -
datafusion/spark/src/function/url/try_parse_url.rs | 6 -
.../spark/src/function/url/try_url_decode.rs | 6 -
datafusion/spark/src/function/url/url_decode.rs | 5 -
datafusion/spark/src/function/url/url_encode.rs | 5 -
datafusion/sql/src/relation/mod.rs | 4 +-
datafusion/sql/src/select.rs | 37 +-
datafusion/sql/src/unparser/dialect.rs | 35 +-
datafusion/sql/src/unparser/expr.rs | 62 +-
datafusion/sql/tests/cases/plan_to_sql.rs | 11 +
datafusion/sql/tests/sql_integration.rs | 167 +-
datafusion/sqllogictest/Cargo.toml | 7 +-
datafusion/sqllogictest/README.md | 19 +-
datafusion/sqllogictest/bin/sqllogictests.rs | 173 +-
.../src/engines/datafusion_engine/runner.rs | 125 +-
datafusion/sqllogictest/src/engines/mod.rs | 2 +
datafusion/sqllogictest/src/lib.rs | 1 +
datafusion/sqllogictest/src/test_context.rs | 4 -
.../test_files/agg_func_substitute.slt | 27 +-
datafusion/sqllogictest/test_files/aggregate.slt | 11 +-
.../test_files/aggregate_repartition.slt | 7 +
.../test_files/aggregate_skip_partial.slt | 45 +
datafusion/sqllogictest/test_files/array.slt | 183 ++
datafusion/sqllogictest/test_files/arrow_files.slt | 4 +
.../sqllogictest/test_files/arrow_try_cast.slt | 109 ++
datafusion/sqllogictest/test_files/avro.slt | 36 +
datafusion/sqllogictest/test_files/cte.slt | 10 +
.../test_files/datetime/arith_date_interval.slt | 12 +
.../test_files/datetime/current_date_timezone.slt | 4 +
.../test_files/datetime/current_time_timezone.slt | 4 +
datafusion/sqllogictest/test_files/delete.slt | 4 +
datafusion/sqllogictest/test_files/distinct_on.slt | 4 +
.../test_files/dynamic_filter_pushdown_config.slt | 7 +-
datafusion/sqllogictest/test_files/explain.slt | 3 +-
.../sqllogictest/test_files/explain_analyze.slt | 234 +++
.../sqllogictest/test_files/explain_tree.slt | 99 +-
.../sqllogictest/test_files/floor_preimage.slt | 4 +
datafusion/sqllogictest/test_files/group_by.slt | 27 +-
.../test_files/ident_normalization.slt | 4 +
.../sqllogictest/test_files/information_schema.slt | 17 +
.../test_files/information_schema_columns.slt | 10 +
.../information_schema_multiple_catalogs.slt | 10 +
.../test_files/information_schema_table_types.slt | 4 +
datafusion/sqllogictest/test_files/insert.slt | 7 +
.../sqllogictest/test_files/insert_to_external.slt | 7 +
.../test_files/join_is_not_distinct_from.slt | 30 +
.../test_files/join_limit_pushdown.slt | 13 +
datafusion/sqllogictest/test_files/join_only.slt | 4 +
datafusion/sqllogictest/test_files/joins.slt | 27 +
datafusion/sqllogictest/test_files/limit.slt | 117 ++
.../sqllogictest/test_files/limit_pruning.slt | 4 +
.../test_files/limit_single_row_batches.slt | 9 +
.../test_files/listing_table_statistics.slt | 5 +-
datafusion/sqllogictest/test_files/map.slt | 26 +
datafusion/sqllogictest/test_files/metadata.slt | 17 +
.../test_files/optimizer_group_by_constant.slt | 10 +
datafusion/sqllogictest/test_files/order.slt | 24 +
datafusion/sqllogictest/test_files/parquet.slt | 20 +
.../test_files/parquet_sorted_statistics.slt | 13 +
.../sqllogictest/test_files/parquet_statistics.slt | 11 +-
.../sqllogictest/test_files/pipe_operator.slt | 4 +
datafusion/sqllogictest/test_files/predicates.slt | 4 +
.../test_files/preserve_file_partitioning.slt | 39 +-
.../test_files/projection_pushdown.slt | 86 +-
.../test_files/push_down_filter_parquet.slt | 7 +
.../test_files/push_down_filter_regression.slt | 22 +
.../test_files/push_down_filter_unnest.slt | 4 +
datafusion/sqllogictest/test_files/repartition.slt | 10 +
.../sqllogictest/test_files/repartition_scan.slt | 7 +
.../test_files/repartition_subset_satisfaction.slt | 40 +-
.../test_files/schema_evolution_nested.slt | 124 ++
datafusion/sqllogictest/test_files/select.slt | 38 +-
.../sqllogictest/test_files/set_variable.slt | 10 +
.../sqllogictest/test_files/sort_merge_join.slt | 34 +
.../sqllogictest/test_files/sort_pushdown.slt | 473 ++++-
.../spark/conversion/cast_int_to_timestamp.slt | 254 +++
.../test_files/spark/string/soundex.slt | 194 ++-
.../test_files/string/string_literal.slt | 24 +
.../sqllogictest/test_files/string/string_view.slt | 50 +
datafusion/sqllogictest/test_files/struct.slt | 4 +
datafusion/sqllogictest/test_files/subquery.slt | 75 +
.../sqllogictest/test_files/table_functions.slt | 15 +-
datafusion/sqllogictest/test_files/tpch/tpch.slt | 4 +
datafusion/sqllogictest/test_files/union.slt | 4 +
datafusion/sqllogictest/test_files/unnest.slt | 17 +-
datafusion/sqllogictest/test_files/update.slt | 4 +
datafusion/sqllogictest/test_files/window.slt | 89 +-
.../test_files/window_topk_pushdown.slt | 11 +-
.../src/logical_plan/consumer/rel/join_rel.rs | 186 +-
.../substrait/tests/cases/consumer_integration.rs | 106 ++
.../mixed_join_equal_and_indistinct.json | 102 ++
.../mixed_join_equal_and_indistinct_left.json | 102 ++
datafusion/wasmtest/Cargo.toml | 2 +-
.../wasmtest/datafusion-wasm-app/package-lock.json | 508 ++----
.../wasmtest/datafusion-wasm-app/package.json | 2 +-
dev/changelog/52.4.0.md | 57 +
dev/changelog/53.0.0.md | 640 +++++++
dev/update_config_docs.sh | 17 +-
docs/source/download.md | 2 +-
.../library-user-guide/functions/adding-udfs.md | 26 +-
docs/source/library-user-guide/upgrading/53.0.0.md | 2 +-
docs/source/library-user-guide/upgrading/54.0.0.md | 117 +-
docs/source/user-guide/configs.md | 18 +-
docs/source/user-guide/crate-configuration.md | 2 +-
docs/source/user-guide/example-usage.md | 2 +-
docs/source/user-guide/explain-usage.md | 1 +
docs/source/user-guide/introduction.md | 1 +
docs/source/user-guide/sql/scalar_functions.md | 31 +
docs/source/user-guide/sql/struct_coercion.md | 20 +
docs/source/user-guide/sql/subqueries.md | 12 +
testing | 2 +-
uv.lock | 112 +-
538 files changed, 19780 insertions(+), 9016 deletions(-)
create mode 100644 benchmarks/queries/sort_pushdown/q1.sql
create mode 100644 benchmarks/queries/sort_pushdown/q2.sql
create mode 100644 benchmarks/queries/sort_pushdown/q3.sql
create mode 100644 benchmarks/queries/sort_pushdown/q4.sql
create mode 100644 benchmarks/src/sort_pushdown.rs
copy datafusion-examples/examples/{external_dependency =>
extension_types}/main.rs (75%)
create mode 100644 datafusion-examples/examples/extension_types/temperature.rs
create mode 100644 datafusion-examples/examples/udf/table_list_udtf.rs
copy datafusion/{core/tests/catalog =>
common/src/types/canonical_extensions}/mod.rs (98%)
create mode 100644 datafusion/common/src/types/canonical_extensions/uuid.rs
create mode 100644 datafusion/common/src/types/extension.rs
create mode 100644 datafusion/core/benches/parquet_struct_projection.rs
copy datafusion/core/tests/{catalog => extension_types}/mod.rs (97%)
create mode 100644 datafusion/core/tests/extension_types/pretty_printing.rs
delete mode 100644
datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs
delete mode 100644 datafusion/datasource-avro/src/avro_to_arrow/mod.rs
delete mode 100644 datafusion/datasource-avro/src/avro_to_arrow/reader.rs
delete mode 100644 datafusion/datasource-avro/src/avro_to_arrow/schema.rs
create mode 100644
datafusion/expr/src/extension_types/array_formatter_factory.rs
copy datafusion/{spark/src/function/csv => expr/src/extension_types}/mod.rs
(84%)
create mode 100644 datafusion/ffi/src/physical_optimizer.rs
create mode 100644 datafusion/ffi/src/tests/physical_optimizer.rs
create mode 100644 datafusion/ffi/tests/ffi_physical_optimizer.rs
create mode 100644 datafusion/functions-nested/benches/array_min_max.rs
create mode 100644 datafusion/functions-nested/benches/string_to_array.rs
copy datafusion/functions/src/core/{arrow_cast.rs => arrow_try_cast.rs} (56%)
create mode 100644
datafusion/physical-plan/src/joins/sort_merge_join/bitwise_stream.rs
rename datafusion/physical-plan/src/joins/sort_merge_join/{stream.rs =>
materializing_stream.rs} (80%)
create mode 100644 datafusion/spark/src/function/conversion/cast.rs
create mode 100644 datafusion/spark/src/function/string/soundex.rs
create mode 100644 datafusion/sqllogictest/test_files/arrow_try_cast.slt
create mode 100644
datafusion/sqllogictest/test_files/schema_evolution_nested.slt
create mode 100644
datafusion/sqllogictest/test_files/spark/conversion/cast_int_to_timestamp.slt
create mode 100644
datafusion/substrait/tests/testdata/test_plans/mixed_join_equal_and_indistinct.json
create mode 100644
datafusion/substrait/tests/testdata/test_plans/mixed_join_equal_and_indistinct_left.json
create mode 100644 dev/changelog/52.4.0.md
create mode 100644 dev/changelog/53.0.0.md
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]