This is an automated email from the ASF dual-hosted git repository. xudong963 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 9942ecd4ac Update version to 50.0.0 (#17463) 9942ecd4ac is described below commit 9942ecd4ac791c26546db8d8bf6ae674006a49fc Author: xudong.w <wxd963996...@gmail.com> AuthorDate: Sun Sep 7 20:55:26 2025 +0800 Update version to 50.0.0 (#17463) * Update version to 50.0.0 * prettier * use 50.0.0 --- Cargo.lock | 82 +++---- Cargo.toml | 74 +++---- dev/changelog/50.0.0.md | 445 ++++++++++++++++++++++++++++++++++++++ docs/source/user-guide/configs.md | 2 +- 4 files changed, 524 insertions(+), 79 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 933a0ee44a..25bcaf68cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1845,7 +1845,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -1918,7 +1918,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "datafusion", @@ -1944,7 +1944,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -1968,7 +1968,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -1989,7 +1989,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "assert_cmd", @@ -2021,7 +2021,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "49.0.2" +version = "50.0.0" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -2049,7 +2049,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "49.0.2" +version = "50.0.0" dependencies = [ "futures", "log", @@ -2058,7 +2058,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-compression", @@ -2094,7 +2094,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "49.0.2" +version = "50.0.0" dependencies = [ "apache-avro", "arrow", @@ -2119,7 +2119,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2142,7 +2142,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2165,7 +2165,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2198,11 +2198,11 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "49.0.2" +version = "50.0.0" [[package]] name = "datafusion-examples" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2236,7 +2236,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2257,7 +2257,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2280,7 +2280,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2291,7 +2291,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "49.0.2" +version = "50.0.0" dependencies = [ "abi_stable", "arrow", @@ -2312,7 +2312,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2341,7 +2341,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "49.0.2" +version = "50.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2362,7 +2362,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "49.0.2" +version = "50.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2375,7 +2375,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2397,7 +2397,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2411,7 +2411,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2427,7 +2427,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "49.0.2" +version = "50.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2435,7 +2435,7 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "49.0.2" +version = "50.0.0" dependencies = [ "datafusion-expr", "quote", @@ -2444,7 +2444,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2471,7 +2471,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "49.0.2" +version = "50.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2497,7 +2497,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2512,7 +2512,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "49.0.2" +version = "50.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2524,7 +2524,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2545,7 +2545,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "49.0.2" +version = "50.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2582,7 +2582,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "chrono", @@ -2605,7 +2605,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2618,7 +2618,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "arrow-schema", @@ -2637,7 +2637,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2659,7 +2659,7 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "chrono", @@ -2680,7 +2680,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2704,7 +2704,7 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "49.0.2" +version = "50.0.0" dependencies = [ "arrow", "async-trait", @@ -2738,7 +2738,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "49.0.2" +version = "50.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2758,7 +2758,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "49.0.2" +version = "50.0.0" dependencies = [ "chrono", "console_error_panic_hook", diff --git a/Cargo.toml b/Cargo.toml index 988f382515..53c35ed35f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,7 +78,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.86.0" # Define DataFusion version -version = "49.0.2" +version = "50.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -110,42 +110,42 @@ chrono = { version = "0.4.41", default-features = false } criterion = "0.5.1" ctor = "0.4.3" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "49.0.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "49.0.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "49.0.0" } -datafusion-common = { path = "datafusion/common", version = "49.0.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "49.0.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "49.0.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "49.0.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "49.0.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "49.0.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "49.0.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "49.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "49.0.0" } -datafusion-expr = { path = "datafusion/expr", version = "49.0.0" } -datafusion-expr-common = { path = "datafusion/expr-common", version = "49.0.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "49.0.0" } -datafusion-functions = { path = "datafusion/functions", version = "49.0.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "49.0.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "49.0.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "49.0.0" } -datafusion-functions-table = { path = "datafusion/functions-table", version = "49.0.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "49.0.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "49.0.0" } -datafusion-macros = { path = "datafusion/macros", version = "49.0.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "49.0.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "49.0.0", default-features = false } -datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "49.0.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "49.0.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "49.0.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "49.0.0" } -datafusion-proto = { path = "datafusion/proto", version = "49.0.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "49.0.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "49.0.0" } -datafusion-session = { path = "datafusion/session", version = "49.0.0" } -datafusion-spark = { path = "datafusion/spark", version = "49.0.0" } -datafusion-sql = { path = "datafusion/sql", version = "49.0.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "49.0.0" } +datafusion = { path = "datafusion/core", version = "50.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "50.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.0.0" } +datafusion-common = { path = "datafusion/common", version = "50.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "50.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "50.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "50.0.0" } +datafusion-expr = { path = "datafusion/expr", version = "50.0.0" } +datafusion-expr-common = { path = "datafusion/expr-common", version = "50.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "50.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "50.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.0.0" } +datafusion-functions-table = { path = "datafusion/functions-table", version = "50.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "50.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "50.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "50.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.0.0", default-features = false } +datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "50.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "50.0.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "50.0.0" } +datafusion-session = { path = "datafusion/session", version = "50.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "50.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "50.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "50.0.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" diff --git a/dev/changelog/50.0.0.md b/dev/changelog/50.0.0.md new file mode 100644 index 0000000000..7563d57777 --- /dev/null +++ b/dev/changelog/50.0.0.md @@ -0,0 +1,445 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Apache DataFusion 50.0.0 Changelog + +This release consists of 315 commits from 79 contributors. See credits at the end of this changelog for more information. + +See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions. + +**Breaking changes:** + +- Support multiple ordered `array_agg` aggregations [#16625](https://github.com/apache/datafusion/pull/16625) (findepi) +- Make `AsyncScalarUDFImpl::invoke_async_with_args` consistent with `ScalarUDFImpl::invoke_with_args` [#16902](https://github.com/apache/datafusion/pull/16902) (geetanshjuneja) +- Derive `WindowUDFImpl` equality, hash from `Eq`, `Hash` traits [#17081](https://github.com/apache/datafusion/pull/17081) (findepi) +- Remove redundant `plan` from extension's check_invariants [#17199](https://github.com/apache/datafusion/pull/17199) (findepi) +- feat: Make parquet_encryption a non-default feature [#17137](https://github.com/apache/datafusion/pull/17137) (miroim) +- chore: fix typos [#17135](https://github.com/apache/datafusion/pull/17135) (waynexia) +- Use a struct for ProjectionExpr [#17398](https://github.com/apache/datafusion/pull/17398) (adriangb) +- Use DataFusionError instead of ArrowError in FileOpenFuture [#17397](https://github.com/apache/datafusion/pull/17397) (adriangb) +- Use return_field instead of return_type for calling aggregates via FFI [#17407](https://github.com/apache/datafusion/pull/17407) (timsaucer) + +**Performance related:** + +- feat: improve LiteralGuarantee for the case like `(a=1 AND b=1) OR (a=2 AND b=3)` [#16762](https://github.com/apache/datafusion/pull/16762) (haohuaijin) +- optimize `initcap` function by avoiding memory allocation [#16878](https://github.com/apache/datafusion/pull/16878) (waynexia) +- speedup `date_trunc` (~7x faster) in some cases [#16859](https://github.com/apache/datafusion/pull/16859) (waynexia) +- Feature: Improve hash Expr performance [#16977](https://github.com/apache/datafusion/pull/16977) (tobixdev) +- Perf: Port arrow-rs optimization for get_buffer_memory_size and add fast path for no buffer for gc string view [#17008](https://github.com/apache/datafusion/pull/17008) (zhuqi-lucas-001) +- Simplify comparisons and binary operations involving NULL [#17088](https://github.com/apache/datafusion/pull/17088) (findepi) +- Eliminate all redundant aggregations [#17139](https://github.com/apache/datafusion/pull/17139) (findepi) + +**Implemented enhancements:** + +- feat: Allow tree explain format width to be customizable [#16827](https://github.com/apache/datafusion/pull/16827) (nuno-faria) +- feat(spark): Implement Spark `string` function `luhn_check` [#16848](https://github.com/apache/datafusion/pull/16848) (Standing-Man) +- feat(spark): implement Spark datetime function last_day [#16828](https://github.com/apache/datafusion/pull/16828) (Standing-Man) +- feat: Add `ScalarValue::{new_one,new_zero,new_ten,distance}` support for `Decimal128` and `Decimal256` [#16831](https://github.com/apache/datafusion/pull/16831) (theirix) +- feat: support distinct for window [#16925](https://github.com/apache/datafusion/pull/16925) (zhuqi-lucas-001) +- feat: add multi level merge sort that will always fit in memory [#15700](https://github.com/apache/datafusion/pull/15700) (rluvaton) +- feat: [datafusion-spark] Implement `next_day` function [#16780](https://github.com/apache/datafusion/pull/16780) (petern48) +- feat: Support distinct window for sum [#16943](https://github.com/apache/datafusion/pull/16943) (zhuqi-lucas-001) +- feat(spark): implement Spark math function rint [#16924](https://github.com/apache/datafusion/pull/16924) (chenkovsky) +- feat(spark): implement Spark string function like/ilike [#16962](https://github.com/apache/datafusion/pull/16962) (chenkovsky) +- feat: Cache Parquet metadata in built in parquet reader [#16971](https://github.com/apache/datafusion/pull/16971) (nuno-faria) +- feat: Add `Arc<ConfigOptions>` to `ScalarFunctionArgs`, don't copy `ConfigOptions` on each query [#16970](https://github.com/apache/datafusion/pull/16970) (Omega359) +- feat(spark): implement spark hash function crc32/sha1 [#17032](https://github.com/apache/datafusion/pull/17032) (chenkovsky) +- feat: Limit the memory used in the file metadata cache [#17031](https://github.com/apache/datafusion/pull/17031) (nuno-faria) +- feat: Dynamic Parquet encryption and decryption properties [#16779](https://github.com/apache/datafusion/pull/16779) (adamreeve) +- feat: Use Cached Metadata for ListingTable Statistics [#17022](https://github.com/apache/datafusion/pull/17022) (shehabgamin) +- feat(spark): implement Spark math function mod/pmod [#16829](https://github.com/apache/datafusion/pull/16829) (chenkovsky) +- feat(spark): implement Spark math function bit_get/bit_count [#16942](https://github.com/apache/datafusion/pull/16942) (chenkovsky) +- feat: add `isodow` (ISO day-of-week) support to date_part (Monday = 0) [#17112](https://github.com/apache/datafusion/pull/17112) (ayemjay) +- feat(spark): implement spark datetime function date_add/date_sub [#17024](https://github.com/apache/datafusion/pull/17024) (chenkovsky) +- feat: Add the ability to review the contents of the Metadata Cache [#17126](https://github.com/apache/datafusion/pull/17126) (nuno-faria) +- feat: add `datafusion-physical-adapter`, implement predicate adaptation missing fields of structs [#16589](https://github.com/apache/datafusion/pull/16589) (adriangb) +- feat: implement QUALIFY clause [#16933](https://github.com/apache/datafusion/pull/16933) (haohuaijin) +- feat: allow to `spawn`/`spawn_blocking` on a provided runtime in `RecordBatchReceiverStreamBuilder` [#17239](https://github.com/apache/datafusion/pull/17239) (rluvaton) +- feat: Support SortMergeJoin proto serde [#17296](https://github.com/apache/datafusion/pull/17296) (milenkovicm) +- feat(spark): implement Spark `bitmap` function `bitmap_count` [#17179](https://github.com/apache/datafusion/pull/17179) (SparkApplicationMaster) +- feat: Track peak value in tracked consumer [#17327](https://github.com/apache/datafusion/pull/17327) (wForget) +- feat(spark): implement Spark conditional function if [#16946](https://github.com/apache/datafusion/pull/16946) (chenkovsky) +- feat(spark): implement Spark `width_bucket` function [#17331](https://github.com/apache/datafusion/pull/17331) (davidlghellin) +- feat: Make Parquet EncryptionFactory async [#17342](https://github.com/apache/datafusion/pull/17342) (adamreeve) +- feat: Support `FILTER` clause in aggregate window functions [#17378](https://github.com/apache/datafusion/pull/17378) (geoffreyclaude) +- feat: Support binary data types for `SortMergeJoin` `on` clause [#17431](https://github.com/apache/datafusion/pull/17431) (stuartcarnie) + +**Fixed bugs:** + +- fix: The inconsistency between scalar and array on the cast decimal to timestamp [#16539](https://github.com/apache/datafusion/pull/16539) (chenkovsky) +- fix: unit test for object_storage [#16824](https://github.com/apache/datafusion/pull/16824) (chenkovsky) +- fix(docs): Update broken links to `TableProvider` docs [#16830](https://github.com/apache/datafusion/pull/16830) (jcsherin) +- fix: `PlaceholderRowExec::partition_statistics` [#16851](https://github.com/apache/datafusion/pull/16851) (crepererum) +- fix: skip predicates on struct unnest in PushDownFilter [#16790](https://github.com/apache/datafusion/pull/16790) (akoshchiy) +- fix: regex bench [#16890](https://github.com/apache/datafusion/pull/16890) (chenkovsky) +- fix: `ComposedPhysicalExtensionCodec` does not use the same codec as encoding when decoding [#16986](https://github.com/apache/datafusion/pull/16986) (Thearas) +- fix: Remove `datafusion.execution.parquet.cache_metadata` config [#17062](https://github.com/apache/datafusion/pull/17062) (jonathanc-n) +- fix: Add missing member to visitor for ConfigFileEncryptionProperties [#17103](https://github.com/apache/datafusion/pull/17103) (corwinjoy) +- fix(ci): update `datafusion-physical-expr-adapter` version to 49.0.1in Cargo.lock [#17209](https://github.com/apache/datafusion/pull/17209) (miroim) +- fix: respect inexact flags in row group metadata [#16412](https://github.com/apache/datafusion/pull/16412) (CookiePieWw) +- fix: deserialization error for `FilterExec` (predicates with inlist) [#17224](https://github.com/apache/datafusion/pull/17224) (haohuaijin) +- FFI_RecordBatchStream was causing a memory leak [#17190](https://github.com/apache/datafusion/pull/17190) (timsaucer) +- fix: Windows paths crashing core tests [#17231](https://github.com/apache/datafusion/pull/17231) (nuno-faria) +- fix: sort should always output batches with `batch_size` rows [#17244](https://github.com/apache/datafusion/pull/17244) (rluvaton) +- fix: align `array_has` null buffer for scalar [#17272](https://github.com/apache/datafusion/pull/17272) (comphead) +- fix: dataframe function count_all with alias [#17282](https://github.com/apache/datafusion/pull/17282) (Loaki07) +- fix: correct readme field in `Cargo.toml` [#17310](https://github.com/apache/datafusion/pull/17310) (Weijun-H) +- fix(doc): update the link of deprecation guidelines (#17328) [#17329](https://github.com/apache/datafusion/pull/17329) (ivila) +- fix: lazy case else evaluation [#17311](https://github.com/apache/datafusion/pull/17311) (chenkovsky) +- fix: set distinct_count to Absent when merging statistics [#17385](https://github.com/apache/datafusion/pull/17385) (adriangb) +- fix: Remove duplicate filter from `CrossJoin` unparsing [#17382](https://github.com/apache/datafusion/pull/17382) (jonathanc-n) +- fix: set IPC alignment based on schema [#17363](https://github.com/apache/datafusion/pull/17363) (ding-young) +- fix: return ALL constants in `EquivalenceProperties::constants` [#17404](https://github.com/apache/datafusion/pull/17404) (crepererum) +- fix: align `map_keys` nullability flag [#17454](https://github.com/apache/datafusion/pull/17454) (comphead) + +**Documentation updates:** + +- docs: Fix broken links [#16839](https://github.com/apache/datafusion/pull/16839) (2010YOUY01) +- Add note to upgrade guide about MSRV update [#16845](https://github.com/apache/datafusion/pull/16845) (alamb) +- [main] Update version to 49.0.0, add 49.0.0 changelog [#16855](https://github.com/apache/datafusion/pull/16855) (alamb) +- Improve async_udf example and docs [#16846](https://github.com/apache/datafusion/pull/16846) (alamb) +- Docs: Update Upgrading.md to reflect 49.0.0 is released [#16853](https://github.com/apache/datafusion/pull/16853) (alamb) +- docs: Remove references to DataFusion for Ray sub project [#16966](https://github.com/apache/datafusion/pull/16966) (andygrove) +- Add `temp_directory` and `max_temp_directory_size` runtime config variables [#16934](https://github.com/apache/datafusion/pull/16934) (delamarch3) +- Add `sql_parser.default_null_ordering` config option to customize the default null ordering [#16963](https://github.com/apache/datafusion/pull/16963) (goldmedal) +- Added Example for `Statistical Functions` in Docs [#16927](https://github.com/apache/datafusion/pull/16927) (Adez017) +- Fix window_functions docs formatting [#17005](https://github.com/apache/datafusion/pull/17005) (mattmatravers) +- docs: Fix 'Analaysis' typo in query optimizer docs [#17015](https://github.com/apache/datafusion/pull/17015) (petern48) +- docs: Fix random extra bullet for 'Analytical Functions' [#17014](https://github.com/apache/datafusion/pull/17014) (petern48) +- docs: Fix failing documentation check in CI [#17026](https://github.com/apache/datafusion/pull/17026) (adamreeve) +- Upgrade arrow/parquet to 56.0.0 [#16690](https://github.com/apache/datafusion/pull/16690) (alamb) +- fix error result in execute&pre_selection [#16930](https://github.com/apache/datafusion/pull/16930) (acking-you) +- docs: Fix failing CI [#17041](https://github.com/apache/datafusion/pull/17041) (liamzwbao) +- Docs: Add Examples to Config Options page [#17039](https://github.com/apache/datafusion/pull/17039) (alamb) +- Docs: Add Tuning Guide for small data / short queries [#17040](https://github.com/apache/datafusion/pull/17040) (alamb) +- Docs: Update the crate configuration / build settings page [#17038](https://github.com/apache/datafusion/pull/17038) (alamb) +- Support `centroids` config for `approx_percentile_cont_with_weight` [#17003](https://github.com/apache/datafusion/pull/17003) (liamzwbao) +- Add ExecutionPlan::reset_state [#17028](https://github.com/apache/datafusion/pull/17028) (adriangb) +- Docs: Add Tuning Guide for larger-than-memory queries [#17069](https://github.com/apache/datafusion/pull/17069) (2010YOUY01) +- Link UdfEq and PtrEq to help understand relationship [#17082](https://github.com/apache/datafusion/pull/17082) (findepi) +- Derive `AggregateUDFImpl` equality, hash from `Eq`, `Hash` traits [#17130](https://github.com/apache/datafusion/pull/17130) (findepi) +- chore: Clarify `EmptyRelation` description [#17157](https://github.com/apache/datafusion/pull/17157) (comphead) +- Update dev env documentation to reflect pinned rust version [#17107](https://github.com/apache/datafusion/pull/17107) (Jefffrey) +- Differentiate 0-row and 1-row EmptyRelation in EXPLAIN [#17145](https://github.com/apache/datafusion/pull/17145) (findepi) +- (Re)Support old syntax for `approx_percentile_cont` and `approx_percentile_cont_with_weight` [#16999](https://github.com/apache/datafusion/pull/16999) (alamb) +- Derive `ScalarUDFImpl` equality, hash from `Eq`, `Hash` traits [#17164](https://github.com/apache/datafusion/pull/17164) (findepi) +- #17128 Add support for chr(0) [#17131](https://github.com/apache/datafusion/pull/17131) (pepijnve) +- [main] Update version to 49.0.1 and add changelog (#17175) [#17191](https://github.com/apache/datafusion/pull/17191) (alamb) +- Docs: Consolidate feature proposal content into roadmap [#17156](https://github.com/apache/datafusion/pull/17156) (alamb) +- Doc: Update upgrade guide for the rewritten NLJ operator [#17202](https://github.com/apache/datafusion/pull/17202) (2010YOUY01) +- Support serializing `generate_series` in `datafusion-proto` [#17200](https://github.com/apache/datafusion/pull/17200) (cetra3) +- Fix broken links in user docs [#17228](https://github.com/apache/datafusion/pull/17228) (AdamGS) +- Format `Date32` to string given timestamp specifiers [#15361](https://github.com/apache/datafusion/pull/15361) (friendlymatthew) +- Improve documentation for Signature, Volatility, and TypeSignature [#17264](https://github.com/apache/datafusion/pull/17264) (alamb) +- [main] Forward port `49.0.2` version and changelog (#17277) [#17287](https://github.com/apache/datafusion/pull/17287) (alamb) +- Document schema merging. [#17249](https://github.com/apache/datafusion/pull/17249) (wiedld) +- Support from-first SQL syntax [#17295](https://github.com/apache/datafusion/pull/17295) (simonvandel) +- Add `cfg(feature = "avro")` attribute to Avro example in SQL API docs [#17142](https://github.com/apache/datafusion/pull/17142) (kosiew) +- Push the limits past window functions [#17347](https://github.com/apache/datafusion/pull/17347) (avantgardnerio) +- Refactor DataSourceExec::try_swapping_with_projection to simplify and remove abstraction leakage [#17395](https://github.com/apache/datafusion/pull/17395) (adriangb) +- doc: Document caveats of `swap_inputs()` interface in join executors [#17373](https://github.com/apache/datafusion/pull/17373) (2010YOUY01) +- Fix syntax error in DDL documentation example [#17412](https://github.com/apache/datafusion/pull/17412) (pepijnve) +- Add MSRV change to upgrade guide [#17406](https://github.com/apache/datafusion/pull/17406) (findepi) +- Add PhysicalExpr::is_volatile_node to upgrade guide [#17443](https://github.com/apache/datafusion/pull/17443) (adriangb) +- docs: Render `--` properly in profiling docs [#17430](https://github.com/apache/datafusion/pull/17430) (petern48) + +**Other:** + +- chore: use `equals_datatype` for `BinaryExpr` [#16813](https://github.com/apache/datafusion/pull/16813) (comphead) +- chore: add tests for out of bounds for NullArray [#16802](https://github.com/apache/datafusion/pull/16802) (comphead) +- Refactor binary.rs tests into modular submodules under `binary/tests` [#16782](https://github.com/apache/datafusion/pull/16782) (kosiew) +- cache generation of dictionary keys and null arrays for ScalarValue [#16789](https://github.com/apache/datafusion/pull/16789) (adriangb) +- refactor(examples): remove redundant call to create directory in `parquet_embedded_index.rs` [#16825](https://github.com/apache/datafusion/pull/16825) (jcsherin) +- Add benchmark for ByteViewGroupValueBuilder [#16826](https://github.com/apache/datafusion/pull/16826) (zhuqi-lucas-001) +- Simplify try cast expr evaluation [#16834](https://github.com/apache/datafusion/pull/16834) (lewiszlw) +- Fix flaky test case in joins.slt [#16849](https://github.com/apache/datafusion/pull/16849) (findepi) +- chore(deps): bump sysinfo from 0.35.2 to 0.36.1 [#16850](https://github.com/apache/datafusion/pull/16850) (dependabot[bot]) +- chore(deps): bump aws-credential-types from 1.2.3 to 1.2.4 [#16815](https://github.com/apache/datafusion/pull/16815) (dependabot[bot]) +- fix(build-wasm): put `arrow-ipc/zstd` dep under `compression` feature [#16844](https://github.com/apache/datafusion/pull/16844) (chrisvander) +- chore(deps): bump serde_json from 1.0.140 to 1.0.141 [#16863](https://github.com/apache/datafusion/pull/16863) (dependabot[bot]) +- chore(deps): bump aws-config from 1.8.1 to 1.8.2 [#16864](https://github.com/apache/datafusion/pull/16864) (dependabot[bot]) +- test: Fix flaky join tests [#16860](https://github.com/apache/datafusion/pull/16860) (2010YOUY01) +- chore(deps): bump rand from 0.9.1 to 0.9.2 [#16882](https://github.com/apache/datafusion/pull/16882) (dependabot[bot]) +- Report error when `SessionState::sql_to_expr_with_alias` does not consume all input [#16811](https://github.com/apache/datafusion/pull/16811) (pepijnve) +- test: fix more flaky join tests [#16880](https://github.com/apache/datafusion/pull/16880) (2010YOUY01) +- MINOR: add unit tests for chr function [#16856](https://github.com/apache/datafusion/pull/16856) (waynexia) +- remove deprecated methods from FileScanConfig / DataSourceExec [#16901](https://github.com/apache/datafusion/pull/16901) (adriangb) +- Support utf8view for spark hex [#16885](https://github.com/apache/datafusion/pull/16885) (xudong963) +- Fixes 3 bugs during serialization and deserialization of physical plans [#16858](https://github.com/apache/datafusion/pull/16858) (NGA-TRAN) +- chore(deps): bump aws-config from 1.8.2 to 1.8.3 [#16912](https://github.com/apache/datafusion/pull/16912) (dependabot[bot]) +- Derive UDF equality from PartialEq, Hash [#16842](https://github.com/apache/datafusion/pull/16842) (findepi) +- Ensure Substrait consumer can handle expressions in VirtualTable [#16857](https://github.com/apache/datafusion/pull/16857) (lorenarosati) +- Mutable Join Unwind [#16883](https://github.com/apache/datafusion/pull/16883) (berkaysynnada) +- fix(datafusion-proto): support serializing/deserilizing ArrowFormat tables [#16875](https://github.com/apache/datafusion/pull/16875) (colinmarc) +- ScalarValue Default + Min + Max [#16891](https://github.com/apache/datafusion/pull/16891) (berkaysynnada) +- minor: add is_superset() method for Interval's [#16895](https://github.com/apache/datafusion/pull/16895) (berkaysynnada) +- minor: implement with_new_expressions for AggregateFunctionExpr [#16897](https://github.com/apache/datafusion/pull/16897) (berkaysynnada) +- minor: Rename add_spm_on_top as add_merge_on_top [#16913](https://github.com/apache/datafusion/pull/16913) (berkaysynnada) +- Implement Helpers for ScopedTimerGuard and Time Structs [#16911](https://github.com/apache/datafusion/pull/16911) (berkaysynnada) +- Fix Partial Sort Get Slice Point Between Batches [#16881](https://github.com/apache/datafusion/pull/16881) (berkaysynnada) +- Fix `schema_adapter` integration tests not running [#16835](https://github.com/apache/datafusion/pull/16835) (kosiew) +- Update release process [#16929](https://github.com/apache/datafusion/pull/16929) (xudong963) +- Fix `next_up` and `next_down` behavior for zero float values [#16745](https://github.com/apache/datafusion/pull/16745) (liamzwbao) +- Add Fetch Property to OutputRequirementExec [#16892](https://github.com/apache/datafusion/pull/16892) (berkaysynnada) +- chore(deps): bump tokio from 1.46.1 to 1.47.0 [#16952](https://github.com/apache/datafusion/pull/16952) (dependabot[bot]) +- chore(deps): bump serde_json from 1.0.140 to 1.0.141 [#16951](https://github.com/apache/datafusion/pull/16951) (dependabot[bot]) +- chore: Remove attributes to allow dead_code that aren't relevant anymore [#16953](https://github.com/apache/datafusion/pull/16953) (AdamGS) +- chore(deps): bump rand from 0.9.1 to 0.9.2 [#16960](https://github.com/apache/datafusion/pull/16960) (dependabot[bot]) +- chore(deps): bump ctor from 0.4.2 to 0.4.3 [#16961](https://github.com/apache/datafusion/pull/16961) (dependabot[bot]) +- disallow pushdown of volatile functions [#16861](https://github.com/apache/datafusion/pull/16861) (adriangb) +- remove warning from every file open [#16968](https://github.com/apache/datafusion/pull/16968) (adriangb) +- Pin github actions to commit sha [#16964](https://github.com/apache/datafusion/pull/16964) (gopidesupavan) +- Enable physical filter pushdown for hash joins [#16954](https://github.com/apache/datafusion/pull/16954) (adriangb) +- Fix [Bug] Aggregate + TopK fails when asc = false [#16972](https://github.com/apache/datafusion/pull/16972) (avantgardnerio) +- Use tokio::task::coop::poll_proceed by default in CooperativeStream [#16748](https://github.com/apache/datafusion/pull/16748) (pepijnve) +- Add benchmark utility to profile peak memory usage [#16814](https://github.com/apache/datafusion/pull/16814) (ding-young) +- chore(deps): bump indicatif from 0.17.11 to 0.18.0 [#16992](https://github.com/apache/datafusion/pull/16992) (dependabot[bot]) +- test(datafusion-cli): migrate tests to `insta` in `print_format.rs` [#16993](https://github.com/apache/datafusion/pull/16993) (Thearas) +- Chore: remove 'spill_record_batch_by_size' api [#16958](https://github.com/apache/datafusion/pull/16958) (ding-young) +- chore(deps): bump serde_json from 1.0.141 to 1.0.142 [#17006](https://github.com/apache/datafusion/pull/17006) (dependabot[bot]) +- Add tests for yielding in `SpillManager::read_spill_as_stream` [#16616](https://github.com/apache/datafusion/pull/16616) (ding-young) +- #16994 Ensure CooperativeExec#maintains_input_order returns a Vec of the correct size [#16995](https://github.com/apache/datafusion/pull/16995) (pepijnve) +- test: Add logic tests for string_agg with order [#17033](https://github.com/apache/datafusion/pull/17033) (nuno-faria) +- Implement `From<Option<String>>' for `ScalarValue` [#17043](https://github.com/apache/datafusion/pull/17043) (findepi) +- chore(deps): bump tokio-util from 0.7.15 to 0.7.16 [#17030](https://github.com/apache/datafusion/pull/17030) (dependabot[bot]) +- Add missing Substrait to DataFusion function name mappings [#16950](https://github.com/apache/datafusion/pull/16950) (lorenarosati) +- refactor: use upstream arrow-rs inline_key_fast [#17044](https://github.com/apache/datafusion/pull/17044) (zhuqi-lucas-001) +- Implement spark `array` function `array` [#16936](https://github.com/apache/datafusion/pull/16936) (Standing-Man) +- Address memory over-accounting in array_agg [#16816](https://github.com/apache/datafusion/pull/16816) (gabotechs) +- chore(deps): bump aws-credential-types from 1.2.4 to 1.2.5 [#17053](https://github.com/apache/datafusion/pull/17053) (dependabot[bot]) +- Support Substrait functions and_not, xor, and between in consumer built-in expression builder [#16984](https://github.com/apache/datafusion/pull/16984) (lorenarosati) +- Derive UDWF equality from PartialEq, Hash [#17057](https://github.com/apache/datafusion/pull/17057) (findepi) +- fix return field for `is_not_null` expression [#17056](https://github.com/apache/datafusion/pull/17056) (davidhewitt) +- chore(deps): bump tokio from 1.47.0 to 1.47.1 [#17063](https://github.com/apache/datafusion/pull/17063) (dependabot[bot]) +- Optimize char expression [#16076](https://github.com/apache/datafusion/pull/16076) (ajita-asthana) +- Fix equality of parametrizable ArrayAgg function [#17065](https://github.com/apache/datafusion/pull/17065) (findepi) +- Implement Spark `url` function `parse_url` [#16937](https://github.com/apache/datafusion/pull/16937) (Standing-Man) +- Derive UDAF equality from Eq, Hash [#17067](https://github.com/apache/datafusion/pull/17067) (findepi) +- Remove elements deprecated since v 45 [#17075](https://github.com/apache/datafusion/pull/17075) (findepi) +- Deprecate ScalarUDF::is_nullable [#17074](https://github.com/apache/datafusion/pull/17074) (findepi) +- Re-export `object_store` crate via DataFusion Core and Common [#17070](https://github.com/apache/datafusion/pull/17070) (kosiew) +- Fix hash/equality issues for ScalarFunctionExpr [#17078](https://github.com/apache/datafusion/pull/17078) (findepi) +- Fill missing methods in aliased UDF impls [#17080](https://github.com/apache/datafusion/pull/17080) (findepi) +- Improve Hash speed for ScalarFunctionExpr [#17099](https://github.com/apache/datafusion/pull/17099) (findepi) +- chore(deps): bump clap from 4.5.42 to 4.5.43 [#17079](https://github.com/apache/datafusion/pull/17079) (dependabot[bot]) +- minor: remove unused import in docstring of datafusion_common::record_batch [#17106](https://github.com/apache/datafusion/pull/17106) (Jefffrey) +- Make macros in common::test_util hygenic and not dependent on user dependencies [#17102](https://github.com/apache/datafusion/pull/17102) (AdamGS) +- minor: remove unnecessary clippy:large_enum_variant allows [#17108](https://github.com/apache/datafusion/pull/17108) (Jefffrey) +- minor: Improve equivalence handling of joins [#16893](https://github.com/apache/datafusion/pull/16893) (berkaysynnada) +- Fix incorrect `NULL IN ()` optimization [#17092](https://github.com/apache/datafusion/pull/17092) (findepi) +- Add `prettier` to the devcontainer (GitHub codespaces) [#17019](https://github.com/apache/datafusion/pull/17019) (alamb) +- Set a lower threshold for clippy to flag large error variants [#17109](https://github.com/apache/datafusion/pull/17109) (Jefffrey) +- chore(deps): bump rustyline from 16.0.0 to 17.0.0 [#17116](https://github.com/apache/datafusion/pull/17116) (dependabot[bot]) +- Add dynamic filter (bounds) pushdown to HashJoinExec [#16445](https://github.com/apache/datafusion/pull/16445) (adriangb) +- Remove the "run extended tests" github PR commend action [#17119](https://github.com/apache/datafusion/pull/17119) (alamb) +- chore(deps): bump sysinfo from 0.36.1 to 0.37.0 [#17124](https://github.com/apache/datafusion/pull/17124) (dependabot[bot]) +- chore(deps): bump libc from 0.2.174 to 0.2.175 [#17121](https://github.com/apache/datafusion/pull/17121) (dependabot[bot]) +- ff: Preserve cached plan information when pushing projection [#17129](https://github.com/apache/datafusion/pull/17129) (friendlymatthew) +- chore: Enforce checks for RC branches [#17132](https://github.com/apache/datafusion/pull/17132) (comphead) +- chore(deps): bump actions/checkout from 4.2.2 to 5.0.0 [#17149](https://github.com/apache/datafusion/pull/17149) (dependabot[bot]) +- minor: enhance comment in SortPreservingMergeStream.abort [#17115](https://github.com/apache/datafusion/pull/17115) (mapleFU) +- Update workspace to use Rust 1.89 [#17100](https://github.com/apache/datafusion/pull/17100) (shruti2522) +- chore(deps): bump on-headers and compression in /datafusion/wasmtest/datafusion-wasm-app [#16812](https://github.com/apache/datafusion/pull/16812) (dependabot[bot]) +- chore(deps): bump slab from 0.4.10 to 0.4.11 [#17161](https://github.com/apache/datafusion/pull/17161) (dependabot[bot]) +- refactor `character_length` impl by unifying null handling logic [#16877](https://github.com/apache/datafusion/pull/16877) (waynexia) +- chore(deps): bump clap from 4.5.43 to 4.5.44 [#17148](https://github.com/apache/datafusion/pull/17148) (dependabot[bot]) +- Pass the input schema to stats_projection for ProjectionExpr [#17123](https://github.com/apache/datafusion/pull/17123) (hareshkh) +- Fix extended tests failure on main by updating `datafusion-testing` pin [#17176](https://github.com/apache/datafusion/pull/17176) (alamb) +- Minor: display filter in HashJoin's tree explain [#17170](https://github.com/apache/datafusion/pull/17170) (2010YOUY01) +- add test for multi-column topk dynamic filter pushdown [#17162](https://github.com/apache/datafusion/pull/17162) (adriangb) +- Test: Add checks to sqllogictest temporary file creations [#17017](https://github.com/apache/datafusion/pull/17017) (2010YOUY01) +- Deprecate unused `ScalarUDF::display_name` [#17168](https://github.com/apache/datafusion/pull/17168) (findepi) +- CI: Fix extended test failure by updating `datafusion-testing` submodule [#17187](https://github.com/apache/datafusion/pull/17187) (2010YOUY01) +- Normalize `NUL` to `\0` in sqllogictests [#17181](https://github.com/apache/datafusion/pull/17181) (Jefffrey) +- Simplify `GetFieldFunc`'s `display_name`, `schema_name` [#17167](https://github.com/apache/datafusion/pull/17167) (findepi) +- Rewrite Nested Loop Join executor for 5× speed and 1% memory usage [#16996](https://github.com/apache/datafusion/pull/16996) (2010YOUY01) +- Minor: Fix compiler warning when compiling `datafusion-cli` [#17205](https://github.com/apache/datafusion/pull/17205) (2010YOUY01) +- Refactor: Do not silently ignore errors in `stats_projection` [#17154](https://github.com/apache/datafusion/pull/17154) (alamb) +- Miscellaneous cleanups [#17189](https://github.com/apache/datafusion/pull/17189) (findepi) +- [Parquet Metadata Cache] Document the ListingTable cache [#17133](https://github.com/apache/datafusion/pull/17133) (alamb) +- Fix: Show backtrace for ArrowError [#17204](https://github.com/apache/datafusion/pull/17204) (2010YOUY01) +- minor: clean up distinct window code [#17215](https://github.com/apache/datafusion/pull/17215) (zhuqi-lucas-001) +- chore: Add drop table test on create_drop.rs [#17219](https://github.com/apache/datafusion/pull/17219) (caicancai) +- chore(deps): bump async-trait from 0.1.88 to 0.1.89 [#17203](https://github.com/apache/datafusion/pull/17203) (dependabot[bot]) +- Bump MSRV to 1.86.0 [#17230](https://github.com/apache/datafusion/pull/17230) (adriangb) +- Minor: improve error message when file creation failed [#17217](https://github.com/apache/datafusion/pull/17217) (2010YOUY01) +- Fix dynamic filter pushdown in HashJoinExec [#17201](https://github.com/apache/datafusion/pull/17201) (adriangb) +- Fix Analyze Exec protobuf roundtrip [#17234](https://github.com/apache/datafusion/pull/17234) (cetra3) +- Preserve `distinct` and `ignore_nulls` in window expressions during proto serde [#17235](https://github.com/apache/datafusion/pull/17235) (cetra3) +- chore(deps): bump serde_json from 1.0.142 to 1.0.143 [#17240](https://github.com/apache/datafusion/pull/17240) (dependabot[bot]) +- chore(deps): bump syn from 2.0.105 to 2.0.106 [#17243](https://github.com/apache/datafusion/pull/17243) (dependabot[bot]) +- Push dynamic pushdown through cooperative and projection execs [#17238](https://github.com/apache/datafusion/pull/17238) (jackkleeman) +- Configure cli test that requires backtrace to be optional [#17236](https://github.com/apache/datafusion/pull/17236) (Jefffrey) +- chore(deps): Update sqlparser to 0.58 [#16456](https://github.com/apache/datafusion/pull/16456) (Dimchikkk) +- chore(deps): bump rustyline from 17.0.0 to 17.0.1 [#17252](https://github.com/apache/datafusion/pull/17252) (dependabot[bot]) +- chore(deps): bump thiserror from 2.0.14 to 2.0.16 [#17257](https://github.com/apache/datafusion/pull/17257) (dependabot[bot]) +- Fix HashJoinExec sideways information passing for partitioned queries [#17197](https://github.com/apache/datafusion/pull/17197) (adriangb) +- Fix HashJoinExec test snapshot under force_hash_collisions=true [#17265](https://github.com/apache/datafusion/pull/17265) (adriangb) +- Deprecate confusingly named `UserDefinedFunctionPlanner` [#17247](https://github.com/apache/datafusion/pull/17247) (alamb) +- Fix: ListingTableFactory paths with dots [#17233](https://github.com/apache/datafusion/pull/17233) (BlakeOrth) +- chore(deps): bump tempfile from 3.20.0 to 3.21.0 [#17268](https://github.com/apache/datafusion/pull/17268) (dependabot[bot]) +- Fix PartialOrd for ScalarUDF [#17182](https://github.com/apache/datafusion/pull/17182) (findepi) +- chore(deps): bump url from 2.5.4 to 2.5.6 [#17283](https://github.com/apache/datafusion/pull/17283) (dependabot[bot]) +- Make dynamic filter creation in HashJoinExec deterministic against partition evaluation order [#17280](https://github.com/apache/datafusion/pull/17280) (adriangb) +- Consolidate Parquet Metadata handling into its own module and struct `DFParquetMetadata` [#17127](https://github.com/apache/datafusion/pull/17127) (alamb) +- Only update TopK dynamic filters if the new ones are more selective [#16433](https://github.com/apache/datafusion/pull/16433) (adriangb) +- Add documentation for UNION schema handling. [#17248](https://github.com/apache/datafusion/pull/17248) (wiedld) +- Replace π-related bound constants with next_up/next_down [#16823](https://github.com/apache/datafusion/pull/16823) (rthummaluru) +- chore: add example for how to use TrackConsumersPool [#17213](https://github.com/apache/datafusion/pull/17213) (wiedld) +- minor: Remove extra line break in explain physical plan [#17303](https://github.com/apache/datafusion/pull/17303) (nuno-faria) +- Support `avg(distinct)` for `float64` type [#17255](https://github.com/apache/datafusion/pull/17255) (Jefffrey) +- chore: check the error message log [#17308](https://github.com/apache/datafusion/pull/17308) (caicancai) +- Expand sql_planner benchmark for benchmarking physical and logical optimization. [#17276](https://github.com/apache/datafusion/pull/17276) (Omega359) +- Encapsulate early File pruning in parquet opener in its own stream [#17293](https://github.com/apache/datafusion/pull/17293) (alamb) +- Implement `partition_statistics` API for `RepartitionExec` [#17061](https://github.com/apache/datafusion/pull/17061) (liamzwbao) +- chore: replace Schema with SchemaRef in PruningExpressionBuilder [#17216](https://github.com/apache/datafusion/pull/17216) (etolbakov) +- chore(deps): bump regex-syntax from 0.8.5 to 0.8.6 [#17320](https://github.com/apache/datafusion/pull/17320) (dependabot[bot]) +- chore(deps): bump indexmap from 2.10.0 to 2.11.0 [#17316](https://github.com/apache/datafusion/pull/17316) (dependabot[bot]) +- refactor: Split `SortMergeJoin` into multiple modules [#17304](https://github.com/apache/datafusion/pull/17304) (jonathanc-n) +- MINOR: add missing examples to example list [#17333](https://github.com/apache/datafusion/pull/17333) (waynexia) +- chore: split hash join to smaller modules [#17300](https://github.com/apache/datafusion/pull/17300) (2010YOUY01) +- chore(deps): bump url from 2.5.6 to 2.5.7 [#17324](https://github.com/apache/datafusion/pull/17324) (dependabot[bot]) +- chore(deps): bump regex from 1.11.1 to 1.11.2 [#17325](https://github.com/apache/datafusion/pull/17325) (dependabot[bot]) +- add a ci job for typo checking [#17339](https://github.com/apache/datafusion/pull/17339) (waynexia) +- chore(deps): bump clap from 4.5.45 to 4.5.46 [#17338](https://github.com/apache/datafusion/pull/17338) (dependabot[bot]) +- chore(deps): bump korandoru/hawkeye from 6.1.1 to 6.2.0 [#17321](https://github.com/apache/datafusion/pull/17321) (dependabot[bot]) +- chore: avoid very cheap copy in `SchemaMapping` [#17344](https://github.com/apache/datafusion/pull/17344) (rluvaton) +- chore(deps): bump actions/checkout from 4.2.2 to 5.0.0 [#17345](https://github.com/apache/datafusion/pull/17345) (dependabot[bot]) +- chore(deps): bump libmimalloc-sys from 0.1.43 to 0.1.44 [#17343](https://github.com/apache/datafusion/pull/17343) (dependabot[bot]) +- fix EquivalenceProperties calculation in DataSourceExec [#17323](https://github.com/apache/datafusion/pull/17323) (adriangb) +- chore(deps): bump mimalloc from 0.1.47 to 0.1.48 [#17353](https://github.com/apache/datafusion/pull/17353) (dependabot[bot]) +- chore(deps): bump tracing-subscriber from 0.3.19 to 0.3.20 [#17355](https://github.com/apache/datafusion/pull/17355) (dependabot[bot]) +- refactor: simplify json_shredding example by using ListingTable [#17369](https://github.com/apache/datafusion/pull/17369) (waynexia) +- Fix incorrect memory accounting for sliced `StringViewArray` [#17315](https://github.com/apache/datafusion/pull/17315) (ding-young) +- chore(deps): bump aws-credential-types from 1.2.5 to 1.2.6 [#17368](https://github.com/apache/datafusion/pull/17368) (dependabot[bot]) +- minor: use debug level log for physical optimizer [#17383](https://github.com/apache/datafusion/pull/17383) (waynexia) +- chore(deps): bump uuid from 1.18.0 to 1.18.1 [#17384](https://github.com/apache/datafusion/pull/17384) (dependabot[bot]) +- chore(deps): bump aws-config from 1.8.5 to 1.8.6 [#17386](https://github.com/apache/datafusion/pull/17386) (dependabot[bot]) +- minor: make dict_from_values public [#17376](https://github.com/apache/datafusion/pull/17376) (parthchandra) +- chore: add memory catalog test to handle table removal before schema deregistration [#17307](https://github.com/apache/datafusion/pull/17307) (caicancai) +- chore(deps): bump actions/setup-node from 4.4.0 to 5.0.0 [#17410](https://github.com/apache/datafusion/pull/17410) (dependabot[bot]) +- chore(deps): bump actions/stale from 9.1.0 to 10.0.0 [#17409](https://github.com/apache/datafusion/pull/17409) (dependabot[bot]) +- chore(deps): bump actions/labeler from 5.0.0 to 6.0.0 [#17408](https://github.com/apache/datafusion/pull/17408) (dependabot[bot]) +- Avoid panic when 'with order' expression could not be converted to a logical expression [#17394](https://github.com/apache/datafusion/pull/17394) (pepijnve) +- chore(deps): bump apache-avro from 0.17.0 to 0.20.0 [#16092](https://github.com/apache/datafusion/pull/16092) (dependabot[bot]) +- chore(deps): bump actions/setup-python from 5.6.0 to 6.0.0 [#17413](https://github.com/apache/datafusion/pull/17413) (dependabot[bot]) +- Test grouping by FixedSizeList [#17415](https://github.com/apache/datafusion/pull/17415) (findepi) +- re-export physical_expr_adapter [#17414](https://github.com/apache/datafusion/pull/17414) (adriangb) +- Benchmark window function with multiple partitioning columns [#17402](https://github.com/apache/datafusion/pull/17402) (findepi) +- Fix PartialOrd for Window [#17393](https://github.com/apache/datafusion/pull/17393) (findepi) +- Memory datasource protobuf support [#17290](https://github.com/apache/datafusion/pull/17290) (lewiszlw) +- fix bounds accumulator reset in HashJoinExec dynamic filter pushdown [#17371](https://github.com/apache/datafusion/pull/17371) (adriangb) +- Unimplement `PartialOrd` for `TDigest`'s `Centroid` [#17440](https://github.com/apache/datafusion/pull/17440) (findepi) +- Unimplement `PartialEq`, `PartialOrd` from `ToRepartition`, `RePartition` [#17441](https://github.com/apache/datafusion/pull/17441) (findepi) +- chore(deps): bump insta from 1.43.1 to 1.43.2 [#17436](https://github.com/apache/datafusion/pull/17436) (dependabot[bot]) +- chore(deps): bump actions/labeler from 6.0.0 to 6.0.1 [#17433](https://github.com/apache/datafusion/pull/17433) (dependabot[bot]) +- chore(deps): bump clap from 4.5.46 to 4.5.47 [#17435](https://github.com/apache/datafusion/pull/17435) (dependabot[bot]) +- Add PhysicalExpr::is_volatile [#17351](https://github.com/apache/datafusion/pull/17351) (adriangb) +- refactor: Use `BufferedBatchState` enum for SMJ spilling [#17429](https://github.com/apache/datafusion/pull/17429) (jonathanc-n) +- Re-enable page index for encrypted Parquet [#17426](https://github.com/apache/datafusion/pull/17426) (adamreeve) +- Re-export apache-avro when avro feature flag is set [#17388](https://github.com/apache/datafusion/pull/17388) (shivbhatia10) +- Improved experience when remote object store URL does not end in / [#17364](https://github.com/apache/datafusion/pull/17364) (xiedeyantu) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 51 dependabot[bot] + 31 Piotr Findeisen + 24 Adrian Garcia Badaracco + 21 Andrew Lamb + 14 Yongting You + 11 Chen Chongchen + 9 Berkay Şahin + 9 Ruihang Xia + 7 Jeffrey Vo + 7 Nuno Faria + 6 Oleks V + 6 Pepijn Van Eeckhoudt + 6 Qi Zhu + 5 ding-young + 4 Adam Reeve + 4 Alan Tang + 4 Jonathan Chen + 4 Liam Bao + 4 Peter Nguyen + 4 Raz Luvaton + 4 kosiew + 3 Adam Gutglick + 3 Cancai Cai + 3 Huaijin + 3 Peter L + 3 lorenarosati + 3 wiedld + 2 Brent Gardner + 2 Bruce Ritchie + 2 Marco Neumann + 2 Matthew Kim + 2 Sherin Jacob + 2 Thearas + 2 Tim Saucer + 2 miro + 2 xudong.w + 2 张林伟 + 1 Ajeeta Asthana + 1 Alex Huang + 1 Andrey Koshchiy + 1 Andy Grove + 1 Blake Orth + 1 Christian van der Loo + 1 Colin Marc + 1 Corwin Joy + 1 David Hewitt + 1 David López + 1 Dima + 1 Eugene Tolbakov + 1 Evgenii Glotov + 1 GPK + 1 Gabriel + 1 Geetansh Juneja + 1 Geoffrey Claude + 1 Haresh Khanna + 1 Jack Kleeman + 1 Jax Liu + 1 Jensen + 1 LB7666 + 1 Loakesh Indiran + 1 Marko Milenković + 1 Matt Matravers + 1 Nga Tran + 1 Parth Chandra + 1 Ronit Thummaluru + 1 Shehab Amin + 1 Shiv Bhatia + 1 Shruti Sharma + 1 Simon Vandel Sillesen + 1 Stuart Carnie + 1 Tobias Schwarzinger + 1 Yuhan Wang + 1 ZC + 1 Zhen Wang + 1 aditya singh rathore + 1 ayemjay + 1 delamarch3 + 1 mwish + 1 theirix +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 5060bc3805..3d4730958f 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -97,7 +97,7 @@ The following configuration settings are available: | datafusion.execution.parquet.dictionary_page_size_limit | 1048576 | (writing) Sets best effort maximum dictionary page size, in bytes [...] | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting [...] | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. [...] -| datafusion.execution.parquet.created_by | datafusion version 49.0.2 | (writing) Sets "created by" property [...] +| datafusion.execution.parquet.created_by | datafusion version 50.0.0 | (writing) Sets "created by" property [...] | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length [...] | datafusion.execution.parquet.statistics_truncate_length | 64 | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting [...] | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page [...] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org