This is an automated email from the ASF dual-hosted git repository. github-merge-queue[bot] pushed a commit to branch gh-readonly-queue/main/pr-22014-aca4d1377bf9785e6fa9a154c579cc761ca7b54b in repository https://gitbox.apache.org/repos/asf/datafusion.git
commit 2f29038f97b02d128e22208a082cfaebc07f36fd Author: Michael Kleen <[email protected]> AuthorDate: Fri May 8 04:40:15 2026 +0200 Support '0' value for parse_capacity_limit() (#22014) ## Which issue does this PR close? - None ## Rationale for this change This extends `parse_capacity_limit()` to support \`0\` to set a limit of 0 instead of using \`0K\`. This simplifies configuration and avoids confusion with the word \`OK\` (Okay). This is based on the [suggestion](https://github.com/apache/datafusion/pull/20047#discussion_r3181959057) from @martin-g. Usage: ``` SET datafusion.runtime.example_limit = '0' ``` instead of: ``` SET datafusion.runtime.example_limit = '0K' ``` ## What changes are included in this PR? see above. ## Are these changes tested? Yes. ## Are there any user-facing changes? Yes. --- datafusion/core/src/execution/context/mod.rs | 6 +++++- datafusion/execution/src/runtime_env.rs | 8 ++++---- .../sqllogictest/test_files/information_schema.slt | 8 ++++---- datafusion/sqllogictest/test_files/set_variable.slt | 9 +++++++++ docs/source/user-guide/configs.md | 16 ++++++++-------- 5 files changed, 30 insertions(+), 17 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 87170f595f..d84ef0c898 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1275,7 +1275,7 @@ impl SessionContext { } /// Parse capacity limit from string to number of bytes by allowing units: K, M and G. - /// Supports formats like '1.5G', '100M', '512K' + /// Supports formats like '1.5G', '100M', '512K'. Capacity limit can be set to 0 with '0'. /// /// # Examples /// ``` @@ -1296,6 +1296,9 @@ impl SessionContext { "Empty limit value found for '{config_name}'" )); } + if limit == "0" { + return Ok(0); + } let (number, unit) = limit.split_at(limit.len() - 1); let number: f64 = number.parse().map_err(|_| { plan_datafusion_err!( @@ -2970,6 +2973,7 @@ mod tests { // Valid capacity_limit for (limit, want) in [ + ("0", 0), ("1.5K", (1.5 * 1024.0) as usize), ("2M", (2f64 * 1024.0 * 1024.0) as usize), ("1G", (1f64 * 1024.0 * 1024.0 * 1024.0) as usize), diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs index 67604c424c..08f5339e7b 100644 --- a/datafusion/execution/src/runtime_env.rs +++ b/datafusion/execution/src/runtime_env.rs @@ -108,12 +108,12 @@ fn create_runtime_config_entries( ConfigEntry { key: "datafusion.runtime.memory_limit".to_string(), value: memory_limit, - description: "Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.", + description: "Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.", }, ConfigEntry { key: "datafusion.runtime.max_temp_directory_size".to_string(), value: max_temp_directory_size, - description: "Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.", + description: "Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.", }, ConfigEntry { key: "datafusion.runtime.temp_directory".to_string(), @@ -123,12 +123,12 @@ fn create_runtime_config_entries( ConfigEntry { key: "datafusion.runtime.metadata_cache_limit".to_string(), value: metadata_cache_limit, - description: "Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.", + description: "Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.", }, ConfigEntry { key: "datafusion.runtime.list_files_cache_limit".to_string(), value: list_files_cache_limit, - description: "Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.", + description: "Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.", }, ConfigEntry { key: "datafusion.runtime.list_files_cache_ttl".to_string(), diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b04c78bd27..8396a60137 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -479,11 +479,11 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan datafusion.optimizer.subset_repartition_threshold 4 Partition count threshold for subset satisfaction optimization. When the current partition count is >= this threshold, DataFusion will skip repartitioning if the required partitioning expression is a subset of the current partition expression such as Hash(a) satisfies Hash(a, b). When the current partition count is < this threshold, DataFusion will repartition to increase parallelism even when subset satisfaction applies. Set to 0 to al [...] datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.optimizer.use_statistics_registry false When set to true, the physical plan optimizer uses the pluggable `StatisticsRegistry` for statistics propagation across operators. This enables more accurate cardinality estimates compared to each operator's built-in `partition_statistics`. -datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. +datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. datafusion.runtime.list_files_cache_ttl NULL TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. -datafusion.runtime.max_temp_directory_size 100G Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. -datafusion.runtime.memory_limit unlimited Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. -datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. +datafusion.runtime.max_temp_directory_size 100G Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. +datafusion.runtime.memory_limit unlimited Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. +datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. datafusion.runtime.temp_directory NULL The path to the temporary file directory. datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html> diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index f270b9b169..378a9c83db 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -632,6 +632,15 @@ SHOW datafusion.runtime.list_files_cache_ttl ---- datafusion.runtime.list_files_cache_ttl 1m30s +# Test SET and SHOW for limit 0 +statement ok +SET datafusion.runtime.list_files_cache_limit = '0' + +query TT +SHOW datafusion.runtime.list_files_cache_limit +---- +datafusion.runtime.list_files_cache_limit 0 + # Note: runtime.temp_directory shows the actual temp directory path with a unique suffix, # so we cannot test the exact value. We verify it exists in information_schema instead. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 46039f3c99..f7f9426e2b 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -229,14 +229,14 @@ SET datafusion.runtime.memory_limit = '2G'; The following runtime configuration settings are available: -| key | default | description | -| ------------------------------------------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | -| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.memory_limit | NULL | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.temp_directory | NULL | The path to the temporary file directory. | +| key | default | description | +| ------------------------------------------ | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. | +| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | +| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. | +| datafusion.runtime.memory_limit | NULL | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. | +| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes. | +| datafusion.runtime.temp_directory | NULL | The path to the temporary file directory. | # Tuning Guide --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
