This is an automated email from the ASF dual-hosted git repository.
github-merge-queue[bot] pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 2f29038f97 Support '0' value for parse_capacity_limit() (#22014)
2f29038f97 is described below
commit 2f29038f97b02d128e22208a082cfaebc07f36fd
Author: Michael Kleen <[email protected]>
AuthorDate: Fri May 8 04:40:15 2026 +0200
Support '0' value for parse_capacity_limit() (#22014)
## Which issue does this PR close?
- None
## Rationale for this change
This extends `parse_capacity_limit()` to support \`0\` to set a limit of
0 instead of using \`0K\`.
This simplifies configuration and avoids confusion with the word \`OK\`
(Okay).
This is based on the
[suggestion](https://github.com/apache/datafusion/pull/20047#discussion_r3181959057)
from @martin-g.
Usage:
```
SET datafusion.runtime.example_limit = '0'
```
instead of:
```
SET datafusion.runtime.example_limit = '0K'
```
## What changes are included in this PR?
see above.
## Are these changes tested?
Yes.
## Are there any user-facing changes?
Yes.
---
datafusion/core/src/execution/context/mod.rs | 6 +++++-
datafusion/execution/src/runtime_env.rs | 8 ++++----
.../sqllogictest/test_files/information_schema.slt | 8 ++++----
datafusion/sqllogictest/test_files/set_variable.slt | 9 +++++++++
docs/source/user-guide/configs.md | 16 ++++++++--------
5 files changed, 30 insertions(+), 17 deletions(-)
diff --git a/datafusion/core/src/execution/context/mod.rs
b/datafusion/core/src/execution/context/mod.rs
index 87170f595f..d84ef0c898 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -1275,7 +1275,7 @@ impl SessionContext {
}
/// Parse capacity limit from string to number of bytes by allowing units:
K, M and G.
- /// Supports formats like '1.5G', '100M', '512K'
+ /// Supports formats like '1.5G', '100M', '512K'. Capacity limit can be
set to 0 with '0'.
///
/// # Examples
/// ```
@@ -1296,6 +1296,9 @@ impl SessionContext {
"Empty limit value found for '{config_name}'"
));
}
+ if limit == "0" {
+ return Ok(0);
+ }
let (number, unit) = limit.split_at(limit.len() - 1);
let number: f64 = number.parse().map_err(|_| {
plan_datafusion_err!(
@@ -2970,6 +2973,7 @@ mod tests {
// Valid capacity_limit
for (limit, want) in [
+ ("0", 0),
("1.5K", (1.5 * 1024.0) as usize),
("2M", (2f64 * 1024.0 * 1024.0) as usize),
("1G", (1f64 * 1024.0 * 1024.0 * 1024.0) as usize),
diff --git a/datafusion/execution/src/runtime_env.rs
b/datafusion/execution/src/runtime_env.rs
index 67604c424c..08f5339e7b 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -108,12 +108,12 @@ fn create_runtime_config_entries(
ConfigEntry {
key: "datafusion.runtime.memory_limit".to_string(),
value: memory_limit,
- description: "Maximum memory limit for query execution. Supports
suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2
gigabytes.",
+ description: "Maximum memory limit for query execution. Supports
suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example:
'2G' for 2 gigabytes.",
},
ConfigEntry {
key: "datafusion.runtime.max_temp_directory_size".to_string(),
value: max_temp_directory_size,
- description: "Maximum temporary file directory size. Supports
suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2
gigabytes.",
+ description: "Maximum temporary file directory size. Supports
suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example:
'2G' for 2 gigabytes.",
},
ConfigEntry {
key: "datafusion.runtime.temp_directory".to_string(),
@@ -123,12 +123,12 @@ fn create_runtime_config_entries(
ConfigEntry {
key: "datafusion.runtime.metadata_cache_limit".to_string(),
value: metadata_cache_limit,
- description: "Maximum memory to use for file metadata cache such
as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes). Example: '2G' for 2 gigabytes.",
+ description: "Maximum memory to use for file metadata cache such
as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.",
},
ConfigEntry {
key: "datafusion.runtime.list_files_cache_limit".to_string(),
value: list_files_cache_limit,
- description: "Maximum memory to use for list files cache. Supports
suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2
gigabytes.",
+ description: "Maximum memory to use for list files cache. Supports
suffixes K (kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example:
'2G' for 2 gigabytes.",
},
ConfigEntry {
key: "datafusion.runtime.list_files_cache_ttl".to_string(),
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt
b/datafusion/sqllogictest/test_files/information_schema.slt
index b04c78bd27..8396a60137 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -479,11 +479,11 @@ datafusion.optimizer.skip_failed_rules false When set to
true, the logical plan
datafusion.optimizer.subset_repartition_threshold 4 Partition count threshold
for subset satisfaction optimization. When the current partition count is >=
this threshold, DataFusion will skip repartitioning if the required
partitioning expression is a subset of the current partition expression such as
Hash(a) satisfies Hash(a, b). When the current partition count is < this
threshold, DataFusion will repartition to increase parallelism even when subset
satisfaction applies. Set to 0 to al [...]
datafusion.optimizer.top_down_join_key_reordering true When set to true, the
physical plan optimizer will run a top down process to reorder the join keys
datafusion.optimizer.use_statistics_registry false When set to true, the
physical plan optimizer uses the pluggable `StatisticsRegistry` for statistics
propagation across operators. This enables more accurate cardinality estimates
compared to each operator's built-in `partition_statistics`.
-datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list
files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes).
Example: '2G' for 2 gigabytes.
+datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list
files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes)
or '0' for 0. Example: '2G' for 2 gigabytes.
datafusion.runtime.list_files_cache_ttl NULL TTL (time-to-live) of the entries
in the list file cache. Supports units m (minutes), and s (seconds). Example:
'2m' for 2 minutes.
-datafusion.runtime.max_temp_directory_size 100G Maximum temporary file
directory size. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes). Example: '2G' for 2 gigabytes.
-datafusion.runtime.memory_limit unlimited Maximum memory limit for query
execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes).
Example: '2G' for 2 gigabytes.
-datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file
metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M
(megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
+datafusion.runtime.max_temp_directory_size 100G Maximum temporary file
directory size. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.
+datafusion.runtime.memory_limit unlimited Maximum memory limit for query
execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes) or
'0' for 0. Example: '2G' for 2 gigabytes.
+datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file
metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M
(megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.
datafusion.runtime.temp_directory NULL The path to the temporary file
directory.
datafusion.sql_parser.collect_spans false When set to true, the source
locations relative to the original SQL query (i.e.
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
will be collected and recorded in the logical plan nodes.
datafusion.sql_parser.default_null_ordering nulls_max Specifies the default
null ordering for query results. There are 4 options: - `nulls_max`: Nulls
appear last in ascending order. - `nulls_min`: Nulls appear first in ascending
order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`:
Nulls always be last in any order. By default, `nulls_max` is used to follow
Postgres's behavior. postgres rule:
<https://www.postgresql.org/docs/current/queries-order.html>
diff --git a/datafusion/sqllogictest/test_files/set_variable.slt
b/datafusion/sqllogictest/test_files/set_variable.slt
index f270b9b169..378a9c83db 100644
--- a/datafusion/sqllogictest/test_files/set_variable.slt
+++ b/datafusion/sqllogictest/test_files/set_variable.slt
@@ -632,6 +632,15 @@ SHOW datafusion.runtime.list_files_cache_ttl
----
datafusion.runtime.list_files_cache_ttl 1m30s
+# Test SET and SHOW for limit 0
+statement ok
+SET datafusion.runtime.list_files_cache_limit = '0'
+
+query TT
+SHOW datafusion.runtime.list_files_cache_limit
+----
+datafusion.runtime.list_files_cache_limit 0
+
# Note: runtime.temp_directory shows the actual temp directory path with a
unique suffix,
# so we cannot test the exact value. We verify it exists in information_schema
instead.
diff --git a/docs/source/user-guide/configs.md
b/docs/source/user-guide/configs.md
index 46039f3c99..f7f9426e2b 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -229,14 +229,14 @@ SET datafusion.runtime.memory_limit = '2G';
The following runtime configuration settings are available:
-| key | default | description
|
-| ------------------------------------------ | ------- |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
-| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use
for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes). Example: '2G' for 2 gigabytes. |
-| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of
the entries in the list file cache. Supports units m (minutes), and s
(seconds). Example: '2m' for 2 minutes. |
-| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary
file directory size. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes). Example: '2G' for 2 gigabytes. |
-| datafusion.runtime.memory_limit | NULL | Maximum memory limit
for query execution. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes). Example: '2G' for 2 gigabytes. |
-| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use
for file metadata cache such as Parquet metadata. Supports suffixes K
(kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. |
-| datafusion.runtime.temp_directory | NULL | The path to the
temporary file directory.
|
+| key | default | description
|
+| ------------------------------------------ | ------- |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
+| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use
for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.
|
+| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of
the entries in the list file cache. Supports units m (minutes), and s
(seconds). Example: '2m' for 2 minutes.
|
+| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary
file directory size. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.
|
+| datafusion.runtime.memory_limit | NULL | Maximum memory limit
for query execution. Supports suffixes K (kilobytes), M (megabytes), and G
(gigabytes) or '0' for 0. Example: '2G' for 2 gigabytes.
|
+| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use
for file metadata cache such as Parquet metadata. Supports suffixes K
(kilobytes), M (megabytes), and G (gigabytes) or '0' for 0. Example: '2G' for 2
gigabytes. |
+| datafusion.runtime.temp_directory | NULL | The path to the
temporary file directory.
|
# Tuning Guide
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]