This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1f59d327ba fix: dfbench respects DATAFUSION_RUNTIME_MEMORY_LIMIT env 
var (#20631)
1f59d327ba is described below

commit 1f59d327ba205f1db9d67b2b1fe72dc84f2b0e38
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Sun Mar 15 13:03:00 2026 -0500

    fix: dfbench respects DATAFUSION_RUNTIME_MEMORY_LIMIT env var (#20631)
    
    ## Which issue does this PR close?
    
    N/A — discovered while running benchmarks with `bench.sh`.
    
    ## Rationale for this change
    
    When running benchmarks via `bench.sh` / `dfbench`, setting
    `DATAFUSION_RUNTIME_MEMORY_LIMIT=2G` is ignored for memory pool
    enforcement. Most `DATAFUSION_*` env vars work because
    `SessionConfig::from_env()` picks them up, but the memory limit is a
    special case — it requires constructing a `MemoryPool` in the
    `RuntimeEnv`, which `dfbench` only did when `--memory-limit` was passed
    as a CLI flag.
    
    ## What changes are included in this PR?
    
    In `runtime_env_builder()`, when `self.memory_limit` (CLI flag) is
    `None`, fall back to reading the `DATAFUSION_RUNTIME_MEMORY_LIMIT` env
    var using the existing `parse_memory_limit()` function. The CLI flag
    still takes precedence when provided.
    
    ## Are these changes tested?
    
    Yes — added `test_runtime_env_builder_reads_env_var` which sets the env
    var, constructs a `CommonOpt` with no CLI memory limit, and verifies the
    resulting `RuntimeEnv` has a `Finite(2GB)` memory pool.
    
    ## Are there any user-facing changes?
    
    `dfbench` now honors the `DATAFUSION_RUNTIME_MEMORY_LIMIT` environment
    variable as a fallback when `--memory-limit` is not passed on the
    command line. No breaking changes — existing CLI flag behavior is
    unchanged.
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-authored-by: Claude Opus 4.6 <[email protected]>
---
 benchmarks/src/util/options.rs | 43 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/benchmarks/src/util/options.rs b/benchmarks/src/util/options.rs
index add8ff17fb..bcb4379fbd 100644
--- a/benchmarks/src/util/options.rs
+++ b/benchmarks/src/util/options.rs
@@ -91,7 +91,15 @@ impl CommonOpt {
     pub fn runtime_env_builder(&self) -> Result<RuntimeEnvBuilder> {
         let mut rt_builder = RuntimeEnvBuilder::new();
         const NUM_TRACKED_CONSUMERS: usize = 5;
-        if let Some(memory_limit) = self.memory_limit {
+        // Use CLI --memory-limit if provided, otherwise fall back to
+        // DATAFUSION_RUNTIME_MEMORY_LIMIT env var
+        let memory_limit = self.memory_limit.or_else(|| {
+            std::env::var("DATAFUSION_RUNTIME_MEMORY_LIMIT")
+                .ok()
+                .and_then(|val| parse_capacity_limit(&val).ok())
+        });
+
+        if let Some(memory_limit) = memory_limit {
             let pool: Arc<dyn MemoryPool> = match self.mem_pool_type.as_str() {
                 "fair" => Arc::new(TrackConsumersPool::new(
                     FairSpillPool::new(memory_limit),
@@ -144,6 +152,39 @@ fn parse_capacity_limit(limit: &str) -> Result<usize, 
String> {
 mod tests {
     use super::*;
 
+    #[test]
+    fn test_runtime_env_builder_reads_env_var() {
+        // Set the env var and verify runtime_env_builder picks it up
+        // when no CLI --memory-limit is provided
+        let opt = CommonOpt {
+            iterations: 3,
+            partitions: None,
+            batch_size: None,
+            mem_pool_type: "fair".to_string(),
+            memory_limit: None,
+            sort_spill_reservation_bytes: None,
+            debug: false,
+        };
+
+        // With env var set, builder should succeed and have a memory pool
+        // SAFETY: This test is single-threaded and the env var is restored 
after use
+        unsafe {
+            std::env::set_var("DATAFUSION_RUNTIME_MEMORY_LIMIT", "2G");
+        }
+        let builder = opt.runtime_env_builder().unwrap();
+        let runtime = builder.build().unwrap();
+        unsafe {
+            std::env::remove_var("DATAFUSION_RUNTIME_MEMORY_LIMIT");
+        }
+        // A 2G memory pool should be present — verify it reports the correct 
limit
+        match runtime.memory_pool.memory_limit() {
+            datafusion::execution::memory_pool::MemoryLimit::Finite(limit) => {
+                assert_eq!(limit, 2 * 1024 * 1024 * 1024);
+            }
+            _ => panic!("Expected Finite memory limit"),
+        }
+    }
+
     #[test]
     fn test_parse_capacity_limit_all() {
         // Test valid inputs


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to