(sedona-db) branch main updated: feat(rust/sedona): default memory limit to 75% of physical memory with fair pool (#687)

paleolimbot Wed, 04 Mar 2026 11:46:58 -0800

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git



The following commit(s) were added to refs/heads/main by this push:
     new 2c8810a8 feat(rust/sedona): default memory limit to 75% of physical 
memory with fair pool (#687)
2c8810a8 is described below

commit 2c8810a838e5bcacf2294e7b5bd973f8267781e6
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Thu Mar 5 03:46:09 2026 +0800

    feat(rust/sedona): default memory limit to 75% of physical memory with fair 
pool (#687)
---
 Cargo.lock                                  |  94 +++++++++++++++++++++
 Cargo.toml                                  |   1 +
 docs/memory-management.ipynb                |  54 ++++++------
 docs/memory-management.md                   |  44 +++++-----
 python/sedonadb/python/sedonadb/_options.py |  35 ++++----
 rust/sedona/Cargo.toml                      |   1 +
 rust/sedona/src/context.rs                  |   9 +-
 rust/sedona/src/context_builder.rs          | 122 ++++++++++++++++++++++++----
 sedona-cli/src/main.rs                      |  35 ++++++--
 9 files changed, 311 insertions(+), 84 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ac91274b..6cbb4d3e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3925,6 +3925,15 @@ dependencies = [
  "minimal-lexical",
 ]
 
+[[package]]
+name = "ntapi"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "num-bigint"
 version = "0.4.6"
@@ -3992,6 +4001,25 @@ dependencies = [
  "syn 2.0.114",
 ]
 
+[[package]]
+name = "objc2-core-foundation"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "objc2-io-kit"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "71c1c64d6120e51cd86033f67176b1cb66780c2efe34dec55176f77befd93c0a"
+dependencies = [
+ "libc",
+ "objc2-core-foundation",
+]
+
 [[package]]
 name = "object"
 version = "0.32.2"
@@ -5113,6 +5141,7 @@ dependencies = [
  "sedona-tg",
  "serde",
  "serde_json",
+ "sysinfo",
  "tempfile",
  "tokio",
  "url",
@@ -6022,6 +6051,20 @@ dependencies = [
  "syn 2.0.114",
 ]
 
+[[package]]
+name = "sysinfo"
+version = "0.38.3"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "d03c61d2a49c649a15c407338afe7accafde9dac869995dccb73e5f7ef7d9034"
+dependencies = [
+ "libc",
+ "memchr",
+ "ntapi",
+ "objc2-core-foundation",
+ "objc2-io-kit",
+ "windows",
+]
+
 [[package]]
 name = "tar"
 version = "0.4.44"
@@ -6694,6 +6737,27 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
+[[package]]
+name = "windows"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580"
+dependencies = [
+ "windows-collections",
+ "windows-core",
+ "windows-future",
+ "windows-numerics",
+]
+
+[[package]]
+name = "windows-collections"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610"
+dependencies = [
+ "windows-core",
+]
+
 [[package]]
 name = "windows-core"
 version = "0.62.2"
@@ -6707,6 +6771,17 @@ dependencies = [
  "windows-strings",
 ]
 
+[[package]]
+name = "windows-future"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb"
+dependencies = [
+ "windows-core",
+ "windows-link",
+ "windows-threading",
+]
+
 [[package]]
 name = "windows-implement"
 version = "0.60.2"
@@ -6735,6 +6810,16 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
+[[package]]
+name = "windows-numerics"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26"
+dependencies = [
+ "windows-core",
+ "windows-link",
+]
+
 [[package]]
 name = "windows-result"
 version = "0.4.1"
@@ -6822,6 +6907,15 @@ dependencies = [
  "windows_x86_64_msvc 0.53.1",
 ]
 
+[[package]]
+name = "windows-threading"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37"
+dependencies = [
+ "windows-link",
+]
+
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.52.6"
diff --git a/Cargo.toml b/Cargo.toml
index abe66917..df7f8757 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -118,6 +118,7 @@ rand = "0.10"
 regex = "1.12"
 rstest = "0.26.1"
 serde = { version = "1" }
+sysinfo = "0.38"
 serde_json = { version = "1" }
 serde_with = { version = "1" }
 tempfile = { version = "3"}
diff --git a/docs/memory-management.ipynb b/docs/memory-management.ipynb
index 0c338d4c..75379559 100644
--- a/docs/memory-management.ipynb
+++ b/docs/memory-management.ipynb
@@ -26,7 +26,9 @@
     "\n",
     "# Memory Management and Spilling\n",
     "\n",
-    "SedonaDB supports memory-limited execution with automatic spill-to-disk, 
allowing you to process datasets that are larger than available memory. When a 
memory limit is configured, operators that exceed their memory budget 
automatically spill intermediate data to temporary files on disk and read them 
back as needed."
+    "SedonaDB uses memory-limited execution with automatic spill-to-disk out 
of the box. By default, the memory limit is set to **75% of the system's 
physical memory** and memory is managed by a **fair** pool. When operators 
exceed their memory budget they automatically spill intermediate data to 
temporary files on disk and read them back as needed.\n",
+    "\n",
+    "This means SedonaDB works well for large datasets without any 
configuration. The sections below explain how to tune the defaults when needed."
    ]
   },
   {
@@ -36,12 +38,12 @@
    "source": [
     "## Configuring Memory Limits\n",
     "\n",
-    "Set `memory_limit` on the context options to cap the total memory 
available for query execution. The limit accepts an integer (bytes) or a 
human-readable string such as `\"4gb\"`, `\"512m\"`, or `\"1.5g\"`."
+    "By default, SedonaDB limits query execution memory to **75% of the 
system's physical memory**. You can override this by setting `memory_limit` on 
the context options before running your first query. The limit accepts an 
integer (bytes) or a human-readable string such as `\"4gb\"`, `\"512m\"`, or 
`\"1.5g\"`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "id": "d1f99fcf",
    "metadata": {},
    "outputs": [],
@@ -57,7 +59,14 @@
    "id": "1fdc73aa",
    "metadata": {},
    "source": [
-    "Without a memory limit, SedonaDB uses an unbounded memory pool and 
operators can use as much memory as needed (until the process hits system 
limits). In this mode, operators typically won't spill to disk because there is 
no memory budget to enforce.\n",
+    "To disable the memory limit entirely and use an unbounded memory pool, 
set `memory_limit` to `\"unlimited\"`:\n",
+    "\n",
+    "```python\n",
+    "sd = sedona.db.connect()\n",
+    "sd.options.memory_limit = \"unlimited\"\n",
+    "```\n",
+    "\n",
+    "In unbounded mode, operators can use as much memory as needed (until the 
process hits system limits) and typically won't spill to disk because there is 
no memory budget to enforce.\n",
     "\n",
     "> **Note:** All runtime options (`memory_limit`, `memory_pool_type`, 
`temp_dir`, `unspillable_reserve_ratio`) must be set before the internal 
context is initialized. The internal context is created on the first call to 
`sd.sql(...)` (including `SET` statements) or any read method (for example, 
`sd.read_parquet(...)`) -- not when you call `.execute()` on the returned 
DataFrame. Once the internal context is created, these runtime options become 
read-only."
    ]
@@ -71,15 +80,15 @@
     "\n",
     "The `memory_pool_type` option controls how the memory budget is 
distributed among concurrent operators. Two pool types are available:\n",
     "\n",
-    "- **`\"greedy\"`** -- Grants memory reservations on a 
first-come-first-served basis. This is the default when no pool type is 
specified. Simple, but can lead to memory reservation failures under pressure 
-- one consumer may exhaust the pool before others get a chance to reserve 
memory.\n",
-    "- **`\"fair\"` (recommended)** -- Distributes memory fairly among 
spillable consumers and reserves a fraction of the pool for unspillable 
consumers. More stable under memory pressure and significantly less likely to 
cause reservation failures, at the cost of slightly lower utilization of the 
total reserved memory.\n",
+    "- **`\"fair\"` (default)** -- Distributes memory fairly among spillable 
consumers and reserves a fraction of the pool for unspillable consumers. Stable 
under memory pressure and significantly less likely to cause reservation 
failures.\n",
+    "- **`\"greedy\"`** -- Grants memory reservations on a 
first-come-first-served basis. Simpler, but can lead to memory reservation 
failures under pressure -- one consumer may exhaust the pool before others get 
a chance to reserve memory.\n",
     "\n",
-    "We recommend using `\"fair\"` whenever a memory limit is configured."
+    "You only need to set `memory_pool_type` if you want to switch to the 
greedy pool:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "id": "b1dff726",
    "metadata": {},
    "outputs": [],
@@ -88,7 +97,7 @@
     "\n",
     "sd = sedona.db.connect()\n",
     "sd.options.memory_limit = \"4gb\"\n",
-    "sd.options.memory_pool_type = \"fair\""
+    "sd.options.memory_pool_type = \"greedy\""
    ]
   },
   {
@@ -96,7 +105,7 @@
    "id": "bd4c0a76",
    "metadata": {},
    "source": [
-    "> **Note:** `memory_pool_type` only takes effect when `memory_limit` is 
set."
+    "> **Note:** `memory_pool_type` only takes effect when a memory limit is 
active (i.e., `memory_limit` is not set to `\"unlimited\"`)."
    ]
   },
   {
@@ -111,7 +120,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "id": "dc0718cf",
    "metadata": {},
    "outputs": [],
@@ -120,7 +129,6 @@
     "\n",
     "sd = sedona.db.connect()\n",
     "sd.options.memory_limit = \"8gb\"\n",
-    "sd.options.memory_pool_type = \"fair\"\n",
     "sd.options.unspillable_reserve_ratio = 0.3  # reserve 30% for unspillable 
consumers"
    ]
   },
@@ -136,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "id": "c8d7a5c9",
    "metadata": {},
    "outputs": [],
@@ -144,8 +152,6 @@
     "import sedona.db\n",
     "\n",
     "sd = sedona.db.connect()\n",
-    "sd.options.memory_limit = \"4gb\"\n",
-    "sd.options.memory_pool_type = \"fair\"\n",
     "sd.options.temp_dir = \"/mnt/fast-ssd/sedona-spill\""
    ]
   },
@@ -154,14 +160,14 @@
    "id": "5d318b8f",
    "metadata": {},
    "source": [
-    "## Example: Spatial Join with Limited Memory\n",
+    "## Example: Spatial Join with Memory Management\n",
     "\n",
-    "This example performs a spatial join between Natural Earth cities 
(points) and Natural Earth countries (polygons) using `ST_Contains`. Spatial 
joins are one of the most common workloads that benefit from memory limits and 
spill-to-disk."
+    "This example performs a spatial join between Natural Earth cities 
(points) and Natural Earth countries (polygons) using `ST_Contains`. 4GB memory 
limit and fair pool are used. We also override `temp_dir` to control where 
spill files are written."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "1ed77d58",
    "metadata": {},
    "outputs": [
@@ -201,12 +207,12 @@
     "\n",
     "sd = sedona.db.connect()\n",
     "\n",
-    "# Configure runtime options before any sd.sql(...) or sd.read_* call.\n",
+    "# Optionally override runtime options before any sd.sql(...) or sd.read_* 
call.\n",
     "sd.options.memory_limit = \"4gb\"\n",
     "sd.options.memory_pool_type = \"fair\"\n",
-    "sd.options.unspillable_reserve_ratio = 0.2\n",
     "sd.options.temp_dir = \"/tmp/sedona-spill\"\n",
     "\n",
+    "# Call sd.sql(...) or sd.read_* to trigger the creation of the context 
with the above options.\n",
     "cities = sd.read_parquet(\n",
     "    
\"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet\"\n";,
     ")\n",
@@ -236,7 +242,7 @@
    "source": [
     "## Operators Supporting Memory Limits\n",
     "\n",
-    "When a memory limit is configured, the following operators automatically 
spill intermediate data to disk when they exceed their memory budget.\n",
+    "With the default memory limit active, the following operators 
automatically spill intermediate data to disk when they exceed their memory 
budget.\n",
     "\n",
     "In practice, this means memory limits and spilling can apply to both 
SedonaDB's spatial operators and DataFusion's general-purpose operators used by 
common SQL constructs.\n",
     "\n",
@@ -286,8 +292,6 @@
     "import sedona.db\n",
     "\n",
     "sd = sedona.db.connect()\n",
-    "sd.options.memory_limit = \"4gb\"\n",
-    "sd.options.memory_pool_type = \"fair\"\n",
     "\n",
     "# Enable LZ4 compression for spill files.\n",
     "sd.sql(\"SET datafusion.execution.spill_compression = 
'lz4_frame'\").execute()"
@@ -300,7 +304,7 @@
    "source": [
     "### Maximum temporary directory size\n",
     "\n",
-    "DataFusion limits the total size of temporary spill files to prevent 
unbounded disk usage. The default limit is **100 G**. If your workload needs to 
spill more data than this, increase the limit."
+    "DataFusion limits the total size of temporary spill files to prevent 
unbounded disk usage. The default limit is **100G**. If your workload needs to 
spill more data than this, increase the limit."
    ]
   },
   {
@@ -313,8 +317,6 @@
     "import sedona.db\n",
     "\n",
     "sd = sedona.db.connect()\n",
-    "sd.options.memory_limit = \"4gb\"\n",
-    "sd.options.memory_pool_type = \"fair\"\n",
     "\n",
     "# Increase the spill directory size limit to 500 GB.\n",
     "sd.sql(\"SET datafusion.runtime.max_temp_directory_size = 
'500G'\").execute()"
diff --git a/docs/memory-management.md b/docs/memory-management.md
index 0318e807..2acbabfb 100644
--- a/docs/memory-management.md
+++ b/docs/memory-management.md
@@ -19,11 +19,13 @@
 
 # Memory Management and Spilling
 
-SedonaDB supports memory-limited execution with automatic spill-to-disk, 
allowing you to process datasets that are larger than available memory. When a 
memory limit is configured, operators that exceed their memory budget 
automatically spill intermediate data to temporary files on disk and read them 
back as needed.
+SedonaDB uses memory-limited execution with automatic spill-to-disk out of the 
box. By default, the memory limit is set to **75% of the system's physical 
memory** and memory is managed by a **fair** pool. When operators exceed their 
memory budget they automatically spill intermediate data to temporary files on 
disk and read them back as needed.
+
+This means SedonaDB works well for large datasets without any configuration. 
The sections below explain how to tune the defaults when needed.
 
 ## Configuring Memory Limits
 
-Set `memory_limit` on the context options to cap the total memory available 
for query execution. The limit accepts an integer (bytes) or a human-readable 
string such as `"4gb"`, `"512m"`, or `"1.5g"`.
+By default, SedonaDB limits query execution memory to **75% of the system's 
physical memory**. You can override this by setting `memory_limit` on the 
context options before running your first query. The limit accepts an integer 
(bytes) or a human-readable string such as `"4gb"`, `"512m"`, or `"1.5g"`.
 
 
 ```python
@@ -33,7 +35,14 @@ sd = sedona.db.connect()
 sd.options.memory_limit = "4gb"
 ```
 
-Without a memory limit, SedonaDB uses an unbounded memory pool and operators 
can use as much memory as needed (until the process hits system limits). In 
this mode, operators typically won't spill to disk because there is no memory 
budget to enforce.
+To disable the memory limit entirely and use an unbounded memory pool, set 
`memory_limit` to `"unlimited"`:
+
+```python
+sd = sedona.db.connect()
+sd.options.memory_limit = "unlimited"
+```
+
+In unbounded mode, operators can use as much memory as needed (until the 
process hits system limits) and typically won't spill to disk because there is 
no memory budget to enforce.
 
 > **Note:** All runtime options (`memory_limit`, `memory_pool_type`, 
 > `temp_dir`, `unspillable_reserve_ratio`) must be set before the internal 
 > context is initialized. The internal context is created on the first call to 
 > `sd.sql(...)` (including `SET` statements) or any read method (for example, 
 > `sd.read_parquet(...)`) -- not when you call `.execute()` on the returned 
 > DataFrame. Once the internal context is created, these runtime options 
 > become read-only.
 
@@ -41,10 +50,10 @@ Without a memory limit, SedonaDB uses an unbounded memory 
pool and operators can
 
 The `memory_pool_type` option controls how the memory budget is distributed 
among concurrent operators. Two pool types are available:
 
-- **`"greedy"`** -- Grants memory reservations on a first-come-first-served 
basis. This is the default when no pool type is specified. Simple, but can lead 
to memory reservation failures under pressure -- one consumer may exhaust the 
pool before others get a chance to reserve memory.
-- **`"fair"` (recommended)** -- Distributes memory fairly among spillable 
consumers and reserves a fraction of the pool for unspillable consumers. More 
stable under memory pressure and significantly less likely to cause reservation 
failures, at the cost of slightly lower utilization of the total reserved 
memory.
+- **`"fair"` (default)** -- Distributes memory fairly among spillable 
consumers and reserves a fraction of the pool for unspillable consumers. Stable 
under memory pressure and significantly less likely to cause reservation 
failures.
+- **`"greedy"`** -- Grants memory reservations on a first-come-first-served 
basis. Simpler, but can lead to memory reservation failures under pressure -- 
one consumer may exhaust the pool before others get a chance to reserve memory.
 
-We recommend using `"fair"` whenever a memory limit is configured.
+You only need to set `memory_pool_type` if you want to switch to the greedy 
pool:
 
 
 ```python
@@ -52,10 +61,10 @@ import sedona.db
 
 sd = sedona.db.connect()
 sd.options.memory_limit = "4gb"
-sd.options.memory_pool_type = "fair"
+sd.options.memory_pool_type = "greedy"
 ```
 
-> **Note:** `memory_pool_type` only takes effect when `memory_limit` is set.
+> **Note:** `memory_pool_type` only takes effect when a memory limit is active 
(i.e., `memory_limit` is not set to `"unlimited"`).
 
 ### Unspillable reserve ratio
 
@@ -67,7 +76,6 @@ import sedona.db
 
 sd = sedona.db.connect()
 sd.options.memory_limit = "8gb"
-sd.options.memory_pool_type = "fair"
 sd.options.unspillable_reserve_ratio = 0.3  # reserve 30% for unspillable 
consumers
 ```
 
@@ -80,14 +88,12 @@ By default, DataFusion uses the system temporary directory 
for spill files. You
 import sedona.db
 
 sd = sedona.db.connect()
-sd.options.memory_limit = "4gb"
-sd.options.memory_pool_type = "fair"
 sd.options.temp_dir = "/mnt/fast-ssd/sedona-spill"
 ```
 
-## Example: Spatial Join with Limited Memory
+## Example: Spatial Join with Memory Management
 
-This example performs a spatial join between Natural Earth cities (points) and 
Natural Earth countries (polygons) using `ST_Contains`. Spatial joins are one 
of the most common workloads that benefit from memory limits and spill-to-disk.
+This example performs a spatial join between Natural Earth cities (points) and 
Natural Earth countries (polygons) using `ST_Contains`. 4GB memory limit and 
fair pool are used. We also override `temp_dir` to control where spill files 
are written.
 
 
 ```python
@@ -95,12 +101,12 @@ import sedona.db
 
 sd = sedona.db.connect()
 
-# Configure runtime options before any sd.sql(...) or sd.read_* call.
+# Optionally override runtime options before any sd.sql(...) or sd.read_* call.
 sd.options.memory_limit = "4gb"
 sd.options.memory_pool_type = "fair"
-sd.options.unspillable_reserve_ratio = 0.2
 sd.options.temp_dir = "/tmp/sedona-spill"
 
+# Call sd.sql(...) or sd.read_* to trigger the creation of the context with 
the above options.
 cities = sd.read_parquet(
     
"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet";
 )
@@ -151,7 +157,7 @@ sd.sql(
 
 ## Operators Supporting Memory Limits
 
-When a memory limit is configured, the following operators automatically spill 
intermediate data to disk when they exceed their memory budget.
+With the default memory limit active, the following operators automatically 
spill intermediate data to disk when they exceed their memory budget.
 
 In practice, this means memory limits and spilling can apply to both 
SedonaDB's spatial operators and DataFusion's general-purpose operators used by 
common SQL constructs.
 
@@ -183,8 +189,6 @@ By default, data is written to spill files uncompressed. 
Enabling compression re
 import sedona.db
 
 sd = sedona.db.connect()
-sd.options.memory_limit = "4gb"
-sd.options.memory_pool_type = "fair"
 
 # Enable LZ4 compression for spill files.
 sd.sql("SET datafusion.execution.spill_compression = 'lz4_frame'").execute()
@@ -192,15 +196,13 @@ sd.sql("SET datafusion.execution.spill_compression = 
'lz4_frame'").execute()
 
 ### Maximum temporary directory size
 
-DataFusion limits the total size of temporary spill files to prevent unbounded 
disk usage. The default limit is **100 G**. If your workload needs to spill 
more data than this, increase the limit.
+DataFusion limits the total size of temporary spill files to prevent unbounded 
disk usage. The default limit is **100G**. If your workload needs to spill more 
data than this, increase the limit.
 
 
 ```python
 import sedona.db
 
 sd = sedona.db.connect()
-sd.options.memory_limit = "4gb"
-sd.options.memory_pool_type = "fair"
 
 # Increase the spill directory size limit to 500 GB.
 sd.sql("SET datafusion.runtime.max_temp_directory_size = '500G'").execute()
diff --git a/python/sedonadb/python/sedonadb/_options.py 
b/python/sedonadb/python/sedonadb/_options.py
index 5da51077..3e0ab9e1 100644
--- a/python/sedonadb/python/sedonadb/_options.py
+++ b/python/sedonadb/python/sedonadb/_options.py
@@ -37,17 +37,20 @@ class Options:
     created will raise a `RuntimeError`:
 
     - `memory_limit`: Maximum memory for execution, in bytes or as a
-      human-readable string (e.g., `"4gb"`, `"512m"`).
+      human-readable string (e.g., `"4gb"`, `"512m"`). Set to
+      `"unlimited"` to disable the memory limit. Defaults to 75% of
+      system physical memory.
     - `temp_dir`: Directory for temporary/spill files.
     - `memory_pool_type`: Memory pool type (`"greedy"` or `"fair"`).
+      Defaults to `"fair"`.
     - `unspillable_reserve_ratio`: Fraction of memory reserved for
       unspillable consumers (only applies to the `"fair"` pool type).
 
     Examples:
 
         >>> sd = sedona.db.connect()
-        >>> sd.options.memory_limit = "4gb"
-        >>> sd.options.memory_pool_type = "fair"
+        >>> sd.options.memory_limit = "4gb"          # override default (75% 
of RAM)
+        >>> sd.options.memory_pool_type = "greedy"    # override default (fair)
         >>> sd.options.temp_dir = "/tmp/sedona-spill"
         >>> sd.options.interactive = True
         >>> sd.sql("SELECT 1 as one")
@@ -67,7 +70,7 @@ class Options:
         # Runtime options (must be set before first query)
         self._memory_limit = None
         self._temp_dir = None
-        self._memory_pool_type = "greedy"
+        self._memory_pool_type = None
         self._unspillable_reserve_ratio = None
 
         # Set to True once the internal context is created; after this,
@@ -126,9 +129,9 @@ class Options:
         """Maximum memory for query execution.
 
         Accepts an integer (bytes) or a human-readable string such as
-        `"4gb"`, `"512m"`, or `"1.5g"`. When set, a bounded memory pool is
-        created to enforce this limit. Without a memory limit, DataFusion's
-        default unbounded pool is used.
+        `"4gb"`, `"512m"`, or `"1.5g"`. Set to `"unlimited"` to disable
+        the memory limit entirely. When `None`, the Rust-side default
+        (75% of system physical memory) is used.
 
         Must be set before the first query is executed.
 
@@ -137,6 +140,7 @@ class Options:
             >>> sd = sedona.db.connect()
             >>> sd.options.memory_limit = "4gb"
             >>> sd.options.memory_limit = 4 * 1024 * 1024 * 1024  # equivalent
+            >>> sd.options.memory_limit = "unlimited"  # disable memory limit
         """
         return self._memory_limit
 
@@ -175,26 +179,27 @@ class Options:
             )
 
     @property
-    def memory_pool_type(self) -> str:
+    def memory_pool_type(self) -> Optional[str]:
         """Memory pool type: `"greedy"` or `"fair"`.
 
-        - `"greedy"`: A simple pool that grants reservations on a
-          first-come-first-served basis. This is the default.
         - `"fair"`: A pool that fairly distributes memory among spillable
           consumers and reserves a fraction for unspillable consumers
-          (configured via `unspillable_reserve_ratio`).
+          (configured via `unspillable_reserve_ratio`). This is the default.
+        - `"greedy"`: A simple pool that grants reservations on a
+          first-come-first-served basis.
 
-        Only takes effect when `memory_limit` is set.
+        When `None`, the Rust-side default (`"fair"`) is used.
+        Only takes effect when a memory limit is active.
         Must be set before the first query is executed.
         """
         return self._memory_pool_type
 
     @memory_pool_type.setter
-    def memory_pool_type(self, value: Literal["greedy", "fair"]) -> None:
+    def memory_pool_type(self, value: "Optional[Literal['greedy', 'fair']]") 
-> None:
         self._check_runtime_mutable("memory_pool_type")
-        if value not in ("greedy", "fair"):
+        if value is not None and value not in ("greedy", "fair"):
             raise ValueError(
-                f"memory_pool_type must be 'greedy' or 'fair', got '{value}'"
+                f"memory_pool_type must be 'greedy', 'fair', or None, got 
'{value}'"
             )
         self._memory_pool_type = value
 
diff --git a/rust/sedona/Cargo.toml b/rust/sedona/Cargo.toml
index eefc5b93..0c6fb4c9 100644
--- a/rust/sedona/Cargo.toml
+++ b/rust/sedona/Cargo.toml
@@ -85,5 +85,6 @@ sedona-testing = { workspace = true }
 sedona-tg = { workspace = true, optional = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
+sysinfo = { workspace = true }
 tokio = { workspace = true }
 url = { workspace = true }
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index ee70d598..92940bd7 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -871,8 +871,13 @@ mod tests {
             .expect("SedonaOptions not found");
         assert!(opts.spatial_join.spilled_batch_in_memory_size_threshold >= 10 
* 1024 * 1024);
 
-        // Specify no memory limit, spilled batch threshold should be 
unlimited (0 is for unlimited)
-        let ctx = SedonaContextBuilder::new().build().await.unwrap();
+        // Explicitly disable the memory limit; spilled batch threshold should 
be unlimited
+        // (0 means unlimited)
+        let ctx = SedonaContextBuilder::new()
+            .without_memory_limit()
+            .build()
+            .await
+            .unwrap();
         let state = ctx.ctx.state();
         let opts = state
             .config_options()
diff --git a/rust/sedona/src/context_builder.rs 
b/rust/sedona/src/context_builder.rs
index ad6b79c8..9341b358 100644
--- a/rust/sedona/src/context_builder.rs
+++ b/rust/sedona/src/context_builder.rs
@@ -33,6 +33,18 @@ use crate::{
     size_parser,
 };
 
+/// The fraction of total physical memory to use as the default memory limit.
+const DEFAULT_MEMORY_FRACTION: f64 = 0.75;
+
+/// Compute the default memory limit as 75% of total physical memory.
+fn default_memory_limit() -> usize {
+    let mut sys = sysinfo::System::new();
+    sys.refresh_memory();
+    // `System::total_memory()` returns bytes since sysinfo 0.23+.
+    let total = sys.total_memory() as f64;
+    (total * DEFAULT_MEMORY_FRACTION) as usize
+}
+
 /// Builder for constructing a [`SedonaContext`] with configurable runtime
 /// environment settings.
 ///
@@ -40,6 +52,10 @@ use crate::{
 /// and runtime environments so that the same logic can be reused across the
 /// CLI, Python bindings, ADBC driver, and any future entry points.
 ///
+/// By default, the builder uses 75% of the system's physical memory as the
+/// memory limit and a fair memory pool. Use 
[`without_memory_limit`](Self::without_memory_limit)
+/// or pass `"unlimited"` as the `memory_limit` option to disable the limit.
+///
 /// # Examples
 ///
 /// ```rust,no_run
@@ -47,12 +63,24 @@ use crate::{
 /// use sedona::context_builder::SedonaContextBuilder;
 /// use sedona::pool_type::PoolType;
 ///
+/// // Uses defaults: 75% of physical memory, fair pool
+/// let ctx = SedonaContextBuilder::new()
+///     .build()
+///     .await?;
+///
+/// // Override with explicit memory limit
 /// let ctx = SedonaContextBuilder::new()
 ///     .with_memory_limit(4 * 1024 * 1024 * 1024)
 ///     .with_pool_type(PoolType::Fair)
 ///     .with_temp_dir("/tmp/sedona-spill".to_string())
 ///     .build()
 ///     .await?;
+///
+/// // Disable memory limit entirely
+/// let ctx = SedonaContextBuilder::new()
+///     .without_memory_limit()
+///     .build()
+///     .await?;
 /// # Ok(())
 /// # }
 /// ```
@@ -69,6 +97,11 @@ use crate::{
 /// opts.insert("memory_pool_type".to_string(), "fair".to_string());
 ///
 /// let ctx = SedonaContextBuilder::from_options(&opts)?.build().await?;
+///
+/// // Use "unlimited" to disable memory limit
+/// let mut opts = HashMap::new();
+/// opts.insert("memory_limit".to_string(), "unlimited".to_string());
+/// let ctx = SedonaContextBuilder::from_options(&opts)?.build().await?;
 /// # Ok(())
 /// # }
 /// ```
@@ -89,15 +122,15 @@ impl SedonaContextBuilder {
     /// Create a new builder with default settings.
     ///
     /// Defaults:
-    /// - `memory_limit`: `None` (no limit, uses DataFusion's default 
unbounded pool)
-    /// - `pool_type`: `PoolType::Greedy`
+    /// - `memory_limit`: 75% of total physical memory
+    /// - `pool_type`: `PoolType::Fair`
     /// - `unspillable_reserve_ratio`: `0.2`
     /// - `temp_dir`: `None` (uses DataFusion's default temp directory)
     pub fn new() -> Self {
         Self {
-            memory_limit: None,
+            memory_limit: Some(default_memory_limit()),
             temp_dir: None,
-            pool_type: PoolType::Greedy,
+            pool_type: PoolType::Fair,
             unspillable_reserve_ratio: DEFAULT_UNSPILLABLE_RESERVE_RATIO,
         }
     }
@@ -107,8 +140,9 @@ impl SedonaContextBuilder {
     /// Recognized keys:
     /// - `"memory_limit"`: Memory limit as a human-readable size string
     ///   (e.g., `"4gb"`, `"512m"`, `"1.5g"`) or plain bytes (e.g.,
-    ///   `"4294967296"`). See [`size_parser::parse_size_string`] for
-    ///   supported suffixes.
+    ///   `"4294967296"`). Use `"unlimited"` to disable the memory limit
+    ///   entirely. See [`size_parser::parse_size_string`] for supported
+    ///   suffixes.
     /// - `"temp_dir"`: Path for temporary/spill files
     /// - `"memory_pool_type"`: `"greedy"` or `"fair"`
     /// - `"unspillable_reserve_ratio"`: Float between 0.0 and 1.0
@@ -118,8 +152,12 @@ impl SedonaContextBuilder {
         let mut builder = Self::new();
 
         if let Some(memory_limit) = options.get("memory_limit") {
-            let limit = size_parser::parse_size_string(memory_limit)?;
-            builder = builder.with_memory_limit(limit);
+            if memory_limit.eq_ignore_ascii_case("unlimited") {
+                builder = builder.without_memory_limit();
+            } else {
+                let limit = size_parser::parse_size_string(memory_limit)?;
+                builder = builder.with_memory_limit(limit);
+            }
         }
 
         if let Some(temp_dir) = options.get("temp_dir") {
@@ -154,6 +192,15 @@ impl SedonaContextBuilder {
         self
     }
 
+    /// Remove the memory limit.
+    ///
+    /// This disables the default memory pool and uses DataFusion's
+    /// unbounded memory pool instead.
+    pub fn without_memory_limit(mut self) -> Self {
+        self.memory_limit = None;
+        self
+    }
+
     /// Set the directory for temporary/spill files.
     pub fn with_temp_dir(mut self, temp_dir: String) -> Self {
         self.temp_dir = Some(temp_dir);
@@ -239,9 +286,12 @@ mod tests {
     #[test]
     fn test_default_builder() {
         let builder = SedonaContextBuilder::new();
-        assert!(builder.memory_limit.is_none());
+        // Default memory limit should be 75% of physical memory
+        let expected_limit = default_memory_limit();
+        assert_eq!(builder.memory_limit, Some(expected_limit));
+        assert!(builder.memory_limit.unwrap() > 0);
         assert!(builder.temp_dir.is_none());
-        assert_eq!(builder.pool_type, PoolType::Greedy);
+        assert_eq!(builder.pool_type, PoolType::Fair);
         assert!(
             (builder.unspillable_reserve_ratio - 
DEFAULT_UNSPILLABLE_RESERVE_RATIO).abs()
                 < f64::EPSILON
@@ -262,6 +312,12 @@ mod tests {
         assert!((builder.unspillable_reserve_ratio - 0.3).abs() < 
f64::EPSILON);
     }
 
+    #[test]
+    fn test_without_memory_limit() {
+        let builder = SedonaContextBuilder::new().without_memory_limit();
+        assert!(builder.memory_limit.is_none());
+    }
+
     #[test]
     fn test_invalid_unspillable_reserve_ratio() {
         let result = 
SedonaContextBuilder::new().with_unspillable_reserve_ratio(-0.1);
@@ -297,9 +353,29 @@ mod tests {
     fn test_from_options_empty() {
         let opts = HashMap::new();
         let builder = SedonaContextBuilder::from_options(&opts).unwrap();
-        assert!(builder.memory_limit.is_none());
+        // Empty options should use defaults (75% memory, Fair pool)
+        assert!(builder.memory_limit.is_some());
         assert!(builder.temp_dir.is_none());
-        assert_eq!(builder.pool_type, PoolType::Greedy);
+        assert_eq!(builder.pool_type, PoolType::Fair);
+    }
+
+    #[test]
+    fn test_from_options_unlimited() {
+        let mut opts = HashMap::new();
+        opts.insert("memory_limit".to_string(), "unlimited".to_string());
+        let builder = SedonaContextBuilder::from_options(&opts).unwrap();
+        assert!(builder.memory_limit.is_none());
+
+        // Case insensitive
+        let mut opts = HashMap::new();
+        opts.insert("memory_limit".to_string(), "Unlimited".to_string());
+        let builder = SedonaContextBuilder::from_options(&opts).unwrap();
+        assert!(builder.memory_limit.is_none());
+
+        let mut opts = HashMap::new();
+        opts.insert("memory_limit".to_string(), "UNLIMITED".to_string());
+        let builder = SedonaContextBuilder::from_options(&opts).unwrap();
+        assert!(builder.memory_limit.is_none());
     }
 
     #[test]
@@ -349,12 +425,13 @@ mod tests {
         let mut opts = HashMap::new();
         opts.insert("unknown_key".to_string(), "value".to_string());
         let builder = SedonaContextBuilder::from_options(&opts).unwrap();
-        assert!(builder.memory_limit.is_none());
+        // Default memory limit should still be set
+        assert!(builder.memory_limit.is_some());
     }
 
     #[test]
     fn test_build_runtime_env_no_memory_limit() {
-        let builder = SedonaContextBuilder::new();
+        let builder = SedonaContextBuilder::new().without_memory_limit();
         let result = builder.build_runtime_env();
         assert!(result.is_ok());
     }
@@ -379,6 +456,14 @@ mod tests {
         assert!(result.is_ok());
     }
 
+    #[test]
+    fn test_build_runtime_env_default() {
+        // Default builder should build successfully with 75% memory + fair 
pool
+        let builder = SedonaContextBuilder::new();
+        let result = builder.build_runtime_env();
+        assert!(result.is_ok());
+    }
+
     #[tokio::test]
     async fn test_build_context_default() {
         let ctx = SedonaContextBuilder::new().build().await;
@@ -396,4 +481,13 @@ mod tests {
             .await;
         assert!(ctx.is_ok());
     }
+
+    #[tokio::test]
+    async fn test_build_context_without_memory_limit() {
+        let ctx = SedonaContextBuilder::new()
+            .without_memory_limit()
+            .build()
+            .await;
+        assert!(ctx.is_ok());
+    }
 }
diff --git a/sedona-cli/src/main.rs b/sedona-cli/src/main.rs
index 6dd315ee..082e68a1 100644
--- a/sedona-cli/src/main.rs
+++ b/sedona-cli/src/main.rs
@@ -66,15 +66,15 @@ struct Args {
     #[clap(
         short = 'm',
         long,
-        help = "The memory pool limitation (e.g. '10g'), default to None (no 
limit)",
-        value_parser(extract_memory_pool_size)
+        help = "The memory pool limitation (e.g. '10g'), default to 75% of 
physical memory. Use 'unlimited' to disable",
+        value_parser(parse_memory_limit)
     )]
-    memory_limit: Option<usize>,
+    memory_limit: Option<MemoryLimitArg>,
 
     #[clap(
         long,
         help = "Specify the memory pool type 'greedy' or 'fair'",
-        default_value_t = PoolType::Greedy
+        default_value_t = PoolType::Fair
     )]
     mem_pool_type: PoolType,
 
@@ -140,6 +140,15 @@ enum FunctionListFormat {
     Json,
 }
 
+/// Parsed representation of the `--memory-limit` CLI argument.
+#[derive(Debug, Clone, PartialEq)]
+enum MemoryLimitArg {
+    /// Disable the memory limit entirely.
+    Unlimited,
+    /// Use an explicit byte limit.
+    Limit(usize),
+}
+
 #[tokio::main]
 /// Calls [`main_inner`], then handles printing errors and returning the 
correct exit code
 pub async fn main() -> ExitCode {
@@ -190,8 +199,14 @@ async fn main_inner() -> Result<()> {
     let mut builder = SedonaContextBuilder::new()
         .with_pool_type(args.mem_pool_type.clone())
         .with_unspillable_reserve_ratio(args.unspillable_reserve_ratio)?;
-    if let Some(memory_limit) = args.memory_limit {
-        builder = builder.with_memory_limit(memory_limit);
+    match args.memory_limit {
+        Some(MemoryLimitArg::Unlimited) => {
+            builder = builder.without_memory_limit();
+        }
+        Some(MemoryLimitArg::Limit(limit)) => {
+            builder = builder.with_memory_limit(limit);
+        }
+        None => {}
     }
     let ctx = builder.build().await?;
 
@@ -252,6 +267,14 @@ pub fn extract_memory_pool_size(size: &str) -> 
Result<usize, String> {
     sedona::size_parser::parse_size_string(size).map_err(|e| e.to_string())
 }
 
+fn parse_memory_limit(s: &str) -> Result<MemoryLimitArg, String> {
+    if s.eq_ignore_ascii_case("unlimited") {
+        Ok(MemoryLimitArg::Unlimited)
+    } else {
+        extract_memory_pool_size(s).map(MemoryLimitArg::Limit)
+    }
+}
+
 fn validate_unspillable_reserve_ratio(s: &str) -> Result<f64, String> {
     let value: f64 = s
         .parse()

(sedona-db) branch main updated: feat(rust/sedona): default memory limit to 75% of physical memory with fair pool (#687)

Reply via email to