This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new a86b61613 feat(csharp/src/Drivers/Databricks): Clarify CloudFetch
memory manager behavior and set appropriate limit (#3656)
a86b61613 is described below
commit a86b61613919c8236129f9f095ed5bf56cc8da27
Author: eric-wang-1990 <[email protected]>
AuthorDate: Fri Oct 31 12:05:52 2025 -0700
feat(csharp/src/Drivers/Databricks): Clarify CloudFetch memory manager
behavior and set appropriate limit (#3656)
## Summary
**Clarifies memory manager behavior** - Documents that
`CloudFetchMemoryBufferManager` tracks in-flight compressed download
sizes and reduces the default from 200MB to 100MB
## Memory Manager Clarification
The `CloudFetchMemoryBufferManager` tracks **in-flight download memory
based on compressed file sizes**, not decompressed sizes. This design is
intentional:
1. **Limits concurrent downloads** - Prevents unbounded parallel
downloads from exhausting system resources
2. **Natural decompression bounds** - Decompressed data memory is
naturally bounded by the result queue capacity and batch processing flow
3. **Lightweight concurrency control** - Tracking compressed sizes
provides efficient download throttling without overhead of tracking
decompressed memory
### Changes
- Added comprehensive documentation to `CloudFetchMemoryBufferManager`
explaining it tracks in-flight compressed data sizes
- Reduced `DefaultMemoryBufferSizeMB` from 200 to 100 in
`CloudFetchDownloadManager`
- Added inline comments clarifying that size parameters represent
compressed file sizes from the server
## Test plan
- [ ] Existing CloudFetch tests pass
- [ ] Manual testing with CloudFetch queries to verify download behavior
🤖 Generated with [Claude Code](https://claude.com/claude-code)
---------
Co-authored-by: Claude <[email protected]>
---
.../Databricks/Reader/CloudFetch/CloudFetchDownloadManager.cs | 2 +-
.../Reader/CloudFetch/CloudFetchMemoryBufferManager.cs | 10 +++++++++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git
a/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchDownloadManager.cs
b/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchDownloadManager.cs
index 98e984baf..934a4af20 100644
---
a/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchDownloadManager.cs
+++
b/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchDownloadManager.cs
@@ -34,7 +34,7 @@ namespace
Apache.Arrow.Adbc.Drivers.Databricks.Reader.CloudFetch
// Default values
private const int DefaultParallelDownloads = 3;
private const int DefaultPrefetchCount = 2;
- private const int DefaultMemoryBufferSizeMB = 200;
+ private const int DefaultMemoryBufferSizeMB = 100;
private const bool DefaultPrefetchEnabled = true;
private const int DefaultTimeoutMinutes = 5;
private const int DefaultMaxUrlRefreshAttempts = 3;
diff --git
a/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchMemoryBufferManager.cs
b/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchMemoryBufferManager.cs
index 7fe7dc2c8..32168047c 100644
---
a/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchMemoryBufferManager.cs
+++
b/csharp/src/Drivers/Databricks/Reader/CloudFetch/CloudFetchMemoryBufferManager.cs
@@ -23,6 +23,13 @@ namespace
Apache.Arrow.Adbc.Drivers.Databricks.Reader.CloudFetch
{
/// <summary>
/// Manages memory allocation for prefetched files.
+ ///
+ /// This manager tracks in-flight download memory based on the compressed
file sizes
+ /// received from the server. It does NOT track the decompressed data
size, as that
+ /// memory is naturally bounded by the result queue capacity and batch
processing flow.
+ ///
+ /// The memory limit controls how many concurrent downloads can be
in-flight at once,
+ /// preventing unbounded parallel downloads from exhausting system
resources.
/// </summary>
internal sealed class CloudFetchMemoryBufferManager :
ICloudFetchMemoryBufferManager
{
@@ -63,7 +70,7 @@ namespace
Apache.Arrow.Adbc.Drivers.Databricks.Reader.CloudFetch
throw new ArgumentOutOfRangeException(nameof(size), "Size must
be positive.");
}
- // Try to acquire memory
+ // Try to acquire memory (size is the compressed file size from
the server)
long originalValue;
long newValue;
do
@@ -91,6 +98,7 @@ namespace
Apache.Arrow.Adbc.Drivers.Databricks.Reader.CloudFetch
}
// Special case: if size is greater than max memory, we'll never
be able to acquire it
+ // Note: size is the compressed file size from the server
if (size > _maxMemory)
{
throw new ArgumentOutOfRangeException(nameof(size),
$"Requested size ({size} bytes) exceeds maximum memory ({_maxMemory} bytes).");