This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git
The following commit(s) were added to refs/heads/main by this push:
new 2503375 Remove unnecessary allocation in ArrowStreamWriter (#73)
2503375 is described below
commit 2503375bedb7750d80c8df9e9b99dfe02fbaeba7
Author: Jan Jahoda <[email protected]>
AuthorDate: Wed Sep 24 14:37:59 2025 +0200
Remove unnecessary allocation in ArrowStreamWriter (#73)
## What's Changed
ArrowStreamWriter allocates 8k of memory by creating a new array pool.
This change introduces a shared buffer instead of pool to reduce the
allocations.
The array pool is used to rent small arrays (8 bytes) but the pool
allocates much bigger arrays (8kb in total)
The Array pool has access time overhead for small arrays compared to
direct allocation.
Results from benchmarks:
Old implementation:
| Method | BatchLength | ColumnSetCount | Mean | Error | StdDev |
Allocated |
|----------- |------------ |---------------
|-----------:|----------:|----------:|----------:|
| WriteBatch | 10000 | 10 | 6.118 ms | 0.1215 ms | 0.3345 ms | 248.53 KB
|
| WriteBatch | 10000 | 14 | 9.788 ms | 0.1910 ms | 0.3396 ms | 324.12 KB
|
| WriteBatch | 300000 | 10 | 119.351 ms | 3.1713 ms | 9.3008 ms | 248.53
KB |
| WriteBatch | 300000 | 14 | 136.697 ms | 2.9229 ms | 8.4799 ms | 324.12
KB |
New implementation:
| Method | BatchLength | ColumnSetCount | Mean | Error | StdDev | Median
| Allocated |
|----------- |------------ |---------------
|-----------:|----------:|-----------:|-----------:|----------:|
| WriteBatch | 10000 | 10 | 5.925 ms | 0.2057 ms | 0.6001 ms | 5.843 ms
| 240.64 KB |
| WriteBatch | 10000 | 14 | 8.908 ms | 0.2743 ms | 0.8002 ms | 8.778 ms
| 316.23 KB |
| WriteBatch | 300000 | 10 | 94.835 ms | 1.7872 ms | 3.7699 ms | 93.892
ms | 240.64 KB |
| WriteBatch | 300000 | 14 | 147.995 ms | 3.6873 ms | 10.6975 ms |
144.591 ms | 316.23 KB |
Closes #41.
---
src/Apache.Arrow/Ipc/ArrowFileWriter.cs | 5 +++--
src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 7 ++-----
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
b/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index a643012..113833d 100644
--- a/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -14,6 +14,7 @@
// limitations under the License.
using System;
+using System.Buffers;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.Diagnostics;
@@ -221,7 +222,7 @@ namespace Apache.Arrow.Ipc
// Write footer length
- using (Buffers.RentReturn(4, out Memory<byte> buffer))
+ using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte>
buffer))
{
int footerLength;
checked
@@ -292,7 +293,7 @@ namespace Apache.Arrow.Ipc
cancellationToken.ThrowIfCancellationRequested();
- using (Buffers.RentReturn(4, out Memory<byte> buffer))
+ using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte>
buffer))
{
int footerLength;
checked
diff --git a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 5f180c4..6c58c15 100644
--- a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -611,8 +611,6 @@ namespace Apache.Arrow.Ipc
protected Stream BaseStream { get; }
- protected ArrayPool<byte> Buffers { get; }
-
private protected FlatBufferBuilder Builder { get; }
protected bool HasWrittenSchema { get; set; }
@@ -663,7 +661,6 @@ namespace Apache.Arrow.Ipc
_leaveOpen = leaveOpen;
_allocator = allocator ?? MemoryAllocator.Default.Value;
- Buffers = ArrayPool<byte>.Create();
Builder = new FlatBufferBuilder(1024);
HasWrittenSchema = false;
@@ -1277,7 +1274,7 @@ namespace Apache.Arrow.Ipc
private void WriteIpcMessageLength(int length)
{
- using (Buffers.RentReturn(_options.SizeOfIpcLength, out
Memory<byte> buffer))
+ using (ArrayPool<byte>.Shared.RentReturn(_options.SizeOfIpcLength,
out Memory<byte> buffer))
{
Memory<byte> currentBufferPosition = buffer;
if (!_options.WriteLegacyIpcFormat)
@@ -1294,7 +1291,7 @@ namespace Apache.Arrow.Ipc
private async ValueTask WriteIpcMessageLengthAsync(int length,
CancellationToken cancellationToken)
{
- using (Buffers.RentReturn(_options.SizeOfIpcLength, out
Memory<byte> buffer))
+ using (ArrayPool<byte>.Shared.RentReturn(_options.SizeOfIpcLength,
out Memory<byte> buffer))
{
Memory<byte> currentBufferPosition = buffer;
if (!_options.WriteLegacyIpcFormat)