This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-swift.git
The following commit(s) were added to refs/heads/main by this push:
new b4658b5 fix(arrow): correct 64-byte alignment calculation in
ArrowBuffer (#144)
b4658b5 is described below
commit b4658b58f36a926ea693cf876eedb15f1c13e80e
Author: Demetrius Albuquerque <[email protected]>
AuthorDate: Thu Mar 12 09:28:33 2026 +0100
fix(arrow): correct 64-byte alignment calculation in ArrowBuffer (#144)
Closes #106
## What
`alignTo64` was adding 8 extra bytes to every buffer allocation,
regardless
of whether the length was already aligned:
```swift
// before
private static func alignTo64(_ length: UInt) -> UInt {
let bufAlignment = length % 64
if bufAlignment != 0 {
return length + (64 - bufAlignment) + 8 // +8 has no spec basis
}
return length + 8 // +8 even when already aligned
}
// after
private static func alignTo64(_ length: UInt) -> UInt {
let bufAlignment = length % 64
if bufAlignment != 0 {
return length + (64 - bufAlignment)
}
return length
}
```
## Why
The Arrow columnar format specification requires buffer alignment to 64
bytes.
There is no provision in the spec for additional padding beyond that
alignment
boundary. The extra 8 bytes were silently inflating every buffer
allocation —
both the allocated size and the `capacity` field used by `append(to:)`
when
writing IPC messages.
The extra bytes are zeros (buffers are zero-initialized), so they have
no
effect on correctness when reading IPC streams — readers skip padding
between
messages at the IPC layer, not at the buffer layer. But they do produce
non-conformant buffer sizes that diverge from what other Arrow
implementations
produce for the same input, which can surface as subtle interop issues
when
exchanging data via the C Data Interface with Go or other runtimes.
The most likely origin of this `+8` is a confusion with the IPC message
framing
format, which uses a 4-byte continuation indicator followed by a 4-byte
metadata
length prefix before each message body. That framing is the
responsibility of
`ArrowWriter`, not `ArrowBuffer`.
## Impact
Every `ArrowBuffer` created via `createBuffer` was 8 bytes larger than
necessary. In workloads with many columns or high message throughput —
exactly
the scenarios Arrow is designed for — this overhead accumulates across
every
batch. The fix brings allocation sizes into conformance with the spec.
No existing tests assert on `capacity` values directly, so no test
changes are
required. The behavior of all existing tests is unchanged.
---
Sources/Arrow/ArrowBuffer.swift | 4 ++--
Tests/ArrowTests/ArrayTests.swift | 30 +++++++++++++++---------------
Tests/ArrowTests/CDataTests.swift | 2 +-
3 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/Sources/Arrow/ArrowBuffer.swift b/Sources/Arrow/ArrowBuffer.swift
index 1ff53cd..36114c2 100644
--- a/Sources/Arrow/ArrowBuffer.swift
+++ b/Sources/Arrow/ArrowBuffer.swift
@@ -80,9 +80,9 @@ public class ArrowBuffer {
private static func alignTo64(_ length: UInt) -> UInt {
let bufAlignment = length % 64
if bufAlignment != 0 {
- return length + (64 - bufAlignment) + 8
+ return length + (64 - bufAlignment)
}
- return length + 8
+ return length
}
}
diff --git a/Tests/ArrowTests/ArrayTests.swift
b/Tests/ArrowTests/ArrayTests.swift
index e28d8bf..0f1ee8f 100644
--- a/Tests/ArrowTests/ArrayTests.swift
+++ b/Tests/ArrowTests/ArrayTests.swift
@@ -31,7 +31,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
XCTAssertEqual(arrayBuilder.nullCount, 0)
arrayBuilder.append(nil)
XCTAssertEqual(arrayBuilder.length, 101)
- XCTAssertEqual(arrayBuilder.capacity, 136)
+ XCTAssertEqual(arrayBuilder.capacity, 128)
XCTAssertEqual(arrayBuilder.nullCount, 1)
let array = try arrayBuilder.finish()
XCTAssertEqual(array.length, 101)
@@ -44,7 +44,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
doubleBuilder.append(40.4)
XCTAssertEqual(doubleBuilder.nullCount, 0)
XCTAssertEqual(doubleBuilder.length, 2)
- XCTAssertEqual(doubleBuilder.capacity, 264)
+ XCTAssertEqual(doubleBuilder.capacity, 256)
let doubleArray = try doubleBuilder.finish()
XCTAssertEqual(doubleArray.length, 2)
XCTAssertEqual(doubleArray[0]!, 14)
@@ -63,7 +63,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
XCTAssertEqual(stringBuilder.nullCount, 10)
XCTAssertEqual(stringBuilder.length, 100)
- XCTAssertEqual(stringBuilder.capacity, 648)
+ XCTAssertEqual(stringBuilder.capacity, 640)
let stringArray = try stringBuilder.finish()
XCTAssertEqual(stringArray.length, 100)
for index in 0..<stringArray.length {
@@ -86,7 +86,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
boolBuilder.append(false)
XCTAssertEqual(boolBuilder.nullCount, 1)
XCTAssertEqual(boolBuilder.length, 4)
- XCTAssertEqual(boolBuilder.capacity, 72)
+ XCTAssertEqual(boolBuilder.capacity, 64)
let boolArray = try boolBuilder.finish()
XCTAssertEqual(boolArray.length, 4)
XCTAssertEqual(boolArray[1], nil)
@@ -103,7 +103,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
date32Builder.append(nil)
XCTAssertEqual(date32Builder.nullCount, 1)
XCTAssertEqual(date32Builder.length, 3)
- XCTAssertEqual(date32Builder.capacity, 136)
+ XCTAssertEqual(date32Builder.capacity, 128)
let date32Array = try date32Builder.finish()
XCTAssertEqual(date32Array.length, 3)
XCTAssertEqual(date32Array[1], date2)
@@ -120,7 +120,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
date64Builder.append(nil)
XCTAssertEqual(date64Builder.nullCount, 1)
XCTAssertEqual(date64Builder.length, 3)
- XCTAssertEqual(date64Builder.capacity, 264)
+ XCTAssertEqual(date64Builder.capacity, 256)
let date64Array = try date64Builder.finish()
XCTAssertEqual(date64Array.length, 3)
XCTAssertEqual(date64Array[1], date2)
@@ -139,7 +139,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
XCTAssertEqual(binaryBuilder.nullCount, 10)
XCTAssertEqual(binaryBuilder.length, 100)
- XCTAssertEqual(binaryBuilder.capacity, 648)
+ XCTAssertEqual(binaryBuilder.capacity, 640)
let binaryArray = try binaryBuilder.finish()
XCTAssertEqual(binaryArray.length, 100)
for index in 0..<binaryArray.length {
@@ -159,7 +159,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
milliBuilder.append(nil)
XCTAssertEqual(milliBuilder.nullCount, 1)
XCTAssertEqual(milliBuilder.length, 3)
- XCTAssertEqual(milliBuilder.capacity, 136)
+ XCTAssertEqual(milliBuilder.capacity, 128)
let milliArray = try milliBuilder.finish()
let milliType = milliArray.arrowData.type as! ArrowTypeTime32 //
swiftlint:disable:this force_cast
XCTAssertEqual(milliType.unit, .milliseconds)
@@ -173,7 +173,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
secBuilder.append(2000011)
XCTAssertEqual(secBuilder.nullCount, 1)
XCTAssertEqual(secBuilder.length, 3)
- XCTAssertEqual(secBuilder.capacity, 136)
+ XCTAssertEqual(secBuilder.capacity, 128)
let secArray = try secBuilder.finish()
let secType = secArray.arrowData.type as! ArrowTypeTime32 //
swiftlint:disable:this force_cast
XCTAssertEqual(secType.unit, .seconds)
@@ -189,7 +189,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
nanoBuilder.append(123456789)
XCTAssertEqual(nanoBuilder.nullCount, 1)
XCTAssertEqual(nanoBuilder.length, 3)
- XCTAssertEqual(nanoBuilder.capacity, 264)
+ XCTAssertEqual(nanoBuilder.capacity, 256)
let nanoArray = try nanoBuilder.finish()
let nanoType = nanoArray.arrowData.type as! ArrowTypeTime64 //
swiftlint:disable:this force_cast
XCTAssertEqual(nanoType.unit, .nanoseconds)
@@ -203,7 +203,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
microBuilder.append(987654321)
XCTAssertEqual(microBuilder.nullCount, 1)
XCTAssertEqual(microBuilder.length, 3)
- XCTAssertEqual(microBuilder.capacity, 264)
+ XCTAssertEqual(microBuilder.capacity, 256)
let microArray = try microBuilder.finish()
let microType = microArray.arrowData.type as! ArrowTypeTime64 //
swiftlint:disable:this force_cast
XCTAssertEqual(microType.unit, .microseconds)
@@ -220,7 +220,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
secBuilder.append(nil)
XCTAssertEqual(secBuilder.nullCount, 1)
XCTAssertEqual(secBuilder.length, 3)
- XCTAssertEqual(secBuilder.capacity, 264)
+ XCTAssertEqual(secBuilder.capacity, 256)
let secArray = try secBuilder.finish()
let secType = secArray.arrowData.type as! ArrowTypeTimestamp //
swiftlint:disable:this force_cast
XCTAssertEqual(secType.unit, .seconds)
@@ -237,7 +237,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
msBuilder.append(1609545600000) // 2021-01-02 00:00:00.000
XCTAssertEqual(msBuilder.nullCount, 1)
XCTAssertEqual(msBuilder.length, 3)
- XCTAssertEqual(msBuilder.capacity, 264)
+ XCTAssertEqual(msBuilder.capacity, 256)
let msArray = try msBuilder.finish()
let msType = msArray.arrowData.type as! ArrowTypeTimestamp //
swiftlint:disable:this force_cast
XCTAssertEqual(msType.unit, .milliseconds)
@@ -254,7 +254,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
usBuilder.append(1609632000000000) // 2021-01-03 00:00:00.000000
XCTAssertEqual(usBuilder.nullCount, 0)
XCTAssertEqual(usBuilder.length, 3)
- XCTAssertEqual(usBuilder.capacity, 264)
+ XCTAssertEqual(usBuilder.capacity, 256)
let usArray = try usBuilder.finish()
let usType = usArray.arrowData.type as! ArrowTypeTimestamp //
swiftlint:disable:this force_cast
XCTAssertEqual(usType.unit, .microseconds)
@@ -271,7 +271,7 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
nsBuilder.append(1609545600000000000) // 2021-01-02 00:00:00.000000000
XCTAssertEqual(nsBuilder.nullCount, 1)
XCTAssertEqual(nsBuilder.length, 3)
- XCTAssertEqual(nsBuilder.capacity, 264)
+ XCTAssertEqual(nsBuilder.capacity, 256)
let nsArray = try nsBuilder.finish()
let nsType = nsArray.arrowData.type as! ArrowTypeTimestamp //
swiftlint:disable:this force_cast
XCTAssertEqual(nsType.unit, .nanoseconds)
diff --git a/Tests/ArrowTests/CDataTests.swift
b/Tests/ArrowTests/CDataTests.swift
index e48ebd0..a94bc05 100644
--- a/Tests/ArrowTests/CDataTests.swift
+++ b/Tests/ArrowTests/CDataTests.swift
@@ -90,7 +90,7 @@ final class CDataTests: XCTestCase {
XCTAssertEqual(stringBuilder.nullCount, 10)
XCTAssertEqual(stringBuilder.length, 100)
- XCTAssertEqual(stringBuilder.capacity, 648)
+ XCTAssertEqual(stringBuilder.capacity, 640)
let stringArray = try stringBuilder.finish()
let exporter = ArrowCExporter()
var cArray = ArrowC.ArrowArray()