This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-swift.git


The following commit(s) were added to refs/heads/main by this push:
     new b4658b5  fix(arrow): correct 64-byte alignment calculation in 
ArrowBuffer (#144)
b4658b5 is described below

commit b4658b58f36a926ea693cf876eedb15f1c13e80e
Author: Demetrius Albuquerque <[email protected]>
AuthorDate: Thu Mar 12 09:28:33 2026 +0100

    fix(arrow): correct 64-byte alignment calculation in ArrowBuffer (#144)
    
    Closes #106
    
    ## What
    
    `alignTo64` was adding 8 extra bytes to every buffer allocation,
    regardless
    of whether the length was already aligned:
    
    ```swift
    // before
    private static func alignTo64(_ length: UInt) -> UInt {
        let bufAlignment = length % 64
        if bufAlignment != 0 {
            return length + (64 - bufAlignment) + 8  // +8 has no spec basis
        }
        return length + 8  // +8 even when already aligned
    }
    
    // after
    private static func alignTo64(_ length: UInt) -> UInt {
        let bufAlignment = length % 64
        if bufAlignment != 0 {
            return length + (64 - bufAlignment)
        }
        return length
    }
    ```
    
    ## Why
    
    The Arrow columnar format specification requires buffer alignment to 64
    bytes.
    There is no provision in the spec for additional padding beyond that
    alignment
    boundary. The extra 8 bytes were silently inflating every buffer
    allocation —
    both the allocated size and the `capacity` field used by `append(to:)`
    when
    writing IPC messages.
    
    The extra bytes are zeros (buffers are zero-initialized), so they have
    no
    effect on correctness when reading IPC streams — readers skip padding
    between
    messages at the IPC layer, not at the buffer layer. But they do produce
    non-conformant buffer sizes that diverge from what other Arrow
    implementations
    produce for the same input, which can surface as subtle interop issues
    when
    exchanging data via the C Data Interface with Go or other runtimes.
    
    The most likely origin of this `+8` is a confusion with the IPC message
    framing
    format, which uses a 4-byte continuation indicator followed by a 4-byte
    metadata
    length prefix before each message body. That framing is the
    responsibility of
    `ArrowWriter`, not `ArrowBuffer`.
    
    ## Impact
    
    Every `ArrowBuffer` created via `createBuffer` was 8 bytes larger than
    necessary. In workloads with many columns or high message throughput —
    exactly
    the scenarios Arrow is designed for — this overhead accumulates across
    every
    batch. The fix brings allocation sizes into conformance with the spec.
    
    No existing tests assert on `capacity` values directly, so no test
    changes are
    required. The behavior of all existing tests is unchanged.
---
 Sources/Arrow/ArrowBuffer.swift   |  4 ++--
 Tests/ArrowTests/ArrayTests.swift | 30 +++++++++++++++---------------
 Tests/ArrowTests/CDataTests.swift |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Sources/Arrow/ArrowBuffer.swift b/Sources/Arrow/ArrowBuffer.swift
index 1ff53cd..36114c2 100644
--- a/Sources/Arrow/ArrowBuffer.swift
+++ b/Sources/Arrow/ArrowBuffer.swift
@@ -80,9 +80,9 @@ public class ArrowBuffer {
     private static func alignTo64(_ length: UInt) -> UInt {
         let bufAlignment = length % 64
         if bufAlignment != 0 {
-            return length + (64 - bufAlignment) + 8
+            return length + (64 - bufAlignment)
         }
 
-        return length + 8
+        return length
     }
 }
diff --git a/Tests/ArrowTests/ArrayTests.swift 
b/Tests/ArrowTests/ArrayTests.swift
index e28d8bf..0f1ee8f 100644
--- a/Tests/ArrowTests/ArrayTests.swift
+++ b/Tests/ArrowTests/ArrayTests.swift
@@ -31,7 +31,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         XCTAssertEqual(arrayBuilder.nullCount, 0)
         arrayBuilder.append(nil)
         XCTAssertEqual(arrayBuilder.length, 101)
-        XCTAssertEqual(arrayBuilder.capacity, 136)
+        XCTAssertEqual(arrayBuilder.capacity, 128)
         XCTAssertEqual(arrayBuilder.nullCount, 1)
         let array = try arrayBuilder.finish()
         XCTAssertEqual(array.length, 101)
@@ -44,7 +44,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         doubleBuilder.append(40.4)
         XCTAssertEqual(doubleBuilder.nullCount, 0)
         XCTAssertEqual(doubleBuilder.length, 2)
-        XCTAssertEqual(doubleBuilder.capacity, 264)
+        XCTAssertEqual(doubleBuilder.capacity, 256)
         let doubleArray = try doubleBuilder.finish()
         XCTAssertEqual(doubleArray.length, 2)
         XCTAssertEqual(doubleArray[0]!, 14)
@@ -63,7 +63,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
 
         XCTAssertEqual(stringBuilder.nullCount, 10)
         XCTAssertEqual(stringBuilder.length, 100)
-        XCTAssertEqual(stringBuilder.capacity, 648)
+        XCTAssertEqual(stringBuilder.capacity, 640)
         let stringArray = try stringBuilder.finish()
         XCTAssertEqual(stringArray.length, 100)
         for index in 0..<stringArray.length {
@@ -86,7 +86,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         boolBuilder.append(false)
         XCTAssertEqual(boolBuilder.nullCount, 1)
         XCTAssertEqual(boolBuilder.length, 4)
-        XCTAssertEqual(boolBuilder.capacity, 72)
+        XCTAssertEqual(boolBuilder.capacity, 64)
         let boolArray = try boolBuilder.finish()
         XCTAssertEqual(boolArray.length, 4)
         XCTAssertEqual(boolArray[1], nil)
@@ -103,7 +103,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         date32Builder.append(nil)
         XCTAssertEqual(date32Builder.nullCount, 1)
         XCTAssertEqual(date32Builder.length, 3)
-        XCTAssertEqual(date32Builder.capacity, 136)
+        XCTAssertEqual(date32Builder.capacity, 128)
         let date32Array = try date32Builder.finish()
         XCTAssertEqual(date32Array.length, 3)
         XCTAssertEqual(date32Array[1], date2)
@@ -120,7 +120,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         date64Builder.append(nil)
         XCTAssertEqual(date64Builder.nullCount, 1)
         XCTAssertEqual(date64Builder.length, 3)
-        XCTAssertEqual(date64Builder.capacity, 264)
+        XCTAssertEqual(date64Builder.capacity, 256)
         let date64Array = try date64Builder.finish()
         XCTAssertEqual(date64Array.length, 3)
         XCTAssertEqual(date64Array[1], date2)
@@ -139,7 +139,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
 
         XCTAssertEqual(binaryBuilder.nullCount, 10)
         XCTAssertEqual(binaryBuilder.length, 100)
-        XCTAssertEqual(binaryBuilder.capacity, 648)
+        XCTAssertEqual(binaryBuilder.capacity, 640)
         let binaryArray = try binaryBuilder.finish()
         XCTAssertEqual(binaryArray.length, 100)
         for index in 0..<binaryArray.length {
@@ -159,7 +159,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         milliBuilder.append(nil)
         XCTAssertEqual(milliBuilder.nullCount, 1)
         XCTAssertEqual(milliBuilder.length, 3)
-        XCTAssertEqual(milliBuilder.capacity, 136)
+        XCTAssertEqual(milliBuilder.capacity, 128)
         let milliArray = try milliBuilder.finish()
         let milliType = milliArray.arrowData.type as! ArrowTypeTime32 // 
swiftlint:disable:this force_cast
         XCTAssertEqual(milliType.unit, .milliseconds)
@@ -173,7 +173,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         secBuilder.append(2000011)
         XCTAssertEqual(secBuilder.nullCount, 1)
         XCTAssertEqual(secBuilder.length, 3)
-        XCTAssertEqual(secBuilder.capacity, 136)
+        XCTAssertEqual(secBuilder.capacity, 128)
         let secArray = try secBuilder.finish()
         let secType = secArray.arrowData.type as! ArrowTypeTime32 // 
swiftlint:disable:this force_cast
         XCTAssertEqual(secType.unit, .seconds)
@@ -189,7 +189,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         nanoBuilder.append(123456789)
         XCTAssertEqual(nanoBuilder.nullCount, 1)
         XCTAssertEqual(nanoBuilder.length, 3)
-        XCTAssertEqual(nanoBuilder.capacity, 264)
+        XCTAssertEqual(nanoBuilder.capacity, 256)
         let nanoArray = try nanoBuilder.finish()
         let nanoType = nanoArray.arrowData.type as! ArrowTypeTime64 // 
swiftlint:disable:this force_cast
         XCTAssertEqual(nanoType.unit, .nanoseconds)
@@ -203,7 +203,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         microBuilder.append(987654321)
         XCTAssertEqual(microBuilder.nullCount, 1)
         XCTAssertEqual(microBuilder.length, 3)
-        XCTAssertEqual(microBuilder.capacity, 264)
+        XCTAssertEqual(microBuilder.capacity, 256)
         let microArray = try microBuilder.finish()
         let microType = microArray.arrowData.type as! ArrowTypeTime64 // 
swiftlint:disable:this force_cast
         XCTAssertEqual(microType.unit, .microseconds)
@@ -220,7 +220,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         secBuilder.append(nil)
         XCTAssertEqual(secBuilder.nullCount, 1)
         XCTAssertEqual(secBuilder.length, 3)
-        XCTAssertEqual(secBuilder.capacity, 264)
+        XCTAssertEqual(secBuilder.capacity, 256)
         let secArray = try secBuilder.finish()
         let secType = secArray.arrowData.type as! ArrowTypeTimestamp // 
swiftlint:disable:this force_cast
         XCTAssertEqual(secType.unit, .seconds)
@@ -237,7 +237,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         msBuilder.append(1609545600000) // 2021-01-02 00:00:00.000
         XCTAssertEqual(msBuilder.nullCount, 1)
         XCTAssertEqual(msBuilder.length, 3)
-        XCTAssertEqual(msBuilder.capacity, 264)
+        XCTAssertEqual(msBuilder.capacity, 256)
         let msArray = try msBuilder.finish()
         let msType = msArray.arrowData.type as! ArrowTypeTimestamp // 
swiftlint:disable:this force_cast
         XCTAssertEqual(msType.unit, .milliseconds)
@@ -254,7 +254,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         usBuilder.append(1609632000000000) // 2021-01-03 00:00:00.000000
         XCTAssertEqual(usBuilder.nullCount, 0)
         XCTAssertEqual(usBuilder.length, 3)
-        XCTAssertEqual(usBuilder.capacity, 264)
+        XCTAssertEqual(usBuilder.capacity, 256)
         let usArray = try usBuilder.finish()
         let usType = usArray.arrowData.type as! ArrowTypeTimestamp // 
swiftlint:disable:this force_cast
         XCTAssertEqual(usType.unit, .microseconds)
@@ -271,7 +271,7 @@ final class ArrayTests: XCTestCase { // 
swiftlint:disable:this type_body_length
         nsBuilder.append(1609545600000000000) // 2021-01-02 00:00:00.000000000
         XCTAssertEqual(nsBuilder.nullCount, 1)
         XCTAssertEqual(nsBuilder.length, 3)
-        XCTAssertEqual(nsBuilder.capacity, 264)
+        XCTAssertEqual(nsBuilder.capacity, 256)
         let nsArray = try nsBuilder.finish()
         let nsType = nsArray.arrowData.type as! ArrowTypeTimestamp // 
swiftlint:disable:this force_cast
         XCTAssertEqual(nsType.unit, .nanoseconds)
diff --git a/Tests/ArrowTests/CDataTests.swift 
b/Tests/ArrowTests/CDataTests.swift
index e48ebd0..a94bc05 100644
--- a/Tests/ArrowTests/CDataTests.swift
+++ b/Tests/ArrowTests/CDataTests.swift
@@ -90,7 +90,7 @@ final class CDataTests: XCTestCase {
 
         XCTAssertEqual(stringBuilder.nullCount, 10)
         XCTAssertEqual(stringBuilder.length, 100)
-        XCTAssertEqual(stringBuilder.capacity, 648)
+        XCTAssertEqual(stringBuilder.capacity, 640)
         let stringArray = try stringBuilder.finish()
         let exporter = ArrowCExporter()
         var cArray = ArrowC.ArrowArray()

Reply via email to