This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-swift.git


The following commit(s) were added to refs/heads/main by this push:
     new 9259237  fix: Padding now added to Arrow file marker and RecordBatches 
are being written with correct alignment (#95)
9259237 is described below

commit 9259237fb5d0dede60136011b4e67efd834c5885
Author: willtemperley <[email protected]>
AuthorDate: Tue Oct 14 20:43:19 2025 +0800

    fix: Padding now added to Arrow file marker and RecordBatches are being 
written with correct alignment (#95)
    
    Files were being written with a non-padded file marker and alignment was
    not being written when serializing record batches. Also the metadata
    length was being set to zero in the block.
    
    ## What's Changed
    
    The padded version of the filemarker is being written.
    
    Padding is written to record blocks and record block metadata, i.e.:
    ```
     addPadForAlignment(&writer)
    ```
    
    The metadata length was being written as zero. This was preventing
    PyArrow from reading files written by `ArrowWriter`. This has now been
    calculated and set in the Block.
    
    Closes #91.
---
 Arrow/Sources/Arrow/ArrowWriter.swift | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/Arrow/Sources/Arrow/ArrowWriter.swift 
b/Arrow/Sources/Arrow/ArrowWriter.swift
index 3aa25b6..24176d3 100644
--- a/Arrow/Sources/Arrow/ArrowWriter.swift
+++ b/Arrow/Sources/Arrow/ArrowWriter.swift
@@ -144,12 +144,25 @@ public class ArrowWriter { // swiftlint:disable:this 
type_body_length
                 withUnsafeBytes(of: CONTINUATIONMARKER.littleEndian) 
{writer.append(Data($0))}
                 withUnsafeBytes(of: rbResult.1.o.littleEndian) 
{writer.append(Data($0))}
                 writer.append(rbResult.0)
+                addPadForAlignment(&writer)
+                let metadataLength = writer.count - startIndex
+                let bodyStart = writer.count
                 switch writeRecordBatchData(&writer, fields: 
batch.schema.fields, columns: batch.columns) {
                 case .success:
+                    let bodyLength = writer.count - bodyStart
+                    let expectedSize = startIndex + metadataLength + bodyLength
+                    guard expectedSize == writer.count else {
+                        return .failure(.invalid(
+                            "Invalid Block. Expected \(expectedSize), got 
\(writer.count)"
+                        ))
+                    }
                     rbBlocks.append(
-                        org_apache_arrow_flatbuf_Block(offset: 
Int64(startIndex),
-                                                       metaDataLength: 
Int32(0),
-                                                       bodyLength: 
Int64(rbResult.1.o)))
+                        org_apache_arrow_flatbuf_Block(
+                            offset: Int64(startIndex),
+                            metaDataLength: Int32(metadataLength),
+                            bodyLength: Int64(bodyLength)
+                        )
+                    )
                 case .failure(let error):
                     return .failure(error)
                 }
@@ -293,6 +306,7 @@ public class ArrowWriter { // swiftlint:disable:this 
type_body_length
         case .success(let schemaOffset):
             fbb.finish(offset: schemaOffset)
             writer.append(fbb.data)
+            addPadForAlignment(&writer)
         case .failure(let error):
             return .failure(error)
         }
@@ -379,7 +393,7 @@ public class ArrowWriter { // swiftlint:disable:this 
type_body_length
         addPadForAlignment(&markerData)
 
         var writer: any DataWriter = FileDataWriter(fileHandle)
-        writer.append(FILEMARKER.data(using: .utf8)!)
+        writer.append(markerData)
         switch writeFile(&writer, info: info) {
         case .success:
             writer.append(FILEMARKER.data(using: .utf8)!)

Reply via email to