This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-swift.git
The following commit(s) were added to refs/heads/main by this push:
new 9259237 fix: Padding now added to Arrow file marker and RecordBatches
are being written with correct alignment (#95)
9259237 is described below
commit 9259237fb5d0dede60136011b4e67efd834c5885
Author: willtemperley <[email protected]>
AuthorDate: Tue Oct 14 20:43:19 2025 +0800
fix: Padding now added to Arrow file marker and RecordBatches are being
written with correct alignment (#95)
Files were being written with a non-padded file marker and alignment was
not being written when serializing record batches. Also the metadata
length was being set to zero in the block.
## What's Changed
The padded version of the filemarker is being written.
Padding is written to record blocks and record block metadata, i.e.:
```
addPadForAlignment(&writer)
```
The metadata length was being written as zero. This was preventing
PyArrow from reading files written by `ArrowWriter`. This has now been
calculated and set in the Block.
Closes #91.
---
Arrow/Sources/Arrow/ArrowWriter.swift | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/Arrow/Sources/Arrow/ArrowWriter.swift
b/Arrow/Sources/Arrow/ArrowWriter.swift
index 3aa25b6..24176d3 100644
--- a/Arrow/Sources/Arrow/ArrowWriter.swift
+++ b/Arrow/Sources/Arrow/ArrowWriter.swift
@@ -144,12 +144,25 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
withUnsafeBytes(of: CONTINUATIONMARKER.littleEndian)
{writer.append(Data($0))}
withUnsafeBytes(of: rbResult.1.o.littleEndian)
{writer.append(Data($0))}
writer.append(rbResult.0)
+ addPadForAlignment(&writer)
+ let metadataLength = writer.count - startIndex
+ let bodyStart = writer.count
switch writeRecordBatchData(&writer, fields:
batch.schema.fields, columns: batch.columns) {
case .success:
+ let bodyLength = writer.count - bodyStart
+ let expectedSize = startIndex + metadataLength + bodyLength
+ guard expectedSize == writer.count else {
+ return .failure(.invalid(
+ "Invalid Block. Expected \(expectedSize), got
\(writer.count)"
+ ))
+ }
rbBlocks.append(
- org_apache_arrow_flatbuf_Block(offset:
Int64(startIndex),
- metaDataLength:
Int32(0),
- bodyLength:
Int64(rbResult.1.o)))
+ org_apache_arrow_flatbuf_Block(
+ offset: Int64(startIndex),
+ metaDataLength: Int32(metadataLength),
+ bodyLength: Int64(bodyLength)
+ )
+ )
case .failure(let error):
return .failure(error)
}
@@ -293,6 +306,7 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
case .success(let schemaOffset):
fbb.finish(offset: schemaOffset)
writer.append(fbb.data)
+ addPadForAlignment(&writer)
case .failure(let error):
return .failure(error)
}
@@ -379,7 +393,7 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
addPadForAlignment(&markerData)
var writer: any DataWriter = FileDataWriter(fileHandle)
- writer.append(FILEMARKER.data(using: .utf8)!)
+ writer.append(markerData)
switch writeFile(&writer, info: info) {
case .success:
writer.append(FILEMARKER.data(using: .utf8)!)