Copilot commented on code in PR #136:
URL: https://github.com/apache/arrow-swift/pull/136#discussion_r2835667060
##########
Sources/Arrow/ArrowTable.swift:
##########
@@ -165,6 +169,18 @@ public class RecordBatch {
return self
}
+ @discardableResult
+ public func addMetadata(_ key: String, value: String) -> Builder {
+ self.customMetadata[key] = value
+ return self
+ }
+
+ @discardableResult
+ public func addMetadata(_ metadata: [String: String]) -> Builder {
+ self.customMetadata.merge(metadata) { _, new in new }
+ return self
+ }
+
public func finish() -> Result<RecordBatch, ArrowError> {
if columns.count > 0 {
let columnLength = columns[0].length
Review Comment:
`RecordBatch.Builder.finish()` can return `.success` when `columns` is
empty, but `RecordBatch.init` unconditionally reads `columns[0].length`, which
will crash at runtime. Consider returning `.failure(.arrayHasNoElements)` (or
similar) when no columns have been added, and/or guarding in `RecordBatch.init`.
##########
Sources/Arrow/ArrowReader.swift:
##########
@@ -221,12 +221,26 @@ public class ArrowReader { // swiftlint:disable:this
type_body_length
}
}
+ private func extractCustomMetadata(
+ from message: org_apache_arrow_flatbuf_Message
+ ) -> [String: String] {
+ var metadata: [String: String] = [:]
+ for index in 0..<message.customMetadataCount {
+ if let kv = message.customMetadata(at: index),
+ let key = kv.key {
+ metadata[key] = kv.value ?? ""
Review Comment:
`extractCustomMetadata` currently maps a KeyValue with a missing/NULL
`value` to an empty string (`kv.value ?? ""`). This changes semantics (NULL vs
empty) and can introduce keys that weren’t actually set by the producer.
Consider skipping entries where `kv.value` is nil, or (if NULL values must be
preserved) changing the in-memory representation to support optional values.
```suggestion
let key = kv.key,
let value = kv.value {
metadata[key] = value
```
##########
Sources/Arrow/ArrowWriter.swift:
##########
@@ -243,11 +243,28 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
org_apache_arrow_flatbuf_RecordBatch.add(length: Int64(batch.length),
&fbb)
let recordBatchOffset =
org_apache_arrow_flatbuf_RecordBatch.endRecordBatch(&fbb, start: startRb)
let bodySize = Int64(bufferOffset)
+
+ var customMetadataOffset = Offset()
+ if !batch.customMetadata.isEmpty {
+ var kvOffsets = [Offset]()
+ for (key, value) in batch.customMetadata {
+ let keyOffset = fbb.create(string: key)
+ let valueOffset = fbb.create(string: value)
+ kvOffsets.append(
+ org_apache_arrow_flatbuf_KeyValue.createKeyValue(
+ &fbb, keyOffset: keyOffset, valueOffset: valueOffset))
+ }
+ customMetadataOffset = fbb.createVector(ofOffsets: kvOffsets)
Review Comment:
`batch.customMetadata` is a Swift `Dictionary`, so iterating it produces an
arbitrary order. That makes IPC message bytes non-deterministic across runs
(even for identical inputs), which can complicate reproducible builds/caching
and byte-for-byte tests. Consider writing metadata in a stable order (e.g.,
sort by key before building the KeyValue vector).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]