Copilot commented on code in PR #149:
URL: https://github.com/apache/arrow-swift/pull/149#discussion_r2987049955
##########
Sources/Arrow/ArrowArrayBuilder.swift:
##########
@@ -146,6 +146,10 @@ public class StructArrayBuilder:
ArrowArrayBuilder<StructBufferBuilder, NestedAr
try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
}
+ public override func appendAny(_ val: Any?) {
+ self.append(val as? [Any?])
+ }
Review Comment:
Add a unit test that exercises `appendAny` on `StructArrayBuilder` and
`ListArrayBuilder` (especially `list<struct<...>>`), since this change fixes a
correctness issue where `appendAny` previously bypassed child-builder
distribution. Existing tests cover `appendAny` for primitive builders but not
nested builders, so this could regress without targeted coverage.
##########
Sources/Arrow/ArrowArrayBuilder.swift:
##########
@@ -186,6 +190,10 @@ public class ListArrayBuilder:
ArrowArrayBuilder<ListBufferBuilder, NestedArray>
try super.init(arrowType)
}
+ public override func appendAny(_ val: Any?) {
+ self.append(val as? [Any?])
+ }
Review Comment:
Add a unit test that calls `ListArrayBuilder.appendAny` with non-nil lists
and verifies the produced array’s `values` child contains the appended
elements. This override is fixing a nested-builder correctness bug; having a
regression test here would help prevent silent reintroductions.
##########
Sources/Arrow/ArrowWriter.swift:
##########
@@ -178,16 +185,23 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
fbb: inout FlatBufferBuilder) {
for index in (0 ..< fields.count).reversed() {
let column = columns[index]
- let fieldNode =
- org_apache_arrow_flatbuf_FieldNode(length:
Int64(column.length),
- nullCount:
Int64(column.nullCount))
- offsets.append(fbb.create(struct: fieldNode))
+ // FlatBuffer vectors use prepend semantics: last-written element
becomes
+ // the first when read. Arrow IPC requires depth-first pre-order
(parent
+ // before children), so children must be written before their
parent here.
if let nestedType = column.type as? ArrowTypeStruct {
let nestedArray = column.array as? NestedArray
if let nestedFields = nestedArray?.fields {
writeFieldNodes(nestedType.fields, columns: nestedFields,
offsets: &offsets, fbb: &fbb)
}
+ } else if let listType = column.type as? ArrowTypeList {
+ if let nestedArray = column.array as? NestedArray, let
valuesHolder = nestedArray.values {
+ writeFieldNodes([listType.elementField], columns:
[valuesHolder], offsets: &offsets, fbb: &fbb)
+ }
}
+ let fieldNode =
+ org_apache_arrow_flatbuf_FieldNode(length:
Int64(column.length),
+ nullCount:
Int64(column.nullCount))
+ offsets.append(fbb.create(struct: fieldNode))
Review Comment:
Add IPC-level unit tests for list columns (at least `list<int32>` and
`list<struct<...>>`) that round-trip through
`ArrowWriter.writeStreaming`/`ArrowReader.readStreaming`. This change adds list
handling and adjusts nested field-node ordering; without regression tests it’s
easy to reintroduce invalid node/buffer ordering that external readers reject.
##########
Sources/Arrow/ArrowWriter.swift:
##########
@@ -341,7 +372,8 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
public func writeStreaming(_ info: ArrowWriter.Info) -> Result<Data,
ArrowError> {
let writer: any DataWriter = InMemDataWriter()
switch toMessage(info.schema) {
- case .success(let schemaData):
+ case .success(var schemaData):
+ addPadForAlignment(&schemaData)
withUnsafeBytes(of: CONTINUATIONMARKER.littleEndian)
{writer.append(Data($0))}
withUnsafeBytes(of: UInt32(schemaData.count).littleEndian)
{writer.append(Data($0))}
writer.append(schemaData)
Review Comment:
Consider adding a regression test that asserts the schema message metadata
length written by `writeStreaming` is padded to an 8-byte multiple (and
therefore the first record-batch body starts at an 8-byte-aligned offset). This
is a spec requirement and a subtle invariant that can regress without a
targeted test.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]