This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-swift.git
The following commit(s) were added to refs/heads/main by this push:
new d34e65f feat: Add support for List data types (#39)
d34e65f is described below
commit d34e65fc64c4205ca7f273544f9cfd63722703db
Author: Marco Antonio <[email protected]>
AuthorDate: Sat Nov 1 18:34:23 2025 -0300
feat: Add support for List data types (#39)
### Rationale within the changes
This PR refactors and extends support for nested types in the Arrow
integration. The current implementation of `ArrowNestedType` is tailored
primarily for data structs, as seen in `StructBufferBuilder`. However,
it lacks broader support and certain expected functionalities, such as
`loadStructArrayBuilder`.
To address this, the following improvements have been made:
- Renamed `ArrowNestedType` to `ArrowTypeStruct` to align with naming
conventions used elsewhere in the codebase.
- Introduced initial support for `ArrowTypeList`, including nested
lists.
For simplicity, instead of introducing a dedicated subtype for lists,
this PR uses an interface of `[Any?]?`. If this approach proves
insufficient, there are more explicit alternatives that can be explored.
**NOTE:** Work on `ArrowCExporter` and `ArrowCImporter` has been
intentionally deferred. These components require a deeper understanding
of memory ownership and child parsing, and I believe it's better to be
addressed in a future PR, unless it's strict necessary.
### What's Changed
1. Renamed `ArrowNestedType -> ArrowTypeStruct`.
2. Added support for `ArrowTypeList`, including nested lists.
3. Implemented `ListArray` with basic `.asString` formatting.
4. Added `ListArrayBuilder`.
5. Extended `ArrowArrayBuilder` to support the `.list` type.
6. Implemented `loadStructArrayBuilder` and `loadListArrayBuilder`.
7. Introduced `ListBufferBuilder`.
8. Added `ArrowReader.loadListData`.
9. Added `makeListHolder`.
### Are these changes tested?
Tests are included in `ArrayTests.swift`. It's also working on internal
applications, including integration with `ArrowFlight`.
Closes #16.
---------
Co-authored-by: Marco <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Co-authored-by: Copilot <[email protected]>
---
Arrow/Sources/Arrow/ArrowArray.swift | 141 ++++++++++++++++++++++-----
Arrow/Sources/Arrow/ArrowArrayBuilder.swift | 57 ++++++++++-
Arrow/Sources/Arrow/ArrowBufferBuilder.swift | 81 ++++++++++++++-
Arrow/Sources/Arrow/ArrowReader.swift | 44 ++++++++-
Arrow/Sources/Arrow/ArrowReaderHelper.swift | 36 ++++---
Arrow/Sources/Arrow/ArrowType.swift | 32 +++++-
Arrow/Sources/Arrow/ArrowWriter.swift | 31 +++---
Arrow/Sources/Arrow/ProtoUtil.swift | 9 +-
Arrow/Tests/ArrowTests/ArrayTests.swift | 90 ++++++++++++++++-
Arrow/Tests/ArrowTests/IPCTests.swift | 53 +++++-----
Arrow/Tests/ArrowTests/TableTests.swift | 4 +-
11 files changed, 479 insertions(+), 99 deletions(-)
diff --git a/Arrow/Sources/Arrow/ArrowArray.swift
b/Arrow/Sources/Arrow/ArrowArray.swift
index d4ee873..759e1de 100644
--- a/Arrow/Sources/Arrow/ArrowArray.swift
+++ b/Arrow/Sources/Arrow/ArrowArray.swift
@@ -114,7 +114,9 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
case .binary:
return try ArrowArrayHolderImpl(BinaryArray(with))
case .strct:
- return try ArrowArrayHolderImpl(StructArray(with))
+ return try ArrowArrayHolderImpl(NestedArray(with))
+ case .list:
+ return try ArrowArrayHolderImpl(NestedArray(with))
default:
throw ArrowError.invalid("Array not found for type: \(arrowType)")
}
@@ -355,16 +357,37 @@ public class BinaryArray: ArrowArray<Data> {
}
}
-public class StructArray: ArrowArray<[Any?]> {
- public private(set) var arrowFields: [ArrowArrayHolder]?
+public class NestedArray: ArrowArray<[Any?]> {
+ private var children: [ArrowArrayHolder]?
+
public required init(_ arrowData: ArrowData) throws {
try super.init(arrowData)
- var fields = [ArrowArrayHolder]()
- for child in arrowData.children {
- fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with:
child))
- }
- self.arrowFields = fields
+ switch arrowData.type.id {
+ case .list:
+ guard arrowData.children.count == 1 else {
+ throw ArrowError.invalid("List array must have exactly one
child")
+ }
+
+ guard let listType = arrowData.type as? ArrowTypeList else {
+ throw ArrowError.invalid("Expected ArrowTypeList for list type
ID")
+ }
+
+ self.children = [try ArrowArrayHolderImpl.loadArray(
+ listType.elementField.type,
+ with: arrowData.children[0]
+ )]
+
+ case .strct:
+ var fields = [ArrowArrayHolder]()
+ for child in arrowData.children {
+ fields.append(try ArrowArrayHolderImpl.loadArray(child.type,
with: child))
+ }
+ self.children = fields
+
+ default:
+ throw ArrowError.invalid("NestedArray only supports list and
struct types, got: \(arrowData.type.id)")
+ }
}
public override subscript(_ index: UInt) -> [Any?]? {
@@ -372,36 +395,104 @@ public class StructArray: ArrowArray<[Any?]> {
return nil
}
- if let fields = arrowFields {
+ guard let children = self.children else {
+ return nil
+ }
+
+ switch arrowData.type.id {
+ case .list:
+ guard let values = children.first else { return nil }
+
+ let offsets = self.arrowData.buffers[1]
+ let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
+
+ let startOffset = offsets.rawPointer.advanced(by:
offsetIndex).load(as: Int32.self)
+ let endOffset = offsets.rawPointer.advanced(by: offsetIndex +
MemoryLayout<Int32>.stride).load(as: Int32.self)
+
+ var items = [Any?]()
+ for i in startOffset..<endOffset {
+ items.append(values.array.asAny(UInt(i)))
+ }
+
+ return items
+
+ case .strct:
var result = [Any?]()
- for field in fields {
+ for field in children {
result.append(field.array.asAny(index))
}
-
return result
- }
- return nil
+ default:
+ return nil
+ }
}
public override func asString(_ index: UInt) -> String {
- if self.arrowData.isNull(index) {
- return ""
- }
+ switch arrowData.type.id {
+ case .list:
+ if self.arrowData.isNull(index) {
+ return "null"
+ }
+
+ guard let list = self[index] else {
+ return "null"
+ }
- var output = "{"
- if let fields = arrowFields {
- for fieldIndex in 0..<fields.count {
- let asStr = fields[fieldIndex].array as? AsString
- if fieldIndex == 0 {
- output.append("\(asStr!.asString(index))")
+ var output = "["
+ for (i, item) in list.enumerated() {
+ if i > 0 {
+ output.append(",")
+ }
+
+ if item == nil {
+ output.append("null")
+ } else if let asStringItem = item as? AsString {
+ output.append(asStringItem.asString(0))
} else {
- output.append(",\(asStr!.asString(index))")
+ output.append("\(item!)")
}
}
+ output.append("]")
+ return output
+
+ case .strct:
+ if self.arrowData.isNull(index) {
+ return ""
+ }
+
+ var output = "{"
+ if let children = self.children {
+ for fieldIndex in 0..<children.count {
+ let asStr = children[fieldIndex].array as? AsString
+ if fieldIndex == 0 {
+ output.append("\(asStr!.asString(index))")
+ } else {
+ output.append(",\(asStr!.asString(index))")
+ }
+ }
+ }
+ output += "}"
+ return output
+
+ default:
+ return ""
}
+ }
+
+ public var isListArray: Bool {
+ return arrowData.type.id == .list
+ }
+
+ public var isStructArray: Bool {
+ return arrowData.type.id == .strct
+ }
+
+ public var fields: [ArrowArrayHolder]? {
+ return arrowData.type.id == .strct ? children : nil
+ }
- output += "}"
- return output
+ public var values: ArrowArrayHolder? {
+ return arrowData.type.id == .list ? children?.first : nil
}
}
diff --git a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
index 493e43a..7db249d 100644
--- a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
+++ b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
@@ -125,13 +125,13 @@ public class TimestampArrayBuilder:
ArrowArrayBuilder<FixedBufferBuilder<Int64>,
}
}
-public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder,
StructArray> {
+public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder,
NestedArray> {
let builders: [any ArrowArrayHolderBuilder]
let fields: [ArrowField]
public init(_ fields: [ArrowField], builders: [any
ArrowArrayHolderBuilder]) throws {
self.fields = fields
self.builders = builders
- try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
+ try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
self.bufferBuilder.initializeTypeInfo(fields)
}
@@ -143,7 +143,7 @@ public class StructArrayBuilder:
ArrowArrayBuilder<StructBufferBuilder, StructAr
}
self.builders = builders
- try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
+ try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
}
public override func append(_ values: [Any?]?) {
@@ -159,7 +159,7 @@ public class StructArrayBuilder:
ArrowArrayBuilder<StructBufferBuilder, StructAr
}
}
- public override func finish() throws -> StructArray {
+ public override func finish() throws -> NestedArray {
let buffers = self.bufferBuilder.finish()
var childData = [ArrowData]()
for builder in self.builders {
@@ -169,11 +169,40 @@ public class StructArrayBuilder:
ArrowArrayBuilder<StructBufferBuilder, StructAr
let arrowData = try ArrowData(self.type, buffers: buffers,
children: childData, nullCount:
self.nullCount,
length: self.length)
- let structArray = try StructArray(arrowData)
+ let structArray = try NestedArray(arrowData)
return structArray
}
}
+public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder,
NestedArray> {
+ let valueBuilder: any ArrowArrayHolderBuilder
+
+ public override init(_ arrowType: ArrowType) throws {
+ guard let listType = arrowType as? ArrowTypeList else {
+ throw ArrowError.invalid("Expected ArrowTypeList")
+ }
+ let arrowField = listType.elementField
+ self.valueBuilder = try ArrowArrayBuilders.loadBuilder(arrowType:
arrowField.type)
+ try super.init(arrowType)
+ }
+
+ public override func append(_ values: [Any?]?) {
+ self.bufferBuilder.append(values)
+ if let vals = values {
+ for val in vals {
+ self.valueBuilder.appendAny(val)
+ }
+ }
+ }
+
+ public override func finish() throws -> NestedArray {
+ let buffers = self.bufferBuilder.finish()
+ let childData = try valueBuilder.toHolder().array.arrowData
+ let arrowData = try ArrowData(self.type, buffers: buffers, children:
[childData], nullCount: self.nullCount, length: self.length)
+ return try NestedArray(arrowData)
+ }
+}
+
public class ArrowArrayBuilders {
public static func loadBuilder( // swiftlint:disable:this
cyclomatic_complexity
_ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
@@ -290,6 +319,16 @@ public class ArrowArrayBuilders {
throw ArrowError.invalid("Expected arrow type for
\(arrowType.id) not found")
}
return try TimestampArrayBuilder(timestampType.unit)
+ case .strct:
+ guard let structType = arrowType as? ArrowTypeStruct else {
+ throw ArrowError.invalid("Expected ArrowStructType for
\(arrowType.id)")
+ }
+ return try StructArrayBuilder(structType.fields)
+ case .list:
+ guard let listType = arrowType as? ArrowTypeList else {
+ throw ArrowError.invalid("Expected ArrowTypeList for
\(arrowType.id)")
+ }
+ return try ListArrayBuilder(listType)
default:
throw ArrowError.unknownType("Builder not found for arrow type:
\(arrowType.id)")
}
@@ -353,4 +392,12 @@ public class ArrowArrayBuilders {
public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit,
timezone: String? = nil) throws -> TimestampArrayBuilder {
return try TimestampArrayBuilder(unit, timezone: timezone)
}
+
+ public static func loadStructArrayBuilder(_ fields: [ArrowField]) throws
-> StructArrayBuilder {
+ return try StructArrayBuilder(fields)
+ }
+
+ public static func loadListArrayBuilder(_ listType: ArrowTypeList) throws
-> ListArrayBuilder {
+ return try ListArrayBuilder(listType)
+ }
}
diff --git a/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
b/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
index cc0bae0..4e518c6 100644
--- a/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
+++ b/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
@@ -338,20 +338,20 @@ public class Date64BufferBuilder:
AbstractWrapperBufferBuilder<Date, Int64> {
public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
- var info: ArrowNestedType?
+ var info: ArrowTypeStruct?
public init() throws {
let nulls = ArrowBuffer.createBuffer(0, size:
UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
}
public func initializeTypeInfo(_ fields: [ArrowField]) {
- info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ info = ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}
public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1
- if length > self.nulls.length {
+ if self.length > self.nulls.length {
self.resize(length)
}
@@ -379,3 +379,78 @@ public final class StructBufferBuilder: BaseBufferBuilder,
ArrowBufferBuilder {
return [nulls]
}
}
+
+public class ListBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
+ public typealias ItemType = [Any?]
+ var offsets: ArrowBuffer
+
+ public required init() throws {
+ self.offsets = ArrowBuffer.createBuffer(1, size:
UInt(MemoryLayout<Int32>.stride))
+ let nulls = ArrowBuffer.createBuffer(0, size:
UInt(MemoryLayout<UInt8>.stride))
+ super.init(nulls)
+ self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self)
+ }
+
+ public func append(_ count: Int) {
+ let index = UInt(self.length)
+ self.length += 1
+
+ if length >= self.offsets.length {
+ self.resize(length + 1)
+ }
+
+ let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
+ let currentOffset = self.offsets.rawPointer.advanced(by:
offsetIndex).load(as: Int32.self)
+
+ BitUtility.setBit(index + self.offset, buffer: self.nulls)
+ let newOffset = currentOffset + Int32(count)
+ self.offsets.rawPointer.advanced(by: offsetIndex +
MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
+ }
+
+ public func append(_ newValue: [Any?]?) {
+ let index = UInt(self.length)
+ self.length += 1
+
+ if self.length >= self.offsets.length {
+ self.resize(self.length + 1)
+ }
+
+ let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
+ let currentOffset = self.offsets.rawPointer.advanced(by:
offsetIndex).load(as: Int32.self)
+
+ if let vals = newValue {
+ BitUtility.setBit(index + self.offset, buffer: self.nulls)
+ let newOffset = currentOffset + Int32(vals.count)
+ self.offsets.rawPointer.advanced(by: offsetIndex +
MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
+ } else {
+ self.nullCount += 1
+ BitUtility.clearBit(index + self.offset, buffer: self.nulls)
+ self.offsets.rawPointer.advanced(by: offsetIndex +
MemoryLayout<Int32>.stride).storeBytes(of: currentOffset, as: Int32.self)
+ }
+ }
+
+ public override func isNull(_ index: UInt) -> Bool {
+ return !BitUtility.isSet(index + self.offset, buffer: self.nulls)
+ }
+
+ public func resize(_ length: UInt) {
+ if length > self.offsets.length {
+ let resizeLength = resizeLength(self.offsets)
+ var offsets = ArrowBuffer.createBuffer(resizeLength, size:
UInt(MemoryLayout<Int32>.size))
+ var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size:
UInt(MemoryLayout<UInt8>.size))
+ ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len:
self.offsets.capacity)
+ ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len:
self.nulls.capacity)
+ self.offsets = offsets
+ self.nulls = nulls
+ }
+ }
+
+ public func finish() -> [ArrowBuffer] {
+ let length = self.length
+ var nulls = ArrowBuffer.createBuffer(length/8 + 1, size:
UInt(MemoryLayout<UInt8>.size))
+ var offsets = ArrowBuffer.createBuffer(length + 1, size:
UInt(MemoryLayout<Int32>.size))
+ ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
+ ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len:
offsets.capacity)
+ return [nulls, offsets]
+ }
+}
diff --git a/Arrow/Sources/Arrow/ArrowReader.swift
b/Arrow/Sources/Arrow/ArrowReader.swift
index 91d7465..bcaa234 100644
--- a/Arrow/Sources/Arrow/ArrowReader.swift
+++ b/Arrow/Sources/Arrow/ArrowReader.swift
@@ -116,6 +116,35 @@ public class ArrowReader { // swiftlint:disable:this
type_body_length
rbLength:
UInt(loadInfo.batchData.recordBatch.length))
}
+ private func loadListData(_ loadInfo: DataLoadInfo, field:
org_apache_arrow_flatbuf_Field) -> Result<ArrowArrayHolder, ArrowError> {
+ guard let node = loadInfo.batchData.nextNode() else {
+ return .failure(.invalid("Node not found"))
+ }
+
+ guard let nullBuffer = loadInfo.batchData.nextBuffer() else {
+ return .failure(.invalid("Null buffer not found"))
+ }
+
+ guard let offsetBuffer = loadInfo.batchData.nextBuffer() else {
+ return .failure(.invalid("Offset buffer not found"))
+ }
+
+ let nullLength = UInt(ceil(Double(node.length) / 8))
+ let arrowNullBuffer = makeBuffer(nullBuffer, fileData:
loadInfo.fileData, length: nullLength, messageOffset: loadInfo.messageOffset)
+ let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData:
loadInfo.fileData, length: UInt(node.length + 1), messageOffset:
loadInfo.messageOffset)
+
+ guard field.childrenCount == 1, let childField = field.children(at: 0)
else {
+ return .failure(.invalid("List must have exactly one child"))
+ }
+
+ switch loadField(loadInfo, field: childField) {
+ case .success(let childHolder):
+ return makeArrayHolder(field, buffers: [arrowNullBuffer,
arrowOffsetBuffer], nullCount: UInt(node.nullCount), children:
[childHolder.array.arrowData], rbLength:
UInt(loadInfo.batchData.recordBatch.length))
+ case .failure(let error):
+ return .failure(error)
+ }
+ }
+
private func loadPrimitiveData(
_ loadInfo: DataLoadInfo,
field: org_apache_arrow_flatbuf_Field)
@@ -178,12 +207,17 @@ public class ArrowReader { // swiftlint:disable:this
type_body_length
_ loadInfo: DataLoadInfo,
field: org_apache_arrow_flatbuf_Field)
-> Result<ArrowArrayHolder, ArrowError> {
- if isNestedType(field.typeType) {
+ switch field.typeType {
+ case .struct_:
return loadStructData(loadInfo, field: field)
- } else if isFixedPrimitive(field.typeType) {
- return loadPrimitiveData(loadInfo, field: field)
- } else {
- return loadVariableData(loadInfo, field: field)
+ case .list:
+ return loadListData(loadInfo, field: field)
+ default:
+ if isFixedPrimitive(field.typeType) {
+ return loadPrimitiveData(loadInfo, field: field)
+ } else {
+ return loadVariableData(loadInfo, field: field)
+ }
}
}
diff --git a/Arrow/Sources/Arrow/ArrowReaderHelper.swift
b/Arrow/Sources/Arrow/ArrowReaderHelper.swift
index 37f4680..d170a6c 100644
--- a/Arrow/Sources/Arrow/ArrowReaderHelper.swift
+++ b/Arrow/Sources/Arrow/ArrowReaderHelper.swift
@@ -135,7 +135,7 @@ private func makeFixedHolder<T>(
}
}
-func makeStructHolder(
+func makeNestedHolder(
_ field: ArrowField,
buffers: [ArrowBuffer],
nullCount: UInt,
@@ -143,10 +143,14 @@ func makeStructHolder(
rbLength: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
do {
- let arrowData = try ArrowData(field.type,
- buffers: buffers, children: children,
- nullCount: nullCount, length: rbLength)
- return .success(ArrowArrayHolderImpl(try StructArray(arrowData)))
+ let arrowData = try ArrowData(
+ field.type,
+ buffers: buffers,
+ children: children,
+ nullCount: nullCount,
+ length: rbLength
+ )
+ return .success(ArrowArrayHolderImpl(try NestedArray(arrowData)))
} catch let error as ArrowError {
return .failure(error)
} catch {
@@ -207,7 +211,9 @@ func makeArrayHolder( // swiftlint:disable:this
cyclomatic_complexity
case .timestamp:
return makeTimestampHolder(field, buffers: buffers, nullCount:
nullCount)
case .strct:
- return makeStructHolder(field, buffers: buffers, nullCount: nullCount,
children: children!, rbLength: rbLength)
+ return makeNestedHolder(field, buffers: buffers, nullCount: nullCount,
children: children!, rbLength: rbLength)
+ case .list:
+ return makeNestedHolder(field, buffers: buffers, nullCount: nullCount,
children: children!, rbLength: rbLength)
default:
return .failure(.unknownType("Type \(typeId) currently not supported"))
}
@@ -230,15 +236,6 @@ func isFixedPrimitive(_ type:
org_apache_arrow_flatbuf_Type_) -> Bool {
}
}
-func isNestedType(_ type: org_apache_arrow_flatbuf_Type_) -> Bool {
- switch type {
- case .struct_:
- return true
- default:
- return false
- }
-}
-
func findArrowType( // swiftlint:disable:this cyclomatic_complexity
function_body_length
_ field: org_apache_arrow_flatbuf_Field) -> ArrowType {
let type = field.typeType
@@ -307,7 +304,14 @@ func findArrowType( // swiftlint:disable:this
cyclomatic_complexity function_bod
ArrowField(childField.name ?? "", type: childType, isNullable:
childField.nullable))
}
- return ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
+ case .list:
+ guard field.childrenCount == 1, let childField = field.children(at: 0)
else {
+ return ArrowType(ArrowType.ArrowUnknown)
+ }
+ let childType = findArrowType(childField)
+ let childFieldName = childField.name ?? "item"
+ return ArrowTypeList(ArrowField(childFieldName, type: childType,
isNullable: childField.nullable))
default:
return ArrowType(ArrowType.ArrowUnknown)
}
diff --git a/Arrow/Sources/Arrow/ArrowType.swift
b/Arrow/Sources/Arrow/ArrowType.swift
index 381078f..a238e99 100644
--- a/Arrow/Sources/Arrow/ArrowType.swift
+++ b/Arrow/Sources/Arrow/ArrowType.swift
@@ -165,7 +165,7 @@ public class ArrowTypeTimestamp: ArrowType {
}
}
-public class ArrowNestedType: ArrowType {
+public class ArrowTypeStruct: ArrowType {
let fields: [ArrowField]
public init(_ info: ArrowType.Info, fields: [ArrowField]) {
self.fields = fields
@@ -173,6 +173,19 @@ public class ArrowNestedType: ArrowType {
}
}
+public class ArrowTypeList: ArrowType {
+ public let elementField: ArrowField
+
+ public init(_ elementField: ArrowField) {
+ self.elementField = elementField
+ super.init(ArrowType.ArrowList)
+ }
+
+ public convenience init(_ elementType: ArrowType, nullable: Bool = true) {
+ self.init(ArrowField("item", type: elementType, isNullable: nullable))
+ }
+}
+
public class ArrowType {
public private(set) var info: ArrowType.Info
public static let ArrowInt8 = Info.primitiveInfo(ArrowTypeId.int8)
@@ -195,6 +208,7 @@ public class ArrowType {
public static let ArrowTime64 = Info.timeInfo(ArrowTypeId.time64)
public static let ArrowTimestamp = Info.timeInfo(ArrowTypeId.timestamp)
public static let ArrowStruct = Info.complexInfo(ArrowTypeId.strct)
+ public static let ArrowList = Info.complexInfo(ArrowTypeId.list)
public init(_ info: ArrowType.Info) {
self.info = info
@@ -320,7 +334,7 @@ public class ArrowType {
return MemoryLayout<Int8>.stride
case .string:
return MemoryLayout<Int8>.stride
- case .strct:
+ case .strct, .list:
return 0
default:
fatalError("Stride requested for unknown type: \(self)")
@@ -375,6 +389,20 @@ public class ArrowType {
return "z"
case ArrowTypeId.string:
return "u"
+ case ArrowTypeId.strct:
+ if let structType = self as? ArrowTypeStruct {
+ var format = "+s"
+ for field in structType.fields {
+ format += try field.type.cDataFormatId
+ }
+ return format
+ }
+ throw ArrowError.invalid("Invalid struct type")
+ case ArrowTypeId.list:
+ if let listType = self as? ArrowTypeList {
+ return "+l" + (try
listType.elementField.type.cDataFormatId)
+ }
+ throw ArrowError.invalid("Invalid list type")
default:
throw ArrowError.notImplemented
}
diff --git a/Arrow/Sources/Arrow/ArrowWriter.swift
b/Arrow/Sources/Arrow/ArrowWriter.swift
index 24176d3..c2ff290 100644
--- a/Arrow/Sources/Arrow/ArrowWriter.swift
+++ b/Arrow/Sources/Arrow/ArrowWriter.swift
@@ -72,7 +72,7 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
private func writeField(_ fbb: inout FlatBufferBuilder, field: ArrowField)
-> Result<Offset, ArrowError> {
var fieldsOffset: Offset?
- if let nestedField = field.type as? ArrowNestedType {
+ if let nestedField = field.type as? ArrowTypeStruct {
var offsets = [Offset]()
for field in nestedField.fields {
switch writeField(&fbb, field: field) {
@@ -182,9 +182,11 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
org_apache_arrow_flatbuf_FieldNode(length:
Int64(column.length),
nullCount:
Int64(column.nullCount))
offsets.append(fbb.create(struct: fieldNode))
- if let nestedType = column.type as? ArrowNestedType {
- let structArray = column.array as? StructArray
- writeFieldNodes(nestedType.fields, columns:
structArray!.arrowFields!, offsets: &offsets, fbb: &fbb)
+ if let nestedType = column.type as? ArrowTypeStruct {
+ let nestedArray = column.array as? NestedArray
+ if let nestedFields = nestedArray?.fields {
+ writeFieldNodes(nestedType.fields, columns: nestedFields,
offsets: &offsets, fbb: &fbb)
+ }
}
}
}
@@ -202,10 +204,12 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
let buffer = org_apache_arrow_flatbuf_Buffer(offset:
Int64(bufferOffset), length: Int64(bufferDataSize))
buffers.append(buffer)
bufferOffset += bufferDataSize
- if let nestedType = column.type as? ArrowNestedType {
- let structArray = column.array as? StructArray
- writeBufferInfo(nestedType.fields, columns:
structArray!.arrowFields!,
- bufferOffset: &bufferOffset, buffers:
&buffers, fbb: &fbb)
+ if let nestedType = column.type as? ArrowTypeStruct {
+ let nestedArray = column.array as? NestedArray
+ if let nestedFields = nestedArray?.fields {
+ writeBufferInfo(nestedType.fields, columns:
nestedFields,
+ bufferOffset: &bufferOffset, buffers:
&buffers, fbb: &fbb)
+ }
}
}
}
@@ -251,20 +255,21 @@ public class ArrowWriter { // swiftlint:disable:this
type_body_length
private func writeRecordBatchData(
_ writer: inout DataWriter, fields: [ArrowField],
- columns: [ArrowArrayHolder])
- -> Result<Bool, ArrowError> {
+ columns: [ArrowArrayHolder]
+ ) -> Result<Bool, ArrowError> {
for index in 0 ..< fields.count {
let column = columns[index]
let colBufferData = column.getBufferData()
for var bufferData in colBufferData {
addPadForAlignment(&bufferData)
writer.append(bufferData)
- if let nestedType = column.type as? ArrowNestedType {
- guard let structArray = column.array as? StructArray else {
+ if let nestedType = column.type as? ArrowTypeStruct {
+ guard let nestedArray = column.array as? NestedArray,
+ let nestedFields = nestedArray.fields else {
return .failure(.invalid("Struct type array expected
for nested type"))
}
- switch writeRecordBatchData(&writer, fields:
nestedType.fields, columns: structArray.arrowFields!) {
+ switch writeRecordBatchData(&writer, fields:
nestedType.fields, columns: nestedFields) {
case .success:
continue
case .failure(let error):
diff --git a/Arrow/Sources/Arrow/ProtoUtil.swift
b/Arrow/Sources/Arrow/ProtoUtil.swift
index e91580e..9440d35 100644
--- a/Arrow/Sources/Arrow/ProtoUtil.swift
+++ b/Arrow/Sources/Arrow/ProtoUtil.swift
@@ -87,7 +87,14 @@ func fromProto( // swiftlint:disable:this
cyclomatic_complexity function_body_le
children.append(fromProto(field: childField))
}
- arrowType = ArrowNestedType(ArrowType.ArrowStruct, fields: children)
+ arrowType = ArrowTypeStruct(ArrowType.ArrowStruct, fields: children)
+ case .list:
+ guard field.childrenCount == 1, let childField = field.children(at: 0)
else {
+ arrowType = ArrowType(ArrowType.ArrowUnknown)
+ break
+ }
+ let childArrowField = fromProto(field: childField)
+ arrowType = ArrowTypeList(childArrowField)
default:
arrowType = ArrowType(ArrowType.ArrowUnknown)
}
diff --git a/Arrow/Tests/ArrowTests/ArrayTests.swift
b/Arrow/Tests/ArrowTests/ArrayTests.swift
index c7142c5..e28d8bf 100644
--- a/Arrow/Tests/ArrowTests/ArrayTests.swift
+++ b/Arrow/Tests/ArrowTests/ArrayTests.swift
@@ -320,8 +320,8 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
let structArray = try structBuilder.finish()
XCTAssertEqual(structArray.length, 3)
XCTAssertNil(structArray[1])
- XCTAssertEqual(structArray.arrowFields![0].length, 3)
- XCTAssertNil(structArray.arrowFields![0].array.asAny(1))
+ XCTAssertEqual(structArray.fields![0].length, 3)
+ XCTAssertNil(structArray.fields![0].array.asAny(1))
XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true)
XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1)
XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2)
@@ -438,4 +438,90 @@ final class ArrayTests: XCTestCase { //
swiftlint:disable:this type_body_length
boolBuilder.append([true, false, true, false])
XCTAssertEqual(try boolBuilder.finish()[2], true)
}
+
+ func testListArrayPrimitive() throws {
+ let listBuilder = try
ListArrayBuilder(ArrowTypeList(ArrowType(ArrowType.ArrowInt32)))
+
+ listBuilder.append([Int32(1), Int32(2), Int32(3)])
+ listBuilder.append([Int32(4), Int32(5)])
+ listBuilder.append(nil)
+ listBuilder.append([Int32(6), Int32(7), Int32(8), Int32(9)])
+
+ XCTAssertEqual(listBuilder.length, 4)
+ XCTAssertEqual(listBuilder.nullCount, 1)
+
+ let listArray = try listBuilder.finish()
+ XCTAssertEqual(listArray.length, 4)
+
+ let firstList = listArray[0]
+ XCTAssertNotNil(firstList, "First list should not be nil")
+ XCTAssertEqual(firstList!.count, 3, "First list should have 3
elements")
+ XCTAssertEqual(firstList![0] as? Int32, 1)
+ XCTAssertEqual(firstList![1] as? Int32, 2)
+ XCTAssertEqual(firstList![2] as? Int32, 3)
+
+ let secondList = listArray[1]
+ XCTAssertEqual(secondList!.count, 2)
+ XCTAssertEqual(secondList![0] as? Int32, 4)
+ XCTAssertEqual(secondList![1] as? Int32, 5)
+
+ XCTAssertNil(listArray[2])
+
+ let fourthList = listArray[3]
+ XCTAssertEqual(fourthList!.count, 4)
+ XCTAssertEqual(fourthList![0] as? Int32, 6)
+ XCTAssertEqual(fourthList![3] as? Int32, 9)
+ }
+
+ func testListArrayNested() throws {
+ let innerListType = ArrowTypeList(ArrowField("item", type:
ArrowType(ArrowType.ArrowInt32), isNullable: true))
+ let outerListType = ArrowTypeList(ArrowField("item", type:
innerListType, isNullable: true))
+ let outerListBuilder = try ListArrayBuilder(outerListType)
+
+ guard let innerListBuilder = outerListBuilder.valueBuilder as?
ListArrayBuilder else {
+ XCTFail("Failed to cast valueBuilder to ListArrayBuilder")
+ return
+ }
+
+ outerListBuilder.bufferBuilder.append(2)
+ innerListBuilder.append([Int32(1), Int32(2)])
+ innerListBuilder.append([Int32(3), Int32(4), Int32(5)])
+
+ outerListBuilder.bufferBuilder.append(1)
+ innerListBuilder.append([Int32(6)])
+
+ outerListBuilder.bufferBuilder.append(nil)
+
+ outerListBuilder.bufferBuilder.append([])
+
+ let nestedArray = try outerListBuilder.finish()
+ XCTAssertEqual(nestedArray.length, 4)
+ XCTAssertEqual(nestedArray.nullCount, 1)
+
+ let firstOuterList = nestedArray[0]!
+ XCTAssertEqual(firstOuterList.count, 2)
+
+ let firstInnerList = firstOuterList[0] as! [Any?]
+ XCTAssertEqual(firstInnerList.count, 2)
+ XCTAssertEqual(firstInnerList[0] as? Int32, 1)
+ XCTAssertEqual(firstInnerList[1] as? Int32, 2)
+
+ let secondInnerList = firstOuterList[1] as! [Any?]
+ XCTAssertEqual(secondInnerList.count, 3)
+ XCTAssertEqual(secondInnerList[0] as? Int32, 3)
+ XCTAssertEqual(secondInnerList[1] as? Int32, 4)
+ XCTAssertEqual(secondInnerList[2] as? Int32, 5)
+
+ let secondOuterList = nestedArray[1]!
+ XCTAssertEqual(secondOuterList.count, 1)
+
+ let thirdInnerList = secondOuterList[0] as! [Any?]
+ XCTAssertEqual(thirdInnerList.count, 1)
+ XCTAssertEqual(thirdInnerList[0] as? Int32, 6)
+
+ XCTAssertNil(nestedArray[2])
+
+ let emptyList = nestedArray[3]!
+ XCTAssertEqual(emptyList.count, 0)
+ }
}
diff --git a/Arrow/Tests/ArrowTests/IPCTests.swift
b/Arrow/Tests/ArrowTests/IPCTests.swift
index 26f38ce..ea44b77 100644
--- a/Arrow/Tests/ArrowTests/IPCTests.swift
+++ b/Arrow/Tests/ArrowTests/IPCTests.swift
@@ -90,10 +90,12 @@ func checkStructRecordBatch(_ result:
Result<ArrowReader.ArrowReaderResult, Arro
XCTAssertEqual(recordBatch.schema.fields.count, 1)
XCTAssertEqual(recordBatch.schema.fields[0].name, "my struct")
XCTAssertEqual(recordBatch.schema.fields[0].type.id, .strct)
- let structArray = recordBatch.columns[0].array as? StructArray
- XCTAssertEqual(structArray!.arrowFields!.count, 2)
- XCTAssertEqual(structArray!.arrowFields![0].type.id, .string)
- XCTAssertEqual(structArray!.arrowFields![1].type.id, .boolean)
+ let nestedArray = recordBatch.columns[0].array as? NestedArray
+ XCTAssertNotNil(nestedArray)
+ XCTAssertNotNil(nestedArray!.fields)
+ XCTAssertEqual(nestedArray!.fields!.count, 2)
+ XCTAssertEqual(nestedArray!.fields![0].type.id, .string)
+ XCTAssertEqual(nestedArray!.fields![1].type.id, .boolean)
let column = recordBatch.columns[0]
let str = column.array as? AsString
XCTAssertEqual("\(str!.asString(0))", "{0,false}")
@@ -121,14 +123,14 @@ func makeSchema() -> ArrowSchema {
func makeStructSchema() -> ArrowSchema {
let testObj = StructTest()
var fields = [ArrowField]()
- let buildStructType = {() -> ArrowNestedType in
+ let buildStructType = {() -> ArrowTypeStruct in
let mirror = Mirror(reflecting: testObj)
for (property, value) in mirror.children {
let arrowType = ArrowType(ArrowType.infoForType(type(of: value)))
fields.append(ArrowField(property!, type: arrowType, isNullable:
true))
}
- return ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}
return ArrowSchema.Builder()
@@ -515,8 +517,8 @@ final class IPCFileReaderTests: XCTestCase { //
swiftlint:disable:this type_body
XCTAssertEqual(recordBatch.schema.fields.count, 1)
XCTAssertEqual(recordBatch.schema.fields[0].name,
"struct1")
XCTAssertEqual(recordBatch.schema.fields[0].type.id,
.strct)
- XCTAssertTrue(recordBatch.schema.fields[0].type is
ArrowNestedType)
- let nestedType = (recordBatch.schema.fields[0].type as?
ArrowNestedType)!
+ XCTAssertTrue(recordBatch.schema.fields[0].type is
ArrowTypeStruct)
+ let nestedType = (recordBatch.schema.fields[0].type as?
ArrowTypeStruct)!
XCTAssertEqual(nestedType.fields.count, 14)
let columns = recordBatch.columns
XCTAssertEqual(columns[0].nullCount, 1)
@@ -524,23 +526,24 @@ final class IPCFileReaderTests: XCTestCase { //
swiftlint:disable:this type_body
let structVal =
"\((columns[0].array as? AsString)!.asString(0))"
XCTAssertEqual(structVal,
"{true,1,2,3,4,5,6,7,8,9.9,10.1,11,12,\(currentDate)}")
- let structArray = (recordBatch.columns[0].array as?
StructArray)!
- XCTAssertEqual(structArray.length, 3)
- XCTAssertEqual(structArray.arrowFields!.count, 14)
- XCTAssertEqual(structArray.arrowFields![0].type.id,
.boolean)
- XCTAssertEqual(structArray.arrowFields![1].type.id, .int8)
- XCTAssertEqual(structArray.arrowFields![2].type.id, .int16)
- XCTAssertEqual(structArray.arrowFields![3].type.id, .int32)
- XCTAssertEqual(structArray.arrowFields![4].type.id, .int64)
- XCTAssertEqual(structArray.arrowFields![5].type.id, .uint8)
- XCTAssertEqual(structArray.arrowFields![6].type.id,
.uint16)
- XCTAssertEqual(structArray.arrowFields![7].type.id,
.uint32)
- XCTAssertEqual(structArray.arrowFields![8].type.id,
.uint64)
- XCTAssertEqual(structArray.arrowFields![9].type.id,
.double)
- XCTAssertEqual(structArray.arrowFields![10].type.id,
.float)
- XCTAssertEqual(structArray.arrowFields![11].type.id,
.string)
- XCTAssertEqual(structArray.arrowFields![12].type.id,
.binary)
- XCTAssertEqual(structArray.arrowFields![13].type.id,
.date64)
+ let nestedArray = (recordBatch.columns[0].array as?
NestedArray)!
+ XCTAssertEqual(nestedArray.length, 3)
+ XCTAssertNotNil(nestedArray.fields)
+ XCTAssertEqual(nestedArray.fields!.count, 14)
+ XCTAssertEqual(nestedArray.fields![0].type.id, .boolean)
+ XCTAssertEqual(nestedArray.fields![1].type.id, .int8)
+ XCTAssertEqual(nestedArray.fields![2].type.id, .int16)
+ XCTAssertEqual(nestedArray.fields![3].type.id, .int32)
+ XCTAssertEqual(nestedArray.fields![4].type.id, .int64)
+ XCTAssertEqual(nestedArray.fields![5].type.id, .uint8)
+ XCTAssertEqual(nestedArray.fields![6].type.id, .uint16)
+ XCTAssertEqual(nestedArray.fields![7].type.id, .uint32)
+ XCTAssertEqual(nestedArray.fields![8].type.id, .uint64)
+ XCTAssertEqual(nestedArray.fields![9].type.id, .double)
+ XCTAssertEqual(nestedArray.fields![10].type.id, .float)
+ XCTAssertEqual(nestedArray.fields![11].type.id, .string)
+ XCTAssertEqual(nestedArray.fields![12].type.id, .binary)
+ XCTAssertEqual(nestedArray.fields![13].type.id, .date64)
}
case.failure(let error):
throw error
diff --git a/Arrow/Tests/ArrowTests/TableTests.swift
b/Arrow/Tests/ArrowTests/TableTests.swift
index dc5cabc..6f5482e 100644
--- a/Arrow/Tests/ArrowTests/TableTests.swift
+++ b/Arrow/Tests/ArrowTests/TableTests.swift
@@ -53,14 +53,14 @@ final class TableTests: XCTestCase {
let testObj = StructTest()
var fields = [ArrowField]()
- let buildStructType = {() -> ArrowNestedType in
+ let buildStructType = {() -> ArrowTypeStruct in
let mirror = Mirror(reflecting: testObj)
for (property, value) in mirror.children {
let arrowType = ArrowType(ArrowType.infoForType(type(of:
value)))
fields.append(ArrowField(property!, type: arrowType,
isNullable: true))
}
- return ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}
let structType = buildStructType()