This is an automated email from the ASF dual-hosted git repository. kou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push: new 068416bd41 GH-46296: [Swift] Add support for reading struct (#46302) 068416bd41 is described below commit 068416bd411d6a8e2949f8ebcb2f80e2c302ef6b Author: abandy <aba...@live.com> AuthorDate: Sat May 3 17:35:24 2025 -0400 GH-46296: [Swift] Add support for reading struct (#46302) ### Rationale for this change `makeArrayHolder` supports `ArrowTypeId.strct`. https://github.com/apache/arrow/blob/62c59c23352583980e894a43ec46335c4d55a7e1/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift#L189-L190 However, we cannot use `makeArrayHolder` if `fromProto` fails to handle `org_apache_arrow_flatbuf_Type_.struct_`. https://github.com/apache/arrow/blob/62c59c23352583980e894a43ec46335c4d55a7e1/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift#L139-L148 ### What changes are included in this PR? This PR aims to fix `ProtoUtil.fromProto` to support `struct_`. This PR also adds an unit test for it. This is based on GH-46297. ### Known issues with Swift Arrow Reader Currently the Swift Arrow reader requires a null buffer to be included in the list of buffers. The reader will fail if the null buffer is missing. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #46296 Co-authored-by: Dongjoon Hyun <dongjoon@ apache.org> Authored-by: Alva Bandy <aba...@live.com> Signed-off-by: Sutou Kouhei <k...@clear-code.com> --- swift/Arrow/Sources/Arrow/ArrowType.swift | 2 ++ swift/Arrow/Sources/Arrow/ProtoUtil.swift | 2 ++ swift/Arrow/Tests/ArrowTests/IPCTests.swift | 40 ++++++++++++++++++++++++++++- swift/data-generator/swift-datagen/go.mod | 3 ++- swift/data-generator/swift-datagen/main.go | 28 ++++++++++++++++++++ 5 files changed, 73 insertions(+), 2 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowType.swift b/swift/Arrow/Sources/Arrow/ArrowType.swift index 000fe21568..b44f859185 100644 --- a/swift/Arrow/Sources/Arrow/ArrowType.swift +++ b/swift/Arrow/Sources/Arrow/ArrowType.swift @@ -385,6 +385,8 @@ extension ArrowType.Info: Equatable { return lhsId == rhsId case (.timeInfo(let lhsId), .timeInfo(let rhsId)): return lhsId == rhsId + case (.complexInfo(let lhsId), .complexInfo(let rhsId)): + return lhsId == rhsId default: return false } diff --git a/swift/Arrow/Sources/Arrow/ProtoUtil.swift b/swift/Arrow/Sources/Arrow/ProtoUtil.swift index f7fd725fe1..ac61030c08 100644 --- a/swift/Arrow/Sources/Arrow/ProtoUtil.swift +++ b/swift/Arrow/Sources/Arrow/ProtoUtil.swift @@ -64,6 +64,8 @@ func fromProto( // swiftlint:disable:this cyclomatic_complexity let arrowUnit: ArrowTime64Unit = timeType.unit == .microsecond ? .microseconds : .nanoseconds arrowType = ArrowTypeTime64(arrowUnit) } + case .struct_: + arrowType = ArrowType(ArrowType.ArrowStruct) default: arrowType = ArrowType(ArrowType.ArrowUnknown) } diff --git a/swift/Arrow/Tests/ArrowTests/IPCTests.swift b/swift/Arrow/Tests/ArrowTests/IPCTests.swift index 311a3bb235..4f56f5fdab 100644 --- a/swift/Arrow/Tests/ArrowTests/IPCTests.swift +++ b/swift/Arrow/Tests/ArrowTests/IPCTests.swift @@ -118,7 +118,44 @@ func makeRecordBatch() throws -> RecordBatch { } } -final class IPCFileReaderTests: XCTestCase { +final class IPCFileReaderTests: XCTestCase { // swiftlint:disable:this type_body_length + func testFileReader_struct() throws { + let fileURL = currentDirectory().appendingPathComponent("../../testdata_struct.arrow") + let arrowReader = ArrowReader() + let result = arrowReader.fromFile(fileURL) + let recordBatches: [RecordBatch] + switch result { + case .success(let result): + recordBatches = result.batches + case .failure(let error): + throw error + } + + XCTAssertEqual(recordBatches.count, 1) + for recordBatch in recordBatches { + XCTAssertEqual(recordBatch.length, 3) + XCTAssertEqual(recordBatch.columns.count, 1) + XCTAssertEqual(recordBatch.schema.fields.count, 1) + XCTAssertEqual(recordBatch.schema.fields[0].type.info, ArrowType.ArrowStruct) + let column = recordBatch.columns[0] + XCTAssertNotNil(column.array as? StructArray) + if let structArray = column.array as? StructArray { + XCTAssertEqual(structArray.arrowFields?.count, 2) + XCTAssertEqual(structArray.arrowFields?[0].type.info, ArrowType.ArrowString) + XCTAssertEqual(structArray.arrowFields?[1].type.info, ArrowType.ArrowBool) + for index in 0..<structArray.length { + if index == 2 { + XCTAssertNil(structArray[index]) + } else { + XCTAssertEqual(structArray[index]?[0] as? String, "\(index)") + XCTAssertEqual(structArray[index]?[1] as? Bool, index % 2 == 1) + } + } + } + + } + } + func testFileReader_double() throws { let fileURL = currentDirectory().appendingPathComponent("../../testdata_double.arrow") let arrowReader = ArrowReader() @@ -384,3 +421,4 @@ final class IPCFileReaderTests: XCTestCase { } } } +// swiftlint:disable:this file_length diff --git a/swift/data-generator/swift-datagen/go.mod b/swift/data-generator/swift-datagen/go.mod index c645e6759b..ca0fd5446e 100644 --- a/swift/data-generator/swift-datagen/go.mod +++ b/swift/data-generator/swift-datagen/go.mod @@ -16,7 +16,8 @@ module swift-datagen/main -go 1.22.7 +go 1.23.0 + toolchain go1.24.1 require github.com/apache/arrow-go/v18 v18.2.0 diff --git a/swift/data-generator/swift-datagen/main.go b/swift/data-generator/swift-datagen/main.go index 97b807d9bb..dbe24222fa 100644 --- a/swift/data-generator/swift-datagen/main.go +++ b/swift/data-generator/swift-datagen/main.go @@ -82,7 +82,35 @@ func writeDoubleData() { writeBytes(rec, "testdata_double.arrow") } +func writeStructData() { + mem := memory.NewGoAllocator() + + fields := []arrow.Field{ + {Name: "my struct", Type: arrow.StructOf([]arrow.Field{ + {Name: "my string", Type: arrow.BinaryTypes.String}, + {Name: "my bool", Type: arrow.FixedWidthTypes.Boolean}, + }...)}, + } + + schema := arrow.NewSchema(fields, nil) + + bld := array.NewRecordBuilder(mem, schema) + defer bld.Release() + + sb := bld.Field(0).(*array.StructBuilder) + f1b := sb.FieldBuilder(0).(*array.StringBuilder) + f2b := sb.FieldBuilder(1).(*array.BooleanBuilder) + + sb.AppendValues([]bool{true, true, false}) + f1b.AppendValues([]string{"0", "1", ""}, []bool{true, true, false}) + f2b.AppendValues([]bool{false, true, false}, []bool{true, true, false}) + + rec := bld.NewRecord() + writeBytes(rec, "testdata_struct.arrow") +} + func main() { writeBoolData() writeDoubleData() + writeStructData() }