pitrou commented on code in PR #39091:
URL: https://github.com/apache/arrow/pull/39091#discussion_r1534165670


##########
swift/Arrow/Sources/Arrow/ArrowCExporter.swift:
##########
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+import Atomics
+
+extension String {
+    var cstring: UnsafePointer<CChar> {
+        (self as NSString).cString(using: String.Encoding.utf8.rawValue)!
+    }
+}
+
+// The memory used by UnsafeAtomic is not automatically
+// reclaimed. Since this value is initialized once
+// and used until the program/app is closed it's
+// memory will be released on program/app exit
+let exportDataCounter: UnsafeAtomic<Int> = .create(0)
+
+public class ArrowCExporter {
+    private class ExportData {
+        let id: Int
+        init() {
+            id = exportDataCounter.loadThenWrappingIncrement(ordering: 
.relaxed)
+            ArrowCExporter.exportedData[id] = self
+        }
+    }
+
+    private class ExportSchema: ExportData {
+        public let arrowTypeName: UnsafePointer<CChar>
+        public let name: UnsafePointer<CChar>
+        private let arrowType: ArrowType
+        init(_ arrowType: ArrowType, name: String = "") throws {
+            self.arrowType = arrowType
+            self.arrowTypeName = try arrowType.cDataFormatId.cstring
+            self.name = name.cstring
+            super.init()
+        }
+    }
+
+    private class ExportArray: ExportData {
+        private let arrowData: ArrowData
+        private(set) var data = [UnsafeRawPointer?]()
+        private(set) var buffers: UnsafeMutablePointer<UnsafeRawPointer?>
+        init(_ arrowData: ArrowData) {
+            // keep a reference to the ArrowData
+            // obj so the memory doesn't get
+            // deallocated
+            self.arrowData = arrowData
+            for arrowBuffer in arrowData.buffers {
+                data.append(arrowBuffer.rawPointer)
+            }
+
+            self.buffers = UnsafeMutablePointer(mutating: data)
+            super.init()
+        }
+    }
+
+    private static var exportedData = [Int: ExportData]()
+    public init() {}
+
+    public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: 
ArrowType, name: String = "") ->
+        Result<Bool, ArrowError> {
+        do {
+            let exportSchema = try ExportSchema(arrowType, name: name)
+            cSchema.format = exportSchema.arrowTypeName
+            cSchema.name = exportSchema.name
+            cSchema.private_data =
+                UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: 
exportSchema.id))

Review Comment:
   Newbie question, but according to the docs, it seems 
`UnsafeMutableRawPointer(bitPattern: exportSchema.id)` should work?



##########
swift/Arrow/Sources/Arrow/ArrowCExporter.swift:
##########
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+import Atomics
+
+extension String {
+    var cstring: UnsafePointer<CChar> {
+        (self as NSString).cString(using: String.Encoding.utf8.rawValue)!
+    }
+}
+
+// The memory used by UnsafeAtomic is not automatically
+// reclaimed. Since this value is initialized once
+// and used until the program/app is closed it's
+// memory will be released on program/app exit
+let exportDataCounter: UnsafeAtomic<Int> = .create(0)
+
+public class ArrowCExporter {
+    private class ExportData {
+        let id: Int
+        init() {
+            id = exportDataCounter.loadThenWrappingIncrement(ordering: 
.relaxed)
+            ArrowCExporter.exportedData[id] = self
+        }
+    }
+
+    private class ExportSchema: ExportData {
+        public let arrowTypeName: UnsafePointer<CChar>
+        public let name: UnsafePointer<CChar>
+        private let arrowType: ArrowType
+        init(_ arrowType: ArrowType, name: String = "") throws {
+            self.arrowType = arrowType
+            self.arrowTypeName = try arrowType.cDataFormatId.cstring
+            self.name = name.cstring

Review Comment:
   Are we sure the `name` string survives long enough? I see it's not caught in 
this class' members.



##########
swift/Arrow/Sources/Arrow/ArrowCImporter.swift:
##########
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+
+public class ImportArrayHolder: ArrowArrayHolder {
+    let cArrayPtr: UnsafePointer<ArrowC.ArrowArray>
+    public var type: ArrowType {self.holder.type}
+    public var length: UInt {self.holder.length}
+    public var nullCount: UInt {self.holder.nullCount}
+    public var array: Any {self.holder.array}
+    public var getBufferData: () -> [Data] {self.holder.getBufferData}
+    public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes}
+    public var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> 
ArrowColumn {self.holder.getArrowColumn}
+    private let holder: ArrowArrayHolder
+    init(_ holder: ArrowArrayHolder, cArrayPtr: 
UnsafePointer<ArrowC.ArrowArray>) {
+        self.cArrayPtr = cArrayPtr
+        self.holder = holder
+    }
+
+    deinit {
+        if self.cArrayPtr.pointee.release != nil {
+            ArrowCImporter.release(self.cArrayPtr)
+        }
+    }
+}
+
+public class ArrowCImporter {
+    private func appendToBuffer(
+        _ cBuffer: UnsafeRawPointer?,
+        arrowBuffers: inout [ArrowBuffer],
+        length: UInt) {
+        if cBuffer == nil {
+            arrowBuffers.append(ArrowBuffer.createEmptyBuffer())
+            return
+        }
+
+        let pointer = UnsafeMutableRawPointer(mutating: cBuffer)!
+        arrowBuffers.append(
+            ArrowBuffer(length: length, capacity: length, rawPointer: pointer, 
isMemoryOwner: false))
+    }
+
+    public init() {}
+
+    public func importType(_ cArrow: String, name: String = "") ->
+        Result<ArrowField, ArrowError> {
+        do {
+            let type = try ArrowType.fromCDataFormatId(cArrow)
+            return .success(ArrowField(name, type: ArrowType(type.info), 
isNullable: true))
+        } catch {
+            return .failure(.invalid("Error occurred while attempting to 
import type: \(error)"))
+        }
+    }
+
+    public func importField(_ cSchema: ArrowC.ArrowSchema) ->
+        Result<ArrowField, ArrowError> {
+        if cSchema.n_children > 0 {
+            return .failure(.invalid("Children currently not supported"))
+        } else if cSchema.dictionary != nil {
+            return .failure(.invalid("Dictinoary types currently not 
supported"))
+        }
+
+        switch importType(
+            String(cString: cSchema.format), name: String(cString: 
cSchema.name)) {
+        case .success(let field):
+            ArrowCImporter.release(cSchema)
+            return .success(field)
+        case .failure(let err):
+            ArrowCImporter.release(cSchema)

Review Comment:
   If we're releasing on failure here, we should also release in the failure 
cases above (children and dictionary)?



##########
swift/Arrow/Sources/Arrow/ArrowCImporter.swift:
##########
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+
+public class ImportArrayHolder: ArrowArrayHolder {
+    let cArrayPtr: UnsafePointer<ArrowC.ArrowArray>
+    public var type: ArrowType {self.holder.type}
+    public var length: UInt {self.holder.length}
+    public var nullCount: UInt {self.holder.nullCount}
+    public var array: Any {self.holder.array}
+    public var getBufferData: () -> [Data] {self.holder.getBufferData}
+    public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes}
+    public var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> 
ArrowColumn {self.holder.getArrowColumn}
+    private let holder: ArrowArrayHolder
+    init(_ holder: ArrowArrayHolder, cArrayPtr: 
UnsafePointer<ArrowC.ArrowArray>) {
+        self.cArrayPtr = cArrayPtr
+        self.holder = holder
+    }
+
+    deinit {
+        if self.cArrayPtr.pointee.release != nil {
+            ArrowCImporter.release(self.cArrayPtr)
+        }
+    }
+}
+
+public class ArrowCImporter {
+    private func appendToBuffer(
+        _ cBuffer: UnsafeRawPointer?,
+        arrowBuffers: inout [ArrowBuffer],
+        length: UInt) {
+        if cBuffer == nil {
+            arrowBuffers.append(ArrowBuffer.createEmptyBuffer())
+            return
+        }
+
+        let pointer = UnsafeMutableRawPointer(mutating: cBuffer)!
+        arrowBuffers.append(
+            ArrowBuffer(length: length, capacity: length, rawPointer: pointer, 
isMemoryOwner: false))
+    }
+
+    public init() {}
+
+    public func importType(_ cArrow: String, name: String = "") ->
+        Result<ArrowField, ArrowError> {
+        do {
+            let type = try ArrowType.fromCDataFormatId(cArrow)
+            return .success(ArrowField(name, type: ArrowType(type.info), 
isNullable: true))
+        } catch {
+            return .failure(.invalid("Error occurred while attempting to 
import type: \(error)"))
+        }
+    }
+
+    public func importField(_ cSchema: ArrowC.ArrowSchema) ->
+        Result<ArrowField, ArrowError> {
+        if cSchema.n_children > 0 {
+            return .failure(.invalid("Children currently not supported"))
+        } else if cSchema.dictionary != nil {
+            return .failure(.invalid("Dictinoary types currently not 
supported"))
+        }
+
+        switch importType(
+            String(cString: cSchema.format), name: String(cString: 
cSchema.name)) {
+        case .success(let field):
+            ArrowCImporter.release(cSchema)
+            return .success(field)
+        case .failure(let err):
+            ArrowCImporter.release(cSchema)
+            return .failure(err)
+        }
+    }
+
+    public func importArray(
+        _ cArray: UnsafePointer<ArrowC.ArrowArray>,
+        arrowType: ArrowType,
+        isNullable: Bool = false
+    ) -> Result<ArrowArrayHolder, ArrowError> {
+        let arrowField = ArrowField("", type: arrowType, isNullable: 
isNullable)
+        return importArray(cArray, arrowField: arrowField)
+    }
+
+    public func importArray( // swiftlint:disable:this cyclomatic_complexity
+        _ cArrayPtr: UnsafePointer<ArrowC.ArrowArray>,
+        arrowField: ArrowField
+    ) -> Result<ArrowArrayHolder, ArrowError> {
+        let cArray = cArrayPtr.pointee
+        if cArray.null_count < 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Uncomputed null count is not supported"))
+        } else if cArray.n_children > 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Children currently not supported"))
+        } else if cArray.dictionary != nil {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Dictionary types currently not 
supported"))
+        } else if cArray.offset != 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Offset of 0 is required but found 
offset: \(cArray.offset)"))
+        }
+
+        let arrowType = arrowField.type
+        let length = UInt(cArray.length)
+        let nullCount = UInt(cArray.null_count)
+        var arrowBuffers = [ArrowBuffer]()
+
+        if cArray.n_buffers > 0 {
+            if cArray.buffers == nil {
+                ArrowCImporter.release(cArrayPtr)
+                return .failure(.invalid("C array buffers is nil"))
+            }
+
+            switch arrowType.info {
+            case .variableInfo:
+                if cArray.n_buffers != 3 {
+                    ArrowCImporter.release(cArrayPtr)
+                    return .failure(
+                        .invalid("Variable buffer count expected 3 but found 
\(cArray.n_buffers)"))
+                }
+
+                appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, 
length: length)
+                appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, 
length: length)
+                appendToBuffer(cArray.buffers[2], arrowBuffers: &arrowBuffers, 
length: length)
+            default:
+                if cArray.n_buffers != 2 {
+                    ArrowCImporter.release(cArrayPtr)
+                    return .failure(.invalid("Expected buffer count 2 but 
found \(cArray.n_buffers)"))
+                }
+
+                appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, 
length: length)
+                appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, 
length: length)
+            }
+        }
+
+        switch makeArrayHolder(arrowField, buffers: arrowBuffers, nullCount: 
nullCount) {
+        case .success(let holder):
+            return .success(ImportArrayHolder(holder, cArrayPtr: cArrayPtr))
+        case .failure(let err):
+            return .failure(err)
+        }
+    }
+
+    public static func release(_ cArrayPtr: UnsafePointer<ArrowC.ArrowArray>) {
+        if cArrayPtr.pointee.release != nil {
+            let cSchemaMutablePtr = 
UnsafeMutablePointer<ArrowC.ArrowArray>(mutating: cArrayPtr)
+            cArrayPtr.pointee.release(cSchemaMutablePtr)
+        }
+    }
+
+    public static func release(_ cSchema: ArrowC.ArrowSchema) {

Review Comment:
   I'm curious: why not take a `UnsafePointer<ArrowC.ArrowSchema>` directly 
here?



##########
swift/Arrow/Sources/Arrow/ArrowType.swift:
##########
@@ -209,6 +231,100 @@ public class ArrowType {
             fatalError("Stride requested for unknown type: \(self)")
         }
     }
+
+    public var cDataFormatId: String {
+        get throws {
+            switch self.id {
+            case ArrowTypeId.int8:
+                return "c"
+            case ArrowTypeId.int16:
+                return "s"
+            case ArrowTypeId.int32:
+                return "i"
+            case ArrowTypeId.int64:
+                return "l"
+            case ArrowTypeId.uint8:
+                return "C"
+            case ArrowTypeId.uint16:
+                return "S"
+            case ArrowTypeId.uint32:
+                return "I"
+            case ArrowTypeId.uint64:
+                return "L"
+            case ArrowTypeId.float:
+                return "f"
+            case ArrowTypeId.double:
+                return "g"
+            case ArrowTypeId.boolean:
+                return "b"
+            case ArrowTypeId.date32:
+                return "tdD"
+            case ArrowTypeId.date64:
+                return "tdm"
+            case ArrowTypeId.time32:
+                if let time32 = self as? ArrowTypeTime32 {

Review Comment:
   Is there a reason this may fail?



##########
swift/Arrow/Sources/ArrowC/ArrowCData.c:
##########
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "include/ArrowCData.h"
+
+void ArrowSwiftClearReleaseSchema(struct ArrowSchema* arrowSchema) {

Review Comment:
   I'm curious: do you actually need these functions to be implemented in C? 



##########
swift/Arrow/Sources/Arrow/ArrowCExporter.swift:
##########
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+import Atomics
+
+extension String {
+    var cstring: UnsafePointer<CChar> {
+        (self as NSString).cString(using: String.Encoding.utf8.rawValue)!

Review Comment:
   Ok, I was still a bit mystified by this, and it appears that using the 
`cString` method actually creates a memory leak, at least on non-Apple 
platforms:
   https://github.com/apple/swift-corelibs-foundation/issues/3885
   
   Instead, perhaps we should use 
[NSString.maximumLengthOfBytes](https://developer.apple.com/documentation/foundation/nsstring/1411611-maximumlengthofbytes)
 and 
[NSString.getCString](https://developer.apple.com/documentation/foundation/nsstring/1415702-getcstring)
 so as to control the lifetime of the CChar array.



##########
swift/Arrow/Sources/Arrow/ArrowCExporter.swift:
##########
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+import Atomics
+
+extension String {
+    var cstring: UnsafePointer<CChar> {
+        (self as NSString).cString(using: String.Encoding.utf8.rawValue)!
+    }
+}
+
+// The memory used by UnsafeAtomic is not automatically
+// reclaimed. Since this value is initialized once
+// and used until the program/app is closed it's
+// memory will be released on program/app exit
+let exportDataCounter: UnsafeAtomic<Int> = .create(0)
+
+public class ArrowCExporter {
+    private class ExportData {
+        let id: Int
+        init() {
+            id = exportDataCounter.loadThenWrappingIncrement(ordering: 
.relaxed)
+            ArrowCExporter.exportedData[id] = self
+        }
+    }
+
+    private class ExportSchema: ExportData {
+        public let arrowTypeName: UnsafePointer<CChar>
+        public let name: UnsafePointer<CChar>
+        private let arrowType: ArrowType
+        init(_ arrowType: ArrowType, name: String = "") throws {
+            self.arrowType = arrowType
+            self.arrowTypeName = try arrowType.cDataFormatId.cstring
+            self.name = name.cstring
+            super.init()
+        }
+    }
+
+    private class ExportArray: ExportData {
+        private let arrowData: ArrowData
+        private(set) var data = [UnsafeRawPointer?]()
+        private(set) var buffers: UnsafeMutablePointer<UnsafeRawPointer?>
+        init(_ arrowData: ArrowData) {
+            // keep a reference to the ArrowData
+            // obj so the memory doesn't get
+            // deallocated
+            self.arrowData = arrowData
+            for arrowBuffer in arrowData.buffers {
+                data.append(arrowBuffer.rawPointer)
+            }
+
+            self.buffers = UnsafeMutablePointer(mutating: data)
+            super.init()
+        }
+    }
+
+    private static var exportedData = [Int: ExportData]()
+    public init() {}
+
+    public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: 
ArrowType, name: String = "") ->
+        Result<Bool, ArrowError> {
+        do {
+            let exportSchema = try ExportSchema(arrowType, name: name)
+            cSchema.format = exportSchema.arrowTypeName
+            cSchema.name = exportSchema.name
+            cSchema.private_data =
+                UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: 
exportSchema.id))
+            cSchema.release = {(data: 
UnsafeMutablePointer<ArrowC.ArrowSchema>?) in
+                let arraySchema = data!.pointee
+                let exportId = Int(bitPattern: arraySchema.private_data)
+                guard ArrowCExporter.exportedData[exportId] != nil else {
+                    fatalError("Export schema not found with id \(exportId)")
+                }
+
+                // the data associated with this exportSchema object
+                // which includes the C strings for the format and name
+                // be deallocated upon removal
+                ArrowCExporter.exportedData.removeValue(forKey: exportId)
+                ArrowC.ArrowSwiftClearReleaseSchema(data)
+            }
+        } catch {
+            return .failure(.unknownError("\(error)"))

Review Comment:
   Two questions:
   1. if we arrive here, then the `exportSchema` object is never removed from 
`exportedData` and therefore leaks until the end of the process, right?
   2. can we fail with a better error message?



##########
swift/Arrow/Sources/Arrow/ArrowCImporter.swift:
##########
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+
+public class ImportArrayHolder: ArrowArrayHolder {
+    let cArrayPtr: UnsafePointer<ArrowC.ArrowArray>
+    public var type: ArrowType {self.holder.type}
+    public var length: UInt {self.holder.length}
+    public var nullCount: UInt {self.holder.nullCount}
+    public var array: Any {self.holder.array}
+    public var getBufferData: () -> [Data] {self.holder.getBufferData}
+    public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes}
+    public var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> 
ArrowColumn {self.holder.getArrowColumn}
+    private let holder: ArrowArrayHolder
+    init(_ holder: ArrowArrayHolder, cArrayPtr: 
UnsafePointer<ArrowC.ArrowArray>) {
+        self.cArrayPtr = cArrayPtr
+        self.holder = holder
+    }
+
+    deinit {
+        if self.cArrayPtr.pointee.release != nil {
+            ArrowCImporter.release(self.cArrayPtr)
+        }
+    }
+}
+
+public class ArrowCImporter {
+    private func appendToBuffer(
+        _ cBuffer: UnsafeRawPointer?,
+        arrowBuffers: inout [ArrowBuffer],
+        length: UInt) {
+        if cBuffer == nil {
+            arrowBuffers.append(ArrowBuffer.createEmptyBuffer())
+            return
+        }
+
+        let pointer = UnsafeMutableRawPointer(mutating: cBuffer)!
+        arrowBuffers.append(
+            ArrowBuffer(length: length, capacity: length, rawPointer: pointer, 
isMemoryOwner: false))
+    }
+
+    public init() {}
+
+    public func importType(_ cArrow: String, name: String = "") ->
+        Result<ArrowField, ArrowError> {
+        do {
+            let type = try ArrowType.fromCDataFormatId(cArrow)
+            return .success(ArrowField(name, type: ArrowType(type.info), 
isNullable: true))
+        } catch {
+            return .failure(.invalid("Error occurred while attempting to 
import type: \(error)"))
+        }
+    }
+
+    public func importField(_ cSchema: ArrowC.ArrowSchema) ->
+        Result<ArrowField, ArrowError> {
+        if cSchema.n_children > 0 {
+            return .failure(.invalid("Children currently not supported"))
+        } else if cSchema.dictionary != nil {
+            return .failure(.invalid("Dictinoary types currently not 
supported"))
+        }
+
+        switch importType(
+            String(cString: cSchema.format), name: String(cString: 
cSchema.name)) {
+        case .success(let field):
+            ArrowCImporter.release(cSchema)
+            return .success(field)
+        case .failure(let err):
+            ArrowCImporter.release(cSchema)
+            return .failure(err)
+        }
+    }
+
+    public func importArray(
+        _ cArray: UnsafePointer<ArrowC.ArrowArray>,
+        arrowType: ArrowType,
+        isNullable: Bool = false
+    ) -> Result<ArrowArrayHolder, ArrowError> {
+        let arrowField = ArrowField("", type: arrowType, isNullable: 
isNullable)
+        return importArray(cArray, arrowField: arrowField)
+    }
+
+    public func importArray( // swiftlint:disable:this cyclomatic_complexity
+        _ cArrayPtr: UnsafePointer<ArrowC.ArrowArray>,
+        arrowField: ArrowField
+    ) -> Result<ArrowArrayHolder, ArrowError> {
+        let cArray = cArrayPtr.pointee
+        if cArray.null_count < 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Uncomputed null count is not supported"))
+        } else if cArray.n_children > 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Children currently not supported"))
+        } else if cArray.dictionary != nil {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Dictionary types currently not 
supported"))
+        } else if cArray.offset != 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Offset of 0 is required but found 
offset: \(cArray.offset)"))
+        }
+
+        let arrowType = arrowField.type
+        let length = UInt(cArray.length)
+        let nullCount = UInt(cArray.null_count)
+        var arrowBuffers = [ArrowBuffer]()
+
+        if cArray.n_buffers > 0 {
+            if cArray.buffers == nil {
+                ArrowCImporter.release(cArrayPtr)
+                return .failure(.invalid("C array buffers is nil"))
+            }
+
+            switch arrowType.info {
+            case .variableInfo:
+                if cArray.n_buffers != 3 {
+                    ArrowCImporter.release(cArrayPtr)
+                    return .failure(
+                        .invalid("Variable buffer count expected 3 but found 
\(cArray.n_buffers)"))
+                }
+
+                appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, 
length: length)
+                appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, 
length: length)
+                appendToBuffer(cArray.buffers[2], arrowBuffers: &arrowBuffers, 
length: length)
+            default:
+                if cArray.n_buffers != 2 {
+                    ArrowCImporter.release(cArrayPtr)
+                    return .failure(.invalid("Expected buffer count 2 but 
found \(cArray.n_buffers)"))
+                }
+
+                appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, 
length: length)
+                appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, 
length: length)

Review Comment:
   Same here, if we actually care about the buffers' bytelengths:
   * for buffer 0 (null bitmap): same calculation as above
   * for buffer 1 (primitive values): `(length + 7) / 8` if type is boolean, 
otherwise `length * sizeof(primitive type)`



##########
swift/Arrow/Sources/Arrow/ArrowCImporter.swift:
##########
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+
+public class ImportArrayHolder: ArrowArrayHolder {
+    let cArrayPtr: UnsafePointer<ArrowC.ArrowArray>

Review Comment:
   You should store the struct itself, not a pointer, so that 
`ImportArrayHolder` controls its lifetime. Otherwise, there might be a memory 
leak, or conversely a use-after-free.
   
   This implies that `importArray` should bit-wise or member-wise (as you 
prefer) move the `ArrowArray` struct into `ImportArrayHolder`. This is also 
explained in 
https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
   
   Equivalent code in C# (note the two last lines in this constructor: first 
member-wise copy the structure, second mark the source structure released):
   
https://github.com/apache/arrow/blob/a36cc62f45e0ebe594f97212728531148afa6e82/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs#L98-L114



##########
swift/Arrow/Sources/Arrow/ArrowCImporter.swift:
##########
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+import ArrowC
+
+public class ImportArrayHolder: ArrowArrayHolder {
+    let cArrayPtr: UnsafePointer<ArrowC.ArrowArray>
+    public var type: ArrowType {self.holder.type}
+    public var length: UInt {self.holder.length}
+    public var nullCount: UInt {self.holder.nullCount}
+    public var array: Any {self.holder.array}
+    public var getBufferData: () -> [Data] {self.holder.getBufferData}
+    public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes}
+    public var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> 
ArrowColumn {self.holder.getArrowColumn}
+    private let holder: ArrowArrayHolder
+    init(_ holder: ArrowArrayHolder, cArrayPtr: 
UnsafePointer<ArrowC.ArrowArray>) {
+        self.cArrayPtr = cArrayPtr
+        self.holder = holder
+    }
+
+    deinit {
+        if self.cArrayPtr.pointee.release != nil {
+            ArrowCImporter.release(self.cArrayPtr)
+        }
+    }
+}
+
+public class ArrowCImporter {
+    private func appendToBuffer(
+        _ cBuffer: UnsafeRawPointer?,
+        arrowBuffers: inout [ArrowBuffer],
+        length: UInt) {
+        if cBuffer == nil {
+            arrowBuffers.append(ArrowBuffer.createEmptyBuffer())
+            return
+        }
+
+        let pointer = UnsafeMutableRawPointer(mutating: cBuffer)!
+        arrowBuffers.append(
+            ArrowBuffer(length: length, capacity: length, rawPointer: pointer, 
isMemoryOwner: false))
+    }
+
+    public init() {}
+
+    public func importType(_ cArrow: String, name: String = "") ->
+        Result<ArrowField, ArrowError> {
+        do {
+            let type = try ArrowType.fromCDataFormatId(cArrow)
+            return .success(ArrowField(name, type: ArrowType(type.info), 
isNullable: true))
+        } catch {
+            return .failure(.invalid("Error occurred while attempting to 
import type: \(error)"))
+        }
+    }
+
+    public func importField(_ cSchema: ArrowC.ArrowSchema) ->
+        Result<ArrowField, ArrowError> {
+        if cSchema.n_children > 0 {
+            return .failure(.invalid("Children currently not supported"))
+        } else if cSchema.dictionary != nil {
+            return .failure(.invalid("Dictinoary types currently not 
supported"))
+        }
+
+        switch importType(
+            String(cString: cSchema.format), name: String(cString: 
cSchema.name)) {
+        case .success(let field):
+            ArrowCImporter.release(cSchema)
+            return .success(field)
+        case .failure(let err):
+            ArrowCImporter.release(cSchema)
+            return .failure(err)
+        }
+    }
+
+    public func importArray(
+        _ cArray: UnsafePointer<ArrowC.ArrowArray>,
+        arrowType: ArrowType,
+        isNullable: Bool = false
+    ) -> Result<ArrowArrayHolder, ArrowError> {
+        let arrowField = ArrowField("", type: arrowType, isNullable: 
isNullable)
+        return importArray(cArray, arrowField: arrowField)
+    }
+
+    public func importArray( // swiftlint:disable:this cyclomatic_complexity
+        _ cArrayPtr: UnsafePointer<ArrowC.ArrowArray>,
+        arrowField: ArrowField
+    ) -> Result<ArrowArrayHolder, ArrowError> {
+        let cArray = cArrayPtr.pointee
+        if cArray.null_count < 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Uncomputed null count is not supported"))
+        } else if cArray.n_children > 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Children currently not supported"))
+        } else if cArray.dictionary != nil {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Dictionary types currently not 
supported"))
+        } else if cArray.offset != 0 {
+            ArrowCImporter.release(cArrayPtr)
+            return .failure(.invalid("Offset of 0 is required but found 
offset: \(cArray.offset)"))
+        }
+
+        let arrowType = arrowField.type
+        let length = UInt(cArray.length)
+        let nullCount = UInt(cArray.null_count)
+        var arrowBuffers = [ArrowBuffer]()
+
+        if cArray.n_buffers > 0 {
+            if cArray.buffers == nil {
+                ArrowCImporter.release(cArrayPtr)
+                return .failure(.invalid("C array buffers is nil"))
+            }
+
+            switch arrowType.info {
+            case .variableInfo:
+                if cArray.n_buffers != 3 {
+                    ArrowCImporter.release(cArrayPtr)
+                    return .failure(
+                        .invalid("Variable buffer count expected 3 but found 
\(cArray.n_buffers)"))
+                }
+
+                appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, 
length: length)

Review Comment:
   If `appendToBuffer` takes a length in bytes, then the value given is not 
correct, is it? It should probably be:
   * for buffer 0 (null bitmap): `(length + 7) / 8`
   * for buffer 1 (32-bit offsets): `(length + 1) * 4`
   * for buffer 2 (string bytes): `offsets[length] - offsets[0]` where 
`offsets` is a int32 pointer to the start of `buffers[1]`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to