This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5b23241  [Python] Add basic Python package structure and build setup 
(#52)
5b23241 is described below

commit 5b2324165d766f6be966d19c25b51163f1519be9
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Mon Oct 24 15:20:58 2022 +0200

    [Python] Add basic Python package structure and build setup (#52)
---
 .github/workflows/python.yaml        |  59 ++++++++++++++++
 python/.gitignore                    |  53 +++++++++++++++
 python/README.md                     |  44 ++++++++++++
 python/pyproject.toml                |  42 ++++++++++++
 python/setup.py                      |  49 ++++++++++++++
 python/src/nanoarrow/__init__.py     |  20 ++++++
 python/src/nanoarrow/_lib.pyx        |  86 ++++++++++++++++++++++++
 python/src/nanoarrow/nanoarrow_c.pxd | 127 +++++++++++++++++++++++++++++++++++
 python/tests/test_nanoarrow.py       |  27 ++++++++
 9 files changed, 507 insertions(+)

diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
new file mode 100644
index 0000000..43a56d9
--- /dev/null
+++ b/.github/workflows/python.yaml
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Test Python
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+    paths:
+      - '.github/workflows/python.yaml'
+      - 'src/nanoarrow/**'
+      - 'python/**'
+
+jobs:
+  test-python:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ['3.10']
+
+    steps:
+      - uses: actions/checkout@v3
+      
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+
+      - name: Install nanoarrow
+        run: |
+          pushd python
+          pip install .[test]
+          popd
+          pip list
+
+      - name: Run tests
+        run: |
+          pytest python/tests -v -s
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 0000000..fcf8363
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,53 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+src/nanoarrow/nanoarrow.c
+src/nanoarrow/nanoarrow.h
+src/nanoarrow/*.cpp
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Unit test / coverage reports
+.pytest_cache/
\ No newline at end of file
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..701896b
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,44 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# nanoarrow for Python
+
+Python bindings for nanoarrow.
+## Building
+
+Python libraries are managed with [setuptools][setuptools]. In general, that
+means all projects can be built as follows:
+
+```shell
+$ cd python
+$ pip install -e .
+```
+
+Tests use [pytest][pytest]:
+
+```shell
+# Install dependencies
+$ pip install -e .[test]
+
+# Run tests
+$ pytest -vvx
+```
+
+[pytest]: https://docs.pytest.org/
+[setuptools]: https://setuptools.pypa.io/en/latest/index.html
\ No newline at end of file
diff --git a/python/pyproject.toml b/python/pyproject.toml
new file mode 100644
index 0000000..1cc2c17
--- /dev/null
+++ b/python/pyproject.toml
@@ -0,0 +1,42 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[project]
+name = "nanoarrow"
+version = "1.0.0-alpha0"
+description = ""
+authors = [{name = "Apache Arrow Developers", email = "[email protected]"}]
+license = {text = "Apache-2.0"}
+requires-python = ">=3.8"
+dependencies = ["numpy"]
+
+[project.optional-dependencies]
+test = ["pyarrow", "pytest"]
+
+[project.urls]
+homepage = "https://arrow.apache.org";
+repository = "https://github.com/apache/arrow-nanoarrow";
+
+[build-system]
+requires = [
+    "setuptools >= 61.0.0",
+    "setuptools-scm",
+    "Cython",
+    "oldest-supported-numpy",
+]
+build-backend = "setuptools.build_meta"
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000..f6f7efb
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import shutil
+from pathlib import Path
+
+from setuptools import Extension, setup
+
+import numpy as np
+
+
+# setuptools gets confused by relative paths that extend above the project root
+target = Path(__file__).parent / "src" / "nanoarrow"
+shutil.copy(
+    Path(__file__).parent / "../dist/nanoarrow.c", target / "nanoarrow.c"
+)
+shutil.copy(
+    Path(__file__).parent / "../dist/nanoarrow.h", target / "nanoarrow.h"
+)
+
+setup(
+    ext_modules=[
+        Extension(
+            name="nanoarrow._lib",
+            include_dirs=[np.get_include(), "src/nanoarrow"],
+            language="c++",
+            sources=[
+                "src/nanoarrow/_lib.pyx",
+                "src/nanoarrow/nanoarrow.c",
+            ],
+        )
+    ]
+)
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
new file mode 100644
index 0000000..1586e60
--- /dev/null
+++ b/python/src/nanoarrow/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from ._lib import (  # noqa: F401
+    as_numpy_array,
+)
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
new file mode 100644
index 0000000..a6b4da1
--- /dev/null
+++ b/python/src/nanoarrow/_lib.pyx
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+"""Low-level nanoarrow Python bindings."""
+
+from libc.stdint cimport uint8_t, uintptr_t
+
+from nanoarrow_c cimport *
+
+import numpy as np
+cimport numpy as cnp
+
+cnp.import_array()
+
+
+cdef dict _numpy_type_map = {
+    NANOARROW_TYPE_UINT8: cnp.NPY_UINT8,
+    NANOARROW_TYPE_INT8: cnp.NPY_INT8,
+    NANOARROW_TYPE_UINT16: cnp.NPY_UINT16,
+    NANOARROW_TYPE_INT16: cnp.NPY_INT16,
+    NANOARROW_TYPE_UINT32: cnp.NPY_UINT32,
+    NANOARROW_TYPE_INT32: cnp.NPY_INT32,
+    NANOARROW_TYPE_UINT64: cnp.NPY_UINT64,
+    NANOARROW_TYPE_INT64: cnp.NPY_INT64,
+    NANOARROW_TYPE_HALF_FLOAT: cnp.NPY_FLOAT16,
+    NANOARROW_TYPE_FLOAT: cnp.NPY_FLOAT32,
+    NANOARROW_TYPE_DOUBLE: cnp.NPY_FLOAT64,
+}
+
+
+def as_numpy_array(arr):
+    cdef ArrowSchema schema
+    cdef ArrowArray array
+    cdef ArrowArrayView array_view
+    cdef ArrowError error
+
+    arr._export_to_c(<uintptr_t> &array, <uintptr_t> &schema)
+    ArrowArrayViewInitFromSchema(&array_view, &schema, &error)
+
+    # primitive arrays have DATA as the second buffer
+    if array_view.layout.buffer_type[1] != NANOARROW_BUFFER_TYPE_DATA:
+        raise TypeError("Cannot convert a non-primitive array")
+
+    # disallow nulls for this method
+    if array.null_count > 0:
+        raise ValueError("Cannot convert array with nulls")
+    elif array.null_count < 0:
+        # not yet computed
+        if array_view.layout.buffer_type[0] == NANOARROW_BUFFER_TYPE_VALIDITY:
+            if array.buffers[0] != NULL:
+                null_count = ArrowBitCountSet(
+                    <const uint8_t *>array.buffers[0], array.offset, 
array.length
+                )
+                if null_count > 0:
+                    raise ValueError("Cannot convert array with nulls")
+
+    cdef int type_num
+    if array_view.storage_type in _numpy_type_map:
+        type_num = _numpy_type_map[array_view.storage_type]
+    else:
+        raise NotImplementedError(array_view.storage_type)
+
+    cdef cnp.npy_intp dims[1]
+    dims[0] = array.length
+    cdef cnp.ndarray result = cnp.PyArray_New(
+        np.ndarray, 1, dims, type_num, NULL, <void *> array.buffers[1], -1, 0, 
<object>NULL
+    )
+    # TODO set base
+
+    return result
diff --git a/python/src/nanoarrow/nanoarrow_c.pxd 
b/python/src/nanoarrow/nanoarrow_c.pxd
new file mode 100644
index 0000000..9bfc488
--- /dev/null
+++ b/python/src/nanoarrow/nanoarrow_c.pxd
@@ -0,0 +1,127 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from libc.stdint cimport int64_t, int8_t, uint8_t
+
+
+cdef extern from "nanoarrow.h":
+    struct ArrowSchema:
+        const char* format
+        int64_t n_children
+        void (*release)(ArrowSchema*)
+        
+    struct ArrowArray:
+        int64_t length
+        int64_t null_count
+        int64_t offset
+        const void** buffers
+        void (*release)(ArrowArray*)
+
+    struct ArrowArrayStream:
+        int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out)
+
+    ctypedef int ArrowErrorCode
+
+    enum ArrowType:
+        NANOARROW_TYPE_UNINITIALIZED = 0
+        NANOARROW_TYPE_NA = 1
+        NANOARROW_TYPE_BOOL
+        NANOARROW_TYPE_UINT8
+        NANOARROW_TYPE_INT8
+        NANOARROW_TYPE_UINT16
+        NANOARROW_TYPE_INT16
+        NANOARROW_TYPE_UINT32
+        NANOARROW_TYPE_INT32
+        NANOARROW_TYPE_UINT64
+        NANOARROW_TYPE_INT64
+        NANOARROW_TYPE_HALF_FLOAT
+        NANOARROW_TYPE_FLOAT
+        NANOARROW_TYPE_DOUBLE
+        NANOARROW_TYPE_STRING
+        NANOARROW_TYPE_BINARY
+        NANOARROW_TYPE_FIXED_SIZE_BINARY
+        NANOARROW_TYPE_DATE32
+        NANOARROW_TYPE_DATE64
+        NANOARROW_TYPE_TIMESTAMP
+        NANOARROW_TYPE_TIME32
+        NANOARROW_TYPE_TIME64
+        NANOARROW_TYPE_INTERVAL_MONTHS
+        NANOARROW_TYPE_INTERVAL_DAY_TIME
+        NANOARROW_TYPE_DECIMAL128
+        NANOARROW_TYPE_DECIMAL256
+        NANOARROW_TYPE_LIST
+        NANOARROW_TYPE_STRUCT
+        NANOARROW_TYPE_SPARSE_UNION
+        NANOARROW_TYPE_DENSE_UNION
+        NANOARROW_TYPE_DICTIONARY
+        NANOARROW_TYPE_MAP
+        NANOARROW_TYPE_EXTENSION
+        NANOARROW_TYPE_FIXED_SIZE_LIST
+        NANOARROW_TYPE_DURATION
+        NANOARROW_TYPE_LARGE_STRING
+        NANOARROW_TYPE_LARGE_BINARY
+        NANOARROW_TYPE_LARGE_LIST
+        NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+
+    enum ArrowBufferType:
+        NANOARROW_BUFFER_TYPE_NONE
+        NANOARROW_BUFFER_TYPE_VALIDITY
+        NANOARROW_BUFFER_TYPE_TYPE_ID
+        NANOARROW_BUFFER_TYPE_UNION_OFFSET
+        NANOARROW_BUFFER_TYPE_DATA_OFFSET
+        NANOARROW_BUFFER_TYPE_DATA
+
+    struct ArrowError:
+        pass
+
+    const char* ArrowErrorMessage(ArrowError* error)
+
+    struct ArrowLayout:
+        ArrowBufferType buffer_type[3]
+        int64_t element_size_bits[3]
+        int64_t child_size_elements
+
+    cdef union buffer_data:
+        const void* data
+        const int8_t* as_int8
+        const uint8_t* as_uint8
+
+    struct ArrowBufferView:
+        buffer_data data
+        int64_t n_bytes
+
+    struct ArrowBuffer:
+        uint8_t* data
+        int64_t size_bytes
+
+    struct ArrowBitmap:
+        ArrowBuffer buffer
+        int64_t size_bits
+
+    struct ArrowArrayView:
+        ArrowArray* array
+        ArrowType storage_type
+        ArrowLayout layout
+        ArrowBufferView buffer_views[3]
+        int64_t n_children
+        ArrowArrayView** children
+
+    ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, 
ArrowSchema* schema, ArrowError* error)
+    ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, 
ArrowArray* array, ArrowError* error)
+    int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to)
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
new file mode 100644
index 0000000..fd76534
--- /dev/null
+++ b/python/tests/test_nanoarrow.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pyarrow as pa
+
+import nanoarrow
+
+import pytest
+
+
+def test_as_numpy_array():
+    
+    arr = pa.array([1, 2, 3])
+    result = nanoarrow.as_numpy_array(arr)
+    expected = arr.to_numpy()
+    np.testing.assert_array_equal(result, expected)
+
+    arr = pa.array([1, 2, 3], pa.uint8())
+    result = nanoarrow.as_numpy_array(arr)
+    expected = arr.to_numpy()
+    np.testing.assert_array_equal(result, expected)
+
+    arr = pa.array([1, 2, None])
+    with pytest.raises(ValueError, match="Cannot convert array with nulls"):
+        nanoarrow.as_numpy_array(arr)
+
+    arr = pa.array([[1], [2, 3]])
+    with pytest.raises(TypeError, match="Cannot convert a non-primitive 
array"):
+        nanoarrow.as_numpy_array(arr)

Reply via email to