This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5b23241 [Python] Add basic Python package structure and build setup
(#52)
5b23241 is described below
commit 5b2324165d766f6be966d19c25b51163f1519be9
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Mon Oct 24 15:20:58 2022 +0200
[Python] Add basic Python package structure and build setup (#52)
---
.github/workflows/python.yaml | 59 ++++++++++++++++
python/.gitignore | 53 +++++++++++++++
python/README.md | 44 ++++++++++++
python/pyproject.toml | 42 ++++++++++++
python/setup.py | 49 ++++++++++++++
python/src/nanoarrow/__init__.py | 20 ++++++
python/src/nanoarrow/_lib.pyx | 86 ++++++++++++++++++++++++
python/src/nanoarrow/nanoarrow_c.pxd | 127 +++++++++++++++++++++++++++++++++++
python/tests/test_nanoarrow.py | 27 ++++++++
9 files changed, 507 insertions(+)
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
new file mode 100644
index 0000000..43a56d9
--- /dev/null
+++ b/.github/workflows/python.yaml
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Test Python
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+ paths:
+ - '.github/workflows/python.yaml'
+ - 'src/nanoarrow/**'
+ - 'python/**'
+
+jobs:
+ test-python:
+
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [ubuntu-latest]
+ python-version: ['3.10']
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: 'pip'
+
+ - name: Install nanoarrow
+ run: |
+ pushd python
+ pip install .[test]
+ popd
+ pip list
+
+ - name: Run tests
+ run: |
+ pytest python/tests -v -s
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 0000000..fcf8363
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,53 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+src/nanoarrow/nanoarrow.c
+src/nanoarrow/nanoarrow.h
+src/nanoarrow/*.cpp
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Unit test / coverage reports
+.pytest_cache/
\ No newline at end of file
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..701896b
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,44 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# nanoarrow for Python
+
+Python bindings for nanoarrow.
+## Building
+
+Python libraries are managed with [setuptools][setuptools]. In general, that
+means all projects can be built as follows:
+
+```shell
+$ cd python
+$ pip install -e .
+```
+
+Tests use [pytest][pytest]:
+
+```shell
+# Install dependencies
+$ pip install -e .[test]
+
+# Run tests
+$ pytest -vvx
+```
+
+[pytest]: https://docs.pytest.org/
+[setuptools]: https://setuptools.pypa.io/en/latest/index.html
\ No newline at end of file
diff --git a/python/pyproject.toml b/python/pyproject.toml
new file mode 100644
index 0000000..1cc2c17
--- /dev/null
+++ b/python/pyproject.toml
@@ -0,0 +1,42 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[project]
+name = "nanoarrow"
+version = "1.0.0-alpha0"
+description = ""
+authors = [{name = "Apache Arrow Developers", email = "[email protected]"}]
+license = {text = "Apache-2.0"}
+requires-python = ">=3.8"
+dependencies = ["numpy"]
+
+[project.optional-dependencies]
+test = ["pyarrow", "pytest"]
+
+[project.urls]
+homepage = "https://arrow.apache.org"
+repository = "https://github.com/apache/arrow-nanoarrow"
+
+[build-system]
+requires = [
+ "setuptools >= 61.0.0",
+ "setuptools-scm",
+ "Cython",
+ "oldest-supported-numpy",
+]
+build-backend = "setuptools.build_meta"
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000..f6f7efb
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import shutil
+from pathlib import Path
+
+from setuptools import Extension, setup
+
+import numpy as np
+
+
+# setuptools gets confused by relative paths that extend above the project root
+target = Path(__file__).parent / "src" / "nanoarrow"
+shutil.copy(
+ Path(__file__).parent / "../dist/nanoarrow.c", target / "nanoarrow.c"
+)
+shutil.copy(
+ Path(__file__).parent / "../dist/nanoarrow.h", target / "nanoarrow.h"
+)
+
+setup(
+ ext_modules=[
+ Extension(
+ name="nanoarrow._lib",
+ include_dirs=[np.get_include(), "src/nanoarrow"],
+ language="c++",
+ sources=[
+ "src/nanoarrow/_lib.pyx",
+ "src/nanoarrow/nanoarrow.c",
+ ],
+ )
+ ]
+)
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
new file mode 100644
index 0000000..1586e60
--- /dev/null
+++ b/python/src/nanoarrow/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from ._lib import ( # noqa: F401
+ as_numpy_array,
+)
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
new file mode 100644
index 0000000..a6b4da1
--- /dev/null
+++ b/python/src/nanoarrow/_lib.pyx
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+"""Low-level nanoarrow Python bindings."""
+
+from libc.stdint cimport uint8_t, uintptr_t
+
+from nanoarrow_c cimport *
+
+import numpy as np
+cimport numpy as cnp
+
+cnp.import_array()
+
+
+cdef dict _numpy_type_map = {
+ NANOARROW_TYPE_UINT8: cnp.NPY_UINT8,
+ NANOARROW_TYPE_INT8: cnp.NPY_INT8,
+ NANOARROW_TYPE_UINT16: cnp.NPY_UINT16,
+ NANOARROW_TYPE_INT16: cnp.NPY_INT16,
+ NANOARROW_TYPE_UINT32: cnp.NPY_UINT32,
+ NANOARROW_TYPE_INT32: cnp.NPY_INT32,
+ NANOARROW_TYPE_UINT64: cnp.NPY_UINT64,
+ NANOARROW_TYPE_INT64: cnp.NPY_INT64,
+ NANOARROW_TYPE_HALF_FLOAT: cnp.NPY_FLOAT16,
+ NANOARROW_TYPE_FLOAT: cnp.NPY_FLOAT32,
+ NANOARROW_TYPE_DOUBLE: cnp.NPY_FLOAT64,
+}
+
+
+def as_numpy_array(arr):
+ cdef ArrowSchema schema
+ cdef ArrowArray array
+ cdef ArrowArrayView array_view
+ cdef ArrowError error
+
+ arr._export_to_c(<uintptr_t> &array, <uintptr_t> &schema)
+ ArrowArrayViewInitFromSchema(&array_view, &schema, &error)
+
+ # primitive arrays have DATA as the second buffer
+ if array_view.layout.buffer_type[1] != NANOARROW_BUFFER_TYPE_DATA:
+ raise TypeError("Cannot convert a non-primitive array")
+
+ # disallow nulls for this method
+ if array.null_count > 0:
+ raise ValueError("Cannot convert array with nulls")
+ elif array.null_count < 0:
+ # not yet computed
+ if array_view.layout.buffer_type[0] == NANOARROW_BUFFER_TYPE_VALIDITY:
+ if array.buffers[0] != NULL:
+ null_count = ArrowBitCountSet(
+ <const uint8_t *>array.buffers[0], array.offset,
array.length
+ )
+ if null_count > 0:
+ raise ValueError("Cannot convert array with nulls")
+
+ cdef int type_num
+ if array_view.storage_type in _numpy_type_map:
+ type_num = _numpy_type_map[array_view.storage_type]
+ else:
+ raise NotImplementedError(array_view.storage_type)
+
+ cdef cnp.npy_intp dims[1]
+ dims[0] = array.length
+ cdef cnp.ndarray result = cnp.PyArray_New(
+ np.ndarray, 1, dims, type_num, NULL, <void *> array.buffers[1], -1, 0,
<object>NULL
+ )
+ # TODO set base
+
+ return result
diff --git a/python/src/nanoarrow/nanoarrow_c.pxd
b/python/src/nanoarrow/nanoarrow_c.pxd
new file mode 100644
index 0000000..9bfc488
--- /dev/null
+++ b/python/src/nanoarrow/nanoarrow_c.pxd
@@ -0,0 +1,127 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from libc.stdint cimport int64_t, int8_t, uint8_t
+
+
+cdef extern from "nanoarrow.h":
+ struct ArrowSchema:
+ const char* format
+ int64_t n_children
+ void (*release)(ArrowSchema*)
+
+ struct ArrowArray:
+ int64_t length
+ int64_t null_count
+ int64_t offset
+ const void** buffers
+ void (*release)(ArrowArray*)
+
+ struct ArrowArrayStream:
+ int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out)
+
+ ctypedef int ArrowErrorCode
+
+ enum ArrowType:
+ NANOARROW_TYPE_UNINITIALIZED = 0
+ NANOARROW_TYPE_NA = 1
+ NANOARROW_TYPE_BOOL
+ NANOARROW_TYPE_UINT8
+ NANOARROW_TYPE_INT8
+ NANOARROW_TYPE_UINT16
+ NANOARROW_TYPE_INT16
+ NANOARROW_TYPE_UINT32
+ NANOARROW_TYPE_INT32
+ NANOARROW_TYPE_UINT64
+ NANOARROW_TYPE_INT64
+ NANOARROW_TYPE_HALF_FLOAT
+ NANOARROW_TYPE_FLOAT
+ NANOARROW_TYPE_DOUBLE
+ NANOARROW_TYPE_STRING
+ NANOARROW_TYPE_BINARY
+ NANOARROW_TYPE_FIXED_SIZE_BINARY
+ NANOARROW_TYPE_DATE32
+ NANOARROW_TYPE_DATE64
+ NANOARROW_TYPE_TIMESTAMP
+ NANOARROW_TYPE_TIME32
+ NANOARROW_TYPE_TIME64
+ NANOARROW_TYPE_INTERVAL_MONTHS
+ NANOARROW_TYPE_INTERVAL_DAY_TIME
+ NANOARROW_TYPE_DECIMAL128
+ NANOARROW_TYPE_DECIMAL256
+ NANOARROW_TYPE_LIST
+ NANOARROW_TYPE_STRUCT
+ NANOARROW_TYPE_SPARSE_UNION
+ NANOARROW_TYPE_DENSE_UNION
+ NANOARROW_TYPE_DICTIONARY
+ NANOARROW_TYPE_MAP
+ NANOARROW_TYPE_EXTENSION
+ NANOARROW_TYPE_FIXED_SIZE_LIST
+ NANOARROW_TYPE_DURATION
+ NANOARROW_TYPE_LARGE_STRING
+ NANOARROW_TYPE_LARGE_BINARY
+ NANOARROW_TYPE_LARGE_LIST
+ NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+
+ enum ArrowBufferType:
+ NANOARROW_BUFFER_TYPE_NONE
+ NANOARROW_BUFFER_TYPE_VALIDITY
+ NANOARROW_BUFFER_TYPE_TYPE_ID
+ NANOARROW_BUFFER_TYPE_UNION_OFFSET
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET
+ NANOARROW_BUFFER_TYPE_DATA
+
+ struct ArrowError:
+ pass
+
+ const char* ArrowErrorMessage(ArrowError* error)
+
+ struct ArrowLayout:
+ ArrowBufferType buffer_type[3]
+ int64_t element_size_bits[3]
+ int64_t child_size_elements
+
+ cdef union buffer_data:
+ const void* data
+ const int8_t* as_int8
+ const uint8_t* as_uint8
+
+ struct ArrowBufferView:
+ buffer_data data
+ int64_t n_bytes
+
+ struct ArrowBuffer:
+ uint8_t* data
+ int64_t size_bytes
+
+ struct ArrowBitmap:
+ ArrowBuffer buffer
+ int64_t size_bits
+
+ struct ArrowArrayView:
+ ArrowArray* array
+ ArrowType storage_type
+ ArrowLayout layout
+ ArrowBufferView buffer_views[3]
+ int64_t n_children
+ ArrowArrayView** children
+
+ ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view,
ArrowSchema* schema, ArrowError* error)
+ ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view,
ArrowArray* array, ArrowError* error)
+ int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to)
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
new file mode 100644
index 0000000..fd76534
--- /dev/null
+++ b/python/tests/test_nanoarrow.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pyarrow as pa
+
+import nanoarrow
+
+import pytest
+
+
+def test_as_numpy_array():
+
+ arr = pa.array([1, 2, 3])
+ result = nanoarrow.as_numpy_array(arr)
+ expected = arr.to_numpy()
+ np.testing.assert_array_equal(result, expected)
+
+ arr = pa.array([1, 2, 3], pa.uint8())
+ result = nanoarrow.as_numpy_array(arr)
+ expected = arr.to_numpy()
+ np.testing.assert_array_equal(result, expected)
+
+ arr = pa.array([1, 2, None])
+ with pytest.raises(ValueError, match="Cannot convert array with nulls"):
+ nanoarrow.as_numpy_array(arr)
+
+ arr = pa.array([[1], [2, 3]])
+ with pytest.raises(TypeError, match="Cannot convert a non-primitive
array"):
+ nanoarrow.as_numpy_array(arr)