This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit e8a93ca1125659e0eaff353d92d351d25f104a4b
Author: Antoine Pitrou <anto...@python.org>
AuthorDate: Mon Feb 12 15:25:18 2018 -0500

    ARROW-1021: [Python] Add documentation for C++ pyarrow API
    
    Author: Wes McKinney <wes.mckin...@twosigma.com>
    Author: Antoine Pitrou <anto...@python.org>
    
    Closes #1576 from pitrou/ARROW-1021-document-pyarrow-cpp-api and squashes 
the following commits:
    
    61f95852 [Wes McKinney] Include get_libraries, get_library_dirs in docs, 
enable test to pass on Linux in local dev setup
    3fced81d [Wes McKinney] Add get_libraries, get_library_dirs functions, get 
Cython test case passing on Windows
    d82ead24 [Antoine Pitrou] ARROW-1021: [Python] Add documentation for C++ 
pyarrow API
    
    Change-Id: I0160fb23569d1e2ba549b1b75486bc41001eebbc
---
 .gitignore                                         |   1 +
 ci/msvc-build.bat                                  |  12 +-
 python/doc/Makefile                                |   2 +-
 python/doc/source/api.rst                          |  12 +
 python/doc/source/extending.rst                    | 327 ++++++++++++++++++++-
 python/pyarrow/__init__.py                         |  30 ++
 python/pyarrow/lib.pxd                             |  27 +-
 .../pyarrow/tests/pyarrow_cython_example.pyx       |  25 +-
 python/pyarrow/tests/test_cython.py                | 100 +++++++
 9 files changed, 504 insertions(+), 32 deletions(-)

diff --git a/.gitignore b/.gitignore
index c38694e..f5b235a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@
 *.o
 *.py[ocd]
 *.so
+*.so.*
 *.dylib
 .build_cache_dir
 MANIFEST
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 58dfc2a1..069ddf0 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -137,14 +137,20 @@ cmake -G "%GENERATOR%" ^
 cmake --build . --target INSTALL --config %CONFIGURATION% || exit /B
 popd
 
-@rem Build and import pyarrow
+@rem Build and install pyarrow
 @rem parquet-cpp has some additional runtime dependencies that we need to 
figure out
 @rem see PARQUET-1018
 
 pushd python
 
 set PYARROW_CXXFLAGS=/WX
-python setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp 
bdist_wheel  || exit /B
-py.test pyarrow -r sxX --durations=15 -v -s --parquet || exit /B
+python setup.py build_ext --with-parquet --bundle-arrow-cpp ^
+    install -q --single-version-externally-managed --record=record.text ^
+    bdist_wheel || exit /B
+
+@rem Test directly from installed location
+
+SET PYARROW_PATH=%CONDA_PREFIX%\Lib\site-packages\pyarrow
+py.test -r sxX --durations=15 -v %PYARROW_PATH% --parquet || exit /B
 
 popd
diff --git a/python/doc/Makefile b/python/doc/Makefile
index 1b9f707..eacb124 100644
--- a/python/doc/Makefile
+++ b/python/doc/Makefile
@@ -20,7 +20,7 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    =
+SPHINXOPTS    = -j4
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = _build
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 2d3e39c..a71e92b 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -342,3 +342,15 @@ Apache Parquet
    read_schema
    write_metadata
    write_table
+
+.. currentmodule:: pyarrow
+
+Using with C extensions
+-----------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   get_include
+   get_libraries
+   get_library_dirs
diff --git a/python/doc/source/extending.rst b/python/doc/source/extending.rst
index 2d6caed..4a35898 100644
--- a/python/doc/source/extending.rst
+++ b/python/doc/source/extending.rst
@@ -15,19 +15,332 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-.. currentmodule:: pyarrow
+.. currentmodule:: pyarrow.lib
 .. _extending:
 
-Building C++ and Cython Extensions using pyarrow
-================================================
+Using pyarrow from C++ and Cython Code
+======================================
 
-pyarrow features both a Cython and C++ API. We intend to fully document the
-details of how to do this.
+pyarrow features both a Cython and C++ API.
 
-The Arrow C++ header files are bundled with a pyarrow installation. To get the
-absolute path to this directory (like ``numpy.get_include()``), use:
+C++ API
+-------
+
+.. default-domain:: cpp
+
+The Arrow C++ header files are bundled with a pyarrow installation.
+To get the absolute path to this directory (like ``numpy.get_include()``), use:
 
 .. code-block:: python
 
    import pyarrow as pa
    pa.get_include()
+
+Assuming the path above is on your compiler's include path, the pyarrow API
+can be included using the following directive:
+
+.. code-block:: cpp
+
+   #include <arrow/python/pyarrow.h>
+
+This will not include other parts of the Arrow API, which you will need
+to include yourself (for example ``arrow/api.h``).
+
+When building C extensions that use the Arrow C++ libraries, you must add
+appropriate linker flags. We have provided functions ``pyarrow.get_libraries``
+and ``pyarrow.get_library_dirs`` which return a list of library names and
+likely library install locations (if you installed pyarrow with pip or
+conda). These must be included when declaring your C extensions with distutils
+(see below).
+
+Initializing the API
+~~~~~~~~~~~~~~~~~~~~
+
+.. function:: int import_pyarrow()
+
+   Initialize inner pointers of the pyarrow API.  On success, 0 is
+   returned.  Otherwise, -1 is returned and a Python exception is set.
+
+   It is mandatory to call this function before calling any other function
+   in the pyarrow C++ API.  Failing to do so will likely lead to crashes.
+
+Wrapping and Unwrapping
+~~~~~~~~~~~~~~~~~~~~~~~
+
+pyarrow provides the following functions to go back and forth between
+Python wrappers (as exposed by the pyarrow Python API) and the underlying
+C++ objects.
+
+.. function:: bool is_array(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Array` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Array` instance.
+
+.. function:: bool is_buffer(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Buffer` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Buffer` instance.
+
+.. function:: bool is_column(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Column` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Column` instance.
+
+.. function:: bool is_data_type(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`DataType` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.DataType` instance.
+
+.. function:: bool is_field(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Field` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Field` instance.
+
+.. function:: bool is_record_batch(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`RecordBatch` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.RecordBatch` instance.
+
+.. function:: bool is_schema(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Schema` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Schema` instance.
+
+.. function:: bool is_table(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Table` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Table` instance.
+
+.. function:: bool is_tensor(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Tensor` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Tensor` instance.
+
+The following functions expect a pyarrow object, unwrap the underlying
+Arrow C++ API pointer, and put it in the *out* parameter.  The returned
+:class:`Status` object must be inspected first to know whether any error
+occurred.  If successful, *out* is guaranteed to be non-NULL.
+
+.. function:: Status unwrap_array(PyObject* obj, std::shared_ptr<Array>* out)
+
+   Unwrap the Arrow C++ :class:`Array` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_buffer(PyObject* obj, std::shared_ptr<Buffer>* out)
+
+   Unwrap the Arrow C++ :class:`Buffer` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_column(PyObject* obj, std::shared_ptr<Column>* out)
+
+   Unwrap the Arrow C++ :class:`Column` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_data_type(PyObject* obj, 
std::shared_ptr<DataType>* out)
+
+   Unwrap the Arrow C++ :class:`DataType` pointer from *obj* and put it in 
*out*.
+
+.. function:: Status unwrap_field(PyObject* obj, std::shared_ptr<Field>* out)
+
+   Unwrap the Arrow C++ :class:`Field` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_record_batch(PyObject* obj, 
std::shared_ptr<RecordBatch>* out)
+
+   Unwrap the Arrow C++ :class:`RecordBatch` pointer from *obj* and put it in 
*out*.
+
+.. function:: Status unwrap_schema(PyObject* obj, std::shared_ptr<Schema>* out)
+
+   Unwrap the Arrow C++ :class:`Schema` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_table(PyObject* obj, std::shared_ptr<Table>* out)
+
+   Unwrap the Arrow C++ :class:`Table` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_tensor(PyObject* obj, std::shared_ptr<Tensor>* out)
+
+   Unwrap the Arrow C++ :class:`Tensor` pointer from *obj* and put it in *out*.
+
+The following functions take an Arrow C++ API pointer and wrap it in a
+pyarray object of the corresponding type.  A new reference is returned.
+On error, NULL is returned and a Python exception is set.
+
+.. function:: PyObject* wrap_array(const std::shared_ptr<Array>& array)
+
+   Wrap the Arrow C++ *array* in a :py:class:`pyarrow.Array` instance.
+
+.. function:: PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer)
+
+   Wrap the Arrow C++ *buffer* in a :py:class:`pyarrow.Buffer` instance.
+
+.. function:: PyObject* wrap_column(const std::shared_ptr<Column>& column)
+
+   Wrap the Arrow C++ *column* in a :py:class:`pyarrow.Column` instance.
+
+.. function:: PyObject* wrap_data_type(const std::shared_ptr<DataType>& 
data_type)
+
+   Wrap the Arrow C++ *data_type* in a :py:class:`pyarrow.DataType` instance.
+
+.. function:: PyObject* wrap_field(const std::shared_ptr<Field>& field)
+
+   Wrap the Arrow C++ *field* in a :py:class:`pyarrow.Field` instance.
+
+.. function:: PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& 
batch)
+
+   Wrap the Arrow C++ record *batch* in a :py:class:`pyarrow.RecordBatch` 
instance.
+
+.. function:: PyObject* wrap_schema(const std::shared_ptr<Schema>& schema)
+
+   Wrap the Arrow C++ *schema* in a :py:class:`pyarrow.Schema` instance.
+
+.. function:: PyObject* wrap_table(const std::shared_ptr<Table>& table)
+
+   Wrap the Arrow C++ *table* in a :py:class:`pyarrow.Table` instance.
+
+.. function:: PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor)
+
+   Wrap the Arrow C++ *tensor* in a :py:class:`pyarrow.Tensor` instance.
+
+
+Cython API
+----------
+
+.. default-domain:: py
+
+The Cython API more or less mirrors the C++ API, but the calling convention
+can be different as required by Cython.  In Cython, you don't need to
+initialize the API as that will be handled automaticalled by the ``cimport``
+directive.
+
+.. note::
+   Classes from the Arrow C++ API are renamed when exposed in Cython, to
+   avoid named clashes with the corresponding Python classes.  For example,
+   C++ Arrow arrays have the ``CArray`` type and ``Array`` is the
+   corresponding Python wrapper class.
+
+Wrapping and Unwrapping
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The following functions expect a pyarrow object, unwrap the underlying
+Arrow C++ API pointer, and return it.  NULL is returned (without setting
+an exception) if the input is not of the right type.
+
+.. function:: pyarrow_unwrap_array(obj) -> shared_ptr[CArray]
+
+   Unwrap the Arrow C++ :cpp:class:`Array` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_batch(obj) -> shared_ptr[CRecordBatch]
+
+   Unwrap the Arrow C++ :cpp:class:`RecordBatch` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_buffer(obj) -> shared_ptr[CBuffer]
+
+   Unwrap the Arrow C++ :cpp:class:`Buffer` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_column(obj) -> shared_ptr[CColumn]
+
+   Unwrap the Arrow C++ :cpp:class:`Column` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_data_type(obj) -> shared_ptr[CDataType]
+
+   Unwrap the Arrow C++ :cpp:class:`CDataType` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_field(obj) -> shared_ptr[CField]
+
+   Unwrap the Arrow C++ :cpp:class:`Field` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_schema(obj) -> shared_ptr[CSchema]
+
+   Unwrap the Arrow C++ :cpp:class:`Schema` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_table(obj) -> shared_ptr[CTable]
+
+   Unwrap the Arrow C++ :cpp:class:`Table` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_tensor(obj) -> shared_ptr[CTensor]
+
+   Unwrap the Arrow C++ :cpp:class:`Tensor` pointer from *obj*.
+
+The following functions take a Arrow C++ API pointer and wrap it in a
+pyarray object of the corresponding type.  An exception is raised on error.
+
+.. function:: pyarrow_wrap_array(sp_array: const shared_ptr[CArray]& array) -> 
object
+
+   Wrap the Arrow C++ *array* in a Python :class:`pyarrow.Array` instance.
+
+.. function:: pyarrow_wrap_batch(sp_array: const shared_ptr[CRecordBatch]& 
batch) -> object
+
+   Wrap the Arrow C++ record *batch* in a Python :class:`pyarrow.RecordBatch` 
instance.
+
+.. function:: pyarrow_wrap_buffer(sp_array: const shared_ptr[CBuffer]& buffer) 
-> object
+
+   Wrap the Arrow C++ *buffer* in a Python :class:`pyarrow.Buffer` instance.
+
+.. function:: pyarrow_wrap_column(sp_array: const shared_ptr[CColumn]& column) 
-> object
+
+   Wrap the Arrow C++ *column* in a Python :class:`pyarrow.Column` instance.
+
+.. function:: pyarrow_wrap_data_type(sp_array: const shared_ptr[CDataType]& 
data_type) -> object
+
+   Wrap the Arrow C++ *data_type* in a Python :class:`pyarrow.DataType` 
instance.
+
+.. function:: pyarrow_wrap_field(sp_array: const shared_ptr[CField]& field) -> 
object
+
+   Wrap the Arrow C++ *field* in a Python :class:`pyarrow.Field` instance.
+
+.. function:: pyarrow_wrap_resizable_buffer(sp_array: const 
shared_ptr[CResizableBuffer]& buffer) -> object
+
+   Wrap the Arrow C++ resizable *buffer* in a Python 
:class:`pyarrow.ResizableBuffer` instance.
+
+.. function:: pyarrow_wrap_schema(sp_array: const shared_ptr[CSchema]& schema) 
-> object
+
+   Wrap the Arrow C++ *schema* in a Python :class:`pyarrow.Schema` instance.
+
+.. function:: pyarrow_wrap_table(sp_array: const shared_ptr[CTable]& table) -> 
object
+
+   Wrap the Arrow C++ *table* in a Python :class:`pyarrow.Table` instance.
+
+.. function:: pyarrow_wrap_tensor(sp_array: const shared_ptr[CTensor]& tensor) 
-> object
+
+   Wrap the Arrow C++ *tensor* in a Python :class:`pyarrow.Tensor` instance.
+
+Example
+~~~~~~~
+
+The following Cython module shows how to unwrap a Python object and call
+the underlying C++ object's API.
+
+.. code-block:: python
+
+   # distutils: language=c++
+
+   from pyarrow.lib cimport *
+
+   def get_array_length(obj):
+       # Just an example function accessing both the pyarrow Cython API
+       # and the Arrow C++ API
+       cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
+       if arr.get() == NULL:
+           raise TypeError("not an array")
+       return arr.get().length()
+
+To build this module, you will need a slightly customized ``setup.py`` file
+(this is assuming the file above is named ``example.pyx``):
+
+.. code-block:: python
+
+    from distutils.core import setup
+    from Cython.Build import cythonize
+
+    import numpy as np
+
+    import pyarrow as pa
+
+    ext_modules = cythonize("example.pyx")
+
+    for ext in ext_modules:
+        # The Numpy C headers are currently required
+        ext.include_dirs.append(np.get_include())
+        ext.include_dirs.append(pa.get_include())
+        ext.libraries.extend(pa.get_libraries())
+        ext.library_dirs.append(pa.get_library_dirs())
+
+    setup(
+        ext_modules=ext_modules,
+    )
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 8b3cba9..d95954e 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -165,3 +165,33 @@ def get_include():
     """
     import os
     return os.path.join(os.path.dirname(__file__), 'include')
+
+
+def get_libraries():
+    """
+    Return list of library names to include in the `libraries` argument for C
+    or Cython extensions using pyarrow
+    """
+    return ['arrow_python']
+
+
+def get_library_dirs():
+    """
+    Return lists of directories likely to contain Arrow C++ libraries for
+    linking C or Cython extensions using pyarrow
+    """
+    import os
+    import sys
+    package_cwd = os.path.dirname(__file__)
+
+    library_dirs = [package_cwd]
+
+    if sys.platform == 'win32':
+        # TODO(wesm): Is this necessary, or does setuptools within a conda
+        # installation add Library\lib to the linker path for MSVC?
+        site_packages, _ = os.path.split(package_cwd)
+        python_base_install, _ = os.path.split(site_packages)
+        library_dirs.append(os.path.join(python_base_install,
+                                         'Library', 'lib'))
+
+    return library_dirs
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 161562c..b1433ec 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -347,16 +347,29 @@ cdef class NativeFile:
 cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader)
 cdef get_writer(object source, shared_ptr[OutputStream]* writer)
 
+cdef dict box_metadata(const CKeyValueMetadata* sp_metadata)
+
+# Public Cython API for 3rd party code
+
+cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
+# XXX pyarrow.h calls it `wrap_record_batch`
+cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
 cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
-cdef public object pyarrow_wrap_resizable_buffer(
-    const shared_ptr[CResizableBuffer]& buf)
+cdef public object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn)
 cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
 cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
+cdef public object pyarrow_wrap_resizable_buffer(
+    const shared_ptr[CResizableBuffer]& buf)
 cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
-cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
-cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
-cdef public object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn)
 cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
-cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
+cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
 
-cdef dict box_metadata(const CKeyValueMetadata* sp_metadata)
+cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array)
+cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch)
+cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer)
+cdef public shared_ptr[CColumn] pyarrow_unwrap_column(object column)
+cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type)
+cdef public shared_ptr[CField] pyarrow_unwrap_field(object field)
+cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema)
+cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table)
+cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor)
diff --git a/.gitignore b/python/pyarrow/tests/pyarrow_cython_example.pyx
similarity index 70%
copy from .gitignore
copy to python/pyarrow/tests/pyarrow_cython_example.pyx
index c38694e..b5e5406 100644
--- a/.gitignore
+++ b/python/pyarrow/tests/pyarrow_cython_example.pyx
@@ -15,18 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Compiled source
-*.a
-*.dll
-*.o
-*.py[ocd]
-*.so
-*.dylib
-.build_cache_dir
-MANIFEST
+# distutils: language=c++
 
-cpp/.idea/
-python/.eggs/
-.vscode
-.idea/
-.pytest_cache/
+from pyarrow.lib cimport *
+
+
+def get_array_length(obj):
+    # Just an example function accessing both the pyarrow Cython API
+    # and the Arrow C++ API
+    cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
+    if arr.get() == NULL:
+        raise TypeError("not an array")
+    return arr.get().length()
diff --git a/python/pyarrow/tests/test_cython.py 
b/python/pyarrow/tests/test_cython.py
new file mode 100644
index 0000000..51b1a48
--- /dev/null
+++ b/python/pyarrow/tests/test_cython.py
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import shutil
+import subprocess
+import sys
+
+import pytest
+
+import pyarrow as pa
+
+
+here = os.path.dirname(os.path.abspath(__file__))
+
+setup_template = """if 1:
+    from distutils.core import setup
+    from Cython.Build import cythonize
+
+    import numpy as np
+
+    import pyarrow as pa
+
+    ext_modules = cythonize({pyx_file!r})
+    compiler_opts = {compiler_opts!r}
+    custom_ld_path = {test_ld_path!r}
+
+    for ext in ext_modules:
+        # XXX required for numpy/numpyconfig.h,
+        # included from arrow/python/api.h
+        ext.include_dirs.append(np.get_include())
+        ext.include_dirs.append(pa.get_include())
+        ext.libraries.extend(pa.get_libraries())
+        ext.library_dirs.extend(pa.get_library_dirs())
+        if custom_ld_path:
+            ext.library_dirs.append(custom_ld_path)
+        ext.extra_compile_args.extend(compiler_opts)
+
+    setup(
+        ext_modules=ext_modules,
+    )
+"""
+
+
+def test_cython_api(tmpdir):
+    """
+    Basic test for the Cython API.
+    """
+    pytest.importorskip('Cython')
+
+    if 'ARROW_HOME' in os.environ:
+        ld_path_default = os.path.join(os.environ['ARROW_HOME'], 'lib')
+
+    test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', ld_path_default)
+
+    with tmpdir.as_cwd():
+        # Set up temporary workspace
+        pyx_file = 'pyarrow_cython_example.pyx'
+        shutil.copyfile(os.path.join(here, pyx_file),
+                        os.path.join(str(tmpdir), pyx_file))
+        # Create setup.py file
+        if os.name == 'posix':
+            compiler_opts = ['-std=c++11']
+        else:
+            compiler_opts = []
+        setup_code = setup_template.format(pyx_file=pyx_file,
+                                           compiler_opts=compiler_opts,
+                                           test_ld_path=test_ld_path)
+        with open('setup.py', 'w') as f:
+            f.write(setup_code)
+
+        # Compile extension module
+        subprocess.check_call([sys.executable, 'setup.py',
+                               'build_ext', '--inplace'])
+
+        # Check basic functionality
+        orig_path = sys.path[:]
+        sys.path.insert(0, str(tmpdir))
+        try:
+            mod = __import__('pyarrow_cython_example')
+            arr = pa.array([1, 2, 3])
+            assert mod.get_array_length(arr) == 3
+            with pytest.raises(TypeError, match="not an array"):
+                mod.get_array_length(None)
+        finally:
+            sys.path = orig_path

-- 
To stop receiving notification emails like this one, please contact
w...@apache.org.

Reply via email to