This is an automated email from the ASF dual-hosted git repository.
raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f1b21f1945 GH-36411: [Python] Use scikit-build-core as build backend
for PyArrow and get rid of setup.py (#49259)
f1b21f1945 is described below
commit f1b21f1945de743b6526440458222c0442acde03
Author: Raúl Cumplido <[email protected]>
AuthorDate: Mon Mar 9 09:47:21 2026 +0100
GH-36411: [Python] Use scikit-build-core as build backend for PyArrow and
get rid of setup.py (#49259)
### Rationale for this change
Move our PyArrow build backend from setuptools and a custom setup.py to
scikit-build-core which is just build backend for CMake related projects.
### What changes are included in this PR?
Move from setuptools to scikit-build-core and remove PyArrow setup.py.
Update some of the build requirements and minor fixes.
A custom build backend has been also been created in order to wrap
scikit-build-core in order to fix problems on License files for monorepos.
pyproject.toml metadata validation expects license files to exist before
exercising the build backend that's why we create symlinks. Our thin build
backend will just make those symlinks hard-links in order for license and
notice files to contain the contents and be added as part of the sdist.
Remove flags that are not used anymore (were only part of setup.py) and
documented and validated how the same flags have to be used now.
### Are these changes tested?
Yes all Python CI tests, wheels and sdist are successful.
### Are there any user-facing changes?
Yes, users building PyArrow will now require the new build dependencies to
exercise the build and depending on the flags used they might require to use
the new documented way of using those flags.
* GitHub Issue: #36411
Lead-authored-by: Raúl Cumplido <[email protected]>
Co-authored-by: Joris Van den Bossche <[email protected]>
Co-authored-by: Rok Mihevc <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
---
.env | 4 +-
.github/workflows/dev.yml | 2 +-
ci/conda_env_python.txt | 2 +-
ci/scripts/python_build.bat | 5 +-
ci/scripts/python_build.sh | 11 +-
ci/scripts/python_sdist_build.sh | 2 +-
ci/scripts/python_sdist_test.sh | 5 +-
ci/scripts/python_wheel_macos_build.sh | 11 +-
ci/scripts/python_wheel_windows_build.bat | 9 +-
ci/scripts/python_wheel_xlinux_build.sh | 10 +-
dev/release/02-source-test.rb | 2 +-
dev/release/verify-release-candidate.bat | 6 +-
dev/release/verify-release-candidate.sh | 2 +-
dev/tasks/python-wheels/github.osx.yml | 1 -
docs/source/developers/documentation.rst | 3 +-
.../developers/guide/step_by_step/building.rst | 2 +-
docs/source/developers/python/building.rst | 72 +--
docs/source/developers/python/development.rst | 3 +-
python/.gitignore | 9 +-
python/CMakeLists.txt | 22 +-
python/LICENSE.txt | 1 +
python/NOTICE.txt | 1 +
python/_build_backend/__init__.py | 68 +++
python/examples/minimal_build/build_conda.sh | 5 +-
python/examples/minimal_build/build_venv.sh | 5 +-
.../src/arrow/python/config_internal.h.cmake | 4 +-
python/pyproject.toml | 29 +-
python/requirements-build.txt | 3 +-
python/requirements-wheel-build.txt | 2 +-
python/setup.py | 483 ---------------------
30 files changed, 198 insertions(+), 586 deletions(-)
diff --git a/.env b/.env
index b2b5b5eb3b..5d136890f6 100644
--- a/.env
+++ b/.env
@@ -99,8 +99,8 @@ VCPKG="66c0373dc7fca549e5803087b9487edfe3aca0a1" #
2026.01.16 Release
# ci/docker/python-*-windows-*.dockerfile or the vcpkg config.
# This is a workaround for our CI problem that "archery docker build" doesn't
# use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-25
-PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-25
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-03-04
+PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-03-04
# Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm
conan".
# See https://github.com/conan-io/conan-docker-tools#readme and
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 59171ddcaa..d8ee21761a 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -103,7 +103,7 @@ jobs:
shell: bash
run: |
gem install test-unit openssl
- pip install "cython>=3.1" setuptools pytest requests setuptools-scm
+ pip install build "cython>=3.1" pytest requests scikit-build-core
setuptools-scm
- name: Run Release Test
shell: bash
run: |
diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt
index 33ac193f86..dd16d66b72 100644
--- a/ci/conda_env_python.txt
+++ b/ci/conda_env_python.txt
@@ -29,5 +29,5 @@ numpy>=1.16.6
pytest
pytest-faulthandler
s3fs>=2023.10.0
-setuptools>=77
+scikit-build-core
setuptools_scm>=8
diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat
index 06f5a63722..10d10bda6a 100644
--- a/ci/scripts/python_build.bat
+++ b/ci/scripts/python_build.bat
@@ -111,10 +111,7 @@ echo "=== CCACHE Stats after build ==="
ccache -sv
echo "=== Building Python ==="
-set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
-set PYARROW_BUILD_VERBOSE=1
set PYARROW_BUNDLE_ARROW_CPP=ON
-set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
set PYARROW_WITH_ACERO=%ARROW_ACERO%
set PYARROW_WITH_AZURE=%ARROW_AZURE%
set PYARROW_WITH_DATASET=%ARROW_DATASET%
@@ -137,6 +134,6 @@ pushd %SOURCE_DIR%\python
%PYTHON_CMD% -m pip install -r requirements-build.txt || exit /B 1
@REM Build PyArrow
-%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv . || exit /B 1
+%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv -C
build.verbose=true . || exit /B 1
popd
diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh
index 36dc35a2de..f8c1af3982 100755
--- a/ci/scripts/python_build.sh
+++ b/ci/scripts/python_build.sh
@@ -59,9 +59,8 @@ if [ -n "${CONDA_PREFIX}" ]; then
conda list
fi
-export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
-export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
-
+export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs}
+export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF}
export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF}
export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
@@ -76,8 +75,6 @@ export
PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON}
export PYARROW_WITH_S3=${ARROW_S3:-OFF}
export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF}
-export PYARROW_PARALLEL=${n_jobs}
-
: "${CMAKE_PREFIX_PATH:=${ARROW_HOME}}"
export CMAKE_PREFIX_PATH
export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
@@ -90,11 +87,9 @@ export
DYLD_LIBRARY_PATH=${ARROW_HOME}/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_P
rm -rf "${python_build_dir}"
cp -aL "${source_dir}" "${python_build_dir}"
pushd "${python_build_dir}"
-# - Cannot call setup.py as it may install in the wrong directory
-# on Debian/Ubuntu (ARROW-15243).
# - Cannot use build isolation as we want to use specific dependency versions
# (e.g. Numpy, Pandas) on some CI jobs.
-${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv .
+${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv -C
cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" .
popd
if [ "${BUILD_DOCS_PYTHON}" == "ON" ]; then
diff --git a/ci/scripts/python_sdist_build.sh b/ci/scripts/python_sdist_build.sh
index dfb9951843..4f7437c423 100755
--- a/ci/scripts/python_sdist_build.sh
+++ b/ci/scripts/python_sdist_build.sh
@@ -23,5 +23,5 @@ source_dir=${1}/python
pushd "${source_dir}"
export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-}
-${PYTHON:-python} setup.py sdist
+${PYTHON:-python} -m build --sdist
popd
diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
index 98a938d970..eca8e0542e 100755
--- a/ci/scripts/python_sdist_test.sh
+++ b/ci/scripts/python_sdist_test.sh
@@ -25,8 +25,7 @@ export ARROW_SOURCE_DIR=${arrow_dir}
export ARROW_TEST_DATA=${arrow_dir}/testing/data
export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
-export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
-export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
+export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON}
export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF}
export PYARROW_WITH_S3=${ARROW_S3:-OFF}
@@ -64,7 +63,7 @@ if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
. "${ARROW_PYTHON_VENV}/bin/activate"
fi
-${PYTHON:-python} -m pip install "${sdist}"
+${PYTHON:-python} -m pip install -C
cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" "${sdist}"
# shellcheck disable=SC2086
pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow
diff --git a/ci/scripts/python_wheel_macos_build.sh
b/ci/scripts/python_wheel_macos_build.sh
index 94f3e7ba89..1571cd57f2 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -146,9 +146,7 @@ cmake --build . --target install
popd
echo "=== (${PYTHON_VERSION}) Building wheel ==="
-export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-export PYARROW_BUNDLE_ARROW_CPP=1
-export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
+export PYARROW_BUNDLE_ARROW_CPP=ON
export PYARROW_WITH_ACERO=${ARROW_ACERO}
export PYARROW_WITH_AZURE=${ARROW_AZURE}
export PYARROW_WITH_DATASET=${ARROW_DATASET}
@@ -161,7 +159,6 @@ export PYARROW_WITH_PARQUET=${ARROW_PARQUET}
export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION}
export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_WITH_S3=${ARROW_S3}
-export
PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
export ARROW_HOME=${build_dir}/install
# PyArrow build configuration
export CMAKE_PREFIX_PATH=${build_dir}/install
@@ -169,7 +166,11 @@ export CMAKE_PREFIX_PATH=${build_dir}/install
export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION}
pushd ${source_dir}/python
-python -m build --sdist --wheel . --no-isolation
+python -m build --sdist --wheel . --no-isolation \
+ -C build.verbose=true \
+ -C cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \
+ -C cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \
+ -C cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
popd
echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ==="
diff --git a/ci/scripts/python_wheel_windows_build.bat
b/ci/scripts/python_wheel_windows_build.bat
index d69a6aac54..14e3e5a629 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -115,11 +115,7 @@ cmake --build . --config %CMAKE_BUILD_TYPE% --target
install || exit /B 1
popd
echo "=== (%PYTHON%) Building wheel ==="
-set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
-set PYARROW_BUILD_VERBOSE=1
set PYARROW_BUNDLE_ARROW_CPP=ON
-set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
-set
PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%"
set PYARROW_WITH_ACERO=%ARROW_ACERO%
set PYARROW_WITH_AZURE=%ARROW_AZURE%
set PYARROW_WITH_DATASET=%ARROW_DATASET%
@@ -138,7 +134,10 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist
pushd C:\arrow\python
@REM Build wheel
-%PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1
+%PYTHON_CMD% -m build --sdist --wheel . --no-isolation -vv ^
+ -C build.verbose=true ^
+ -C cmake.build-type=%CMAKE_BUILD_TYPE% ^
+ -C
cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%"
|| exit /B 1
@REM Repair the wheel with delvewheel
@REM
diff --git a/ci/scripts/python_wheel_xlinux_build.sh
b/ci/scripts/python_wheel_xlinux_build.sh
index 013c09765f..960fe5bad6 100755
--- a/ci/scripts/python_wheel_xlinux_build.sh
+++ b/ci/scripts/python_wheel_xlinux_build.sh
@@ -154,10 +154,7 @@ popd
check_arrow_visibility
echo "=== (${PYTHON_VERSION}) Building wheel ==="
-export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-export PYARROW_BUNDLE_ARROW_CPP=1
-export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
-export
PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}"
+export PYARROW_BUNDLE_ARROW_CPP=ON
export PYARROW_WITH_ACERO=${ARROW_ACERO}
export PYARROW_WITH_AZURE=${ARROW_AZURE}
export PYARROW_WITH_DATASET=${ARROW_DATASET}
@@ -175,7 +172,10 @@ export ARROW_HOME=/tmp/arrow-dist
export CMAKE_PREFIX_PATH=/tmp/arrow-dist
pushd /arrow/python
-python -m build --sdist --wheel . --no-isolation
+python -m build --sdist --wheel . --no-isolation \
+ -C build.verbose=true \
+ -C cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \
+ -C
cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}"
echo "=== Strip symbols from wheel ==="
mkdir -p dist/temp-fix-wheel
diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb
index 5bd7c71770..fdd1db9c60 100644
--- a/dev/release/02-source-test.rb
+++ b/dev/release/02-source-test.rb
@@ -64,7 +64,7 @@ class SourceTest < Test::Unit::TestCase
def test_python_version
source
Dir.chdir("#{@tag_name_no_rc}/python") do
- sh("python3", "setup.py", "sdist")
+ sh("python", "-m", "build", "--sdist")
if on_release_branch?
pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz"
else
diff --git a/dev/release/verify-release-candidate.bat
b/dev/release/verify-release-candidate.bat
index 50dfc06e69..c69dab58e7 100644
--- a/dev/release/verify-release-candidate.bat
+++ b/dev/release/verify-release-candidate.bat
@@ -132,15 +132,17 @@ popd
@rem Build and import pyarrow
pushd !ARROW_SOURCE!\python
+pip install build || exit /B 1
pip install -r requirements-test.txt || exit /B 1
-set PYARROW_CMAKE_GENERATOR=%GENERATOR%
+set CMAKE_GENERATOR=%GENERATOR%
set PYARROW_WITH_FLIGHT=1
set PYARROW_WITH_PARQUET=1
set PYARROW_WITH_PARQUET_ENCRYPTION=1
set PYARROW_WITH_DATASET=1
set PYARROW_TEST_CYTHON=OFF
-python setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel || exit /B 1
+set PYARROW_BUNDLE_ARROW_CPP=ON
+python -m build --sdist --wheel . --no-isolation || exit /B 1
pytest pyarrow -v -s --enable-parquet || exit /B 1
popd
diff --git a/dev/release/verify-release-candidate.sh
b/dev/release/verify-release-candidate.sh
index 325c4342e6..f91b8de474 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -545,7 +545,7 @@ test_python() {
CMAKE_PREFIX_PATH="${CONDA_BACKUP_CMAKE_PREFIX_PATH}:${CMAKE_PREFIX_PATH}"
fi
- export PYARROW_PARALLEL=$NPROC
+ export CMAKE_BUILD_PARALLEL_LEVEL=$NPROC
export PYARROW_WITH_DATASET=1
export PYARROW_WITH_HDFS=1
export PYARROW_WITH_ORC=1
diff --git a/dev/tasks/python-wheels/github.osx.yml
b/dev/tasks/python-wheels/github.osx.yml
index ab17d11da1..3de8c3c021 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -23,7 +23,6 @@
CMAKE_BUILD_TYPE: release
CXX: "clang++"
MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
- PYARROW_BUILD_VERBOSE: 1
PYARROW_VERSION: "{{ arrow.no_rc_version }}"
PYTHON_VERSION: "{{ python_version }}"
PYTHON_ABI_TAG: "{{ python_abi_tag }}"
diff --git a/docs/source/developers/documentation.rst
b/docs/source/developers/documentation.rst
index 5f0ebbdb7d..3cdb2f23b0 100644
--- a/docs/source/developers/documentation.rst
+++ b/docs/source/developers/documentation.rst
@@ -71,8 +71,9 @@ These two steps are mandatory and must be executed in order.
this step requires that ``pyarrow`` library is installed
in your python environment. One way to accomplish
this is to follow the build instructions at :ref:`python-development`
- and then run ``python setup.py install`` in arrow/python
+ and then run ``pip install --no-build-isolation .`` in arrow/python
(it is best to do this in a dedicated conda/virtual environment).
+ Add ``-vv`` to the pip command to get output of the build process.
You can still build the documentation without ``pyarrow``
library installed but note that Python part of the documentation
diff --git a/docs/source/developers/guide/step_by_step/building.rst
b/docs/source/developers/guide/step_by_step/building.rst
index 5317cf06c7..a8eb1c46c2 100644
--- a/docs/source/developers/guide/step_by_step/building.rst
+++ b/docs/source/developers/guide/step_by_step/building.rst
@@ -155,7 +155,7 @@ Building other Arrow libraries
.. code:: console
- $ python setup.py build_ext --inplace
+ $ pip install --no-build-isolation --editable -vv .
**Recompiling C++**
diff --git a/docs/source/developers/python/building.rst
b/docs/source/developers/python/building.rst
index deb6076e44..fc2d1a2629 100644
--- a/docs/source/developers/python/building.rst
+++ b/docs/source/developers/python/building.rst
@@ -349,7 +349,7 @@ Optional build components
There are several optional components that can be enabled or disabled by
setting
specific flags to ``ON`` or ``OFF``, respectively. See the list of
-:ref:`python-dev-env-variables` below.
+:ref:`python-dev-components` below.
You may choose between different kinds of C++ build types:
@@ -366,7 +366,7 @@ You may choose between different kinds of C++ build types:
For any other C++ build challenges, see :ref:`cpp-development`.
In case you may need to rebuild the C++ part due to errors in the process it is
-advisable to delete the build folder, see :ref:`python-dev-env-variables`.
+advisable to delete the build folder, see :ref:`stale_artifacts`.
If the build has passed successfully and you need to rebuild due to latest pull
from git main, then this step is not needed.
@@ -378,7 +378,7 @@ Build PyArrow
If you did build one of the optional components in C++, the equivalent
components
will be enabled by default for building pyarrow. This default can be overridden
by setting the corresponding ``PYARROW_WITH_$COMPONENT`` environment variable
-to 0 or 1, see :ref:`python-dev-env-variables` below.
+to 0 or 1, see :ref:`python-dev-components` below.
To build PyArrow run:
@@ -391,7 +391,7 @@ To build PyArrow run:
.. code-block::
$ pushd arrow/python
- $ python setup.py build_ext --inplace
+ $ pip install --no-build-isolation --editable -vv .
$ popd
.. tab-item:: Windows
@@ -400,7 +400,7 @@ To build PyArrow run:
.. code-block::
$ pushd arrow\python
- $ python setup.py build_ext --inplace
+ $ pip install --no-build-isolation --editable -vv .
$ popd
.. note::
@@ -428,14 +428,14 @@ To build PyArrow run:
.. code-block::
- $ set PYARROW_BUNDLE_ARROW_CPP=1
- $ python setup.py build_ext --inplace
+ $ set PYARROW_BUNDLE_ARROW_CPP=ON
+ $ pip install --no-build-isolation --editable -vv .
Note that bundled Arrow C++ libraries will not be automatically
updated when rebuilding Arrow C++.
To set the number of threads used to compile PyArrow's C++/Cython components,
-set the ``PYARROW_PARALLEL`` environment variable.
+set the ``CMAKE_BUILD_PARALLEL_LEVEL`` environment variable.
If you build PyArrow but then make changes to the Arrow C++ or PyArrow code,
you can end up with stale build artifacts. This can lead to
@@ -444,9 +444,8 @@ artifacts before rebuilding. See
:ref:`python-dev-env-variables`.
By default, PyArrow will be built in release mode even if Arrow C++ has been
built in debug mode. To create a debug build of PyArrow, run
-``export PYARROW_BUILD_TYPE=debug`` prior to running ``python setup.py
-build_ext --inplace`` above. A ``relwithdebinfo`` build can be created
-similarly.
+``pip install --no-build-isolation -vv -C cmake.build-type=Debug .``.
+A ``relwithdebinfo`` build can be created similarly.
Self-Contained Wheel
^^^^^^^^^^^^^^^^^^^^
@@ -457,13 +456,13 @@ libraries). This ensures that all necessary native
libraries are bundled inside
the wheel, so users can install it without needing to have Arrow or Parquet
installed separately on their system.
-To do this, pass the ``--bundle-arrow-cpp`` option to the build command:
+To do this, set the ``PYARROW_BUNDLE_ARROW_CPP`` environment variable before
building ``pyarrow``:
.. code-block::
- $ pip install wheel # if not installed
- $ python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \
- --bundle-arrow-cpp bdist_wheel
+ $ export PYARROW_BUNDLE_ARROW_CPP=ON
+ $ pip install build wheel # if not installed
+ $ python -m build --sdist --wheel . --no-isolation
This option is typically only needed for releases or distribution scenarios,
not for local development.
@@ -542,8 +541,8 @@ described in development section.
.. _python-dev-env-variables:
-Relevant components and environment variables
-=============================================
+Relevant environment variables and build options
+================================================
List of relevant environment variables that can be used to build
PyArrow are:
@@ -555,14 +554,11 @@ PyArrow are:
* - PyArrow environment variable
- Description
- Default value
- * - ``PYARROW_BUILD_TYPE``
- - Build type for PyArrow (release, debug or relwithdebinfo), sets
``CMAKE_BUILD_TYPE``
- - ``release``
- * - ``PYARROW_CMAKE_GENERATOR``
- - Example: ``'Visual Studio 17 2022 Win64'``
+ * - ``CMAKE_BUILD_PARALLEL_LEVEL``
+ - Number of processes used to compile PyArrow’s C++/Cython components
- ``''``
- * - ``PYARROW_CMAKE_OPTIONS``
- - Extra CMake and Arrow options (ex. ``"-DARROW_SIMD_LEVEL=NONE
-DCMAKE_OSX_ARCHITECTURES=x86_64;arm64"``)
+ * - ``CMAKE_GENERATOR``
+ - Example: ``'Visual Studio 17 2022 Win64'``
- ``''``
* - ``PYARROW_CXXFLAGS``
- Extra C++ compiler flags
@@ -576,12 +572,28 @@ PyArrow are:
* - ``PYARROW_BUNDLE_CYTHON_CPP``
- Bundle the C++ files generated by Cython
- ``0`` (``OFF``)
- * - ``PYARROW_BUILD_VERBOSE``
- - Enable verbose output from Makefile builds
- - ``0`` (``OFF``)
- * - ``PYARROW_PARALLEL``
- - Number of processes used to compile PyArrow’s C++/Cython components
- - ``''``
+
+To set the build type (e.g. ``Debug``, ``Release``, ``RelWithDebInfo``), pass
+``-C cmake.build-type=Debug`` to ``pip install`` or to ``python -m build``.
+
+For extra CMake arguments you can use the ``-C cmake.args=``
+argument when building PyArrow. For example, to build a version of PyArrow
+with ``ARROW_SIMD_LEVEL=NONE``, you can run
+``pip install --no-build-isolation -vv -C cmake.args="-DARROW_SIMD_LEVEL=NONE"
.``.
+
+On PyArrow 24.0.0 we migrated our Python build backend from setuptools to
+scikit-build-core, which is a CMake-based build system. Previous versions used
+``PYARROW_BUILD_TYPE`` and ``PYARROW_CMAKE_OPTIONS`` environment variables
+to customize the CMake invocation. This is no longer supported.
+Instead, use the ``-C cmake.build-type=<build_type>`` and ``-C
cmake.args=-D<OPTION>=<VALUE>`` option as described above.
+
+To enable verbose output from the build tool, pass
+``-C build.verbose=true`` to ``pip install`` or to ``python -m build``.
+
+.. _python-dev-components:
+
+Relevant components
+===================
The components being disabled or enabled when building PyArrow is by default
based on how Arrow C++ is build (i.e. it follows the ``ARROW_$COMPONENT``
flags).
diff --git a/docs/source/developers/python/development.rst
b/docs/source/developers/python/development.rst
index 5529ad25a2..5757b76187 100644
--- a/docs/source/developers/python/development.rst
+++ b/docs/source/developers/python/development.rst
@@ -222,7 +222,8 @@ Debug build
Since PyArrow depends on the Arrow C++ libraries, debugging can
frequently involve crossing between Python and C++ shared libraries.
For the best experience, make sure you've built both Arrow C++
-(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow (``export
PYARROW_BUILD_TYPE=debug``)
+(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow
+(``pip install --no-build-isolation -C cmake.build-type=Debug .``)
in debug mode.
Using gdb on Linux
diff --git a/python/.gitignore b/python/.gitignore
index ce97ba4af6..de51d21c9f 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -4,7 +4,6 @@ CMakeCache.txt
CTestTestfile.cmake
Makefile
cmake_install.cmake
-build/
Testing/
# Python stuff
@@ -24,9 +23,9 @@ cython_debug
# Bundled headers
pyarrow/include
-# setup.py working directory
+# build directory
build
-# setup.py dist directory
+# dist directory
dist
# Coverage
.coverage
@@ -44,7 +43,3 @@ manylinux1/arrow
nm_arrow.log
visible_symbols.log
-# the purpose of the custom SDist class in setup.py is to include these files
-# in the sdist tarball, but we don't want to track duplicates
-LICENSE.txt
-NOTICE.txt
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index f99225284a..0630e0cff7 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -282,6 +282,11 @@ message(STATUS "Found NumPy version:
${Python3_NumPy_VERSION}")
message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}")
include(UseCython)
+
+if(CYTHON_VERSION VERSION_LESS "3.1")
+ message(FATAL_ERROR "Please update your Cython version. Supported Cython >=
3.1, found ${CYTHON_VERSION}"
+ )
+endif()
message(STATUS "Found Cython version: ${CYTHON_VERSION}")
# Arrow C++ and set default PyArrow build options
@@ -352,7 +357,19 @@ set(PYARROW_CPP_ROOT_DIR pyarrow/src)
set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python)
# Write out compile-time configuration constants
-string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_PYBUILD_TYPE)
+if(CMAKE_BUILD_TYPE)
+ string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_PYBUILD_TYPE)
+else()
+ # For multi-config generators (XCode and Visual Studio),
+ # CMAKE_BUILD_TYPE is not set at configure time.
+ # scikit-build-core does the right thing with cmake.build-type and
+ # adds the corresponding --config but does not populate CMAKE_BUILD_TYPE
+ # for those. On this specific case, we set the default to "RELEASE".
+ # The actual build type is injected through target compile definitions
+ # for multi-config generators.
+ set(UPPERCASE_PYBUILD_TYPE "RELEASE")
+endif()
+
configure_file("${PYARROW_CPP_SOURCE_DIR}/config_internal.h.cmake"
"${PYARROW_CPP_SOURCE_DIR}/config_internal.h" ESCAPE_QUOTES)
@@ -501,6 +518,9 @@ else()
endif()
target_link_libraries(arrow_python PUBLIC Python3::NumPy)
target_compile_definitions(arrow_python PRIVATE ARROW_PYTHON_EXPORTING)
+if(CMAKE_CONFIGURATION_TYPES)
+ target_compile_definitions(arrow_python PRIVATE
PYARROW_BUILD_TYPE="$<CONFIG>")
+endif()
set_target_properties(arrow_python PROPERTIES VERSION
"${PYARROW_FULL_SO_VERSION}"
SOVERSION
"${PYARROW_SO_VERSION}")
install(TARGETS arrow_python
diff --git a/python/LICENSE.txt b/python/LICENSE.txt
new file mode 120000
index 0000000000..4ab43736a8
--- /dev/null
+++ b/python/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/python/NOTICE.txt b/python/NOTICE.txt
new file mode 120000
index 0000000000..eb9f24e040
--- /dev/null
+++ b/python/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/python/_build_backend/__init__.py
b/python/_build_backend/__init__.py
new file mode 100644
index 0000000000..6e5c328a69
--- /dev/null
+++ b/python/_build_backend/__init__.py
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Build backend wrapper that resolves license symlinks before delegating
+to scikit-build-core.
+
+Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level
+above python/. They are symlinked into python/ so that license-files in
+pyproject.toml can reference them otherwise project metadata fails validation.
+This is done before any build backend is invoked that's why symlinks are
necessary.
+But when building sdist tarballs symlinks are not copied and we end up with
+broken LICENSE.txt and NOTICE.txt.
+
+This custom build backend replaces the symlinks with actual file copies before
+scikit_build_core.build.build_sdist so that the sdist contains the real file
content.
+The symlinks are restored afterwards to keep the git working tree clean.
+"""
+
+from contextlib import contextmanager
+import os
+from pathlib import Path
+import shutil
+import sys
+
+from scikit_build_core.build import * # noqa: F401,F403
+from scikit_build_core.build import build_sdist as scikit_build_sdist
+
+LICENSE_FILES = ("LICENSE.txt", "NOTICE.txt")
+PYTHON_DIR = Path(__file__).resolve().parent.parent
+
+
+@contextmanager
+def prepare_licenses():
+ # Temporarily copy the files so they are included on sdist.
+ for name in LICENSE_FILES:
+ parent_license = PYTHON_DIR.parent / name
+ pyarrow_license = PYTHON_DIR / name
+ pyarrow_license.unlink(missing_ok=True)
+ shutil.copy2(parent_license, pyarrow_license)
+ try:
+ yield
+ finally:
+ if sys.platform != "win32":
+ # Copy back the original symlinks so git status is clean.
+ for name in LICENSE_FILES:
+ filepath = PYTHON_DIR / name
+ os.unlink(filepath)
+ os.symlink(f"../{name}", filepath)
+
+
+def build_sdist(sdist_directory, config_settings=None):
+ with prepare_licenses():
+ return scikit_build_sdist(sdist_directory, config_settings)
diff --git a/python/examples/minimal_build/build_conda.sh
b/python/examples/minimal_build/build_conda.sh
index 0b731638cd..5b39b93b2f 100755
--- a/python/examples/minimal_build/build_conda.sh
+++ b/python/examples/minimal_build/build_conda.sh
@@ -94,11 +94,10 @@ pushd $ARROW_ROOT/python
rm -rf build/ # remove any pesky preexisting build directory
export
CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}
-export PYARROW_BUILD_TYPE=Debug
-export PYARROW_CMAKE_GENERATOR=Ninja
+export CMAKE_GENERATOR=Ninja
# Use the same command that we use on python_build.sh
-python -m pip install --no-deps --no-build-isolation -vv .
+python -m pip install --no-deps --no-build-isolation -vv -C
cmake.build-type=Debug .
popd
pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow
diff --git a/python/examples/minimal_build/build_venv.sh
b/python/examples/minimal_build/build_venv.sh
index f462c4e9b9..53c2810efc 100755
--- a/python/examples/minimal_build/build_venv.sh
+++ b/python/examples/minimal_build/build_venv.sh
@@ -66,11 +66,10 @@ pushd $ARROW_ROOT/python
rm -rf build/ # remove any pesky preexisting build directory
export
CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}
-export PYARROW_BUILD_TYPE=Debug
-export PYARROW_CMAKE_GENERATOR=Ninja
+export CMAKE_GENERATOR=Ninja
# Use the same command that we use on python_build.sh
-python -m pip install --no-deps --no-build-isolation -vv .
+python -m pip install --no-deps --no-build-isolation -vv -C
cmake.build-type=Debug .
popd
diff --git a/python/pyarrow/src/arrow/python/config_internal.h.cmake
b/python/pyarrow/src/arrow/python/config_internal.h.cmake
index e8a6e78c48..f76edccb69 100644
--- a/python/pyarrow/src/arrow/python/config_internal.h.cmake
+++ b/python/pyarrow/src/arrow/python/config_internal.h.cmake
@@ -15,4 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-#define PYARROW_BUILD_TYPE "@UPPERCASE_PYBUILD_TYPE@"
\ No newline at end of file
+#ifndef PYARROW_BUILD_TYPE
+#define PYARROW_BUILD_TYPE "@UPPERCASE_PYBUILD_TYPE@"
+#endif
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 217dba81b8..14aa37ed04 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -17,16 +17,17 @@
[build-system]
requires = [
+ "scikit-build-core",
"cython >= 3.1",
# Needed for build-time stub docstring extraction
"libcst>=1.8.6",
"numpy>=1.25",
- # configuring setuptools_scm in pyproject.toml requires
- # versions released after 2022
"setuptools_scm[toml]>=8",
- "setuptools>=77",
]
-build-backend = "setuptools.build_meta"
+# We use a really simple build backend wrapper over scikit-build-core
+# to solve licenses to work around links not being included in sdists.
+build-backend = "_build_backend"
+backend-path = ["."]
[project]
name = "pyarrow"
@@ -81,16 +82,18 @@ exclude = [
'\._.*$',
]
-[tool.setuptools]
-zip-safe=false
-include-package-data=true
+[tool.scikit-build]
+cmake.build-type = "Release"
+metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
+sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"]
+wheel.packages = ["pyarrow"]
+wheel.install-dir = "pyarrow"
-[tool.setuptools.packages.find]
-include = ["pyarrow"]
-namespaces = false
-
-[tool.setuptools.package-data]
-pyarrow = ["*.pxd", "*.pyi", "*.pyx", "includes/*.pxd", "py.typed"]
+[tool.scikit-build.cmake.define]
+PYARROW_BUNDLE_ARROW_CPP = {env = "PYARROW_BUNDLE_ARROW_CPP", default = "OFF"}
+PYARROW_BUNDLE_CYTHON_CPP = {env = "PYARROW_BUNDLE_CYTHON_CPP", default =
"OFF"}
+PYARROW_GENERATE_COVERAGE = {env = "PYARROW_GENERATE_COVERAGE", default =
"OFF"}
+PYARROW_CXXFLAGS = {env = "PYARROW_CXXFLAGS", default = ""}
[tool.setuptools_scm]
root = '..'
diff --git a/python/requirements-build.txt b/python/requirements-build.txt
index c3b7aa48eb..fdd3e68a1b 100644
--- a/python/requirements-build.txt
+++ b/python/requirements-build.txt
@@ -1,5 +1,6 @@
+build
cython>=3.1
libcst>=1.8.6
numpy>=1.25
+scikit-build-core
setuptools_scm>=8
-setuptools>=77
diff --git a/python/requirements-wheel-build.txt
b/python/requirements-wheel-build.txt
index 6a2c622124..a3ac139677 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -3,6 +3,6 @@ cython>=3.1
# Needed for build-time stub docstring extraction
libcst>=1.8.6
numpy>=2.0.0
+scikit-build-core
setuptools_scm
-setuptools>=77
wheel
diff --git a/python/setup.py b/python/setup.py
deleted file mode 100755
index 02e7cb4614..0000000000
--- a/python/setup.py
+++ /dev/null
@@ -1,483 +0,0 @@
-#!/usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import contextlib
-import os
-import os.path
-from os.path import join as pjoin
-import re
-import shlex
-import shutil
-import sys
-import warnings
-
-if sys.version_info >= (3, 10):
- import sysconfig
-else:
- # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825)
- from distutils import sysconfig
-
-from setuptools import setup, Extension, Distribution
-from setuptools.command.sdist import sdist
-
-from Cython.Distutils import build_ext as _build_ext
-import Cython
-
-# Check if we're running 64-bit Python
-is_64_bit = sys.maxsize > 2**32
-
-# We can't use sys.platform in a cross-compiling situation
-# as here it may be set to the host not target platform
-is_emscripten = (
- sysconfig.get_config_var("SOABI")
- and sysconfig.get_config_var("SOABI").find("emscripten") != -1
-)
-
-
-if Cython.__version__ < '3.1':
- raise Exception(
- 'Please update your Cython version. Supported Cython >= 3.1')
-
-setup_dir = os.path.abspath(os.path.dirname(__file__))
-
-ext_suffix = sysconfig.get_config_var('EXT_SUFFIX')
-
-
[email protected]
-def changed_dir(dirname):
- oldcwd = os.getcwd()
- os.chdir(dirname)
- try:
- yield
- finally:
- os.chdir(oldcwd)
-
-
-def strtobool(val):
- """Convert a string representation of truth to true (1) or false (0).
-
- True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
- are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
- 'val' is anything else.
- """
- # Copied from distutils
- val = val.lower()
- if val in ('y', 'yes', 't', 'true', 'on', '1'):
- return 1
- elif val in ('n', 'no', 'f', 'false', 'off', '0'):
- return 0
- else:
- raise ValueError("invalid truth value %r" % (val,))
-
-
-MSG_DEPR_SETUP_BUILD_FLAGS = """
- !!
-
- ***********************************************************************
- The '{}' flag is being passed to setup.py, but this is
- deprecated.
-
- If a certain component is available in Arrow C++, it will automatically
- be enabled for the PyArrow build as well. If you want to force the
- build of a certain component, you can still use the
- PYARROW_WITH_$COMPONENT environment variable.
- ***********************************************************************
-
- !!
-"""
-
-
-class build_ext(_build_ext):
- _found_names = ()
-
- def build_extensions(self):
- import numpy
- numpy_incl = numpy.get_include()
-
- self.extensions = [ext for ext in self.extensions
- if ext.name != '__dummy__']
-
- for ext in self.extensions:
- if (hasattr(ext, 'include_dirs') and
- numpy_incl not in ext.include_dirs):
- ext.include_dirs.append(numpy_incl)
- _build_ext.build_extensions(self)
-
- def run(self):
- self._run_cmake()
- self._update_stubs()
- _build_ext.run(self)
-
- def _update_stubs(self):
- """Copy stubs to build directory, then inject docstrings into the
copies."""
- if is_emscripten:
- # stubs are not supported in Emscripten build
- return
- stubs_dir = pjoin(setup_dir, 'pyarrow-stubs')
- if not os.path.exists(stubs_dir):
- return
-
- build_cmd = self.get_finalized_command('build')
- build_lib = os.path.abspath(build_cmd.build_lib)
-
- # Copy clean stubs to build directory first
- self._copy_stubs(stubs_dir, build_lib)
-
- # Inject docstrings into the build copies (not the source stubs).
- # We pass build_lib as stubs_dir since it mirrors the pyarrow-stubs/
- # directory structure (both contain a pyarrow/ subdirectory with .pyi
- # files), so the namespace resolution logic works identically.
- import importlib.util
- spec = importlib.util.spec_from_file_location(
- "update_stub_docstrings",
- pjoin(setup_dir, 'scripts', 'update_stub_docstrings.py'))
- mod = importlib.util.module_from_spec(spec)
- spec.loader.exec_module(mod)
- mod.add_docstrings_from_build(build_lib, build_lib)
-
- def _copy_stubs(self, stubs_dir, build_lib):
- """Copy .pyi stub files to the build directory."""
- src_dir = pjoin(stubs_dir, 'pyarrow')
- dest_dir = pjoin(build_lib, 'pyarrow')
-
- if not os.path.exists(src_dir):
- return
-
- print(f"-- Copying stubs: {src_dir} -> {dest_dir}")
- for root, dirs, files in os.walk(src_dir):
- for fname in files:
- if fname.endswith('.pyi'):
- src = pjoin(root, fname)
- rel_path = os.path.relpath(src, src_dir)
- dest = pjoin(dest_dir, rel_path)
- os.makedirs(os.path.dirname(dest), exist_ok=True)
- shutil.copy2(src, dest)
-
- # adapted from cmake_build_ext in dynd-python
- # github.com/libdynd/dynd-python
-
- description = "Build the C-extensions for arrow"
- user_options = ([('cmake-generator=', None, 'CMake generator'),
- ('extra-cmake-args=', None, 'extra arguments for CMake'),
- ('build-type=', None,
- 'build type (debug or release), default release'),
- ('boost-namespace=', None,
- 'namespace of boost (default: boost)'),
- ('with-cuda', None, 'build the Cuda extension'),
- ('with-flight', None, 'build the Flight extension'),
- ('with-substrait', None, 'build the Substrait extension'),
- ('with-acero', None, 'build the Acero Engine extension'),
- ('with-dataset', None, 'build the Dataset extension'),
- ('with-parquet', None, 'build the Parquet extension'),
- ('with-parquet-encryption', None,
- 'build the Parquet encryption extension'),
- ('with-azure', None,
- 'build the Azure Blob Storage extension'),
- ('with-gcs', None,
- 'build the Google Cloud Storage (GCS) extension'),
- ('with-s3', None, 'build the Amazon S3 extension'),
- ('with-static-parquet', None, 'link parquet statically'),
- ('with-static-boost', None, 'link boost statically'),
- ('with-orc', None, 'build the ORC extension'),
- ('with-gandiva', None, 'build the Gandiva extension'),
- ('generate-coverage', None,
- 'enable Cython code coverage'),
- ('bundle-boost', None,
- 'bundle the (shared) Boost libraries'),
- ('bundle-cython-cpp', None,
- 'bundle generated Cython C++ code '
- '(used for code coverage)'),
- ('bundle-arrow-cpp', None,
- 'bundle the Arrow C++ libraries'),
- ('bundle-arrow-cpp-headers', None,
- 'bundle the Arrow C++ headers')] +
- _build_ext.user_options)
-
- def initialize_options(self):
- _build_ext.initialize_options(self)
- self.cmake_generator = os.environ.get('PYARROW_CMAKE_GENERATOR')
- if not self.cmake_generator and sys.platform == 'win32':
- self.cmake_generator = 'Visual Studio 15 2017 Win64'
- self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
- self.build_type = os.environ.get('PYARROW_BUILD_TYPE',
- 'release').lower()
-
- self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '')
-
- if sys.platform == 'win32':
- # Cannot do debug builds in Windows unless Python itself is a debug
- # build
- if not hasattr(sys, 'gettotalrefcount'):
- self.build_type = 'release'
-
- self.with_azure = None
- self.with_gcs = None
- self.with_s3 = None
- self.with_hdfs = None
- self.with_cuda = None
- self.with_substrait = None
- self.with_flight = None
- self.with_acero = None
- self.with_dataset = None
- self.with_parquet = None
- self.with_parquet_encryption = None
- self.with_orc = None
- self.with_gandiva = None
-
- self.generate_coverage = strtobool(
- os.environ.get('PYARROW_GENERATE_COVERAGE', '0'))
- self.bundle_arrow_cpp = strtobool(
- os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0'))
- self.bundle_cython_cpp = strtobool(
- os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0'))
-
- CYTHON_MODULE_NAMES = [
- 'lib',
- '_fs',
- '_csv',
- '_json',
- '_compute',
- '_cuda',
- '_flight',
- '_dataset',
- '_dataset_orc',
- '_dataset_parquet',
- '_acero',
- '_feather',
- '_parquet',
- '_parquet_encryption',
- '_pyarrow_cpp_tests',
- '_orc',
- '_azurefs',
- '_gcsfs',
- '_s3fs',
- '_substrait',
- '_hdfs',
- 'gandiva']
-
- def _run_cmake(self):
- # check if build_type is correctly passed / set
- if self.build_type.lower() not in ('release', 'debug',
- 'relwithdebinfo'):
- raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
- "be 'release', 'debug' or 'relwithdebinfo'")
-
- # The directory containing this setup.py
- source = os.path.dirname(os.path.abspath(__file__))
-
- # The staging directory for the module being built
- build_cmd = self.get_finalized_command('build')
- saved_cwd = os.getcwd()
- build_temp = pjoin(saved_cwd, build_cmd.build_temp)
- build_lib = pjoin(saved_cwd, build_cmd.build_lib)
-
- if not os.path.isdir(build_temp):
- self.mkpath(build_temp)
-
- if self.inplace:
- # a bit hacky
- build_lib = saved_cwd
-
- install_prefix = pjoin(build_lib, "pyarrow")
-
- # Change to the build directory
- with changed_dir(build_temp):
- # Detect if we built elsewhere
- if os.path.isfile('CMakeCache.txt'):
- cachefile = open('CMakeCache.txt', 'r')
- cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)',
- cachefile.read()).group(1)
- cachefile.close()
- if (cachedir != build_temp):
- build_base = pjoin(saved_cwd, build_cmd.build_base)
- print(f"-- Skipping build. Temp build {build_temp} does "
- f"not match cached dir {cachedir}")
- print("---- For a clean build you might want to delete "
- f"{build_base}.")
- return
-
- cmake_options = [
- f'-DCMAKE_INSTALL_PREFIX={install_prefix}',
- f'-DPYTHON_EXECUTABLE={sys.executable}',
- f'-DPython3_EXECUTABLE={sys.executable}',
- f'-DPYARROW_CXXFLAGS={self.cmake_cxxflags}',
- ]
-
- def append_cmake_bool(value, varname):
- cmake_options.append(f'-D{varname}={"on" if value else "off"}')
-
- def append_cmake_component(flag, varname):
- # only pass this to cmake if the user pass the --with-component
- # flag to setup.py build_ext
- if flag is not None:
- flag_name = (
- "--with-"
- +
varname.removeprefix("PYARROW_").lower().replace("_", "-"))
- warnings.warn(
- MSG_DEPR_SETUP_BUILD_FLAGS.format(flag_name),
- UserWarning, stacklevel=2
- )
- append_cmake_bool(flag, varname)
-
- if self.cmake_generator:
- cmake_options += ['-G', self.cmake_generator]
-
- append_cmake_component(self.with_cuda, 'PYARROW_CUDA')
- append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT')
- append_cmake_component(self.with_flight, 'PYARROW_FLIGHT')
- append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA')
- append_cmake_component(self.with_acero, 'PYARROW_ACERO')
- append_cmake_component(self.with_dataset, 'PYARROW_DATASET')
- append_cmake_component(self.with_orc, 'PYARROW_ORC')
- append_cmake_component(self.with_parquet, 'PYARROW_PARQUET')
- append_cmake_component(self.with_parquet_encryption,
- 'PYARROW_PARQUET_ENCRYPTION')
- append_cmake_component(self.with_azure, 'PYARROW_AZURE')
- append_cmake_component(self.with_gcs, 'PYARROW_GCS')
- append_cmake_component(self.with_s3, 'PYARROW_S3')
- append_cmake_component(self.with_hdfs, 'PYARROW_HDFS')
-
- append_cmake_bool(self.bundle_arrow_cpp,
- 'PYARROW_BUNDLE_ARROW_CPP')
- append_cmake_bool(self.bundle_cython_cpp,
- 'PYARROW_BUNDLE_CYTHON_CPP')
- append_cmake_bool(self.generate_coverage,
- 'PYARROW_GENERATE_COVERAGE')
-
- cmake_options.append(
- f'-DCMAKE_BUILD_TYPE={self.build_type.lower()}')
-
- extra_cmake_args = shlex.split(self.extra_cmake_args)
-
- build_tool_args = []
- if sys.platform == 'win32':
- if not is_64_bit:
- raise RuntimeError('Not supported on 32-bit Windows')
- else:
- build_tool_args.append('--')
- if os.environ.get('PYARROW_BUILD_VERBOSE', '0') == '1':
- cmake_options.append('-DCMAKE_VERBOSE_MAKEFILE=ON')
- parallel = os.environ.get('PYARROW_PARALLEL')
- if parallel:
- build_tool_args.append(f'-j{parallel}')
-
- # Generate the build files
- if is_emscripten:
- print("-- Running emcmake cmake for PyArrow on Emscripten")
- self.spawn(['emcmake', 'cmake'] + extra_cmake_args +
- cmake_options + [source])
- else:
- print("-- Running cmake for PyArrow")
- self.spawn(['cmake'] + extra_cmake_args + cmake_options +
[source])
-
- print("-- Finished cmake for PyArrow")
-
- print("-- Running cmake --build for PyArrow")
- self.spawn(['cmake', '--build', '.', '--config', self.build_type] +
- build_tool_args)
- print("-- Finished cmake --build for PyArrow")
-
- print("-- Running cmake --build --target install for PyArrow")
- self.spawn(['cmake', '--build', '.', '--config', self.build_type] +
- ['--target', 'install'] + build_tool_args)
- print("-- Finished cmake --build --target install for PyArrow")
-
- self._found_names = []
- for name in self.CYTHON_MODULE_NAMES:
- built_path = pjoin(install_prefix, name + ext_suffix)
- if os.path.exists(built_path):
- self._found_names.append(name)
-
- def _get_build_dir(self):
- # Get the package directory from build_py
- build_py = self.get_finalized_command('build_py')
- return build_py.get_package_dir('pyarrow')
-
- def _get_cmake_ext_path(self, name):
- # This is the name of the arrow C-extension
- filename = name + ext_suffix
- return pjoin(self._get_build_dir(), filename)
-
- def get_ext_generated_cpp_source(self, name):
- if sys.platform == 'win32':
- head, tail = os.path.split(name)
- return pjoin(head, tail + ".cpp")
- else:
- return pjoin(name + ".cpp")
-
- def get_ext_built_api_header(self, name):
- if sys.platform == 'win32':
- head, tail = os.path.split(name)
- return pjoin(head, tail + "_api.h")
- else:
- return pjoin(name + "_api.h")
-
- def get_names(self):
- return self._found_names
-
- def get_outputs(self):
- # Just the C extensions
- # regular_exts = _build_ext.get_outputs(self)
- return [self._get_cmake_ext_path(name)
- for name in self.get_names()]
-
-
-class BinaryDistribution(Distribution):
- def has_ext_modules(foo):
- return True
-
-
-class CopyLicenseSdist(sdist):
- """Custom sdist command that copies license files from parent directory."""
-
- def make_release_tree(self, base_dir, files):
- # Call parent to do the normal work
- super().make_release_tree(base_dir, files)
-
- # Define source (parent dir) and destination (sdist root) for license
files
- license_files = [
- ("LICENSE.txt", "../LICENSE.txt"),
- ("NOTICE.txt", "../NOTICE.txt"),
- ]
-
- for dest_name, src_path in license_files:
- src_full = os.path.join(os.path.dirname(__file__), src_path)
- dest_full = os.path.join(base_dir, dest_name)
-
- # Remove any existing file/symlink at destination
- if os.path.exists(dest_full) or os.path.islink(dest_full):
- os.unlink(dest_full)
-
- if not os.path.exists(src_full):
- msg = f"Required license file not found: {src_full}"
- raise FileNotFoundError(msg)
-
- shutil.copy2(src_full, dest_full)
- print(f"Copied {src_path} to {dest_name} in sdist")
-
-
-setup(
- distclass=BinaryDistribution,
- # Dummy extension to trigger build_ext
- ext_modules=[Extension('__dummy__', sources=[])],
- cmdclass={
- 'build_ext': build_ext,
- 'sdist': CopyLicenseSdist,
- },
-)