This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4c05a3b4ea GH-32619: [Python][Docs] Include options for PyArrow build
explicitly (#34463)
4c05a3b4ea is described below
commit 4c05a3b4ea433961e1c546050f4f34356aacded6
Author: Alenka Frim <[email protected]>
AuthorDate: Fri Mar 10 08:24:03 2023 +0100
GH-32619: [Python][Docs] Include options for PyArrow build explicitly
(#34463)
### What changes are included in this PR?
List of relevant build options for PyArrow added to the python dev docs.
This PR also removes these flags from PyArrow:
- `PYARROW_BUNDLE_BOOST`
- `PYARROW_BUNDLE_ARROW_CPP_HEADERS`
- `PYARROW_WITH_STATIC_PARQUET`
- `PYARROW_PARQUET_USE_SHARED`
* Closes: #32619
Lead-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
ci/appveyor-cpp-build.bat | 1 -
ci/scripts/python_wheel_windows_build.bat | 1 -
dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh | 1 -
docs/source/developers/python.rst | 77 ++++++++++++++++++++++
python/CMakeLists.txt | 6 +-
python/setup.py | 20 ------
6 files changed, 78 insertions(+), 28 deletions(-)
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 0faac67a14..b34d00e85a 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -106,7 +106,6 @@ popd
pushd python
-set PYARROW_BUNDLE_BOOST=OFF
set PYARROW_CMAKE_GENERATOR=%GENERATOR%
set PYARROW_CXXFLAGS=%ARROW_CXXFLAGS%
set PYARROW_PARALLEL=2
diff --git a/ci/scripts/python_wheel_windows_build.bat
b/ci/scripts/python_wheel_windows_build.bat
index d137cd8a98..6a5c994d08 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -103,7 +103,6 @@ popd
echo "=== (%PYTHON_VERSION%) Building wheel ==="
set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
set PYARROW_BUNDLE_ARROW_CPP=ON
-set PYARROW_BUNDLE_BOOST=OFF
set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
set PYARROW_INSTALL_TESTS=ON
set PYARROW_WITH_DATASET=%ARROW_DATASET%
diff --git a/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
b/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
index 692ffb7806..718617ce17 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
+++ b/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
@@ -8,7 +8,6 @@ export ARROW_HOME=$PREFIX
export PARQUET_HOME=$PREFIX
export SETUPTOOLS_SCM_PRETEND_VERSION=$PKG_VERSION
export PYARROW_BUILD_TYPE=release
-export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
export PYARROW_WITH_DATASET=1
export PYARROW_WITH_FLIGHT=1
export PYARROW_WITH_GANDIVA=1
diff --git a/docs/source/developers/python.rst
b/docs/source/developers/python.rst
index 8df095f7c3..495c5db991 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -586,6 +586,83 @@ Caveats
The Plasma component is not supported on Windows.
+Relevant components and environment variables
+=============================================
+
+List of relevant Arrow CMake flags and corresponding environment variables
+to be used when building PyArrow are:
+
+.. list-table::
+ :widths: 30 30
+ :header-rows: 1
+
+ * - Arrow flags/options
+ - Corresponding environment variables for PyArrow
+ * - ``CMAKE_BUILD_TYPE``
+ - ``PYARROW_BUILD_TYPE`` (release, debug or relwithdebinfo)
+ * - ``ARROW_GCS``
+ - ``PYARROW_WITH_GCS``
+ * - ``ARROW_S3``
+ - ``PYARROW_WITH_S3``
+ * - ``ARROW_HDFS``
+ - ``PYARROW_WITH_HDFS``
+ * - ``ARROW_CUDA``
+ - ``PYARROW_WITH_CUDA``
+ * - ``ARROW_SUBSTRAIT``
+ - ``PYARROW_WITH_SUBSTRAIT``
+ * - ``ARROW_FLIGHT``
+ - ``PYARROW_WITH_FLIGHT``
+ * - ``ARROW_DATASET``
+ - ``PYARROW_WITH_DATASET``
+ * - ``ARROW_PARQUET``
+ - ``PYARROW_WITH_PARQUET``
+ * - ``PARQUET_REQUIRE_ENCRYPTION``
+ - ``PYARROW_WITH_PARQUET_ENCRYPTION``
+ * - ``ARROW_TENSORFLOW``
+ - ``PYARROW_WITH_TENSORFLOW``
+ * - ``ARROW_ORC``
+ - ``PYARROW_WITH_ORC``
+ * - ``ARROW_GANDIVA``
+ - ``PYARROW_WITH_GANDIVA``
+
+List of relevant environment variables that can also be used to build
+PyArrow are:
+
+.. list-table::
+ :widths: 20 20 20
+ :header-rows: 1
+
+ * - PyArrow environment variable
+ - Description
+ - Default value
+ * - ``PYARROW_CMAKE_GENERATOR``
+ - Example: ``'Visual Studio 15 2017 Win64'``
+ - ``''``
+ * - ``PYARROW_CMAKE_OPTIONS``
+ - Extra CMake and Arrow options (ex. ``"-DARROW_SIMD_LEVEL=NONE
-DCMAKE_OSX_ARCHITECTURES=x86_64;arm64"``)
+ - ``''``
+ * - ``PYARROW_CXXFLAGS``
+ - Extra C++ compiler flags
+ - ``''``
+ * - ``PYARROW_GENERATE_COVERAGE``
+ - Setting ``Xlinetrace`` flag to true for the Cython compiler
+ - ``false``
+ * - ``PYARROW_BUNDLE_ARROW_CPP``
+ - Bundle the Arrow C++ libraries
+ - ``0`` (``OFF``)
+ * - ``PYARROW_BUNDLE_CYTHON_CPP``
+ - Bundle the C++ files generated by Cython
+ - ``0`` (``OFF``)
+ * - ``PYARROW_INSTALL_TESTS``
+ - Add the test to the python package
+ - ``1`` (``ON``)
+ * - ``PYARROW_BUILD_VERBOSE``
+ - Enable verbose output from Makefile builds
+ - ``0`` (``OFF``)
+ * - ``PYARROW_PARALLEL``
+ - Number of processes used to compile PyArrow’s C++/Cython components
+ - ``''``
+
Deleting stale build artifacts
==============================
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 8f846348f3..580857d50c 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -116,16 +116,12 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL
"${CMAKE_CURRENT_SOURCE_DIR}")
option(PYARROW_BUILD_DATASET "Build the PyArrow Dataset integration" OFF)
option(PYARROW_BUILD_GANDIVA "Build the PyArrow Gandiva integration" OFF)
option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF)
- option(PYARROW_PARQUET_USE_SHARED "Rely on parquet shared libraries where
relevant" ON)
option(PYARROW_BUILD_PARQUET_ENCRYPTION
"Build the PyArrow Parquet encryption integration" OFF)
- option(PYARROW_BOOST_USE_SHARED
- "Rely on Boost shared libraries on linking static parquet" ON)
option(PYARROW_BUILD_PLASMA "Build the PyArrow Plasma integration" OFF)
option(PYARROW_USE_TENSORFLOW "Build PyArrow with TensorFlow support" OFF)
option(PYARROW_BUILD_ORC "Build the PyArrow ORC integration" OFF)
option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF)
- option(PYARROW_BUNDLE_BOOST "Bundle the Boost libraries when we bundle Arrow
C++" OFF)
option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython"
OFF)
option(PYARROW_BUNDLE_PLASMA_EXECUTABLE "Bundle the plasma-store-server" ON)
option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled"
OFF)
@@ -586,7 +582,7 @@ if(PYARROW_BUILD_PARQUET)
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
- if(PYARROW_PARQUET_USE_SHARED)
+ if(ARROW_BUILD_SHARED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${PARQUET_SHARED_LIB} SO_VERSION ${PARQUET_SO_VERSION})
if(MSVC)
diff --git a/python/setup.py b/python/setup.py
index 7598133206..8d3b56cc51 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -144,8 +144,6 @@ class build_ext(_build_ext):
self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
self.build_type = os.environ.get('PYARROW_BUILD_TYPE',
'release').lower()
- self.boost_namespace = os.environ.get('PYARROW_BOOST_NAMESPACE',
- 'boost')
self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '')
@@ -171,12 +169,8 @@ class build_ext(_build_ext):
os.environ.get('PYARROW_WITH_DATASET', '0'))
self.with_parquet = strtobool(
os.environ.get('PYARROW_WITH_PARQUET', '0'))
- self.with_static_parquet = strtobool(
- os.environ.get('PYARROW_WITH_STATIC_PARQUET', '0'))
self.with_parquet_encryption = strtobool(
os.environ.get('PYARROW_WITH_PARQUET_ENCRYPTION', '0'))
- self.with_static_boost = strtobool(
- os.environ.get('PYARROW_WITH_STATIC_BOOST', '0'))
self.with_plasma = strtobool(
os.environ.get('PYARROW_WITH_PLASMA', '0'))
self.with_tensorflow = strtobool(
@@ -191,10 +185,6 @@ class build_ext(_build_ext):
os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0'))
self.bundle_cython_cpp = strtobool(
os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0'))
- self.bundle_boost = strtobool(
- os.environ.get('PYARROW_BUNDLE_BOOST', '0'))
- self.bundle_arrow_cpp_headers = strtobool(
- os.environ.get('PYARROW_BUNDLE_ARROW_CPP_HEADERS', '1'))
self.bundle_plasma_executable = strtobool(
os.environ.get('PYARROW_BUNDLE_PLASMA_EXECUTABLE', '1'))
@@ -298,26 +288,16 @@ class build_ext(_build_ext):
append_cmake_bool(self.with_tensorflow, 'PYARROW_USE_TENSORFLOW')
append_cmake_bool(self.bundle_arrow_cpp,
'PYARROW_BUNDLE_ARROW_CPP')
- append_cmake_bool(self.bundle_boost,
- 'PYARROW_BUNDLE_BOOST')
append_cmake_bool(self.bundle_cython_cpp,
'PYARROW_BUNDLE_CYTHON_CPP')
append_cmake_bool(self.bundle_plasma_executable,
'PYARROW_BUNDLE_PLASMA_EXECUTABLE')
append_cmake_bool(self.generate_coverage,
'PYARROW_GENERATE_COVERAGE')
- append_cmake_bool(not self.with_static_boost,
- 'PYARROW_BOOST_USE_SHARED')
- append_cmake_bool(not self.with_static_parquet,
- 'PYARROW_PARQUET_USE_SHARED')
cmake_options.append(
f'-DCMAKE_BUILD_TYPE={self.build_type.lower()}')
- if self.boost_namespace != 'boost':
- cmake_options.append(
- f'-DBoost_NAMESPACE={self.boost_namespace}')
-
extra_cmake_args = shlex.split(self.extra_cmake_args)
build_tool_args = []