This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4b7d699ab9 GH-37500: [CI][C++] Disable Dataset and Substrait by
default (#37501)
4b7d699ab9 is described below
commit 4b7d699ab9eeb87996b647ec4a3f2034f9108cfd
Author: Antoine Pitrou <[email protected]>
AuthorDate: Fri Sep 1 09:58:27 2023 +0200
GH-37500: [CI][C++] Disable Dataset and Substrait by default (#37501)
### Rationale for this change
Datasets and Substrait are heavy subcomponents (as they have non-trivial
dependencies), so should not be enabled unwillingly in all CI builds.
### What changes are included in this PR?
Disable Dataset and Substrait in `ci/scripts/cpp_build.sh`, enable them
explicitly in relevant builds.
Disable more subcomponents in CI builds that don't need them, especially
"minimal" builds.
### Are these changes tested?
Yes, by existing CI jobs.
### Are there any user-facing changes?
No.
* Closes: #37500
Lead-authored-by: Antoine Pitrou <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
.github/workflows/cpp.yml | 3 +++
.github/workflows/ruby.yml | 2 ++
ci/docker/alpine-linux-3.16-cpp.dockerfile | 1 +
ci/docker/conda-cpp.dockerfile | 1 +
ci/docker/conda-integration.dockerfile | 1 +
ci/docker/conda-python-dask.dockerfile | 12 ++++++++++++
ci/docker/conda-python-substrait.dockerfile | 11 +++++++----
ci/docker/conda-python.dockerfile | 1 +
ci/docker/debian-11-cpp.dockerfile | 1 +
ci/docker/fedora-35-cpp.dockerfile | 1 +
ci/docker/ubuntu-20.04-cpp.dockerfile | 1 +
ci/docker/ubuntu-22.04-cpp.dockerfile | 1 +
ci/scripts/cpp_build.sh | 4 ++--
ci/scripts/integration_substrait.sh | 2 +-
ci/scripts/python_build.sh | 2 +-
cpp/examples/minimal_build/run_static.sh | 15 ---------------
docker-compose.yml | 4 ++++
python/examples/minimal_build/build_conda.sh | 25 ++++++++++---------------
python/examples/minimal_build/build_venv.sh | 12 +++++-------
19 files changed, 55 insertions(+), 45 deletions(-)
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 41032fc1b0..7dd825c72f 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -167,6 +167,7 @@ jobs:
ARROW_ORC: ON
ARROW_PARQUET: ON
ARROW_S3: ON
+ ARROW_SUBSTRAIT: ON
ARROW_WITH_BROTLI: ON
ARROW_WITH_BZ2: ON
ARROW_WITH_LZ4: ON
@@ -257,6 +258,7 @@ jobs:
ARROW_ORC: ON
ARROW_PARQUET: ON
ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
+ ARROW_SUBSTRAIT: ON
ARROW_USE_GLOG: OFF
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
ARROW_WITH_BROTLI: OFF
@@ -358,6 +360,7 @@ jobs:
ARROW_JEMALLOC: OFF
ARROW_PARQUET: ON
ARROW_S3: ON
+ ARROW_SUBSTRAIT: ON
ARROW_USE_GLOG: OFF
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
ARROW_WITH_BROTLI: ON
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 1e74497557..af4f34fee2 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -121,6 +121,7 @@ jobs:
ARROW_BUILD_STATIC: OFF
ARROW_BUILD_TESTS: OFF
ARROW_BUILD_UTILITIES: OFF
+ ARROW_DATASET: ON
ARROW_FLIGHT: ON
ARROW_FLIGHT_SQL: ON
ARROW_GANDIVA: ON
@@ -206,6 +207,7 @@ jobs:
ARROW_BUILD_TESTS: OFF
ARROW_BUILD_UTILITIES: OFF
ARROW_BUILD_TYPE: release
+ ARROW_DATASET: ON
ARROW_FLIGHT: ON
ARROW_FLIGHT_SQL: ON
ARROW_GANDIVA: ON
diff --git a/ci/docker/alpine-linux-3.16-cpp.dockerfile
b/ci/docker/alpine-linux-3.16-cpp.dockerfile
index f269fa548c..8828e717a5 100644
--- a/ci/docker/alpine-linux-3.16-cpp.dockerfile
+++ b/ci/docker/alpine-linux-3.16-cpp.dockerfile
@@ -85,6 +85,7 @@ ENV ARROW_ACERO=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
+ ARROW_SUBSTRAIT=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index 2997983ca3..b635e5e934 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -61,6 +61,7 @@ ENV ARROW_ACERO=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
+ ARROW_SUBSTRAIT=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/conda-integration.dockerfile
b/ci/docker/conda-integration.dockerfile
index d6fad57b9f..43d7e7ab0b 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -57,6 +57,7 @@ ENV DOTNET_ROOT=/opt/dotnet \
RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel
7.0 -InstallDir /opt/dotnet
ENV ARROW_ACERO=OFF \
+ ARROW_AZURE=OFF \
ARROW_BUILD_INTEGRATION=ON \
ARROW_BUILD_STATIC=OFF \
ARROW_BUILD_TESTS=OFF \
diff --git a/ci/docker/conda-python-dask.dockerfile
b/ci/docker/conda-python-dask.dockerfile
index 400106f189..4484011081 100644
--- a/ci/docker/conda-python-dask.dockerfile
+++ b/ci/docker/conda-python-dask.dockerfile
@@ -23,3 +23,15 @@ FROM ${repo}:${arch}-conda-python-${python}
ARG dask=latest
COPY ci/scripts/install_dask.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_dask.sh ${dask}
+
+ENV ARROW_ACERO=OFF \
+ ARROW_COMPUTE=ON \
+ ARROW_CSV=ON \
+ ARROW_DATASET=ON \
+ ARROW_FLIGHT=OFF \
+ ARROW_FLIGHT_SQL=OFF \
+ ARROW_FILESYSTEM=ON \
+ ARROW_GANDIVA=OFF \
+ ARROW_ORC=ON \
+ ARROW_SUBSTRAIT=OFF \
+ ARROW_TENSORFLOW=OFF
diff --git a/ci/docker/conda-python-substrait.dockerfile
b/ci/docker/conda-python-substrait.dockerfile
index 33f6957a86..191795f253 100644
--- a/ci/docker/conda-python-substrait.dockerfile
+++ b/ci/docker/conda-python-substrait.dockerfile
@@ -36,13 +36,16 @@ RUN mamba install -q -y \
ARG substrait=latest
COPY ci/scripts/install_substrait_consumer.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_substrait_consumer.sh
+
ENV ARROW_ACERO=ON \
- ARROW_BUILD_TESTS=ON \
ARROW_COMPUTE=ON \
ARROW_CSV=ON \
ARROW_DATASET=ON \
ARROW_FILESYSTEM=ON \
+ ARROW_FLIGHT=OFF \
+ ARROW_FLIGHT_SQL=OFF \
+ ARROW_GANDIVA=OFF \
ARROW_JSON=ON \
- ARROW_SUBSTRAIT=ON
-
-RUN /arrow/ci/scripts/install_substrait_consumer.sh
+ ARROW_SUBSTRAIT=ON \
+ ARROW_TESTING=OFF
diff --git a/ci/docker/conda-python.dockerfile
b/ci/docker/conda-python.dockerfile
index 21e57228f4..ca0ceee5f9 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -46,5 +46,6 @@ ENV ARROW_ACERO=ON \
ARROW_GDB=ON \
ARROW_HDFS=ON \
ARROW_JSON=ON \
+ ARROW_SUBSTRAIT=OFF \
ARROW_TENSORFLOW=ON \
ARROW_USE_GLOG=OFF
diff --git a/ci/docker/debian-11-cpp.dockerfile
b/ci/docker/debian-11-cpp.dockerfile
index 00adc6bd6b..4682405462 100644
--- a/ci/docker/debian-11-cpp.dockerfile
+++ b/ci/docker/debian-11-cpp.dockerfile
@@ -108,6 +108,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
+ ARROW_SUBSTRAIT=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/fedora-35-cpp.dockerfile
b/ci/docker/fedora-35-cpp.dockerfile
index 668e35b443..aefa25663b 100644
--- a/ci/docker/fedora-35-cpp.dockerfile
+++ b/ci/docker/fedora-35-cpp.dockerfile
@@ -87,6 +87,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
+ ARROW_SUBSTRAIT=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile
b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 08dda6cf50..1e0a1e4807 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -161,6 +161,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
+ ARROW_SUBSTRAIT=ON \
ARROW_USE_ASAN=OFF \
ARROW_USE_CCACHE=ON \
ARROW_USE_UBSAN=OFF \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile
b/ci/docker/ubuntu-22.04-cpp.dockerfile
index dedeedd979..fffafe2b05 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -185,6 +185,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
+ ARROW_SUBSTRAIT=ON \
ARROW_USE_ASAN=OFF \
ARROW_USE_CCACHE=ON \
ARROW_USE_UBSAN=OFF \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index a420cb64cb..1f5596e2a5 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -104,7 +104,7 @@ cmake \
-DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \
-DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \
-DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \
- -DARROW_DATASET=${ARROW_DATASET:-ON} \
+ -DARROW_DATASET=${ARROW_DATASET:-OFF} \
-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
-DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \
-DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
@@ -129,7 +129,7 @@ cmake \
-DARROW_S3=${ARROW_S3:-OFF} \
-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
-DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \
- -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-ON} \
+ -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \
-DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
-DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
-DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
diff --git a/ci/scripts/integration_substrait.sh
b/ci/scripts/integration_substrait.sh
index ce4c68ceb7..f7208ae113 100755
--- a/ci/scripts/integration_substrait.sh
+++ b/ci/scripts/integration_substrait.sh
@@ -28,4 +28,4 @@ python -c "from substrait_consumer.consumers import
AceroConsumer"
echo "Executing pytest"
cd consumer-testing
-pytest
substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py
--producer IsthmusProducer --consumer AceroConsumer
+pytest -r s
substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py
--producer IsthmusProducer --consumer AceroConsumer
diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh
index b5b5b75b96..c0a27e6e70 100755
--- a/ci/scripts/python_build.sh
+++ b/ci/scripts/python_build.sh
@@ -54,7 +54,7 @@ fi
export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
-export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON}
+export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF}
export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
export PYARROW_WITH_DATASET=${ARROW_DATASET:-ON}
export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
diff --git a/cpp/examples/minimal_build/run_static.sh
b/cpp/examples/minimal_build/run_static.sh
index 5b6afbd67a..189f59a007 100755
--- a/cpp/examples/minimal_build/run_static.sh
+++ b/cpp/examples/minimal_build/run_static.sh
@@ -39,24 +39,9 @@ NPROC=$(nproc)
cmake $ARROW_DIR/cpp \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_STATIC=ON \
- -DARROW_COMPUTE=ON \
-DARROW_CSV=ON \
- -DARROW_DATASET=ON \
-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE} \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
- -DARROW_FILESYSTEM=ON \
- -DARROW_HDFS=ON \
- -DARROW_JEMALLOC=ON \
- -DARROW_JSON=ON \
- -DARROW_ORC=ON \
- -DARROW_PARQUET=ON \
- -DARROW_WITH_BROTLI=ON \
- -DARROW_WITH_BZ2=ON \
- -DARROW_WITH_LZ4=ON \
- -DARROW_WITH_SNAPPY=ON \
- -DARROW_WITH_ZLIB=ON \
- -DARROW_WITH_ZSTD=ON \
- -DORC_SOURCE=BUNDLED \
-Dxsimd_SOURCE=BUNDLED \
$ARROW_CMAKE_OPTIONS
diff --git a/docker-compose.yml b/docker-compose.yml
index c193a7d11e..8947eadd70 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -497,6 +497,8 @@ services:
environment:
<<: [*common, *ccache, *sccache, *cpp]
ARROW_BUILD_UTILITIES: "OFF"
+ ARROW_ACERO: "OFF"
+ ARROW_AZURE: "OFF"
ARROW_COMPUTE: "OFF"
ARROW_CSV: "OFF"
ARROW_DATASET: "OFF"
@@ -1326,8 +1328,10 @@ services:
shm_size: *shm-size
environment:
<<: [*common, *ccache]
+ ARROW_ACERO: "OFF"
ARROW_DATASET: "OFF"
ARROW_FLIGHT: "OFF"
+ ARROW_FLIGHT_SQL: "OFF"
ARROW_GANDIVA: "OFF"
ARROW_JAVA_CDATA: "ON"
ARROW_ORC: "OFF"
diff --git a/python/examples/minimal_build/build_conda.sh
b/python/examples/minimal_build/build_conda.sh
index a2a1fcbf94..cd0030ac5f 100755
--- a/python/examples/minimal_build/build_conda.sh
+++ b/python/examples/minimal_build/build_conda.sh
@@ -34,22 +34,19 @@ git config --global --add safe.directory $ARROW_ROOT
# Run these only once
function setup_miniconda() {
-
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+
MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh"
wget -O miniconda.sh $MINICONDA_URL
bash miniconda.sh -b -p $MINICONDA
rm -f miniconda.sh
LOCAL_PATH=$PATH
export PATH="$MINICONDA/bin:$PATH"
- conda update -y -q conda
- conda config --set auto_update_conda false
- conda info -a
+ mamba info -a
conda config --set show_channel_urls True
- conda config --add channels https://repo.anaconda.com/pkgs/free
- conda config --add channels conda-forge
+ conda config --show channels
- conda create -y -n pyarrow-$PYTHON -c conda-forge \
+ mamba create -y -n pyarrow-$PYTHON \
--file arrow/ci/conda_env_unix.txt \
--file arrow/ci/conda_env_cpp.txt \
--file arrow/ci/conda_env_python.txt \
@@ -63,7 +60,7 @@ function setup_miniconda() {
setup_miniconda
#----------------------------------------------------------------------
-# Activate conda in bash and activate conda environment
+# Activate mamba in bash and activate mamba environment
. $MINICONDA/etc/profile.d/conda.sh
conda activate pyarrow-$PYTHON
@@ -79,13 +76,11 @@ cmake -GNinja \
-DCMAKE_BUILD_TYPE=DEBUG \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_INSTALL_LIBDIR=lib \
- -DARROW_WITH_BZ2=ON \
- -DARROW_WITH_ZLIB=ON \
- -DARROW_WITH_ZSTD=ON \
- -DARROW_WITH_LZ4=ON \
- -DARROW_WITH_SNAPPY=ON \
- -DARROW_WITH_BROTLI=ON \
- -DARROW_PYTHON=ON \
+ -DCMAKE_UNITY_BUILD=ON \
+ -DARROW_COMPUTE=ON \
+ -DARROW_CSV=ON \
+ -DARROW_FILESYSTEM=ON \
+ -DARROW_JSON=ON \
$ARROW_ROOT/cpp
ninja install
diff --git a/python/examples/minimal_build/build_venv.sh
b/python/examples/minimal_build/build_venv.sh
index 2f1bc4ed30..d0432049f7 100755
--- a/python/examples/minimal_build/build_venv.sh
+++ b/python/examples/minimal_build/build_venv.sh
@@ -46,14 +46,12 @@ cmake -GNinja \
-DCMAKE_BUILD_TYPE=DEBUG \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_UNITY_BUILD=ON \
-DARROW_BUILD_STATIC=OFF \
- -DARROW_WITH_BZ2=ON \
- -DARROW_WITH_ZLIB=ON \
- -DARROW_WITH_ZSTD=ON \
- -DARROW_WITH_LZ4=ON \
- -DARROW_WITH_SNAPPY=ON \
- -DARROW_WITH_BROTLI=ON \
- -DARROW_PYTHON=ON \
+ -DARROW_COMPUTE=ON \
+ -DARROW_CSV=ON \
+ -DARROW_FILESYSTEM=ON \
+ -DARROW_JSON=ON \
$ARROW_ROOT/cpp
ninja install