This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4b7d699ab9 GH-37500: [CI][C++] Disable Dataset and Substrait by 
default (#37501)
4b7d699ab9 is described below

commit 4b7d699ab9eeb87996b647ec4a3f2034f9108cfd
Author: Antoine Pitrou <[email protected]>
AuthorDate: Fri Sep 1 09:58:27 2023 +0200

    GH-37500: [CI][C++] Disable Dataset and Substrait by default (#37501)
    
    ### Rationale for this change
    
    Datasets and Substrait are heavy subcomponents (as they have non-trivial 
dependencies), so should not be enabled unwillingly in all CI builds.
    
    ### What changes are included in this PR?
    
    Disable Dataset and Substrait in `ci/scripts/cpp_build.sh`, enable them 
explicitly in relevant builds.
    
    Disable more subcomponents in CI builds that don't need them, especially 
"minimal" builds.
    
    ### Are these changes tested?
    
    Yes, by existing CI jobs.
    
    ### Are there any user-facing changes?
    
    No.
    
    * Closes: #37500
    
    Lead-authored-by: Antoine Pitrou <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 .github/workflows/cpp.yml                    |  3 +++
 .github/workflows/ruby.yml                   |  2 ++
 ci/docker/alpine-linux-3.16-cpp.dockerfile   |  1 +
 ci/docker/conda-cpp.dockerfile               |  1 +
 ci/docker/conda-integration.dockerfile       |  1 +
 ci/docker/conda-python-dask.dockerfile       | 12 ++++++++++++
 ci/docker/conda-python-substrait.dockerfile  | 11 +++++++----
 ci/docker/conda-python.dockerfile            |  1 +
 ci/docker/debian-11-cpp.dockerfile           |  1 +
 ci/docker/fedora-35-cpp.dockerfile           |  1 +
 ci/docker/ubuntu-20.04-cpp.dockerfile        |  1 +
 ci/docker/ubuntu-22.04-cpp.dockerfile        |  1 +
 ci/scripts/cpp_build.sh                      |  4 ++--
 ci/scripts/integration_substrait.sh          |  2 +-
 ci/scripts/python_build.sh                   |  2 +-
 cpp/examples/minimal_build/run_static.sh     | 15 ---------------
 docker-compose.yml                           |  4 ++++
 python/examples/minimal_build/build_conda.sh | 25 ++++++++++---------------
 python/examples/minimal_build/build_venv.sh  | 12 +++++-------
 19 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 41032fc1b0..7dd825c72f 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -167,6 +167,7 @@ jobs:
       ARROW_ORC: ON
       ARROW_PARQUET: ON
       ARROW_S3: ON
+      ARROW_SUBSTRAIT: ON
       ARROW_WITH_BROTLI: ON
       ARROW_WITH_BZ2: ON
       ARROW_WITH_LZ4: ON
@@ -257,6 +258,7 @@ jobs:
       ARROW_ORC: ON
       ARROW_PARQUET: ON
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
+      ARROW_SUBSTRAIT: ON
       ARROW_USE_GLOG: OFF
       ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
       ARROW_WITH_BROTLI: OFF
@@ -358,6 +360,7 @@ jobs:
       ARROW_JEMALLOC: OFF
       ARROW_PARQUET: ON
       ARROW_S3: ON
+      ARROW_SUBSTRAIT: ON
       ARROW_USE_GLOG: OFF
       ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
       ARROW_WITH_BROTLI: ON
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 1e74497557..af4f34fee2 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -121,6 +121,7 @@ jobs:
       ARROW_BUILD_STATIC: OFF
       ARROW_BUILD_TESTS: OFF
       ARROW_BUILD_UTILITIES: OFF
+      ARROW_DATASET: ON
       ARROW_FLIGHT: ON
       ARROW_FLIGHT_SQL: ON
       ARROW_GANDIVA: ON
@@ -206,6 +207,7 @@ jobs:
       ARROW_BUILD_TESTS: OFF
       ARROW_BUILD_UTILITIES: OFF
       ARROW_BUILD_TYPE: release
+      ARROW_DATASET: ON
       ARROW_FLIGHT: ON
       ARROW_FLIGHT_SQL: ON
       ARROW_GANDIVA: ON
diff --git a/ci/docker/alpine-linux-3.16-cpp.dockerfile 
b/ci/docker/alpine-linux-3.16-cpp.dockerfile
index f269fa548c..8828e717a5 100644
--- a/ci/docker/alpine-linux-3.16-cpp.dockerfile
+++ b/ci/docker/alpine-linux-3.16-cpp.dockerfile
@@ -85,6 +85,7 @@ ENV ARROW_ACERO=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
+    ARROW_SUBSTRAIT=ON \
     ARROW_USE_CCACHE=ON \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index 2997983ca3..b635e5e934 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -61,6 +61,7 @@ ENV ARROW_ACERO=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
+    ARROW_SUBSTRAIT=ON \
     ARROW_USE_CCACHE=ON \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/conda-integration.dockerfile 
b/ci/docker/conda-integration.dockerfile
index d6fad57b9f..43d7e7ab0b 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -57,6 +57,7 @@ ENV DOTNET_ROOT=/opt/dotnet \
 RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel 
7.0 -InstallDir /opt/dotnet
 
 ENV ARROW_ACERO=OFF \
+    ARROW_AZURE=OFF \
     ARROW_BUILD_INTEGRATION=ON \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
diff --git a/ci/docker/conda-python-dask.dockerfile 
b/ci/docker/conda-python-dask.dockerfile
index 400106f189..4484011081 100644
--- a/ci/docker/conda-python-dask.dockerfile
+++ b/ci/docker/conda-python-dask.dockerfile
@@ -23,3 +23,15 @@ FROM ${repo}:${arch}-conda-python-${python}
 ARG dask=latest
 COPY ci/scripts/install_dask.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_dask.sh ${dask}
+
+ENV ARROW_ACERO=OFF \
+    ARROW_COMPUTE=ON \
+    ARROW_CSV=ON \
+    ARROW_DATASET=ON \
+    ARROW_FLIGHT=OFF \
+    ARROW_FLIGHT_SQL=OFF \
+    ARROW_FILESYSTEM=ON \
+    ARROW_GANDIVA=OFF \
+    ARROW_ORC=ON \
+    ARROW_SUBSTRAIT=OFF \
+    ARROW_TENSORFLOW=OFF
diff --git a/ci/docker/conda-python-substrait.dockerfile 
b/ci/docker/conda-python-substrait.dockerfile
index 33f6957a86..191795f253 100644
--- a/ci/docker/conda-python-substrait.dockerfile
+++ b/ci/docker/conda-python-substrait.dockerfile
@@ -36,13 +36,16 @@ RUN mamba install -q -y \
 ARG substrait=latest
 COPY ci/scripts/install_substrait_consumer.sh /arrow/ci/scripts/
 
+RUN /arrow/ci/scripts/install_substrait_consumer.sh
+
 ENV ARROW_ACERO=ON \
-    ARROW_BUILD_TESTS=ON \
     ARROW_COMPUTE=ON \
     ARROW_CSV=ON \
     ARROW_DATASET=ON \
     ARROW_FILESYSTEM=ON \
+    ARROW_FLIGHT=OFF \
+    ARROW_FLIGHT_SQL=OFF \
+    ARROW_GANDIVA=OFF \
     ARROW_JSON=ON \
-    ARROW_SUBSTRAIT=ON
-
-RUN /arrow/ci/scripts/install_substrait_consumer.sh
+    ARROW_SUBSTRAIT=ON \
+    ARROW_TESTING=OFF
diff --git a/ci/docker/conda-python.dockerfile 
b/ci/docker/conda-python.dockerfile
index 21e57228f4..ca0ceee5f9 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -46,5 +46,6 @@ ENV ARROW_ACERO=ON \
     ARROW_GDB=ON \
     ARROW_HDFS=ON \
     ARROW_JSON=ON \
+    ARROW_SUBSTRAIT=OFF \
     ARROW_TENSORFLOW=ON \
     ARROW_USE_GLOG=OFF
diff --git a/ci/docker/debian-11-cpp.dockerfile 
b/ci/docker/debian-11-cpp.dockerfile
index 00adc6bd6b..4682405462 100644
--- a/ci/docker/debian-11-cpp.dockerfile
+++ b/ci/docker/debian-11-cpp.dockerfile
@@ -108,6 +108,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
+    ARROW_SUBSTRAIT=ON \
     ARROW_USE_CCACHE=ON \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/fedora-35-cpp.dockerfile 
b/ci/docker/fedora-35-cpp.dockerfile
index 668e35b443..aefa25663b 100644
--- a/ci/docker/fedora-35-cpp.dockerfile
+++ b/ci/docker/fedora-35-cpp.dockerfile
@@ -87,6 +87,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
+    ARROW_SUBSTRAIT=ON \
     ARROW_USE_CCACHE=ON \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile 
b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 08dda6cf50..1e0a1e4807 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -161,6 +161,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
+    ARROW_SUBSTRAIT=ON \
     ARROW_USE_ASAN=OFF \
     ARROW_USE_CCACHE=ON \
     ARROW_USE_UBSAN=OFF \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile 
b/ci/docker/ubuntu-22.04-cpp.dockerfile
index dedeedd979..fffafe2b05 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -185,6 +185,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
+    ARROW_SUBSTRAIT=ON \
     ARROW_USE_ASAN=OFF \
     ARROW_USE_CCACHE=ON \
     ARROW_USE_UBSAN=OFF \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index a420cb64cb..1f5596e2a5 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -104,7 +104,7 @@ cmake \
   -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \
   -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \
   -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \
-  -DARROW_DATASET=${ARROW_DATASET:-ON} \
+  -DARROW_DATASET=${ARROW_DATASET:-OFF} \
   -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
   -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \
   -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
@@ -129,7 +129,7 @@ cmake \
   -DARROW_S3=${ARROW_S3:-OFF} \
   -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
   -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \
-  -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-ON} \
+  -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \
   -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
   -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
   -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
diff --git a/ci/scripts/integration_substrait.sh 
b/ci/scripts/integration_substrait.sh
index ce4c68ceb7..f7208ae113 100755
--- a/ci/scripts/integration_substrait.sh
+++ b/ci/scripts/integration_substrait.sh
@@ -28,4 +28,4 @@ python -c "from substrait_consumer.consumers import 
AceroConsumer"
 
 echo "Executing pytest"
 cd consumer-testing
-pytest 
substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py
 --producer IsthmusProducer --consumer AceroConsumer
+pytest -r s 
substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py
 --producer IsthmusProducer --consumer AceroConsumer
diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh
index b5b5b75b96..c0a27e6e70 100755
--- a/ci/scripts/python_build.sh
+++ b/ci/scripts/python_build.sh
@@ -54,7 +54,7 @@ fi
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
 
-export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON}
+export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF}
 export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
 export PYARROW_WITH_DATASET=${ARROW_DATASET:-ON}
 export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
diff --git a/cpp/examples/minimal_build/run_static.sh 
b/cpp/examples/minimal_build/run_static.sh
index 5b6afbd67a..189f59a007 100755
--- a/cpp/examples/minimal_build/run_static.sh
+++ b/cpp/examples/minimal_build/run_static.sh
@@ -39,24 +39,9 @@ NPROC=$(nproc)
 cmake $ARROW_DIR/cpp \
     -DARROW_BUILD_SHARED=OFF \
     -DARROW_BUILD_STATIC=ON \
-    -DARROW_COMPUTE=ON \
     -DARROW_CSV=ON \
-    -DARROW_DATASET=ON \
     -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE} \
     -DARROW_DEPENDENCY_USE_SHARED=OFF \
-    -DARROW_FILESYSTEM=ON \
-    -DARROW_HDFS=ON \
-    -DARROW_JEMALLOC=ON \
-    -DARROW_JSON=ON \
-    -DARROW_ORC=ON \
-    -DARROW_PARQUET=ON \
-    -DARROW_WITH_BROTLI=ON \
-    -DARROW_WITH_BZ2=ON \
-    -DARROW_WITH_LZ4=ON \
-    -DARROW_WITH_SNAPPY=ON \
-    -DARROW_WITH_ZLIB=ON \
-    -DARROW_WITH_ZSTD=ON \
-    -DORC_SOURCE=BUNDLED \
     -Dxsimd_SOURCE=BUNDLED \
     $ARROW_CMAKE_OPTIONS
 
diff --git a/docker-compose.yml b/docker-compose.yml
index c193a7d11e..8947eadd70 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -497,6 +497,8 @@ services:
     environment:
       <<: [*common, *ccache, *sccache, *cpp]
       ARROW_BUILD_UTILITIES: "OFF"
+      ARROW_ACERO: "OFF"
+      ARROW_AZURE: "OFF"
       ARROW_COMPUTE: "OFF"
       ARROW_CSV: "OFF"
       ARROW_DATASET: "OFF"
@@ -1326,8 +1328,10 @@ services:
     shm_size: *shm-size
     environment:
       <<: [*common, *ccache]
+      ARROW_ACERO: "OFF"
       ARROW_DATASET: "OFF"
       ARROW_FLIGHT: "OFF"
+      ARROW_FLIGHT_SQL: "OFF"
       ARROW_GANDIVA: "OFF"
       ARROW_JAVA_CDATA: "ON"
       ARROW_ORC: "OFF"
diff --git a/python/examples/minimal_build/build_conda.sh 
b/python/examples/minimal_build/build_conda.sh
index a2a1fcbf94..cd0030ac5f 100755
--- a/python/examples/minimal_build/build_conda.sh
+++ b/python/examples/minimal_build/build_conda.sh
@@ -34,22 +34,19 @@ git config --global --add safe.directory $ARROW_ROOT
 # Run these only once
 
 function setup_miniconda() {
-  
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh";
+  
MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh";
   wget -O miniconda.sh $MINICONDA_URL
   bash miniconda.sh -b -p $MINICONDA
   rm -f miniconda.sh
   LOCAL_PATH=$PATH
   export PATH="$MINICONDA/bin:$PATH"
 
-  conda update -y -q conda
-  conda config --set auto_update_conda false
-  conda info -a
+  mamba info -a
 
   conda config --set show_channel_urls True
-  conda config --add channels https://repo.anaconda.com/pkgs/free
-  conda config --add channels conda-forge
+  conda config --show channels
 
-  conda create -y -n pyarrow-$PYTHON -c conda-forge \
+  mamba create -y -n pyarrow-$PYTHON \
         --file arrow/ci/conda_env_unix.txt \
         --file arrow/ci/conda_env_cpp.txt \
         --file arrow/ci/conda_env_python.txt \
@@ -63,7 +60,7 @@ function setup_miniconda() {
 setup_miniconda
 
 #----------------------------------------------------------------------
-# Activate conda in bash and activate conda environment
+# Activate mamba in bash and activate mamba environment
 
 . $MINICONDA/etc/profile.d/conda.sh
 conda activate pyarrow-$PYTHON
@@ -79,13 +76,11 @@ cmake -GNinja \
       -DCMAKE_BUILD_TYPE=DEBUG \
       -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
       -DCMAKE_INSTALL_LIBDIR=lib \
-      -DARROW_WITH_BZ2=ON \
-      -DARROW_WITH_ZLIB=ON \
-      -DARROW_WITH_ZSTD=ON \
-      -DARROW_WITH_LZ4=ON \
-      -DARROW_WITH_SNAPPY=ON \
-      -DARROW_WITH_BROTLI=ON \
-      -DARROW_PYTHON=ON \
+      -DCMAKE_UNITY_BUILD=ON \
+      -DARROW_COMPUTE=ON \
+      -DARROW_CSV=ON \
+      -DARROW_FILESYSTEM=ON \
+      -DARROW_JSON=ON \
       $ARROW_ROOT/cpp
 
 ninja install
diff --git a/python/examples/minimal_build/build_venv.sh 
b/python/examples/minimal_build/build_venv.sh
index 2f1bc4ed30..d0432049f7 100755
--- a/python/examples/minimal_build/build_venv.sh
+++ b/python/examples/minimal_build/build_venv.sh
@@ -46,14 +46,12 @@ cmake -GNinja \
       -DCMAKE_BUILD_TYPE=DEBUG \
       -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
       -DCMAKE_INSTALL_LIBDIR=lib \
+      -DCMAKE_UNITY_BUILD=ON \
       -DARROW_BUILD_STATIC=OFF \
-      -DARROW_WITH_BZ2=ON \
-      -DARROW_WITH_ZLIB=ON \
-      -DARROW_WITH_ZSTD=ON \
-      -DARROW_WITH_LZ4=ON \
-      -DARROW_WITH_SNAPPY=ON \
-      -DARROW_WITH_BROTLI=ON \
-      -DARROW_PYTHON=ON \
+      -DARROW_COMPUTE=ON \
+      -DARROW_CSV=ON \
+      -DARROW_FILESYSTEM=ON \
+      -DARROW_JSON=ON \
       $ARROW_ROOT/cpp
 
 ninja install

Reply via email to