This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4487be0276 GH-34615: [CI][C++] Add CI job for basic format support
without ARROW_COMPUTE (#34617)
4487be0276 is described below
commit 4487be02763ff19c14c9d56809907284e51b513d
Author: Ben Harkins <[email protected]>
AuthorDate: Wed Mar 22 20:31:32 2023 -0400
GH-34615: [CI][C++] Add CI job for basic format support without
ARROW_COMPUTE (#34617)
This adds a crossbow job for `ARROW_IPC`, `ARROW_PARQUET`, and `ARROW_CSV`
- based on a minimal Ubuntu image.
The job primarily aims to test the core Arrow library + basic format
support without the full kernel registry provided by `ARROW_COMPUTE`. Note that
`ARROW_JSON` is implicitly enabled as well, since it's a dependency of
`ARROW_TESTING`.
* Closes: #34615
* Closes: #34655
Authored-by: benibus <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ci/docker/ubuntu-20.04-cpp-minimal.dockerfile | 1 +
ci/docker/ubuntu-22.04-cpp-minimal.dockerfile | 1 +
cpp/src/arrow/compute/CMakeLists.txt | 47 +++++++++++----------
cpp/src/arrow/compute/exec/CMakeLists.txt | 59 +++++----------------------
cpp/src/arrow/compute/kernels/CMakeLists.txt | 36 +++++++---------
dev/tasks/tasks.yml | 9 ++++
docker-compose.yml | 43 +++++++++++++++++++
7 files changed, 104 insertions(+), 92 deletions(-)
diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
index ca2be2873d..b7ddcd6fa7 100644
--- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
+ tzdata \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*
diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
index f0dc76c65f..e3006d9ec3 100644
--- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
+ tzdata \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*
diff --git a/cpp/src/arrow/compute/CMakeLists.txt
b/cpp/src/arrow/compute/CMakeLists.txt
index 3a7c30d68d..f459dd2cc7 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -26,7 +26,14 @@ arrow_add_pkg_config("arrow-compute")
# Unit tests
#
-# The following kernels are always present:
+set(ARROW_COMPUTE_TEST_PREFIX "arrow-compute")
+set(ARROW_COMPUTE_TEST_LABELS "arrow_compute")
+set(ARROW_COMPUTE_TEST_ARGS PREFIX ${ARROW_COMPUTE_TEST_PREFIX} LABELS
+ ${ARROW_COMPUTE_TEST_LABELS})
+
+# This will only add the test if ARROW_COMPUTE is enabled, meaning the full
kernel registry is available.
+#
+# The following kernels are always present in default builds:
# - array_filter
# - array_take
# - cast
@@ -38,10 +45,12 @@ arrow_add_pkg_config("arrow-compute")
# - unique
# - value_counts
#
-# Tests that use additional kernels should specify REQUIRE_ALL_KERNELS to avoid
-# being included in minimal builds. See: GH-34388
+# Also see: GH-34388, GH-34615
function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
- set(options REQUIRE_ALL_KERNELS)
+ if(NOT ARROW_COMPUTE)
+ return()
+ endif()
+
set(one_value_args PREFIX)
set(multi_value_args LABELS)
cmake_parse_arguments(ARG
@@ -50,25 +59,19 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
"${multi_value_args}"
${ARGN})
- if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE))
- return()
- endif()
-
if(ARG_PREFIX)
set(PREFIX ${ARG_PREFIX})
else()
- set(PREFIX "arrow-compute")
+ set(PREFIX ${ARROW_COMPUTE_TEST_PREFIX})
endif()
if(ARG_LABELS)
set(LABELS ${ARG_LABELS})
else()
- set(LABELS "arrow_compute")
+ set(LABELS ${ARROW_COMPUTE_TEST_LABELS})
endif()
add_arrow_test(${REL_TEST_NAME}
- EXTRA_LINK_LIBS
- ${ARROW_DATASET_TEST_LINK_LIBS}
PREFIX
${PREFIX}
LABELS
@@ -76,15 +79,17 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
${ARG_UNPARSED_ARGUMENTS})
endfunction()
-add_arrow_compute_test(internals_test
- SOURCES
- function_test.cc
- exec_test.cc
- kernel_test.cc
- light_array_test.cc
- registry_test.cc
- key_hash_test.cc
- expression_test.cc)
+add_arrow_test(internals_test
+ ${ARROW_COMPUTE_TEST_ARGS}
+ SOURCES
+ function_test.cc
+ exec_test.cc
+ kernel_test.cc
+ light_array_test.cc
+ registry_test.cc
+ key_hash_test.cc)
+
+add_arrow_compute_test(expression_test SOURCES expression_test.cc)
add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt
b/cpp/src/arrow/compute/exec/CMakeLists.txt
index c7853b2f67..61cf1903b6 100644
--- a/cpp/src/arrow/compute/exec/CMakeLists.txt
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -17,63 +17,24 @@
arrow_install_all_headers("arrow/compute/exec")
-add_arrow_compute_test(subtree_test
- REQUIRE_ALL_KERNELS
- PREFIX
- "arrow-compute"
- SOURCES
- subtree_test.cc)
+add_arrow_compute_test(subtree_test SOURCES subtree_test.cc)
add_arrow_compute_test(plan_test
- REQUIRE_ALL_KERNELS
- PREFIX
- "arrow-compute"
SOURCES
plan_test.cc
test_nodes_test.cc
test_nodes.cc)
-add_arrow_compute_test(fetch_node_test
- REQUIRE_ALL_KERNELS
- PREFIX
- "arrow-compute"
- SOURCES
- fetch_node_test.cc
- test_nodes.cc)
-add_arrow_compute_test(hash_join_node_test
- REQUIRE_ALL_KERNELS
- PREFIX
- "arrow-compute"
- SOURCES
- hash_join_node_test.cc
+add_arrow_compute_test(fetch_node_test SOURCES fetch_node_test.cc
test_nodes.cc)
+add_arrow_compute_test(hash_join_node_test SOURCES hash_join_node_test.cc
bloom_filter_test.cc)
-add_arrow_compute_test(order_by_node_test
- PREFIX
- "arrow-compute"
- SOURCES
- order_by_node_test.cc
+add_arrow_compute_test(order_by_node_test SOURCES order_by_node_test.cc
test_nodes.cc)
+add_arrow_compute_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc
test_nodes.cc)
-add_arrow_compute_test(pivot_longer_node_test
- PREFIX
- "arrow-compute"
- SOURCES
- pivot_longer_node_test.cc
- test_nodes.cc)
-add_arrow_compute_test(asof_join_node_test
- REQUIRE_ALL_KERNELS
- PREFIX
- "arrow-compute"
- SOURCES
- asof_join_node_test.cc
- test_nodes.cc)
-add_arrow_compute_test(tpch_node_test PREFIX "arrow-compute")
-add_arrow_compute_test(union_node_test PREFIX "arrow-compute")
-add_arrow_compute_test(groupby_test REQUIRE_ALL_KERNELS PREFIX "arrow-compute")
-add_arrow_compute_test(util_test
- PREFIX
- "arrow-compute"
- SOURCES
- util_test.cc
- task_util_test.cc)
+add_arrow_compute_test(asof_join_node_test SOURCES asof_join_node_test.cc
test_nodes.cc)
+add_arrow_compute_test(tpch_node_test)
+add_arrow_compute_test(union_node_test)
+add_arrow_compute_test(groupby_test)
+add_arrow_compute_test(util_test SOURCES util_test.cc task_util_test.cc)
add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt
b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 6cadbd1d20..0c5a3b7d78 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -16,32 +16,32 @@
# under the License.
# ----------------------------------------------------------------------
-# Scalar kernels
+# Tests that don't require the full kernel library
+
+add_arrow_test(scalar_cast_test
+ ${ARROW_COMPUTE_TEST_ARGS}
+ SOURCES
+ scalar_cast_test.cc
+ test_util.cc)
+
+add_arrow_test(kernel_utility_test ${ARROW_COMPUTE_TEST_ARGS} SOURCES
+ codegen_internal_test.cc)
-add_arrow_compute_test(scalar_cast_test SOURCES scalar_cast_test.cc
test_util.cc)
+# ----------------------------------------------------------------------
+# Scalar kernels
add_arrow_compute_test(scalar_type_test
- REQUIRE_ALL_KERNELS
SOURCES
scalar_boolean_test.cc
scalar_nested_test.cc
scalar_string_test.cc
test_util.cc)
-add_arrow_compute_test(scalar_if_else_test
- REQUIRE_ALL_KERNELS
- SOURCES
- scalar_if_else_test.cc
- test_util.cc)
+add_arrow_compute_test(scalar_if_else_test SOURCES scalar_if_else_test.cc
test_util.cc)
-add_arrow_compute_test(scalar_temporal_test
- REQUIRE_ALL_KERNELS
- SOURCES
- scalar_temporal_test.cc
- test_util.cc)
+add_arrow_compute_test(scalar_temporal_test SOURCES scalar_temporal_test.cc
test_util.cc)
add_arrow_compute_test(scalar_math_test
- REQUIRE_ALL_KERNELS
SOURCES
scalar_arithmetic_test.cc
scalar_compare_test.cc
@@ -49,7 +49,6 @@ add_arrow_compute_test(scalar_math_test
test_util.cc)
add_arrow_compute_test(scalar_utility_test
- REQUIRE_ALL_KERNELS
SOURCES
scalar_random_test.cc
scalar_set_lookup_test.cc
@@ -71,7 +70,6 @@ add_arrow_benchmark(scalar_temporal_benchmark PREFIX
"arrow-compute")
# Vector kernels
add_arrow_compute_test(vector_test
- REQUIRE_ALL_KERNELS
SOURCES
vector_cumulative_ops_test.cc
vector_hash_test.cc
@@ -96,14 +94,8 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX
"arrow-compute")
# Aggregates
add_arrow_compute_test(aggregate_test
- REQUIRE_ALL_KERNELS
SOURCES
aggregate_test.cc
hash_aggregate_test.cc
test_util.cc)
add_arrow_benchmark(aggregate_benchmark PREFIX "arrow-compute")
-
-# ----------------------------------------------------------------------
-# Utilities
-
-add_arrow_compute_test(kernel_utility_test SOURCES codegen_internal_test.cc)
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index f387fdbf94..4f7af3134f 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1255,6 +1255,15 @@ tasks:
UBUNTU: 20.04
image: ubuntu-cpp-thread-sanitizer
+ test-ubuntu-20.04-cpp-minimal-with-formats:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 20.04
+ flags: "-e ARROW_CSV=ON -e ARROW_PARQUET=ON"
+ image: ubuntu-cpp-minimal
+
{% for python_version in ["3.7", "3.8", "3.9", "3.10", "3.11"] %}
test-conda-python-{{ python_version }}:
ci: github
diff --git a/docker-compose.yml b/docker-compose.yml
index 38a22e23d6..0acfec2ac3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -143,6 +143,7 @@ x-hierarchy:
- ubuntu-r
- ubuntu-r-only-r
- ubuntu-cpp-bundled
+ - ubuntu-cpp-minimal
- ubuntu-cuda-cpp:
- ubuntu-cuda-python
- ubuntu-csharp
@@ -467,6 +468,48 @@ services:
volumes: *ubuntu-volumes
command: *cpp-command
+ ubuntu-cpp-minimal:
+ # Arrow build with minimal components/dependencies
+ image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp-minimal
+ build:
+ context: .
+ dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp-minimal.dockerfile
+ cache_from:
+ - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp-minimal
+ args:
+ arch: ${ARCH}
+ base: "${ARCH}/ubuntu:${UBUNTU}"
+ llvm: ${LLVM}
+ shm_size: *shm-size
+ ulimits: *ulimits
+ environment:
+ <<: [*ccache, *sccache]
+ ARROW_BUILD_UTILITIES: "OFF"
+ ARROW_COMPUTE: "OFF"
+ ARROW_CSV: "OFF"
+ ARROW_DATASET: "OFF"
+ ARROW_FILESYSTEM: "OFF"
+ ARROW_FLIGHT: "OFF"
+ ARROW_GANDIVA: "OFF"
+ ARROW_GCS: "OFF"
+ ARROW_HDFS: "OFF"
+ ARROW_ORC: "OFF"
+ ARROW_PARQUET: "OFF"
+ ARROW_PLASMA: "OFF"
+ ARROW_S3: "OFF"
+ ARROW_SUBSTRAIT: "OFF"
+ ARROW_WITH_BROTLI: "OFF"
+ ARROW_WITH_BZ2: "OFF"
+ ARROW_WITH_LZ4: "OFF"
+ ARROW_WITH_SNAPPY: "OFF"
+ ARROW_WITH_ZLIB: "OFF"
+ ARROW_WITH_ZSTD: "OFF"
+ PARQUET_BUILD_EXAMPLES: "OFF"
+ PARQUET_BUILD_EXECUTABLES: "OFF"
+ PARQUET_REQUIRE_ENCRYPTION: "OFF"
+ volumes: *ubuntu-volumes
+ command: *cpp-command
+
ubuntu-cuda-cpp:
# Usage:
# docker-compose build cuda-cpp