This is an automated email from the ASF dual-hosted git repository. chengchengjin pushed a commit to branch cudf in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
commit 558f4a471821b35429bde6b3accbd05edac45b09 Author: Chengcheng Jin <[email protected]> AuthorDate: Fri Apr 4 15:15:04 2025 +0100 support cudf --- cpp/CMakeLists.txt | 5 +++ cpp/compile.sh | 9 +++- cpp/velox/CMakeLists.txt | 9 ++++ cpp/velox/compute/VeloxBackend.cc | 11 +++++ cpp/velox/config/VeloxConfig.h | 6 +++ dev/builddeps-veloxbe.sh | 10 ++++- dev/docker/cudf/Dockerfile | 18 ++++++++ dev/start_cudf.sh | 22 ++++++++++ docs/get-started/VeloxGPU.md | 48 ++++++++++++++++++++++ ep/build-velox/src/build_velox.sh | 9 ++++ .../org/apache/gluten/config/GlutenConfig.scala | 10 ++++- 11 files changed, 153 insertions(+), 4 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5cc1d4ee4b..1eaa578d7c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -56,6 +56,7 @@ option(ENABLE_S3 "Enable S3" OFF) option(ENABLE_HDFS "Enable HDFS" OFF) option(ENABLE_ORC "Enable ORC" OFF) option(ENABLE_ABFS "Enable ABFS" OFF) +option(ENABLE_GPU "Enable GPU" OFF) set(root_directory ${PROJECT_BINARY_DIR}) get_filename_component(GLUTEN_HOME ${CMAKE_SOURCE_DIR} DIRECTORY) @@ -236,6 +237,10 @@ if(ENABLE_IAA) add_definitions(-DGLUTEN_ENABLE_IAA) endif() +if(ENABLE_GPU) + add_definitions(-DGLUTEN_ENABLE_GPU) +endif() + # Subdirectories add_subdirectory(core) diff --git a/cpp/compile.sh b/cpp/compile.sh index 7006f69a3a..5f3c63db1b 100755 --- a/cpp/compile.sh +++ b/cpp/compile.sh @@ -28,6 +28,7 @@ ENABLE_GCS=OFF ENABLE_S3=OFF ENABLE_HDFS=OFF ENABLE_ABFS=OFF +ENABLE_GPU=OFF VELOX_HOME= # set default number of threads as cpu cores minus 2 if [[ "$(uname)" == "Darwin" ]]; then @@ -97,6 +98,10 @@ for arg in "$@"; do ENABLE_HDFS=("${arg#*=}") shift # Remove argument name from processing ;; + --enable_gpu=*) + ENABLE_GPU=("${arg#*=}") + shift # Remove argument name from processing + ;; *) OTHER_ARGUMENTS+=("$1") shift # Remove generic argument from processing @@ -128,6 +133,7 @@ echo "ENABLE_GCS=${ENABLE_GCS}" echo "ENABLE_S3=${ENABLE_S3}" echo "ENABLE_HDFS=${ENABLE_HDFS}" echo "ENABLE_ABFS=${ENABLE_ABFS}" +echo "ENABLE_GPU=${ENABLE_GPU}" if [ -d build ]; then rm -r build @@ -147,5 +153,6 @@ cmake .. \ -DENABLE_GCS=${ENABLE_GCS} \ -DENABLE_S3=${ENABLE_S3} \ -DENABLE_HDFS=${ENABLE_HDFS} \ - -DENABLE_ABFS=${ENABLE_ABFS} + -DENABLE_ABFS=${ENABLE_ABFS} \ + -DENABLE_GPU=${ENABLE_GPU} make -j$NPROC diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt index 2c4d0cae7f..75895f97e8 100644 --- a/cpp/velox/CMakeLists.txt +++ b/cpp/velox/CMakeLists.txt @@ -340,6 +340,15 @@ if(BUILD_EXAMPLES) add_subdirectory(udf/examples) endif() +if(ENABLE_GPU) + import_library( + facebook::velox::velox_cudf_exec + ${VELOX_BUILD_PATH}/velox/experimental/cudf/exec/libvelox_cudf_exec.a) + + target_link_libraries(velox PUBLIC facebook::velox::velox_cudf_exec) + target_link_libraries(velox PRIVATE ${VELOX_BUILD_PATH}/_deps/cudf-build/libcudf.so) +endif() + add_custom_command( TARGET velox POST_BUILD diff --git a/cpp/velox/compute/VeloxBackend.cc b/cpp/velox/compute/VeloxBackend.cc index 44313fbb39..2ce0df3c23 100644 --- a/cpp/velox/compute/VeloxBackend.cc +++ b/cpp/velox/compute/VeloxBackend.cc @@ -30,6 +30,9 @@ #ifdef GLUTEN_ENABLE_IAA #include "utils/qpl/QplCodec.h" #endif +#ifdef GLUTEN_ENABLE_GPU +#include "velox/experimental/cudf/exec/ToCudf.h" +#endif #include "compute/VeloxRuntime.h" #include "config/VeloxConfig.h" #include "jni/JniFileSystem.h" @@ -154,6 +157,14 @@ void VeloxBackend::init( velox::filesystems::registerAbfsFileSystem(); #endif +#ifdef GLUTEN_ENABLE_GPU + FLAGS_velox_cudf_debug = backendConf_->get<bool>(kDebugModeEnabled, false); + if (backendConf_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) { + velox::cudf_velox::registerCudf(); + } + +#endif + initJolFilesystem(); initCache(); initConnector(); diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h index cb3f891072..7f86508d12 100644 --- a/cpp/velox/config/VeloxConfig.h +++ b/cpp/velox/config/VeloxConfig.h @@ -139,6 +139,12 @@ const uint32_t kGlogVerboseLevelMaximum = 99; const std::string kGlogSeverityLevel = "spark.gluten.sql.columnar.backend.velox.glogSeverityLevel"; const uint32_t kGlogSeverityLevelDefault = 1; +// cudf +#ifdef GLUTEN_ENABLE_GPU +const std::string kCudfEnabled = "spark.gluten.sql.columnar.cudf"; +const bool kCudfEnabledDefault = "true"; +#endif + // Query trace /// Enable query tracing flag. const std::string kQueryTraceEnabled = "spark.gluten.sql.columnar.backend.velox.queryTraceEnabled"; diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 81a5f69584..85f566c954 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -39,6 +39,7 @@ ENABLE_S3=OFF ENABLE_HDFS=OFF ENABLE_ABFS=OFF ENABLE_VCPKG=OFF +ENABLE_GPU=OFF RUN_SETUP_SCRIPT=ON VELOX_REPO="" VELOX_BRANCH="" @@ -116,6 +117,10 @@ do ENABLE_VCPKG=("${arg#*=}") shift # Remove argument name from processing ;; + --enable_gpu=*) + ENABLE_GPU=("${arg#*=}") + shift # Remove argument name from processing + ;; --run_setup_script=*) RUN_SETUP_SCRIPT=("${arg#*=}") shift # Remove argument name from processing @@ -204,7 +209,7 @@ function build_velox { cd $GLUTEN_DIR/ep/build-velox/src # When BUILD_TESTS is on for gluten cpp, we need turn on VELOX_BUILD_TEST_UTILS via build_test_utils. ./build_velox.sh --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS \ - --enable_abfs=$ENABLE_ABFS --build_test_utils=$BUILD_TESTS \ + --enable_abfs=$ENABLE_ABFS --enable_gpu=$ENABLE_GPU --build_test_utils=$BUILD_TESTS \ --build_tests=$BUILD_VELOX_TESTS --build_benchmarks=$BUILD_VELOX_BENCHMARKS --num_threads=$NUM_THREADS \ --velox_home=$VELOX_HOME } @@ -229,7 +234,8 @@ function build_gluten_cpp { -DENABLE_GCS=$ENABLE_GCS \ -DENABLE_S3=$ENABLE_S3 \ -DENABLE_HDFS=$ENABLE_HDFS \ - -DENABLE_ABFS=$ENABLE_ABFS" + -DENABLE_ABFS=$ENABLE_ABFS \ + -DENABLE_GPU=$ENABLE_GPU" if [ $OS == 'Darwin' ]; then if [ -n "$INSTALL_PREFIX" ]; then diff --git a/dev/docker/cudf/Dockerfile b/dev/docker/cudf/Dockerfile new file mode 100644 index 0000000000..f8a598dcfe --- /dev/null +++ b/dev/docker/cudf/Dockerfile @@ -0,0 +1,18 @@ +FROM ghcr.io/facebookincubator/velox-dev:adapters +RUN yum install -y sudo patch maven perl +# After the base docker image updated, don;t need to install cmake +RUN pip install cmake==3.30.4 && ln -s /usr/local/bin/cmake /usr/bin + +RUN git clone -b cudf --depth=1 https://github.com/jinchengchenghh/gluten /opt/gluten + +# Install spark to folder /opt +RUN cd /opt/gluten/.github/workflows/util/ && ./install_spark_resources.sh 3.4 +ENV SPARK_HOME=/opt/spark-3.4.4-bin-hadoop3 +ENV PATH=$SPARK_HOME/bin:$PATH +ENV CUDA_ARCHITECTURES=70 + +WORKDIR /opt/gluten +RUN ./dev/buildbundle-veloxbe.sh--run_setup_script=ON --build_arrow=ON --spark_version=3.4 --enable_gpu=ON && \ + touch cudf.sql && echo "select o_orderkey from orders order by o_orderkey;" > cudf.sql + +# You can try the data in folder backends-velox/src/test/resources/tpch-data-parquet diff --git a/dev/start_cudf.sh b/dev/start_cudf.sh new file mode 100644 index 0000000000..ba79157a76 --- /dev/null +++ b/dev/start_cudf.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Execute this script in host machine +set -eu + +wget https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda-repo-amzn2023-12-8-local-12.8.1_570.124.06-1.x86_64.rpm +sudo rpm -i cuda-repo-amzn2023-12-8-local-12.8.1_570.124.06-1.x86_64.rpm +sudo dnf clean all +sudo dnf -y install cuda-toolkit-12-8 +sudo dnf -y module install nvidia-driver:open-dkms + +sudo yum install -y docker + +curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \ + sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo +sudo dnf install -y nvidia-container-toolkit +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker +# May need reboot here after install cuda driver +# Run the gpu example +sudo docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi +# Then run this command to +sudo docker run --name gpu_gluten_container --gpus all -itd apache/gluten:centos-9-jdk8-cudf diff --git a/docs/get-started/VeloxGPU.md b/docs/get-started/VeloxGPU.md new file mode 100644 index 0000000000..440d1d8032 --- /dev/null +++ b/docs/get-started/VeloxGPU.md @@ -0,0 +1,48 @@ +--- +layout: page +title: Velox GPU +nav_order: 9 +parent: Getting-Started +--- + +# GPU Support in Velox Backend + +This is an experimental feature in velox, so as Gluten. Now it only supports OrderBy operator. + +Velox has several GPU support implementations, Gluten only enables cudf. + +## GPU environment + +It requires to install the cuda 12.8.0, driver and the nvidia-container-toolkit. + +Refers to [start_cudf.sh](https://github.com/apache/incubator-gluten/tree/main/dev/start_cudf.sh) +to set the ``host`` environment and start the container. + +> You may need to reboot after install the GPU driver. + +## GPU implementation + +Invokes [CUDF](https://docs.rapids.ai/api/cudf/stable/libcudf_docs/) API to support the Velox operators. + +Suppose we have a velox PlanNode, convert it to the GPU operator or CPU operator depending on the +config `spark.gluten.sql.columnar.cudf` which decides registering cudf driver adapter or not. + +Besides, config `spark.gluten.debug.enabled` true can print the operator replacement information. + +## Docker images +This docker image contains Spark at env $SPARK_HOME, Gluten at /opt/gluten, take a try if you are interested on it. +The Gluten has been built with Spark3.4. +``` +docker pull apache/gluten:centos-9-jdk8-cudf +docker run --name gpu_gluten_container --gpus all -itd apache/gluten:centos-9-jdk8-cudf +``` + +# Branch +The [PR](https://github.com/facebookincubator/velox/pull/12735/) has not been merged to +facebookincubator/velox, so use a fixed Gluten branch `cudf` and corresponding oap-project velox branch `cudf`. + +# Relevant link + +Cuda Toolkit 12.8: https://developer.nvidia.com/cuda-downloads?target_os=Linux +Cuda Container ToolKit: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +Cudf Document: https://docs.rapids.ai/api/libcudf/legacy/namespacecudf diff --git a/ep/build-velox/src/build_velox.sh b/ep/build-velox/src/build_velox.sh index a9460cb799..c79cfc2c1a 100755 --- a/ep/build-velox/src/build_velox.sh +++ b/ep/build-velox/src/build_velox.sh @@ -26,6 +26,8 @@ ENABLE_GCS=OFF ENABLE_HDFS=OFF # Enable ABFS connector. ENABLE_ABFS=OFF + +ENABLE_GPU=OFF # CMake build type for Velox. BUILD_TYPE=release # May be deprecated in Gluten build. @@ -64,6 +66,10 @@ for arg in "$@"; do ENABLE_ABFS=("${arg#*=}") shift # Remove argument name from processing ;; + --enable_gpu=*) + ENABLE_GPU=("${arg#*=}") + shift # Remove argument name from processing + ;; --build_type=*) BUILD_TYPE=("${arg#*=}") shift # Remove argument name from processing @@ -121,6 +127,9 @@ function compile { echo "ENABLE_BENCHMARK is ON. Disabling Tests, GCS and ABFS connectors if enabled." COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_BENCHMARKS=ON" fi + if [ $ENABLE_GPU == "ON" ]; then + COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_GPU=ON -DVELOX_ENABLE_CUDF=ON" + fi if [ -n "${GLUTEN_VCPKG_ENABLED:-}" ]; then COMPILE_OPTION="$COMPILE_OPTION -DVELOX_GFLAGS_TYPE=static" fi diff --git a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala index f9920e6732..855dd631b9 100644 --- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala @@ -612,7 +612,8 @@ object GlutenConfig { SESSION_LOCAL_TIMEZONE.key, DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key, SPARK_REDACTION_REGEX, - LEGACY_TIME_PARSER_POLICY.key + LEGACY_TIME_PARSER_POLICY.key, + COLUMNAR_CUDF_ENABLED.key ) nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava) @@ -1719,4 +1720,11 @@ object GlutenConfig { .booleanConf .createWithDefault(true) + val COLUMNAR_CUDF_ENABLED = + buildConf("spark.gluten.sql.columnar.cudf") + .internal() + .doc("Enable or disable cudf support.") + .booleanConf + .createWithDefault(true) + } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
