This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new ad5b468ad [GLUTEN-7224][CH]Update doc for compiling ch backend #7225
ad5b468ad is described below

commit ad5b468adf199c15ae9a53ecee407da5b434e001
Author: lgbo <[email protected]>
AuthorDate: Fri Sep 13 14:13:13 2024 +0800

    [GLUTEN-7224][CH]Update doc for compiling ch backend #7225
    
    What changes were proposed in this pull request?
    (Please fill in changes proposed in this fix)
    
    Fixes: #7224
    
    How was this patch tested?
    (Please explain how this patch was tested. E.g. unit tests, integration 
tests, manual tests)
    
    unit tests
    
    (If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)
---
 cpp-ch/local-engine/docker/build.sh         |  69 ++++++++++++++++-
 cpp-ch/local-engine/docker/image/Dockerfile |  24 +++---
 cpp-ch/local-engine/docker/image/build.sh   |  22 +++++-
 docs/get-started/ClickHouse.md              | 114 +++++++++++++++++++---------
 4 files changed, 178 insertions(+), 51 deletions(-)

diff --git a/cpp-ch/local-engine/docker/build.sh 
b/cpp-ch/local-engine/docker/build.sh
index 8bc61284e..e88ef08e3 100755
--- a/cpp-ch/local-engine/docker/build.sh
+++ b/cpp-ch/local-engine/docker/build.sh
@@ -12,4 +12,71 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-sudo docker run  --rm --volume="$2":/output --volume="$1":/clickhouse 
--volume=/tmp/.cache:/ccache -e ENABLE_EMBEDDED_COMPILER=ON libchbuilder:1.0
\ No newline at end of file
+
+opts=$(getopt -o c:g:b:o:h --long clickhouse:,gluten:,build:,output:,help -- 
"$@")
+
+eval set -- "$opts"
+
+while true; do
+    case "$1" in
+        -g | --gluten )
+            GLUTEN_DIR=$2
+            shift 2
+            ;;
+        -c | --clickhouse )
+            CLICKHOUSE_DIR=$2
+            shift 2
+            ;;
+        -b | --build )
+            BUILD_DIR=$2
+            shift 2
+            ;;
+        -o | --output )
+            OUTPUT_DIR=$2
+            shift
+            ;;
+        -h | --help )
+            echo "Usage: build.sh -g <gluten_root_dir> -c 
<clickhouse_root_dir> [-b <build_dir>] [-o <output_dir>]"
+            shift
+            ;;
+        -- )
+            shift
+            break
+            ;;
+        * )
+            break;
+            ;;
+    esac
+done
+
+if [ -z "$GLUTEN_DIR" ]; then
+    echo "Miss gluten source root directory"
+    exit 1
+fi
+
+if [ -z "$CLICKHOUSE_DIR" ]; then
+    echo "Miss clickhouse source root directory"
+    exit 1
+fi
+
+CURRENT_DIR=$(pwd)
+if [ -z "$BUILD_DIR" ]; then
+    mkdir -p build
+    BUILD_DIR=${CURRENT_DIR}/build
+    echo "Will use ${BUILD_DIR} as the build directory" 
+fi
+
+if [ -z "$OUTPUT_DIR" ]; then
+    mkdir -p output
+    OUTPUT_DIR=${CURRENT_DIR}/build
+    echo "Will use ${OUTPUT_DIR} as the output directory" 
+fi
+
+
+docker run  --rm \
+    -v $GLUTEN_DIR:/gluten \
+    -v $CLICKHOUSE_DIR:/clickhouse \
+    -v /tmp/.cache:/ccache \
+    -v $BUILD_DIR:/build \
+    -v $OUTPUT_DIR:/output \
+    libch_builder
diff --git a/cpp-ch/local-engine/docker/image/Dockerfile 
b/cpp-ch/local-engine/docker/image/Dockerfile
index 4355999e9..4941d20aa 100644
--- a/cpp-ch/local-engine/docker/image/Dockerfile
+++ b/cpp-ch/local-engine/docker/image/Dockerfile
@@ -1,12 +1,12 @@
 # rebuild in #33610
-# docker build -t clickhouse/binary-builder .
+# docker build -t libch_builder .
 FROM ubuntu:20.04
 
 # ARG for quick switch to a given ubuntu mirror
 ARG apt_archive="http://mirrors.aliyun.com";
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
 
-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=17
 
 RUN apt-get update \
     && apt-get install \
@@ -36,35 +36,35 @@ RUN apt-get update \
 RUN curl -s https://apt.kitware.com/keys/kitware-archive-latest.asc | \
         gpg --dearmor - > /etc/apt/trusted.gpg.d/kitware.gpg && \
     echo "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" >> 
/etc/apt/sources.list
+
 # initial packages
 RUN apt-get update \
     && apt-get install \
         bash \
         openjdk-8-jdk\
-    #        build-essential \
         ccache \
         clang-${LLVM_VERSION} \
-#        clang-tidy-${LLVM_VERSION} \
         cmake \
         fakeroot \
-#        gdb \
         git \
-#        gperf \
         lld-${LLVM_VERSION} \
         llvm-${LLVM_VERSION} \
-#        llvm-${LLVM_VERSION}-dev \
-#        moreutils \
         ninja-build \
-#        pigz \
-#        rename \
         software-properties-common \
         tzdata \
         --yes --no-install-recommends \
+#       clang-tidy-${LLVM_VERSION} \
+#       gperf \
+#       gdb \
+#       llvm-${LLVM_VERSION}-dev \
+#       moreutils \
+#       build-essential \
+#       pigz \
+#       rename \
     && apt-get clean
 
 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
-
 ENV RUSTUP_HOME=/rust/rustup
 ENV CARGO_HOME=/rust/cargo
 ENV PATH="/rust/cargo/env:${PATH}"
@@ -82,4 +82,4 @@ ENV CXX=clang++-${LLVM_VERSION}
 
 ADD ./build.sh /build.sh
 RUN chmod +x /build.sh
-CMD ["bash", "-c", "/build.sh 2>&1"]
\ No newline at end of file
+CMD ["bash", "-c", "/build.sh 2>&1"]
diff --git a/cpp-ch/local-engine/docker/image/build.sh 
b/cpp-ch/local-engine/docker/image/build.sh
index 22d62aa04..6de678def 100644
--- a/cpp-ch/local-engine/docker/image/build.sh
+++ b/cpp-ch/local-engine/docker/image/build.sh
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 mkdir -p /build && cd /build || exit
 export CCACHE_DIR=/ccache
 export CCACHE_BASEDIR=/build
@@ -19,7 +20,22 @@ export CCACHE_NOHASHDIR=true
 export CCACHE_COMPILERCHECK=content
 export CCACHE_MAXSIZE=15G
 
-cmake -G Ninja  "-DCMAKE_C_COMPILER=$CC" "-DCMAKE_CXX_COMPILER=$CXX" 
"-DCMAKE_BUILD_TYPE=Release" "-DENABLE_PROTOBUF=1" 
"-DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER" "-DENABLE_TESTS=OFF" 
"-DWERROR=OFF" "-DENABLE_JEMALLOC=1" "-DENABLE_MULTITARGET_CODE=ON" /clickhouse
-ninja ch
+# link the local-engine lib to a subdir of clickhouse
+rm -f /clickhosue/util/extern-local-engine
+ln -s /gluten/cpp-ch/local-engine /clickhouse/utils/extern-local-engine
+
+cmake -G Ninja  "-DCMAKE_C_COMPILER=$CC" "-DCMAKE_CXX_COMPILER=$CXX" \
+          "-DCMAKE_BUILD_TYPE=Release" \
+          "-DENABLE_PROTOBUF=1" \
+          "-DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER" \
+          "-DENABLE_TESTS=OFF" \
+          "-DWERROR=OFF" \
+          "-DENABLE_JEMALLOC=1" \
+          "-DENABLE_MULTITARGET_CODE=ON" \
+          "-DENABLE_GWP_ASAN=OFF" \
+          "-DENABLE_EXTERN_LOCAL_ENGINE=ON" \
+          "-DENABLE_THINLTO=false" \
+          /clickhouse
+ninja
 
-cp /build/utils/local-engine/libch.so "/output/libch_$(date +%Y%m%d).so"
\ No newline at end of file
+cp /build/utils/extern-local-engine/libch.so "/output/libch_$(date +%Y%m%d).so"
diff --git a/docs/get-started/ClickHouse.md b/docs/get-started/ClickHouse.md
index a4b3174d1..b12607d96 100644
--- a/docs/get-started/ClickHouse.md
+++ b/docs/get-started/ClickHouse.md
@@ -27,49 +27,90 @@ The architecture of the ClickHouse backend is shown below:
 
 In general, we use IDEA for Gluten development and CLion for ClickHouse 
backend development on **Ubuntu 20**.
 
-#### Prerequisites
+#### Prerequisite
 
-Install the software required for compilation, run `sudo 
./ep/build-clickhouse/src/install_ubuntu.sh`.
-Under the hood, it will install the following software:
+##### For compiling clickhouse backend
+
+Following softwares are required,
 - Clang 18.0
 - cmake 3.20 or higher version
 - ninja-build 1.8.2
 
+You can run `sudo $gluten_root/ep/build-clickhouse/src/install_ubuntu.sh` to 
setup the requirements. We also provide a [docker 
file](../../cpp-ch/local-engine/docker/image/Dockerfile), you can build your 
own image
+```shell
+cd $gluten_root/cpp-ch/local-engine/docker/image/
+docker build . -t libch_builder
+```
+
 You can also refer to 
[How-to-Build-ClickHouse-on-Linux](https://clickhouse.com/docs/en/development/build/).
+
+##### For compiling gluten
 You need to install the following software manually:
 - Java 8
 - Maven 3.6.3 or higher version
 - Spark 3.2.2 or Spark 3.3.1
 
 Then, get Gluten code:
-```
+```shell
     git clone https://github.com/apache/incubator-gluten.git
 ```
 
 #### Setup ClickHouse backend development environment
 
-If you don't care about development environment, you can skip this part.
+##### Compile Clickhouse backend
 
-Otherwise, do:
+###### clone repos
 
-1. clone Kyligence/ClickHouse repo
-    ```
-    cd /to/some/place/
+clone Kyligence/ClickHouse repo
+    ```shell
     git clone --recursive --shallow-submodules -b clickhouse_backend 
https://github.com/Kyligence/ClickHouse.git
     ```
+    checkout to the latest branch
+    ```shell
+    latest_branch=$(cat $gluten_root/cpp-ch/clickhouse.version  | grep 
CH_BRANCH | cut -d= -f2)
+    git checkout -b $latest_branch origin/$latest_branch
+    git submodule sync --recursive
+    git submodule update --init --recursive
+    ```
 
-2. Configure cpp-ch
-    ${GLUTEN_SOURCE}/cpp-ch can be treated as an add-on of Kyligence/Clickhouse
 
-    First, initialize some configuration for this add-on:
+clone gluten repo
+  ```shell
+  git clone https://github.com/apache/incubator-gluten.git
+  ```
+
+
+##### build
+
+There are several ways to build the backend library.
+1. Build it direclty
+
+If you have setup all requirements, you can use following command to build it 
direclty.
+
+```bash
+cd $gluten_root
+bash ./ep/build-clickhouse/src/build_clickhouse.sh
+```
+
+
+This will download Clickhouse for you and build everything.
+The target file is 
`$gluten_root/cpp-ch/build/utils/extern-local-engine/libch.so`.
+
+
+2. Use docker
+You can use [docker file](../../cpp-ch/local-engine/docker/image/Dockerfile) 
to build a docker image
+```shell
+cd $gluten_root/cpp-ch/local-engine/docker/image/
+docker build . -t libch_builder
+
+cd $gluten_root/cpp-ch/local-engine/docker
+./build.sh -g <gluten_root> -c <clickhouse_root> [-b <build_directory>] [-o 
<output_directory>] libch_builder
+```
+`build_directory` is a directory used as to store the intermediate files from 
compiling. It will use `current_dir>`/build as the default value if you don't 
provide it.
 
-    ```shell
-    export GLUTEN_SOURCE=/path/to/gluten
-    export CH_SOURCE_DIR=/path/to/ClickHouse
-    cmake -G Ninja -S ${GLUTEN_SOURCE}/cpp-ch -B 
${GLUTEN_SOURCE}/cpp-ch/build_ch -DCH_SOURCE_DIR=${CH_SOURCE_DIR} 
"-DCMAKE_C_COMPILER=$(command -v clang-18)" "-DCMAKE_CXX_COMPILER=$(command -v 
clang++-18)" "-DCMAKE_BUILD_TYPE=RelWithDebInfo"
-    ```
 
-    Next, you need to compile Kyligence/Clickhouse. There are two options:
+`output_directory` is used as to store the finaly output `libch.so`. The 
default value is `current_dir`/output if you don't provide it.
+
 
 3. (Option 1) Use CLion
 
@@ -94,28 +135,31 @@ Otherwise, do:
    
       If it builds with Debug mode successfully, there is a library file 
called 'libchd.so' in path 
'${CH_SOURCE_DIR}/cmake-build-debug/utils/extern-local-engine/'.
 
-4. (Option 2) Use command line
-    ```
-    cmake --build ${GLUTEN_SOURCE}/cpp-ch/build_ch --target build_ch
-   ```
-   If it builds successfully, there is a library file called 'libch.so' in 
path '${GLUTEN_SOURCE}/cpp-ch/build/utils/extern-local-engine/'.
-   
-### Directly Compile ClickHouse backend
-
-In case you don't want a develop environment, you can use the following 
command to compile ClickHouse backend directly:
+4. Build it as a submodule of Clickhouse
 
+```shell
+ln -s $gluten_root/cpp-ch/local-engine 
$clickhouse_root/utils/extern-local-engine
+mkdir -p $clickhouse_root/build
+cd $clickhouse_root/build
+cmake -G Ninja  "-DCMAKE_C_COMPILER=$CC" "-DCMAKE_CXX_COMPILER=$CXX" \
+          "-DCMAKE_BUILD_TYPE=Release" \
+          "-DENABLE_PROTOBUF=1" \
+          "-DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER" \
+          "-DENABLE_TESTS=OFF" \
+          "-DWERROR=OFF" \
+          "-DENABLE_JEMALLOC=1" \
+          "-DENABLE_MULTITARGET_CODE=ON" \
+          "-DENABLE_GWP_ASAN=OFF" \
+          "-DENABLE_EXTERN_LOCAL_ENGINE=ON" \
+          "-DENABLE_THINLTO=false" \
+          ..
+ninja
 ```
-git clone https://github.com/apache/incubator-gluten.git
-cd incubator-gluten
-bash ./ep/build-clickhouse/src/build_clickhouse.sh
-```
-
-This will download Clickhouse for you and build everything.
-The target file is 
`/path/to/gluten/cpp-ch/build/utils/extern-local-engine/libch.so`.
+The result is in `$clickhouse_root/build/utils/extern-local-engine/libch.so`.
 
 
 
-### Compile Gluten
+#### Compile Gluten
 
 The prerequisites are the same as the one mentioned above. Compile Gluten with 
ClickHouse backend through maven:
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to