This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new ad5b468ad [GLUTEN-7224][CH]Update doc for compiling ch backend #7225
ad5b468ad is described below
commit ad5b468adf199c15ae9a53ecee407da5b434e001
Author: lgbo <[email protected]>
AuthorDate: Fri Sep 13 14:13:13 2024 +0800
[GLUTEN-7224][CH]Update doc for compiling ch backend #7225
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)
Fixes: #7224
How was this patch tested?
(Please explain how this patch was tested. E.g. unit tests, integration
tests, manual tests)
unit tests
(If this patch involves UI changes, please attach a screenshot; otherwise,
remove this)
---
cpp-ch/local-engine/docker/build.sh | 69 ++++++++++++++++-
cpp-ch/local-engine/docker/image/Dockerfile | 24 +++---
cpp-ch/local-engine/docker/image/build.sh | 22 +++++-
docs/get-started/ClickHouse.md | 114 +++++++++++++++++++---------
4 files changed, 178 insertions(+), 51 deletions(-)
diff --git a/cpp-ch/local-engine/docker/build.sh
b/cpp-ch/local-engine/docker/build.sh
index 8bc61284e..e88ef08e3 100755
--- a/cpp-ch/local-engine/docker/build.sh
+++ b/cpp-ch/local-engine/docker/build.sh
@@ -12,4 +12,71 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-sudo docker run --rm --volume="$2":/output --volume="$1":/clickhouse
--volume=/tmp/.cache:/ccache -e ENABLE_EMBEDDED_COMPILER=ON libchbuilder:1.0
\ No newline at end of file
+
+opts=$(getopt -o c:g:b:o:h --long clickhouse:,gluten:,build:,output:,help --
"$@")
+
+eval set -- "$opts"
+
+while true; do
+ case "$1" in
+ -g | --gluten )
+ GLUTEN_DIR=$2
+ shift 2
+ ;;
+ -c | --clickhouse )
+ CLICKHOUSE_DIR=$2
+ shift 2
+ ;;
+ -b | --build )
+ BUILD_DIR=$2
+ shift 2
+ ;;
+ -o | --output )
+ OUTPUT_DIR=$2
+ shift
+ ;;
+ -h | --help )
+ echo "Usage: build.sh -g <gluten_root_dir> -c
<clickhouse_root_dir> [-b <build_dir>] [-o <output_dir>]"
+ shift
+ ;;
+ -- )
+ shift
+ break
+ ;;
+ * )
+ break;
+ ;;
+ esac
+done
+
+if [ -z "$GLUTEN_DIR" ]; then
+ echo "Miss gluten source root directory"
+ exit 1
+fi
+
+if [ -z "$CLICKHOUSE_DIR" ]; then
+ echo "Miss clickhouse source root directory"
+ exit 1
+fi
+
+CURRENT_DIR=$(pwd)
+if [ -z "$BUILD_DIR" ]; then
+ mkdir -p build
+ BUILD_DIR=${CURRENT_DIR}/build
+ echo "Will use ${BUILD_DIR} as the build directory"
+fi
+
+if [ -z "$OUTPUT_DIR" ]; then
+ mkdir -p output
+ OUTPUT_DIR=${CURRENT_DIR}/build
+ echo "Will use ${OUTPUT_DIR} as the output directory"
+fi
+
+
+docker run --rm \
+ -v $GLUTEN_DIR:/gluten \
+ -v $CLICKHOUSE_DIR:/clickhouse \
+ -v /tmp/.cache:/ccache \
+ -v $BUILD_DIR:/build \
+ -v $OUTPUT_DIR:/output \
+ libch_builder
diff --git a/cpp-ch/local-engine/docker/image/Dockerfile
b/cpp-ch/local-engine/docker/image/Dockerfile
index 4355999e9..4941d20aa 100644
--- a/cpp-ch/local-engine/docker/image/Dockerfile
+++ b/cpp-ch/local-engine/docker/image/Dockerfile
@@ -1,12 +1,12 @@
# rebuild in #33610
-# docker build -t clickhouse/binary-builder .
+# docker build -t libch_builder .
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://mirrors.aliyun.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=17
RUN apt-get update \
&& apt-get install \
@@ -36,35 +36,35 @@ RUN apt-get update \
RUN curl -s https://apt.kitware.com/keys/kitware-archive-latest.asc | \
gpg --dearmor - > /etc/apt/trusted.gpg.d/kitware.gpg && \
echo "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" >>
/etc/apt/sources.list
+
# initial packages
RUN apt-get update \
&& apt-get install \
bash \
openjdk-8-jdk\
- # build-essential \
ccache \
clang-${LLVM_VERSION} \
-# clang-tidy-${LLVM_VERSION} \
cmake \
fakeroot \
-# gdb \
git \
-# gperf \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
-# llvm-${LLVM_VERSION}-dev \
-# moreutils \
ninja-build \
-# pigz \
-# rename \
software-properties-common \
tzdata \
--yes --no-install-recommends \
+# clang-tidy-${LLVM_VERSION} \
+# gperf \
+# gdb \
+# llvm-${LLVM_VERSION}-dev \
+# moreutils \
+# build-essential \
+# pigz \
+# rename \
&& apt-get clean
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
-
ENV RUSTUP_HOME=/rust/rustup
ENV CARGO_HOME=/rust/cargo
ENV PATH="/rust/cargo/env:${PATH}"
@@ -82,4 +82,4 @@ ENV CXX=clang++-${LLVM_VERSION}
ADD ./build.sh /build.sh
RUN chmod +x /build.sh
-CMD ["bash", "-c", "/build.sh 2>&1"]
\ No newline at end of file
+CMD ["bash", "-c", "/build.sh 2>&1"]
diff --git a/cpp-ch/local-engine/docker/image/build.sh
b/cpp-ch/local-engine/docker/image/build.sh
index 22d62aa04..6de678def 100644
--- a/cpp-ch/local-engine/docker/image/build.sh
+++ b/cpp-ch/local-engine/docker/image/build.sh
@@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
mkdir -p /build && cd /build || exit
export CCACHE_DIR=/ccache
export CCACHE_BASEDIR=/build
@@ -19,7 +20,22 @@ export CCACHE_NOHASHDIR=true
export CCACHE_COMPILERCHECK=content
export CCACHE_MAXSIZE=15G
-cmake -G Ninja "-DCMAKE_C_COMPILER=$CC" "-DCMAKE_CXX_COMPILER=$CXX"
"-DCMAKE_BUILD_TYPE=Release" "-DENABLE_PROTOBUF=1"
"-DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER" "-DENABLE_TESTS=OFF"
"-DWERROR=OFF" "-DENABLE_JEMALLOC=1" "-DENABLE_MULTITARGET_CODE=ON" /clickhouse
-ninja ch
+# link the local-engine lib to a subdir of clickhouse
+rm -f /clickhosue/util/extern-local-engine
+ln -s /gluten/cpp-ch/local-engine /clickhouse/utils/extern-local-engine
+
+cmake -G Ninja "-DCMAKE_C_COMPILER=$CC" "-DCMAKE_CXX_COMPILER=$CXX" \
+ "-DCMAKE_BUILD_TYPE=Release" \
+ "-DENABLE_PROTOBUF=1" \
+ "-DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER" \
+ "-DENABLE_TESTS=OFF" \
+ "-DWERROR=OFF" \
+ "-DENABLE_JEMALLOC=1" \
+ "-DENABLE_MULTITARGET_CODE=ON" \
+ "-DENABLE_GWP_ASAN=OFF" \
+ "-DENABLE_EXTERN_LOCAL_ENGINE=ON" \
+ "-DENABLE_THINLTO=false" \
+ /clickhouse
+ninja
-cp /build/utils/local-engine/libch.so "/output/libch_$(date +%Y%m%d).so"
\ No newline at end of file
+cp /build/utils/extern-local-engine/libch.so "/output/libch_$(date +%Y%m%d).so"
diff --git a/docs/get-started/ClickHouse.md b/docs/get-started/ClickHouse.md
index a4b3174d1..b12607d96 100644
--- a/docs/get-started/ClickHouse.md
+++ b/docs/get-started/ClickHouse.md
@@ -27,49 +27,90 @@ The architecture of the ClickHouse backend is shown below:
In general, we use IDEA for Gluten development and CLion for ClickHouse
backend development on **Ubuntu 20**.
-#### Prerequisites
+#### Prerequisite
-Install the software required for compilation, run `sudo
./ep/build-clickhouse/src/install_ubuntu.sh`.
-Under the hood, it will install the following software:
+##### For compiling clickhouse backend
+
+Following softwares are required,
- Clang 18.0
- cmake 3.20 or higher version
- ninja-build 1.8.2
+You can run `sudo $gluten_root/ep/build-clickhouse/src/install_ubuntu.sh` to
setup the requirements. We also provide a [docker
file](../../cpp-ch/local-engine/docker/image/Dockerfile), you can build your
own image
+```shell
+cd $gluten_root/cpp-ch/local-engine/docker/image/
+docker build . -t libch_builder
+```
+
You can also refer to
[How-to-Build-ClickHouse-on-Linux](https://clickhouse.com/docs/en/development/build/).
+
+##### For compiling gluten
You need to install the following software manually:
- Java 8
- Maven 3.6.3 or higher version
- Spark 3.2.2 or Spark 3.3.1
Then, get Gluten code:
-```
+```shell
git clone https://github.com/apache/incubator-gluten.git
```
#### Setup ClickHouse backend development environment
-If you don't care about development environment, you can skip this part.
+##### Compile Clickhouse backend
-Otherwise, do:
+###### clone repos
-1. clone Kyligence/ClickHouse repo
- ```
- cd /to/some/place/
+clone Kyligence/ClickHouse repo
+ ```shell
git clone --recursive --shallow-submodules -b clickhouse_backend
https://github.com/Kyligence/ClickHouse.git
```
+ checkout to the latest branch
+ ```shell
+ latest_branch=$(cat $gluten_root/cpp-ch/clickhouse.version | grep
CH_BRANCH | cut -d= -f2)
+ git checkout -b $latest_branch origin/$latest_branch
+ git submodule sync --recursive
+ git submodule update --init --recursive
+ ```
-2. Configure cpp-ch
- ${GLUTEN_SOURCE}/cpp-ch can be treated as an add-on of Kyligence/Clickhouse
- First, initialize some configuration for this add-on:
+clone gluten repo
+ ```shell
+ git clone https://github.com/apache/incubator-gluten.git
+ ```
+
+
+##### build
+
+There are several ways to build the backend library.
+1. Build it direclty
+
+If you have setup all requirements, you can use following command to build it
direclty.
+
+```bash
+cd $gluten_root
+bash ./ep/build-clickhouse/src/build_clickhouse.sh
+```
+
+
+This will download Clickhouse for you and build everything.
+The target file is
`$gluten_root/cpp-ch/build/utils/extern-local-engine/libch.so`.
+
+
+2. Use docker
+You can use [docker file](../../cpp-ch/local-engine/docker/image/Dockerfile)
to build a docker image
+```shell
+cd $gluten_root/cpp-ch/local-engine/docker/image/
+docker build . -t libch_builder
+
+cd $gluten_root/cpp-ch/local-engine/docker
+./build.sh -g <gluten_root> -c <clickhouse_root> [-b <build_directory>] [-o
<output_directory>] libch_builder
+```
+`build_directory` is a directory used as to store the intermediate files from
compiling. It will use `current_dir>`/build as the default value if you don't
provide it.
- ```shell
- export GLUTEN_SOURCE=/path/to/gluten
- export CH_SOURCE_DIR=/path/to/ClickHouse
- cmake -G Ninja -S ${GLUTEN_SOURCE}/cpp-ch -B
${GLUTEN_SOURCE}/cpp-ch/build_ch -DCH_SOURCE_DIR=${CH_SOURCE_DIR}
"-DCMAKE_C_COMPILER=$(command -v clang-18)" "-DCMAKE_CXX_COMPILER=$(command -v
clang++-18)" "-DCMAKE_BUILD_TYPE=RelWithDebInfo"
- ```
- Next, you need to compile Kyligence/Clickhouse. There are two options:
+`output_directory` is used as to store the finaly output `libch.so`. The
default value is `current_dir`/output if you don't provide it.
+
3. (Option 1) Use CLion
@@ -94,28 +135,31 @@ Otherwise, do:
If it builds with Debug mode successfully, there is a library file
called 'libchd.so' in path
'${CH_SOURCE_DIR}/cmake-build-debug/utils/extern-local-engine/'.
-4. (Option 2) Use command line
- ```
- cmake --build ${GLUTEN_SOURCE}/cpp-ch/build_ch --target build_ch
- ```
- If it builds successfully, there is a library file called 'libch.so' in
path '${GLUTEN_SOURCE}/cpp-ch/build/utils/extern-local-engine/'.
-
-### Directly Compile ClickHouse backend
-
-In case you don't want a develop environment, you can use the following
command to compile ClickHouse backend directly:
+4. Build it as a submodule of Clickhouse
+```shell
+ln -s $gluten_root/cpp-ch/local-engine
$clickhouse_root/utils/extern-local-engine
+mkdir -p $clickhouse_root/build
+cd $clickhouse_root/build
+cmake -G Ninja "-DCMAKE_C_COMPILER=$CC" "-DCMAKE_CXX_COMPILER=$CXX" \
+ "-DCMAKE_BUILD_TYPE=Release" \
+ "-DENABLE_PROTOBUF=1" \
+ "-DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER" \
+ "-DENABLE_TESTS=OFF" \
+ "-DWERROR=OFF" \
+ "-DENABLE_JEMALLOC=1" \
+ "-DENABLE_MULTITARGET_CODE=ON" \
+ "-DENABLE_GWP_ASAN=OFF" \
+ "-DENABLE_EXTERN_LOCAL_ENGINE=ON" \
+ "-DENABLE_THINLTO=false" \
+ ..
+ninja
```
-git clone https://github.com/apache/incubator-gluten.git
-cd incubator-gluten
-bash ./ep/build-clickhouse/src/build_clickhouse.sh
-```
-
-This will download Clickhouse for you and build everything.
-The target file is
`/path/to/gluten/cpp-ch/build/utils/extern-local-engine/libch.so`.
+The result is in `$clickhouse_root/build/utils/extern-local-engine/libch.so`.
-### Compile Gluten
+#### Compile Gluten
The prerequisites are the same as the one mentioned above. Compile Gluten with
ClickHouse backend through maven:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]