This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new be110ffaf6 [thirdparty](clucene) add clucene deps for doris inverted
index (#15807)
be110ffaf6 is described below
commit be110ffaf62ef8beef8b1ddca24c8895eef886a8
Author: airborne12 <[email protected]>
AuthorDate: Thu Jan 12 21:59:19 2023 +0800
[thirdparty](clucene) add clucene deps for doris inverted index (#15807)
As part of Inverted Index DSIP steps, we'd like to contribute our inverted
index implementations step by step.
First of all we need to introduce clucene to doris thirdparty libs, because
inverted index implementations are based on
lucence API and index file format, also we add our features and performance
improvements base on clucene, so we
need to maintain the repo ourselves
---
build.sh | 1 +
thirdparty/build-thirdparty.sh | 29 +++++++++++++++++++++++++++++
thirdparty/vars.sh | 7 +++++++
3 files changed, 37 insertions(+)
diff --git a/build.sh b/build.sh
index 6360acb6fd..fb2e0b86cb 100755
--- a/build.sh
+++ b/build.sh
@@ -575,6 +575,7 @@ EOF
copy_common_files "${DORIS_OUTPUT}/be/"
mkdir -p "${DORIS_OUTPUT}/be/log"
mkdir -p "${DORIS_OUTPUT}/be/storage"
+ cp -r -p "${DORIS_THIRDPARTY}/installed/share/dict" "${DORIS_OUTPUT}/be/"
fi
if [[ "${BUILD_BROKER}" -eq 1 ]]; then
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index ac0ce174d4..3798c54979 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -146,6 +146,7 @@ if [[ "${CC}" == *gcc ]]; then
warning_stringop_truncation='-Wno-stringop-truncation'
warning_class_memaccess='-Wno-class-memaccess'
warning_array_parameter='-Wno-array-parameter'
+ warning_narrowing='-Wno-narrowing'
boost_toolset='gcc'
elif [[ "${CC}" == *clang ]]; then
warning_uninitialized='-Wno-uninitialized'
@@ -156,6 +157,7 @@ elif [[ "${CC}" == *clang ]]; then
warning_reserved_identifier='-Wno-reserved-identifier'
warning_suggest_override='-Wno-suggest-override
-Wno-suggest-destructor-override'
warning_option_ignored='-Wno-option-ignored'
+ warning_narrowing='-Wno-c++11-narrowing'
boost_toolset='clang'
libhdfs_cxx17='-std=c++1z'
@@ -1545,12 +1547,39 @@ build_concurrentqueue() {
cp ./*.h "${TP_INSTALL_DIR}/include/"
}
+#clucene
+build_clucene() {
+ if [[ -z ${USE_AVX2} ]]; then
+ USE_AVX2=1
+ fi
+ if [[ -z ${BUILD_TYPE} ]]; then
+ BUILD_TYPE=Release
+ fi
+ check_if_source_exist "${CLUCENE_SOURCE}"
+ cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
+ mkdir -p "${BUILD_DIR}" && cd "${BUILD_DIR}"
+ rm -rf CMakeCache.txt CMakeFiles/
+
+ ${CMAKE_CMD} -G "${GENERATOR}" -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}"
-DBUILD_STATIC_LIBRARIES=ON \
+ -DBUILD_SHARED_LIBRARIES=OFF
-DCMAKE_CXX_FLAGS="-fno-omit-frame-pointer ${warning_narrowing}" \
+ -DUSE_STAT64=0 -DUSE_AVX2="${USE_AVX2}"
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" -DBUILD_CONTRIBS_LIB=ON ..
+ ${BUILD_SYSTEM} -j "${PARALLEL}"
+ ${BUILD_SYSTEM} install
+
+ cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
+ if [[ ! -d "${TP_INSTALL_DIR}"/share ]]; then
+ mkdir -p "${TP_INSTALL_DIR}"/share
+ fi
+ cp -rf src/contribs-lib/CLucene/analysis/jieba/dict
"${TP_INSTALL_DIR}"/share/
+}
+
if [[ "$(uname -s)" == 'Darwin' ]]; then
echo 'build for Darwin'
build_binutils
build_gettext
fi
+build_clucene
build_libunixodbc
build_openssl
build_libevent
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 1da6c6155c..64f64a1980 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -54,6 +54,12 @@ export TP_JAR_DIR="${TP_INSTALL_DIR}/lib/jar"
# of all thirdparties
#####################################################
+#clucene
+CLUCENE_DOWNLOAD="https://github.com/apache/doris-thirdparty/archive/refs/tags/libclucene-v2.4.4.zip"
+CLUCENE_NAME="doris-thirdparty-libclucene-v2.4.4.zip"
+CLUCENE_SOURCE="doris-thirdparty-libclucene-v2.4.4"
+CLUCENE_MD5SUM="431b4c2dc5c75df5e114da72a2e9b74a"
+
# libevent
LIBEVENT_DOWNLOAD="https://github.com/libevent/libevent/archive/release-2.1.12-stable.tar.gz"
LIBEVENT_NAME=libevent-release-2.1.12-stable.tar.gz
@@ -443,6 +449,7 @@ CONCURRENTQUEUE_MD5SUM="118e5bb661b567634647312991e10222"
# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
export TP_ARCHIVES=(
+ 'CLUCENE'
'LIBEVENT'
'OPENSSL'
'THRIFT'
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]