This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f32deb18e9 [Update](build) change clucene from thirdparty to git
module (#19352)
f32deb18e9 is described below
commit f32deb18e9832ccbd8193fc6822c2616faa2d58e
Author: airborne12 <[email protected]>
AuthorDate: Fri May 19 08:25:51 2023 +0800
[Update](build) change clucene from thirdparty to git module (#19352)
---
.gitmodules | 4 ++
be/CMakeLists.txt | 60 ++++++++++++++++------
be/src/clucene | 1 +
.../rowset/segment_v2/inverted_index_reader.cpp | 4 +-
build.sh | 35 ++++++++-----
thirdparty/build-thirdparty.sh | 46 -----------------
thirdparty/vars.sh | 7 ---
7 files changed, 72 insertions(+), 85 deletions(-)
diff --git a/.gitmodules b/.gitmodules
index 06213cbb75..9fe51bfd1d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -29,3 +29,7 @@
path = be/src/apache-orc
url = https://github.com/apache/doris-thirdparty.git
branch = orc
+[submodule "be/src/clucene"]
+ path = be/src/clucene
+ url = https://github.com/apache/doris-thirdparty.git
+ branch = clucene
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 182837f1a0..67ad424da5 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -148,18 +148,6 @@ endif()
set(GPERFTOOLS_HOME "${THIRDPARTY_DIR}/gperftools")
# Set all libraries
-add_library(ic STATIC IMPORTED)
-set_target_properties(ic PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libic.a)
-
-add_library(clucene-core STATIC IMPORTED)
-set_target_properties(clucene-core PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libclucene-core-static.a)
-
-add_library(clucene-shared STATIC IMPORTED)
-set_target_properties(clucene-shared PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libclucene-shared-static.a)
-
-add_library(clucene-contribs-lib STATIC IMPORTED)
-set_target_properties(clucene-contribs-lib PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libclucene-contribs-lib.a)
-
add_library(gflags STATIC IMPORTED)
set_target_properties(gflags PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libgflags.a)
@@ -449,6 +437,39 @@ SET(ZSTD_INCLUDE_DIR
"$ENV{DORIS_THIRDPARTY}/installed/include/zstd")
add_subdirectory(${SRC_DIR}/apache-orc EXCLUDE_FROM_ALL)
target_compile_options(orc PRIVATE -Wno-implicit-fallthrough -w)
+set(BUILD_STATIC_LIBRARIES ON)
+set(BUILD_SHARED_LIBRARIES OFF)
+set(BUILD_CONTRIBS_LIB ON)
+set(BOOST_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
+set(ZLIB_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
+set(Roaring_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
+set(USE_STAT64 0)
+
+if (USE_BTHREAD_SCANNER)
+ set(USE_BTHREAD ON)
+else()
+ set(USE_BTHREAD OFF)
+endif()
+
+
+add_subdirectory(${SRC_DIR}/clucene EXCLUDE_FROM_ALL)
+
+if (COMPILER_CLANG)
+ target_compile_options(clucene-core-static PRIVATE
-fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
+ target_compile_options(clucene-shared-static PRIVATE
-fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
+ target_compile_options(clucene-contribs-lib PRIVATE
-fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
+ target_compile_options(ic PRIVATE -fno-omit-frame-pointer
-Wno-c++11-narrowing -w -Wall )
+else ()
+ target_compile_options(clucene-core-static PRIVATE
-fno-omit-frame-pointer -Wno-narrowing -w -Wall )
+ target_compile_options(clucene-shared-static PRIVATE
-fno-omit-frame-pointer -Wno-narrowing -w -Wall )
+ target_compile_options(clucene-contribs-lib PRIVATE
-fno-omit-frame-pointer -Wno-narrowing -w -Wall )
+ target_compile_options(ic PRIVATE -fno-omit-frame-pointer
-Wno-narrowing -w -Wall )
+endif()
+
+install(DIRECTORY
+ ${SRC_DIR}/clucene/src/contribs-lib/CLucene/analysis/jieba/dict
+ DESTINATION ${OUTPUT_DIR})
+
# Check if functions are supported in this platform. All flags will generated
# in gensrc/build/common/env_config.h.
# You can check funcion here which depends on platform. Don't forget add this
@@ -645,6 +666,13 @@ include_directories(
${CMAKE_CURRENT_BINARY_DIR}/src/apache-orc/c++/include
)
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/src/clucene/src/shared
+ ${SRC_DIR}/clucene/src/core
+ ${SRC_DIR}/clucene/src/shared
+ ${SRC_DIR}/clucene/src/contribs-lib
+)
+
include_directories(
${SRC_DIR}/
${TEST_DIR}/
@@ -732,10 +760,6 @@ find_package(absl)
# When adding new dependencies, If you don’t know if it can run on all
platforms,
# add it here first.
set(COMMON_THIRDPARTY
- ic
- clucene-core
- clucene-shared
- clucene-contribs-lib
backtrace
rocksdb
cyrus-sasl
@@ -858,6 +882,10 @@ if (WITH_MYSQL)
endif()
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} orc)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ic)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-core-static)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-shared-static)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-contribs-lib)
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${WL_END_GROUP})
diff --git a/be/src/clucene b/be/src/clucene
new file mode 160000
index 0000000000..76cd035119
--- /dev/null
+++ b/be/src/clucene
@@ -0,0 +1 @@
+Subproject commit 76cd03511903916ab076ab0ef6f3779ab4e7476e
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index ae7b34bad6..48b399e8eb 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -38,10 +38,10 @@
#include <math.h>
#include <string.h>
-#include <CLucene/util/croaring/roaring.hh>
#include <algorithm>
#include <filesystem>
#include <ostream>
+#include <roaring/roaring.hh>
#include <set>
#include "common/config.h"
@@ -687,7 +687,7 @@ void InvertedIndexVisitor::visit(std::vector<char>& doc_id,
std::vector<uint8_t>
visit(roaring::Roaring::read(doc_id.data(), false));
}
-void InvertedIndexVisitor::visit(Roaring* doc_id, std::vector<uint8_t>&
packed_value) {
+void InvertedIndexVisitor::visit(roaring::Roaring* doc_id,
std::vector<uint8_t>& packed_value) {
if (!matches(packed_value.data())) {
return;
}
diff --git a/build.sh b/build.sh
index 8b5373aab0..aa2f8fa4d8 100755
--- a/build.sh
+++ b/build.sh
@@ -252,19 +252,26 @@ if [[ ! -f
"${DORIS_THIRDPARTY}/installed/lib/libbacktrace.a" ]]; then
fi
fi
-echo "Update apache-orc ..."
-set +e
-cd "${DORIS_HOME}"
-echo "Update apache-orc submodule ..."
-git submodule update --init --recursive be/src/apache-orc
-exit_code=$?
-set -e
-if [[ "${exit_code}" -ne 0 ]]; then
- echo "Update apache-orc submodule failed, start to download and extract
apache-orc package ..."
- rm -rf "${DORIS_HOME}/be/src/apache-orc"
- mkdir -p "${DORIS_HOME}/be/src/apache-orc"
- curl -L
https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz | tar
-xz -C "${DORIS_HOME}/be/src/apache-orc" --strip-components=1
-fi
+update_submodule() {
+ local submodule_path=$1
+ local submodule_name=$2
+ local archive_url=$3
+
+ set +e
+ cd "${DORIS_HOME}"
+ echo "Update ${submodule_name} submodule ..."
+ git submodule update --init --recursive "${submodule_path}"
+ exit_code=$?
+ set -e
+ if [[ "${exit_code}" -ne 0 ]]; then
+ echo "Update ${submodule_name} submodule failed, start to download and
extract apache-orc package ..."
+ mkdir -p "${DORIS_HOME}/${submodule_path}"
+ curl -L "${archive_url}" | tar -xz -C
"${DORIS_HOME}/${submodule_path}" --strip-components=1
+ fi
+}
+
+update_submodule "be/src/apache-orc" "apache-orc"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz"
+update_submodule "be/src/clucene" "clucene"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz"
if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 &&
"${BUILD_SPARK_DPP}" -eq 0 ]]; then
clean_gensrc
@@ -573,6 +580,7 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/
cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
+ cp -r -p "${DORIS_HOME}/be/output/dict" "${DORIS_OUTPUT}/be/"
if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/"
"${DORIS_OUTPUT}/be/lib/"
@@ -618,7 +626,6 @@ EOF
copy_common_files "${DORIS_OUTPUT}/be/"
mkdir -p "${DORIS_OUTPUT}/be/log"
mkdir -p "${DORIS_OUTPUT}/be/storage"
- cp -r -p "${DORIS_THIRDPARTY}/installed/share/dict" "${DORIS_OUTPUT}/be/"
fi
if [[ "${BUILD_BROKER}" -eq 1 ]]; then
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index c3ef10319e..0e07b3a5b7 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1571,51 +1571,6 @@ build_fast_float() {
cp -r ./include/fast_float "${TP_INSTALL_DIR}/include/"
}
-#clucene
-build_clucene() {
- if [[ "$(uname -m)" == 'x86_64' ]]; then
- USE_AVX2="${USE_AVX2:-1}"
- else
- USE_AVX2="${USE_AVX2:-0}"
- fi
- if [[ -z "${USE_BTHREAD_SCANNER}" ]]; then
- USE_BTHREAD_SCANNER='OFF'
- fi
- if [[ ${USE_BTHREAD_SCANNER} == "ON" ]]; then
- USE_BTHREAD=1
- else
- USE_BTHREAD=0
- fi
-
- check_if_source_exist "${CLUCENE_SOURCE}"
- cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
-
- mkdir -p "${BUILD_DIR}"
- cd "${BUILD_DIR}"
- rm -rf CMakeCache.txt CMakeFiles/
-
- ${CMAKE_CMD} -G "${GENERATOR}" \
- -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" \
- -DBUILD_STATIC_LIBRARIES=ON \
- -DBUILD_SHARED_LIBRARIES=OFF \
- -DBOOST_ROOT="${TP_INSTALL_DIR}" \
- -DZLIB_ROOT="${TP_INSTALL_DIR}" \
- -DCMAKE_CXX_FLAGS="-g -fno-omit-frame-pointer ${warning_narrowing}" \
- -DUSE_STAT64=0 \
- -DUSE_AVX2="${USE_AVX2}" \
- -DUSE_BTHREAD="${USE_BTHREAD}" \
- -DCMAKE_BUILD_TYPE=Release \
- -DBUILD_CONTRIBS_LIB=ON ..
- ${BUILD_SYSTEM} -j "${PARALLEL}"
- ${BUILD_SYSTEM} install
-
- cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
- if [[ ! -d "${TP_INSTALL_DIR}"/share ]]; then
- mkdir -p "${TP_INSTALL_DIR}"/share
- fi
- cp -rf src/contribs-lib/CLucene/analysis/jieba/dict
"${TP_INSTALL_DIR}"/share/
-}
-
# hadoop_libs_x86
build_hadoop_libs_x86() {
check_if_source_exist "${HADOOP_LIBS_X86_SOURCE}"
@@ -1685,7 +1640,6 @@ if [[ "${#packages[@]}" -eq 0 ]]; then
xxhash
concurrentqueue
fast_float
- clucene
)
if [[ "$(uname -s)" == 'Darwin' ]]; then
read -r -a packages <<<"binutils gettext ${packages[*]}"
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 3ced85e07b..05846629c9 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -54,12 +54,6 @@ export TP_JAR_DIR="${TP_INSTALL_DIR}/lib/jar"
# of all thirdparties
#####################################################
-#clucene
-CLUCENE_DOWNLOAD="https://github.com/apache/doris-thirdparty/archive/refs/tags/libclucene-v2.4.12.tar.gz"
-CLUCENE_NAME="doris-thirdparty-libclucene-v2.4.12.tar.gz"
-CLUCENE_SOURCE="doris-thirdparty-libclucene-v2.4.12"
-CLUCENE_MD5SUM="171035c1d4c9fe3d7307f04dd76ab3e3"
-
# libevent
LIBEVENT_DOWNLOAD="https://github.com/libevent/libevent/archive/release-2.1.12-stable.tar.gz"
LIBEVENT_NAME=libevent-release-2.1.12-stable.tar.gz
@@ -466,7 +460,6 @@ HADOOP_LIBS_X86_MD5SUM="96117450170487f007ffeca5ddf62f7e"
# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
export TP_ARCHIVES=(
- 'CLUCENE'
'LIBEVENT'
'OPENSSL'
'THRIFT'
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]