This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f32deb18e9 [Update](build) change clucene from thirdparty to git 
module (#19352)
f32deb18e9 is described below

commit f32deb18e9832ccbd8193fc6822c2616faa2d58e
Author: airborne12 <[email protected]>
AuthorDate: Fri May 19 08:25:51 2023 +0800

    [Update](build) change clucene from thirdparty to git module (#19352)
---
 .gitmodules                                        |  4 ++
 be/CMakeLists.txt                                  | 60 ++++++++++++++++------
 be/src/clucene                                     |  1 +
 .../rowset/segment_v2/inverted_index_reader.cpp    |  4 +-
 build.sh                                           | 35 ++++++++-----
 thirdparty/build-thirdparty.sh                     | 46 -----------------
 thirdparty/vars.sh                                 |  7 ---
 7 files changed, 72 insertions(+), 85 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 06213cbb75..9fe51bfd1d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -29,3 +29,7 @@
        path = be/src/apache-orc
        url = https://github.com/apache/doris-thirdparty.git
        branch = orc
+[submodule "be/src/clucene"]
+       path = be/src/clucene
+       url = https://github.com/apache/doris-thirdparty.git
+       branch = clucene
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 182837f1a0..67ad424da5 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -148,18 +148,6 @@ endif()
 set(GPERFTOOLS_HOME "${THIRDPARTY_DIR}/gperftools")
 
 # Set all libraries
-add_library(ic STATIC IMPORTED)
-set_target_properties(ic PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libic.a)
-
-add_library(clucene-core STATIC IMPORTED)
-set_target_properties(clucene-core PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libclucene-core-static.a)
-
-add_library(clucene-shared STATIC IMPORTED)
-set_target_properties(clucene-shared PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libclucene-shared-static.a)
-
-add_library(clucene-contribs-lib STATIC IMPORTED)
-set_target_properties(clucene-contribs-lib PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libclucene-contribs-lib.a)
-
 add_library(gflags STATIC IMPORTED)
 set_target_properties(gflags PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libgflags.a)
 
@@ -449,6 +437,39 @@ SET(ZSTD_INCLUDE_DIR 
"$ENV{DORIS_THIRDPARTY}/installed/include/zstd")
 add_subdirectory(${SRC_DIR}/apache-orc EXCLUDE_FROM_ALL)
 target_compile_options(orc PRIVATE -Wno-implicit-fallthrough -w)
 
+set(BUILD_STATIC_LIBRARIES ON)
+set(BUILD_SHARED_LIBRARIES OFF)
+set(BUILD_CONTRIBS_LIB ON)
+set(BOOST_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
+set(ZLIB_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
+set(Roaring_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
+set(USE_STAT64 0)
+
+if (USE_BTHREAD_SCANNER)
+    set(USE_BTHREAD ON)
+else()
+    set(USE_BTHREAD OFF)
+endif()
+
+
+add_subdirectory(${SRC_DIR}/clucene EXCLUDE_FROM_ALL)
+
+if (COMPILER_CLANG)
+       target_compile_options(clucene-core-static PRIVATE 
-fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
+       target_compile_options(clucene-shared-static PRIVATE 
-fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
+       target_compile_options(clucene-contribs-lib PRIVATE 
-fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
+       target_compile_options(ic PRIVATE -fno-omit-frame-pointer 
-Wno-c++11-narrowing -w -Wall )
+else ()
+       target_compile_options(clucene-core-static PRIVATE 
-fno-omit-frame-pointer -Wno-narrowing -w -Wall )
+       target_compile_options(clucene-shared-static PRIVATE 
-fno-omit-frame-pointer -Wno-narrowing -w -Wall )
+       target_compile_options(clucene-contribs-lib PRIVATE 
-fno-omit-frame-pointer -Wno-narrowing -w -Wall )
+       target_compile_options(ic PRIVATE -fno-omit-frame-pointer 
-Wno-narrowing -w -Wall )
+endif()
+
+install(DIRECTORY
+    ${SRC_DIR}/clucene/src/contribs-lib/CLucene/analysis/jieba/dict 
+    DESTINATION ${OUTPUT_DIR})
+
 # Check if functions are supported in this platform. All flags will generated
 # in gensrc/build/common/env_config.h.
 # You can check funcion here which depends on platform. Don't forget add this
@@ -645,6 +666,13 @@ include_directories(
     ${CMAKE_CURRENT_BINARY_DIR}/src/apache-orc/c++/include
 )
 
+include_directories(
+    ${CMAKE_CURRENT_BINARY_DIR}/src/clucene/src/shared
+    ${SRC_DIR}/clucene/src/core
+    ${SRC_DIR}/clucene/src/shared
+    ${SRC_DIR}/clucene/src/contribs-lib
+)
+
 include_directories(
     ${SRC_DIR}/
     ${TEST_DIR}/
@@ -732,10 +760,6 @@ find_package(absl)
 # When adding new dependencies, If you don’t know if it can run on all 
platforms,
 # add it here first.
 set(COMMON_THIRDPARTY
-    ic
-    clucene-core
-    clucene-shared
-    clucene-contribs-lib
     backtrace
     rocksdb
     cyrus-sasl
@@ -858,6 +882,10 @@ if (WITH_MYSQL)
 endif()
 
 set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} orc)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ic)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-core-static)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-shared-static)
+set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-contribs-lib)
 
 set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${WL_END_GROUP})
 
diff --git a/be/src/clucene b/be/src/clucene
new file mode 160000
index 0000000000..76cd035119
--- /dev/null
+++ b/be/src/clucene
@@ -0,0 +1 @@
+Subproject commit 76cd03511903916ab076ab0ef6f3779ab4e7476e
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index ae7b34bad6..48b399e8eb 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -38,10 +38,10 @@
 #include <math.h>
 #include <string.h>
 
-#include <CLucene/util/croaring/roaring.hh>
 #include <algorithm>
 #include <filesystem>
 #include <ostream>
+#include <roaring/roaring.hh>
 #include <set>
 
 #include "common/config.h"
@@ -687,7 +687,7 @@ void InvertedIndexVisitor::visit(std::vector<char>& doc_id, 
std::vector<uint8_t>
     visit(roaring::Roaring::read(doc_id.data(), false));
 }
 
-void InvertedIndexVisitor::visit(Roaring* doc_id, std::vector<uint8_t>& 
packed_value) {
+void InvertedIndexVisitor::visit(roaring::Roaring* doc_id, 
std::vector<uint8_t>& packed_value) {
     if (!matches(packed_value.data())) {
         return;
     }
diff --git a/build.sh b/build.sh
index 8b5373aab0..aa2f8fa4d8 100755
--- a/build.sh
+++ b/build.sh
@@ -252,19 +252,26 @@ if [[ ! -f 
"${DORIS_THIRDPARTY}/installed/lib/libbacktrace.a" ]]; then
     fi
 fi
 
-echo "Update apache-orc ..."
-set +e
-cd "${DORIS_HOME}"
-echo "Update apache-orc submodule ..."
-git submodule update --init --recursive be/src/apache-orc
-exit_code=$?
-set -e
-if [[ "${exit_code}" -ne 0 ]]; then
-    echo "Update apache-orc submodule failed, start to download and extract 
apache-orc package ..."
-    rm -rf "${DORIS_HOME}/be/src/apache-orc"
-    mkdir -p "${DORIS_HOME}/be/src/apache-orc"
-    curl -L 
https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz | tar 
-xz -C "${DORIS_HOME}/be/src/apache-orc" --strip-components=1
-fi
+update_submodule() {
+    local submodule_path=$1
+    local submodule_name=$2
+    local archive_url=$3
+
+    set +e
+    cd "${DORIS_HOME}"
+    echo "Update ${submodule_name} submodule ..."
+    git submodule update --init --recursive "${submodule_path}"
+    exit_code=$?
+    set -e
+    if [[ "${exit_code}" -ne 0 ]]; then
+        echo "Update ${submodule_name} submodule failed, start to download and 
extract apache-orc package ..."
+        mkdir -p "${DORIS_HOME}/${submodule_path}"
+        curl -L "${archive_url}" | tar -xz -C 
"${DORIS_HOME}/${submodule_path}" --strip-components=1
+    fi
+}
+
+update_submodule "be/src/apache-orc" "apache-orc" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz";
+update_submodule "be/src/clucene" "clucene" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz";
 
 if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 && 
"${BUILD_SPARK_DPP}" -eq 0 ]]; then
     clean_gensrc
@@ -573,6 +580,7 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
 
     cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/
     cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
+    cp -r -p "${DORIS_HOME}/be/output/dict" "${DORIS_OUTPUT}/be/"
 
     if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
         cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" 
"${DORIS_OUTPUT}/be/lib/"
@@ -618,7 +626,6 @@ EOF
     copy_common_files "${DORIS_OUTPUT}/be/"
     mkdir -p "${DORIS_OUTPUT}/be/log"
     mkdir -p "${DORIS_OUTPUT}/be/storage"
-    cp -r -p "${DORIS_THIRDPARTY}/installed/share/dict" "${DORIS_OUTPUT}/be/"
 fi
 
 if [[ "${BUILD_BROKER}" -eq 1 ]]; then
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index c3ef10319e..0e07b3a5b7 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1571,51 +1571,6 @@ build_fast_float() {
     cp -r ./include/fast_float "${TP_INSTALL_DIR}/include/"
 }
 
-#clucene
-build_clucene() {
-    if [[ "$(uname -m)" == 'x86_64' ]]; then
-        USE_AVX2="${USE_AVX2:-1}"
-    else
-        USE_AVX2="${USE_AVX2:-0}"
-    fi
-    if [[ -z "${USE_BTHREAD_SCANNER}" ]]; then
-        USE_BTHREAD_SCANNER='OFF'
-    fi
-    if [[ ${USE_BTHREAD_SCANNER} == "ON" ]]; then
-        USE_BTHREAD=1
-    else
-        USE_BTHREAD=0
-    fi
-
-    check_if_source_exist "${CLUCENE_SOURCE}"
-    cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
-
-    mkdir -p "${BUILD_DIR}"
-    cd "${BUILD_DIR}"
-    rm -rf CMakeCache.txt CMakeFiles/
-
-    ${CMAKE_CMD} -G "${GENERATOR}" \
-        -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" \
-        -DBUILD_STATIC_LIBRARIES=ON \
-        -DBUILD_SHARED_LIBRARIES=OFF \
-        -DBOOST_ROOT="${TP_INSTALL_DIR}" \
-        -DZLIB_ROOT="${TP_INSTALL_DIR}" \
-        -DCMAKE_CXX_FLAGS="-g -fno-omit-frame-pointer ${warning_narrowing}" \
-        -DUSE_STAT64=0 \
-        -DUSE_AVX2="${USE_AVX2}" \
-        -DUSE_BTHREAD="${USE_BTHREAD}" \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DBUILD_CONTRIBS_LIB=ON ..
-    ${BUILD_SYSTEM} -j "${PARALLEL}"
-    ${BUILD_SYSTEM} install
-
-    cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
-    if [[ ! -d "${TP_INSTALL_DIR}"/share ]]; then
-        mkdir -p "${TP_INSTALL_DIR}"/share
-    fi
-    cp -rf src/contribs-lib/CLucene/analysis/jieba/dict 
"${TP_INSTALL_DIR}"/share/
-}
-
 # hadoop_libs_x86
 build_hadoop_libs_x86() {
     check_if_source_exist "${HADOOP_LIBS_X86_SOURCE}"
@@ -1685,7 +1640,6 @@ if [[ "${#packages[@]}" -eq 0 ]]; then
         xxhash
         concurrentqueue
         fast_float
-        clucene
     )
     if [[ "$(uname -s)" == 'Darwin' ]]; then
         read -r -a packages <<<"binutils gettext ${packages[*]}"
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 3ced85e07b..05846629c9 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -54,12 +54,6 @@ export TP_JAR_DIR="${TP_INSTALL_DIR}/lib/jar"
 # of all thirdparties
 #####################################################
 
-#clucene
-CLUCENE_DOWNLOAD="https://github.com/apache/doris-thirdparty/archive/refs/tags/libclucene-v2.4.12.tar.gz";
-CLUCENE_NAME="doris-thirdparty-libclucene-v2.4.12.tar.gz"
-CLUCENE_SOURCE="doris-thirdparty-libclucene-v2.4.12"
-CLUCENE_MD5SUM="171035c1d4c9fe3d7307f04dd76ab3e3"
-
 # libevent
 
LIBEVENT_DOWNLOAD="https://github.com/libevent/libevent/archive/release-2.1.12-stable.tar.gz";
 LIBEVENT_NAME=libevent-release-2.1.12-stable.tar.gz
@@ -466,7 +460,6 @@ HADOOP_LIBS_X86_MD5SUM="96117450170487f007ffeca5ddf62f7e"
 
 # all thirdparties which need to be downloaded is set in array TP_ARCHIVES
 export TP_ARCHIVES=(
-    'CLUCENE'
     'LIBEVENT'
     'OPENSSL'
     'THRIFT'


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to