This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8740049402c [third-party](faiss) Enable FAISS integration in Doris.
(#49644)
8740049402c is described below
commit 8740049402c77282e8950fdc8f2144452f2003bc
Author: zhiqiang <[email protected]>
AuthorDate: Tue Apr 1 09:01:46 2025 +0800
[third-party](faiss) Enable FAISS integration in Doris. (#49644)
### What problem does this PR solve?
Enable FAISS integration in Doris.
Dependency of faiss is OpenMP, BLAS and LAPACK.
OpenMP is distributed with gcc/llvm.
OpenBLAS could supply BLAS & LAPACK impl, so we introduced OpenBLAS.
If you are using ldb-toolchain, and version is before
https://github.com/amosbird/ldb_toolchain_gen/releases/tag/v0.24, gcc
should be used to compile openblas and faiss, since libopm.a is missing.
Build new thirdparty:
```
sh build-thirdparty.sh openblas
sh build-thirdparty.sh faiss
```
`export ENABLE_BUILD_FAISS=ON` to make doris link with faiss.
---
be/CMakeLists.txt | 3 ++
be/cmake/thirdparty.cmake | 5 +++
build.sh | 8 +++++
thirdparty/build-thirdparty.sh | 60 +++++++++++++++++++++++++++++++
thirdparty/download-thirdparty.sh | 13 +++++++
thirdparty/patches/faiss-1.10.0.patch | 66 +++++++++++++++++++++++++++++++++++
thirdparty/vars.sh | 15 ++++++++
7 files changed, 170 insertions(+)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index a77c796b381..e7dc2961a4b 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -181,6 +181,9 @@ endif()
set(GPERFTOOLS_HOME "${THIRDPARTY_DIR}/gperftools")
+option(BUILD_FAISS "Link doris with faiss for vector similarity search" OFF)
+message(STATUS "build faiss: ${BUILD_FAISS}")
+
include (cmake/thirdparty.cmake)
find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin)
diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index a165c4ab203..1250e8ab1f5 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -175,3 +175,8 @@ endif()
add_thirdparty(icuuc LIB64)
add_thirdparty(icui18n LIB64)
add_thirdparty(icudata LIB64)
+
+if (BUILD_FAISS)
+ add_thirdparty(openblas LIB64)
+ add_thirdparty(faiss LIB64)
+endif()
diff --git a/build.sh b/build.sh
index 3774803c533..3fbdc4df6c1 100755
--- a/build.sh
+++ b/build.sh
@@ -70,6 +70,7 @@ Usage: $0 <options>
DISABLE_BE_JAVA_EXTENSIONS If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will
do not build binary with java-udf,hudi-scanner,jdbc-scanner and so on Default
is OFF.
DISABLE_JAVA_CHECK_STYLE If set DISABLE_JAVA_CHECK_STYLE=ON, it will
skip style check of java code in FE.
DISABLE_BUILD_AZURE If set DISABLE_BUILD_AZURE=ON, it will not
build azure into BE.
+ ENABLE_BUILD_FAISS If set BUILD_FAISS=ON, it will link BE with
faiss.
Eg.
$0 build all
@@ -173,6 +174,7 @@ PARAMETER_COUNT="$#"
PARAMETER_FLAG=0
DENABLE_CLANG_COVERAGE='OFF'
BUILD_AZURE='ON'
+BUILD_FAISS='OFF'
BUILD_UI=1
if [[ "$#" == 1 ]]; then
# default
@@ -472,6 +474,10 @@ if [[ -n "${DISABLE_BUILD_AZURE}" ]]; then
BUILD_AZURE='OFF'
fi
+if [[ -n "${ENABLE_BUILD_FAISS}" ]]; then
+ BUILD_FAISS='ON'
+fi
+
if [[ -z "${ENABLE_INJECTION_POINT}" ]]; then
ENABLE_INJECTION_POINT='OFF'
fi
@@ -640,6 +646,7 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
-DENABLE_CLANG_COVERAGE="${DENABLE_CLANG_COVERAGE}" \
-DDORIS_JAVA_HOME="${JAVA_HOME}" \
-DBUILD_AZURE="${BUILD_AZURE}" \
+ -DBUILD_FAISS="${BUILD_FAISS}" \
"${DORIS_HOME}/be"
if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
@@ -681,6 +688,7 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
-DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
-DBUILD_AZURE="${BUILD_AZURE}" \
-DBUILD_CHECK_META="${BUILD_CHECK_META:-OFF}" \
+ -DBUILD_FAISS="${BUILD_FAISS}" \
"${DORIS_HOME}/cloud/"
"${BUILD_SYSTEM}" -j "${PARALLEL}"
"${BUILD_SYSTEM}" install
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index b409349eec3..cdd5bec050c 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1885,6 +1885,66 @@ build_pugixml() {
cp "${TP_SOURCE_DIR}/${PUGIXML_SOURCE}/src/pugiconfig.hpp"
"${TP_INSTALL_DIR}/include/"
}
+build_openblas() {
+ check_if_source_exist "${OPENBLAS_SOURCE}"
+ cd "${TP_SOURCE_DIR}/${OPENBLAS_SOURCE}"
+
+ rm -rf "${BUILD_DIR}"
+ mkdir -p "${BUILD_DIR}"
+ cd "${BUILD_DIR}"
+ OPENBLAS_CMAKE_OPTIONS=(
+ "-DCMAKE_PREFIX_PATH=${TP_INSTALL_DIR}"
+ "-DCMAKE_INSTALL_PREFIX=${TP_INSTALL_DIR}"
+ "-DCMAKE_BUILD_TYPE=Release"
+ "-DBUILD_WITHOUT_LAPACK=OFF"
+ "-DNO_SHARED=TRUE"
+ "-DNO_AVX512=TRUE"
+ "-DC_LAPACK=TRUE"
+ "-DUSE_OPENMP=TRUE"
+ "-DBUILD_STATIC_LIBS=ON"
+ "-DNOFORTRAN=TRUE"
+ "-DBUILD_TESTING=OFF"
+ "-DBUILD_RELAPACK=ON"
+ "-DBUILD_BENCHMARKS=OFF"
+ )
+
+ echo "Building openblas at $(pwd) with cmake parameters:
${OPENBLAS_CMAKE_OPTIONS[*]}"
+
+ "${CMAKE_CMD}" -G "${GENERATOR}" "${OPENBLAS_CMAKE_OPTIONS[@]}" ..
+ "${BUILD_SYSTEM}" -j "${PARALLEL}"
+ "${BUILD_SYSTEM}" install
+}
+
+build_faiss() {
+ check_if_source_exist "${FAISS_SOURCE}"
+ echo "Building faiss ${FAISS_SOURCE}"
+ cd "${TP_SOURCE_DIR}"
+ # if faiss dir not exists, create a symlink to faiss source dir
+ # this symlink is necessary since faiss source code must be compiled in a
directory named faiss.
+ if [[ ! -d "${TP_SOURCE_DIR}/faiss" ]]; then
+ ln -s "${FAISS_SOURCE}" faiss
+ fi
+ cd "${TP_SOURCE_DIR}/faiss"
+
+ rm -rf "${BUILD_DIR}"
+ mkdir -p "${BUILD_DIR}"
+ cd "${BUILD_DIR}"
+
+ FAISS_CMAKE_OPTIONS=(
+ "-DDORIS_THIRD_LIB_INSTALL_DIR=${TP_INSTALL_DIR}"
+ "-DCMAKE_INSTALL_PREFIX=${TP_INSTALL_DIR}"
+ "-DCMAKE_BUILD_TYPE=Release"
+ "-DFAISS_ENABLE_GPU=OFF"
+ "-DFAISS_ENABLE_PYTHON=OFF"
+ )
+
+ echo "Building faiss at $(pwd) with cmake parameters:
${FAISS_CMAKE_OPTIONS[*]}"
+
+ "${CMAKE_CMD}" -G "${GENERATOR}" "${FAISS_CMAKE_OPTIONS[@]}" ..
+ "${BUILD_SYSTEM}" -j "${PARALLEL}"
+ "${BUILD_SYSTEM}" install
+}
+
if [[ "${#packages[@]}" -eq 0 ]]; then
packages=(
jindofs
diff --git a/thirdparty/download-thirdparty.sh
b/thirdparty/download-thirdparty.sh
index b80048025f7..89c04f6fea9 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -590,5 +590,18 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " THRIFT " ]]; then
echo "Finished patching ${THRIFT_SOURCE}"
fi
+# patch faiss cmake so that we can use openblas
+if [[ " ${TP_ARCHIVES[*]} " =~ " FAISS " ]]; then
+ if [[ "${FAISS_SOURCE}" = "faiss-1.10.0" ]]; then
+ cd "${TP_SOURCE_DIR}/${FAISS_SOURCE}"
+ if [[ ! -f "${PATCHED_MARK}" ]]; then
+ patch -p2 <"${TP_PATCH_DIR}/faiss-1.10.0.patch"
+ touch "${PATCHED_MARK}"
+ fi
+ cd -
+ fi
+ echo "Finished patching ${FAISS_SOURCE}"
+fi
+
# vim: ts=4 sw=4 ts=4 tw=100:
diff --git a/thirdparty/patches/faiss-1.10.0.patch
b/thirdparty/patches/faiss-1.10.0.patch
new file mode 100644
index 00000000000..8279f4d71a1
--- /dev/null
+++ b/thirdparty/patches/faiss-1.10.0.patch
@@ -0,0 +1,66 @@
+--- src/faiss-1.10.0/faiss/CMakeLists.txt 2025-02-01 05:52:00.000000000
+0800
++++ src/faiss-1.10.0/faiss/CMakeLists.txt.new 2025-03-28 19:45:37.513624103
+0800
+@@ -381,19 +381,51 @@
+ target_link_libraries(faiss_avx512 PRIVATE ${MKL_LIBRARIES})
+ target_link_libraries(faiss_avx512_spr PRIVATE ${MKL_LIBRARIES})
+ else()
+- find_package(BLAS REQUIRED)
+- target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES})
+- target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES})
+- target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES})
+- target_link_libraries(faiss_avx512_spr PRIVATE ${BLAS_LIBRARIES})
+- target_link_libraries(faiss_sve PRIVATE ${BLAS_LIBRARIES})
++ # If not found through find_package, look in the DORIS_THIRD_LIB_INSTALL_DIR
++ if(DEFINED DORIS_THIRD_LIB_INSTALL_DIR)
++ set(OpenBLAS_ROOT ${DORIS_THIRD_LIB_INSTALL_DIR})
++
++ # Check if libopenblas exists in DORIS_THIRD_LIB_INSTALL_DIR
++ if(EXISTS "${DORIS_THIRD_LIB_INSTALL_DIR}/lib/libopenblas.a")
++ set(OpenBLAS_LIB "${DORIS_THIRD_LIB_INSTALL_DIR}/lib/libopenblas.a")
++ endif()
++ # Terminate if OpenBLAS_LIB is not found
++ if(NOT OpenBLAS_LIB)
++ message(WARNING "OpenBLAS not found in DORIS_THIRD_LIB_INSTALL_DIR:
${DORIS_THIRD_LIB_INSTALL_DIR}")
++ endif()
+
+- find_package(LAPACK REQUIRED)
+- target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES})
+- target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES})
+- target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES})
+- target_link_libraries(faiss_avx512_spr PRIVATE ${LAPACK_LIBRARIES})
+- target_link_libraries(faiss_sve PRIVATE ${LAPACK_LIBRARIES})
++ if(OpenBLAS_LIB)
++ set(OpenBLAS_LIBRARIES ${OpenBLAS_LIB})
++ set(OpenBLAS_FOUND TRUE)
++ message(STATUS "Found OpenBLAS in DORIS_THIRD_LIB_INSTALL_DIR:
${OpenBLAS_LIB}")
++ endif()
++ else()
++ message(WARNING "DORIS_THIRD_LIB_INSTALL_DIR is not defined. Please set
it to the directory where OpenBLAS is installed.")
++ endif()
++
++ if(OpenBLAS_FOUND)
++ message(STATUS "Using OpenBLAS: ${OpenBLAS_LIBRARIES}")
++ target_link_libraries(faiss PRIVATE ${OpenBLAS_LIBRARIES})
++ target_link_libraries(faiss_avx2 PRIVATE ${OpenBLAS_LIBRARIES})
++ target_link_libraries(faiss_avx512 PRIVATE ${OpenBLAS_LIBRARIES})
++ target_link_libraries(faiss_avx512_spr PRIVATE ${OpenBLAS_LIBRARIES})
++ target_link_libraries(faiss_sve PRIVATE ${OpenBLAS_LIBRARIES})
++ else()
++ # Fall back to separate BLAS and LAPACK if OpenBLAS is not found
++ find_package(BLAS REQUIRED)
++ target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES})
++ target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES})
++ target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES})
++ target_link_libraries(faiss_avx512_spr PRIVATE ${BLAS_LIBRARIES})
++ target_link_libraries(faiss_sve PRIVATE ${BLAS_LIBRARIES})
++
++ find_package(LAPACK REQUIRED)
++ target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES})
++ target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES})
++ target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES})
++ target_link_libraries(faiss_avx512_spr PRIVATE ${LAPACK_LIBRARIES})
++ target_link_libraries(faiss_sve PRIVATE ${LAPACK_LIBRARIES})
++ endif()
+ endif()
+
+ install(TARGETS faiss
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 33d34782861..ce4d3c370fb 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -538,6 +538,19 @@ PUGIXML_NAME=pugixml-1.15.tar.gz
PUGIXML_SOURCE=pugixml-1.15
PUGIXML_MD5SUM="3b894c29455eb33a40b165c6e2de5895"
+# openblas
+OPENBLAS_DOWNLOAD="https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.29/OpenBLAS-0.3.29.tar.gz"
+OPENBLAS_NAME="OpenBLAS-0.3.29.tar.gz"
+OPENBLAS_SOURCE="OpenBLAS-0.3.29"
+OPENBLAS_MD5SUM="853a0c5c0747c5943e7ef4bbb793162d"
+
+# faiss
+FAISS_DOWNLOAD="https://github.com/facebookresearch/faiss/archive/refs/tags/v1.10.0.tar.gz"
+FAISS_NAME="faiss-1.10.0.tar.gz"
+FAISS_SOURCE="faiss-1.10.0"
+FAISS_MD5SUM="f31edf2492808b27cc963d0ab316a205"
+
+
# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
export TP_ARCHIVES=(
'LIBEVENT'
@@ -618,6 +631,8 @@ export TP_ARCHIVES=(
'ICU'
'JINDOFS'
'PUGIXML'
+ 'OPENBLAS'
+ 'FAISS'
)
if [[ "$(uname -s)" == 'Darwin' ]]; then
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]