This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 084ed46a158 [Exec](func) Support-simd-cal-knn-distance (#55572)
084ed46a158 is described below
commit 084ed46a158f379523b4729cc532dd94cd3773e2
Author: Xin Li <[email protected]>
AuthorDate: Tue Sep 2 23:24:21 2025 +0800
[Exec](func) Support-simd-cal-knn-distance (#55572)
---
be/src/vec/CMakeLists.txt | 1 +
.../functions/array/function_array_distance.cpp | 17 ++++++
.../vec/functions/array/function_array_distance.h | 68 +++++-----------------
3 files changed, 34 insertions(+), 52 deletions(-)
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index e905d597a9c..0d2a1dd8ce9 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(Vec STATIC
${VEC_FILES}
)
+target_link_libraries(Vec PUBLIC faiss)
pch_reuse(Vec)
if (${BUILD_TASK_EXECUTOR_SIMULATOR} STREQUAL "ON")
diff --git a/be/src/vec/functions/array/function_array_distance.cpp
b/be/src/vec/functions/array/function_array_distance.cpp
index 0bf33654812..0d7b40f2700 100644
--- a/be/src/vec/functions/array/function_array_distance.cpp
+++ b/be/src/vec/functions/array/function_array_distance.cpp
@@ -21,6 +21,23 @@
namespace doris::vectorized {
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float CosineDistance::distance(const float* x, const float* y, size_t d) {
+ float dot_prod = 0;
+ float squared_x = 0;
+ float squared_y = 0;
+ for (size_t i = 0; i < d; ++i) {
+ dot_prod += x[i] * y[i];
+ squared_x += x[i] * x[i];
+ squared_y += y[i] * y[i];
+ }
+ if (squared_x == 0 or squared_y == 0) {
+ return 2.0f;
+ }
+ return 1 - dot_prod / sqrt(squared_x * squared_y);
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
void register_function_array_distance(SimpleFunctionFactory& factory) {
factory.register_function<FunctionArrayDistance<L1Distance>>();
factory.register_function<FunctionArrayDistance<L2Distance>>();
diff --git a/be/src/vec/functions/array/function_array_distance.h
b/be/src/vec/functions/array/function_array_distance.h
index c1f10f693ac..f2c8c08dab5 100644
--- a/be/src/vec/functions/array/function_array_distance.h
+++ b/be/src/vec/functions/array/function_array_distance.h
@@ -17,6 +17,8 @@
#pragma once
+#include <faiss/impl/platform_macros.h>
+#include <faiss/utils/distances.h>
#include <gen_cpp/Types_types.h>
#include "vec/columns/column.h"
@@ -36,53 +38,31 @@ namespace doris::vectorized {
class L1Distance {
public:
static constexpr auto name = "l1_distance";
- struct State {
- float sum = 0;
- };
- static void accumulate(State& state, float x, float y) { state.sum +=
fabs(x - y); }
- static float finalize(const State& state) { return state.sum; }
+ static float distance(const float* x, const float* y, size_t d) {
+ return faiss::fvec_L1(x, y, d);
+ }
};
class L2Distance {
public:
static constexpr auto name = "l2_distance";
- struct State {
- float sum = 0;
- };
- static void accumulate(State& state, float x, float y) { state.sum += (x -
y) * (x - y); }
- static float finalize(const State& state) { return sqrt(state.sum); }
+ static float distance(const float* x, const float* y, size_t d) {
+ return std::sqrt(faiss::fvec_L2sqr(x, y, d));
+ }
};
class InnerProduct {
public:
static constexpr auto name = "inner_product";
- struct State {
- float sum = 0;
- };
- static void accumulate(State& state, float x, float y) { state.sum += x *
y; }
- static float finalize(const State& state) { return state.sum; }
+ static float distance(const float* x, const float* y, size_t d) {
+ return faiss::fvec_inner_product(x, y, d);
+ }
};
class CosineDistance {
public:
static constexpr auto name = "cosine_distance";
- struct State {
- float dot_prod = 0;
- float squared_x = 0;
- float squared_y = 0;
- };
- static void accumulate(State& state, float x, float y) {
- state.dot_prod += x * y;
- state.squared_x += x * x;
- state.squared_y += y * y;
- }
- static float finalize(const State& state) {
- // division by zero check
- if (state.squared_x == 0 || state.squared_y == 0) [[unlikely]] {
- return 2.0F;
- }
- return 1 - state.dot_prod / sqrt(state.squared_x * state.squared_y);
- }
+ static float distance(const float* x, const float* y, size_t d);
};
class L2DistanceApproximate : public L2Distance {
@@ -108,7 +88,7 @@ public:
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
- return std::make_shared<DataType>();
+ return std::make_shared<DataTypeFloat32>();
}
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
@@ -159,25 +139,9 @@ public:
"function {} have different input element sizes of
array: {} and {}",
get_name(), size1, size2);
}
-
- typename DistanceImpl::State st;
- for (ssize_t pos = offsets1[row - 1]; pos < offsets1[row]; ++pos) {
- // Calculate corresponding position in the second array
- ssize_t pos2 = offsets2[row - 1] + (pos - offsets1[row - 1]);
-
- if ((arr1.nested_nullmap_data &&
arr1.nested_nullmap_data[pos]) ||
- (arr2.nested_nullmap_data &&
arr2.nested_nullmap_data[pos2])) [[unlikely]] {
- return Status::RuntimeError(
- "function {} does not support arrays containing
null, null found at "
- "index {}",
- get_name(), pos);
- }
-
- DistanceImpl::accumulate(st, nested_col1->get_element(pos),
- nested_col2->get_element(pos2));
- }
-
- dst_data[row] = DistanceImpl::finalize(st);
+ dst_data[row] = DistanceImpl::distance(
+ nested_col1->get_data().data() + offsets1[row - 1],
+ nested_col2->get_data().data() + offsets1[row - 1], size1);
}
block.replace_by_position(result, std::move(dst));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]