This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 084ed46a158 [Exec](func) Support-simd-cal-knn-distance (#55572)
084ed46a158 is described below

commit 084ed46a158f379523b4729cc532dd94cd3773e2
Author: Xin Li <[email protected]>
AuthorDate: Tue Sep 2 23:24:21 2025 +0800

    [Exec](func) Support-simd-cal-knn-distance (#55572)
---
 be/src/vec/CMakeLists.txt                          |  1 +
 .../functions/array/function_array_distance.cpp    | 17 ++++++
 .../vec/functions/array/function_array_distance.h  | 68 +++++-----------------
 3 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index e905d597a9c..0d2a1dd8ce9 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(Vec STATIC
         ${VEC_FILES}
 )
 
+target_link_libraries(Vec PUBLIC faiss)
 pch_reuse(Vec)
 
 if (${BUILD_TASK_EXECUTOR_SIMULATOR} STREQUAL "ON")
diff --git a/be/src/vec/functions/array/function_array_distance.cpp 
b/be/src/vec/functions/array/function_array_distance.cpp
index 0bf33654812..0d7b40f2700 100644
--- a/be/src/vec/functions/array/function_array_distance.cpp
+++ b/be/src/vec/functions/array/function_array_distance.cpp
@@ -21,6 +21,23 @@
 
 namespace doris::vectorized {
 
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float CosineDistance::distance(const float* x, const float* y, size_t d) {
+    float dot_prod = 0;
+    float squared_x = 0;
+    float squared_y = 0;
+    for (size_t i = 0; i < d; ++i) {
+        dot_prod += x[i] * y[i];
+        squared_x += x[i] * x[i];
+        squared_y += y[i] * y[i];
+    }
+    if (squared_x == 0 or squared_y == 0) {
+        return 2.0f;
+    }
+    return 1 - dot_prod / sqrt(squared_x * squared_y);
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
 void register_function_array_distance(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionArrayDistance<L1Distance>>();
     factory.register_function<FunctionArrayDistance<L2Distance>>();
diff --git a/be/src/vec/functions/array/function_array_distance.h 
b/be/src/vec/functions/array/function_array_distance.h
index c1f10f693ac..f2c8c08dab5 100644
--- a/be/src/vec/functions/array/function_array_distance.h
+++ b/be/src/vec/functions/array/function_array_distance.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <faiss/impl/platform_macros.h>
+#include <faiss/utils/distances.h>
 #include <gen_cpp/Types_types.h>
 
 #include "vec/columns/column.h"
@@ -36,53 +38,31 @@ namespace doris::vectorized {
 class L1Distance {
 public:
     static constexpr auto name = "l1_distance";
-    struct State {
-        float sum = 0;
-    };
-    static void accumulate(State& state, float x, float y) { state.sum += 
fabs(x - y); }
-    static float finalize(const State& state) { return state.sum; }
+    static float distance(const float* x, const float* y, size_t d) {
+        return faiss::fvec_L1(x, y, d);
+    }
 };
 
 class L2Distance {
 public:
     static constexpr auto name = "l2_distance";
-    struct State {
-        float sum = 0;
-    };
-    static void accumulate(State& state, float x, float y) { state.sum += (x - 
y) * (x - y); }
-    static float finalize(const State& state) { return sqrt(state.sum); }
+    static float distance(const float* x, const float* y, size_t d) {
+        return std::sqrt(faiss::fvec_L2sqr(x, y, d));
+    }
 };
 
 class InnerProduct {
 public:
     static constexpr auto name = "inner_product";
-    struct State {
-        float sum = 0;
-    };
-    static void accumulate(State& state, float x, float y) { state.sum += x * 
y; }
-    static float finalize(const State& state) { return state.sum; }
+    static float distance(const float* x, const float* y, size_t d) {
+        return faiss::fvec_inner_product(x, y, d);
+    }
 };
 
 class CosineDistance {
 public:
     static constexpr auto name = "cosine_distance";
-    struct State {
-        float dot_prod = 0;
-        float squared_x = 0;
-        float squared_y = 0;
-    };
-    static void accumulate(State& state, float x, float y) {
-        state.dot_prod += x * y;
-        state.squared_x += x * x;
-        state.squared_y += y * y;
-    }
-    static float finalize(const State& state) {
-        // division by zero check
-        if (state.squared_x == 0 || state.squared_y == 0) [[unlikely]] {
-            return 2.0F;
-        }
-        return 1 - state.dot_prod / sqrt(state.squared_x * state.squared_y);
-    }
+    static float distance(const float* x, const float* y, size_t d);
 };
 
 class L2DistanceApproximate : public L2Distance {
@@ -108,7 +88,7 @@ public:
     size_t get_number_of_arguments() const override { return 2; }
 
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
-        return std::make_shared<DataType>();
+        return std::make_shared<DataTypeFloat32>();
     }
 
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
@@ -159,25 +139,9 @@ public:
                         "function {} have different input element sizes of 
array: {} and {}",
                         get_name(), size1, size2);
             }
-
-            typename DistanceImpl::State st;
-            for (ssize_t pos = offsets1[row - 1]; pos < offsets1[row]; ++pos) {
-                // Calculate corresponding position in the second array
-                ssize_t pos2 = offsets2[row - 1] + (pos - offsets1[row - 1]);
-
-                if ((arr1.nested_nullmap_data && 
arr1.nested_nullmap_data[pos]) ||
-                    (arr2.nested_nullmap_data && 
arr2.nested_nullmap_data[pos2])) [[unlikely]] {
-                    return Status::RuntimeError(
-                            "function {} does not support arrays containing 
null, null found at "
-                            "index {}",
-                            get_name(), pos);
-                }
-
-                DistanceImpl::accumulate(st, nested_col1->get_element(pos),
-                                         nested_col2->get_element(pos2));
-            }
-
-            dst_data[row] = DistanceImpl::finalize(st);
+            dst_data[row] = DistanceImpl::distance(
+                    nested_col1->get_data().data() + offsets1[row - 1],
+                    nested_col2->get_data().data() + offsets1[row - 1], size1);
         }
 
         block.replace_by_position(result, std::move(dst));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to