zhiqiang-hhhh commented on code in PR #58130:
URL: https://github.com/apache/doris/pull/58130#discussion_r2563490934


##########
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java:
##########
@@ -3094,6 +3095,11 @@ public boolean isEnableESParallelScroll() {
                     "Whether to use a bounded priority queue to optimize HNSW 
search performance"})
     public boolean hnswBoundedQueue = true;
 
+    @VariableMgr.VarAttr(name = IVF_NPROBE, needForward = true,
+            description = {"IVF 索引的 nprobe 参数,控制搜索时访问的聚类数量",
+                    "IVF index nprobe parameter, controls the number of 
clusters to search"})
+    public int ivfNprobe = 1;

Review Comment:
   Default value is too small. set it to a value that we can have a good recall 
on sift 128D 1M data sets.



##########
be/src/vec/runtime/vector_search_user_params.h:
##########
@@ -26,10 +26,11 @@ struct VectorSearchUserParams {
     int hnsw_ef_search = 32;
     bool hnsw_check_relative_distance = true;
     bool hnsw_bounded_queue = true;
+    int ivf_nprobe = 1;

Review Comment:
   too small



##########
be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp:
##########
@@ -340,6 +346,62 @@ void FaissVectorIndex::build(const FaissBuildParameter& 
params) {
         hnsw_index->hnsw.efConstruction = params.ef_construction;
 
         _index = std::move(hnsw_index);
+    } else if (params.index_type == FaissBuildParameter::IndexType::IVF) {
+        set_type(AnnIndexType::IVF);
+        std::unique_ptr<faiss::Index> ivf_index;
+        if (params.metric_type == FaissBuildParameter::MetricType::L2) {
+            _quantizer = std::make_unique<faiss::IndexFlat>(params.dim, 
faiss::METRIC_L2);

Review Comment:
   what is _quantizer used for



##########
be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.h:
##########
@@ -271,7 +276,8 @@ class FaissVectorIndex : public VectorIndex {
 
 private:
     std::unique_ptr<faiss::Index> _index = nullptr; ///< Underlying FAISS 
index instance
-    FaissBuildParameter _params;                    ///< Build parameters for 
the index
+    std::unique_ptr<faiss::Index> _quantizer = nullptr;

Review Comment:
   what is this used for?



##########
gensrc/thrift/PaloInternalService.thrift:
##########
@@ -416,6 +416,8 @@ struct TQueryOptions {
   // Default 0 means use config::iceberg_sink_max_file_size
   178: optional i64 iceberg_write_target_file_size_bytes = 0;
 
+  179: optional i32 ivf_nprobe = 1;

Review Comment:
   too small default value.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to