airborne12 commented on code in PR #60358:
URL: https://github.com/apache/doris/pull/60358#discussion_r2877768346


##########
be/src/olap/rowset/segment_v2/ann_index/ann_index_reader.cpp:
##########
@@ -87,23 +89,30 @@ Status AnnIndexReader::load_index(io::IOContext* io_ctx) {
             _vector_index->set_type(_index_type);
             RETURN_IF_ERROR(_vector_index->load(compound_dir->get()));
         } catch (CLuceneError& err) {
+            LOG_ERROR("Failed to load ann index: {}", err.what());
             return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
                     "CLuceneError occur when open ann idx file, error msg: 
{}", err.what());
         }
         return Status::OK();
     });
 }
 
-Status AnnIndexReader::query(io::IOContext* io_ctx, AnnTopNParam* param, 
AnnIndexStats* stats) {
+bool AnnIndexReader::try_load_index(io::IOContext* io_ctx) {
 #ifndef BE_TEST
-    {
-        SCOPED_TIMER(&(stats->load_index_costs_ns));
-        RETURN_IF_ERROR(load_index(io_ctx));
-        double load_costs_ms = 
static_cast<double>(stats->load_index_costs_ns.value()) / 1000.0;
-        DorisMetrics::instance()->ann_index_load_costs_ms->increment(
-                static_cast<int64_t>(load_costs_ms));
+    Status st = load_index(io_ctx);

Review Comment:
   @zhiqiang-hhhh need check this



##########
be/src/olap/rowset/segment_v2/ann_index/ann_index_writer.cpp:
##########
@@ -151,16 +152,50 @@ int64_t AnnIndexColumnWriter::size() const {
 }
 
 Status AnnIndexColumnWriter::finish() {
+    vectorized::Int64 min_train_rows = _vector_index->get_min_train_rows();
+
+    // Check if we have enough rows to train the index
     // train/add the remaining data
-    if (!_float_array.empty()) {
+    if (_float_array.empty()) {
+        if (_need_save_index) {
+            return _vector_index->save(_dir.get());
+        } else {
+            // No data was added at all. This can happen if the segment has 0 
rows
+            // or all rows were filtered out. We need to delete the directory 
entry
+            // to avoid writing an empty/invalid index file.
+            LOG_INFO("No data to train/add for ANN index. Skipping index 
building.");
+            return _index_file_writer->delete_index(_index_meta);
+        }
+    } else {
         DCHECK(_float_array.size() % _vector_index->get_dimension() == 0);
         vectorized::Int64 num_rows = _float_array.size() / 
_vector_index->get_dimension();
-        RETURN_IF_ERROR(_vector_index->train(num_rows, _float_array.data()));
-        RETURN_IF_ERROR(_vector_index->add(num_rows, _float_array.data()));
-        _float_array.clear();
+
+        if (num_rows >= min_train_rows) {
+            RETURN_IF_ERROR(_vector_index->train(num_rows, 
_float_array.data()));
+            RETURN_IF_ERROR(_vector_index->add(num_rows, _float_array.data()));
+            return _vector_index->save(_dir.get());
+        } else {
+            // It happens to have not enough data to train.
+            // If we have data to add before, we still need to save the index.
+            if (_need_save_index) {
+                RETURN_IF_ERROR(_vector_index->add(num_rows, 
_float_array.data()));

Review Comment:
   @zhiqiang-hhhh need check this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to