zhiqiang-hhhh commented on code in PR #60358:
URL: https://github.com/apache/doris/pull/60358#discussion_r2915435510


##########
be/src/olap/rowset/segment_v2/ann_index/ann_index_writer.cpp:
##########
@@ -151,16 +152,50 @@ int64_t AnnIndexColumnWriter::size() const {
 }
 
 Status AnnIndexColumnWriter::finish() {
+    vectorized::Int64 min_train_rows = _vector_index->get_min_train_rows();
+
+    // Check if we have enough rows to train the index
     // train/add the remaining data
-    if (!_float_array.empty()) {
+    if (_float_array.empty()) {
+        if (_need_save_index) {
+            return _vector_index->save(_dir.get());
+        } else {
+            // No data was added at all. This can happen if the segment has 0 
rows
+            // or all rows were filtered out. We need to delete the directory 
entry
+            // to avoid writing an empty/invalid index file.
+            LOG_INFO("No data to train/add for ANN index. Skipping index 
building.");
+            return _index_file_writer->delete_index(_index_meta);
+        }
+    } else {
         DCHECK(_float_array.size() % _vector_index->get_dimension() == 0);
         vectorized::Int64 num_rows = _float_array.size() / 
_vector_index->get_dimension();
-        RETURN_IF_ERROR(_vector_index->train(num_rows, _float_array.data()));
-        RETURN_IF_ERROR(_vector_index->add(num_rows, _float_array.data()));
-        _float_array.clear();
+
+        if (num_rows >= min_train_rows) {
+            RETURN_IF_ERROR(_vector_index->train(num_rows, 
_float_array.data()));
+            RETURN_IF_ERROR(_vector_index->add(num_rows, _float_array.data()));
+            return _vector_index->save(_dir.get());
+        } else {
+            // It happens to have not enough data to train.
+            // If we have data to add before, we still need to save the index.
+            if (_need_save_index) {
+                RETURN_IF_ERROR(_vector_index->add(num_rows, 
_float_array.data()));

Review Comment:
   > [@zhiqiang-hhhh](https://github.com/zhiqiang-hhhh) need check this
   
   A comment has been added.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to