This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene-2.0 by this push:
     new 2a20b0d72e [fix](index compaction) Support merge null_bitmap during 
index compaction (#178) (#180)
2a20b0d72e is described below

commit 2a20b0d72e6d05cb7025137823f874ff062bdebf
Author: qiye <[email protected]>
AuthorDate: Wed Jan 24 19:50:49 2024 +0800

    [fix](index compaction) Support merge null_bitmap during index compaction 
(#178) (#180)
    
    Spport merge null_bitmap during index compaction.
    We read the source indices null_bitmap files and write them to new ones 
according to the translation vector doc id mapping.
    
    fix cl_test compile error
---
 src/core/CLucene/index/IndexWriter.cpp |  95 ++++++++-
 src/core/CLucene/index/IndexWriter.h   |  16 ++
 src/core/CLucene/util/stringUtil.h     |   1 +
 src/test/CMakeLists.txt                |   1 +
 src/test/index/TestIndexCompaction.cpp | 344 +++++++++++++++++++++++++++++++++
 src/test/search/TestSearchRange.cpp    |   2 +-
 src/test/test.h                        |   1 +
 src/test/tests.cpp                     |   1 +
 8 files changed, 458 insertions(+), 3 deletions(-)

diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 71cf4f2cac..e30abf5107 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -40,6 +40,13 @@
 #include <memory>
 #include <assert.h>
 #include <iostream>
+#include <roaring/roaring.hh>
+
+#define FINALLY_CLOSE_OUTPUT(x)       \
+    try {                             \
+        if (x != nullptr) x->close(); \
+    } catch (...) {                   \
+    }
 
 CL_NS_USE(store)
 CL_NS_USE(util)
@@ -50,6 +57,7 @@ CL_NS_DEF(index)
 
 int64_t IndexWriter::WRITE_LOCK_TIMEOUT = 1000;
 const char *IndexWriter::WRITE_LOCK_NAME = "write.lock";
+const char *IndexWriter::NULL_BITMAP_FILE_NAME = "null_bitmap";
 std::ostream *IndexWriter::defaultInfoStream = NULL;
 
 const int32_t IndexWriter::MERGE_READ_BUFFER_SIZE = 4096;
@@ -1255,18 +1263,43 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
     int numIndices = src_dirs.size();
 
     //Set of IndexReaders
-    if (infoStream != NULL) {
+    if (infoStream != nullptr) {
         message(string("src index dir size: ") + Misc::toString(numIndices));
     }
+
+    // first level vector index is src_index_id
+    // second level vector index is src_doc_id
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues(numIndices);
+    IndexInput* null_bitmap_in = nullptr;
     for (int32_t i = 0; i < numIndices; i++) {
         // One index dir may have more than one segment, so we change the code 
to open all segments by using IndexReader::open
         // To keep the number of readers consistent with the number of src 
dirs.
         // Using IndexWriter::segmentInfos will be incorrect when there are 
more than one segment in one index dir
         IndexReader* reader = lucene::index::IndexReader::open(src_dirs[i], 
MERGE_READ_BUFFER_SIZE, false);
         readers.push_back(reader);
-        if (infoStream != NULL) {
+        if (infoStream != nullptr) {
             message(src_dirs[i]->toString());
         }
+
+        // read null_bitmap and store values in srcBitmapValues
+        try {
+            if (src_dirs[i]->fileExists(NULL_BITMAP_FILE_NAME)) {
+                // get null_bitmap index input
+                null_bitmap_in = src_dirs[i]->openInput(NULL_BITMAP_FILE_NAME);
+                size_t null_bitmap_size = null_bitmap_in->length();
+                std::string buf;
+                buf.resize(null_bitmap_size);
+                
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(const_cast<char*>(buf.data())),
 null_bitmap_size);
+                auto null_bitmap = roaring::Roaring::read(buf.data(), false);
+                null_bitmap.runOptimize();
+                for (unsigned int v : null_bitmap) {
+                    srcNullBitmapValues[i].emplace_back(v);
+                }
+                FINALLY_CLOSE_OUTPUT(null_bitmap_in);
+            }
+        } catch (CLuceneError &e) {
+            FINALLY_CLOSE_OUTPUT(null_bitmap_in);
+        }
     }
     assert(readers.size() == numIndices);
 
@@ -1302,6 +1335,7 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
     docStoreSegment.clear();
 
     std::vector<lucene::index::IndexWriter *> destIndexWriterList;
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
     try {
         /// merge fields
         mergeFields(hasProx);
@@ -1345,10 +1379,17 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
             skipInterval = termInfosWriter->skipInterval;
             maxSkipLevels = termInfosWriter->maxSkipLevels;
             skipListWriterList.push_back(_CLNEW 
DefaultSkipListWriter(skipInterval, maxSkipLevels, (int) dest_index_docs[j], 
freqOutputList[j], proxOutputList[j]));
+
+            // create null_bitmap index output
+            auto* null_bitmap_out = 
dest_dir->createOutput(NULL_BITMAP_FILE_NAME);
+            nullBitmapIndexOutputList.push_back(null_bitmap_out);
         }
 
         /// merge terms
         mergeTerms(hasProx);
+
+        /// merge null_bitmap
+        mergeNullBitmap(srcNullBitmapValues, nullBitmapIndexOutputList);
     } catch (CLuceneError &e) {
         throw e;
     }
@@ -1387,6 +1428,13 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
                     _CLDELETE(r);
                 }
             } readers.clear(););
+            for (auto* null_bitmap_out
+                 : nullBitmapIndexOutputList) {
+                if (null_bitmap_out != nullptr) {
+                    null_bitmap_out->close();
+                    _CLDELETE(null_bitmap_out);
+                }
+            } nullBitmapIndexOutputList.clear();
 
     // update segment infos of dest index_writer in memory
     // close dest index writer
@@ -1818,6 +1866,49 @@ void IndexWriter::mergeTerms(bool hasProx) {
     }
 }
 
+void IndexWriter::mergeNullBitmap(std::vector<std::vector<uint32_t>> 
srcNullBitmapValues, std::vector<lucene::store::IndexOutput *> 
nullBitmapIndexOutputList) {
+    // first level vector index is dest_index_id
+    // second level vector index is dest_doc_id
+    std::vector<std::vector<uint32_t>> destNullBitmapValues(numDestIndexes);
+
+    // iterate srcNullBitmapValues to construct destNullBitmapValues
+    for (size_t i = 0; i < srcNullBitmapValues.size(); ++i) {
+        std::vector<uint32_t> &indexSrcBitmapValues = srcNullBitmapValues[i];
+        if (indexSrcBitmapValues.empty()) {
+            // empty indicates there is no null_bitmap file in this index
+            continue;
+        }
+        for (const auto& srcDocId : indexSrcBitmapValues) {
+            auto destIdx = _trans_vec[i][srcDocId].first;
+            auto destDocId = _trans_vec[i][srcDocId].second;
+            // <UINT32_MAX, UINT32_MAX> indicates current row not exist in 
Doris dest segment.
+            // So we ignore this doc here.
+            if (destIdx == UINT32_MAX || destDocId == UINT32_MAX) {
+                continue;
+            }
+            destNullBitmapValues[destIdx].emplace_back(destDocId);
+        }
+    }
+
+    // construct null_bitmap and write null_bitmap to dest index
+    for (size_t i = 0; i < destNullBitmapValues.size(); ++i) {
+        roaring::Roaring null_bitmap;
+        for (const auto& v : destNullBitmapValues[i]) {
+            null_bitmap.add(v);
+        }
+        // write null_bitmap file
+        auto* nullBitmapIndexOutput = nullBitmapIndexOutputList[i];
+        null_bitmap.runOptimize();
+        size_t size = null_bitmap.getSizeInBytes(false);
+        if (size > 0) {
+            std::string buf;
+            buf.resize(size);
+            null_bitmap.write(reinterpret_cast<char*>(buf.data()), false);
+            
nullBitmapIndexOutput->writeBytes(reinterpret_cast<uint8_t*>(buf.data()), size);
+        }
+    }
+}
+
 void 
IndexWriter::addIndexesNoOptimize(CL_NS(util)::ArrayBase<CL_NS(store)::Directory
 *> &dirs) {
     ensureOpen();
 
diff --git a/src/core/CLucene/index/IndexWriter.h 
b/src/core/CLucene/index/IndexWriter.h
index 719ce0e5dc..7cfb67d2ca 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -325,10 +325,21 @@ public:
     void writeFields(lucene::store::Directory* d, std::string segment);
     // merge terms and write files
     void mergeTerms(bool hasProx);
+    // merge null_bitmap
+    void mergeNullBitmap(std::vector<std::vector<uint32_t>> srcBitmapValues, 
std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList);
 
     // Compare current index with the other
     void compareIndexes(lucene::store::Directory* other);
 
+    // only for tests
+    void setNumDestIndexes(int32_t num_dest_indexes) {
+        numDestIndexes = num_dest_indexes;
+    }
+    // only for tests
+    void setTransVec(std::vector<std::vector<std::pair<uint32_t, uint32_t>>> 
trans_vec) {
+      _trans_vec = std::move(trans_vec);
+    }
+
        // Release the write lock, if needed.
        SegmentInfos* segmentInfos;
 
@@ -414,6 +425,11 @@ public:
    */
   static const char* WRITE_LOCK_NAME; //"write.lock";
 
+  /**
+   * Name of the null bitmap in the index.
+   */
+  static const char* NULL_BITMAP_FILE_NAME; //"null_bitmap";
+
   /**
    * @deprecated
    * @see LogMergePolicy#DEFAULT_MERGE_FACTOR
diff --git a/src/core/CLucene/util/stringUtil.h 
b/src/core/CLucene/util/stringUtil.h
index 070ebe9c24..4a022e3e24 100644
--- a/src/core/CLucene/util/stringUtil.h
+++ b/src/core/CLucene/util/stringUtil.h
@@ -12,6 +12,7 @@
 #endif
 
 #include <cstring>
+#include <assert.h>
 #include "SSEUtil.h"
 
 template <typename T>
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index b06f7f7d20..736a5f7ea4 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -85,6 +85,7 @@ SET(test_files ./tests.cpp
         ./search/spans/TestSpanExplanations.h
         ./search/spans/TestSpanExplanationsOfNonMatches.cpp
         ./search/spans/TestSpanExplanationsOfNonMatches.h
+        ./index/TestIndexCompaction.cpp
         ./index/TestIndexModifier.cpp
         ./index/TestIndexWriter.cpp
         ./index/TestIndexModifier.cpp
diff --git a/src/test/index/TestIndexCompaction.cpp 
b/src/test/index/TestIndexCompaction.cpp
new file mode 100644
index 0000000000..1d49c59788
--- /dev/null
+++ b/src/test/index/TestIndexCompaction.cpp
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License
+
+#include <cstdint>
+#include <iostream>
+#include <utility>
+#include <vector>
+#include "CLucene/debug/mem.h"
+#include "test.h"
+#include "CLucene/debug/error.h"
+#include "CLucene/index/IndexWriter.h"
+#include "CLucene/store/IndexInput.h"
+#include "CLucene/store/IndexOutput.h"
+#include "roaring/roaring.hh"
+
+void _setupSourceNullBitmapValues(std::vector<std::vector<uint32_t>> 
&srcNullBitmapValues) {
+    srcNullBitmapValues.push_back(std::vector<uint32_t>{1, 2, 3});
+    srcNullBitmapValues.push_back(std::vector<uint32_t>{2, 3, 4});
+    srcNullBitmapValues.push_back(std::vector<uint32_t>{3, 4, 5});
+}
+
+void _setupTransVec(std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& 
trans_vec) {
+
+    trans_vec.resize(3);
+    for (int i = 0; i < 3; i++) {
+        trans_vec[i].resize(6);
+    }
+    
+    trans_vec[0][0] = std::pair<uint32_t, uint32_t>{0, 1};
+    trans_vec[0][1] = std::pair<uint32_t, uint32_t>{0, 2};
+    trans_vec[0][2] = std::pair<uint32_t, uint32_t>{0, 5};
+    trans_vec[0][3] = std::pair<uint32_t, uint32_t>{0, 7};
+    trans_vec[0][4] = std::pair<uint32_t, uint32_t>{0, 3};
+    trans_vec[0][5] = std::pair<uint32_t, uint32_t>{0, 8};
+    trans_vec[1][0] = std::pair<uint32_t, uint32_t>{0, 4};
+    trans_vec[1][1] = std::pair<uint32_t, uint32_t>{0, 6};
+    trans_vec[1][2] = std::pair<uint32_t, uint32_t>{UINT32_MAX, UINT32_MAX};
+    trans_vec[1][3] = std::pair<uint32_t, uint32_t>{1, 1};
+    trans_vec[1][4] = std::pair<uint32_t, uint32_t>{1, 2};
+    trans_vec[1][5] = std::pair<uint32_t, uint32_t>{1, 9};
+    trans_vec[2][0] = std::pair<uint32_t, uint32_t>{1, 3};
+    trans_vec[2][1] = std::pair<uint32_t, uint32_t>{1, 4};
+    trans_vec[2][2] = std::pair<uint32_t, uint32_t>{1, 5};
+    trans_vec[2][3] = std::pair<uint32_t, uint32_t>{1, 6};
+    trans_vec[2][4] = std::pair<uint32_t, uint32_t>{1, 7};
+    trans_vec[2][5] = std::pair<uint32_t, uint32_t>{1, 8};
+}
+
+uint64_t _getNullBitmapCardinality(RAMDirectory& dir) {
+    IndexInput* null_bitmap_in = nullptr;
+    CLuceneError error;
+    dir.openInput(IndexWriter::NULL_BITMAP_FILE_NAME, null_bitmap_in, error);
+    if (error.number() != 0) {
+        return 0;
+    }
+    size_t null_bitmap_size = null_bitmap_in->length();
+    std::string buf;
+    buf.resize(null_bitmap_size);
+    
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(const_cast<char*>(buf.data())),
 null_bitmap_size);
+    auto null_bitmap = roaring::Roaring::read(buf.data(), false);
+    null_bitmap.runOptimize();
+
+    // close resources
+    null_bitmap_in->close();
+    _CLLDELETE(null_bitmap_in);
+
+    return null_bitmap.cardinality();
+}
+
+// src segments -> dest segments
+//           3  -> 2
+// docs      18 -> 17
+// 1,2,3,4,5,6
+// 1,2,3,4,5,6  -> 1,2,3,4,5,6,7,8 
+// 1,2,3,4,5,6     1,2,3,4,5,6,7,8,9
+//
+// null values
+// 1,2,3
+// 2,3,4        -> 2,5,7
+// 3,4,5           1,2,6,7,8
+void TestMergeNullBitmapWriteNullBitmap(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    _setupSourceNullBitmapValues(srcNullBitmapValues);
+
+    // setup _trans_vec
+    // translation vec
+    // <<dest_idx_num, dest_docId>>
+    // the first level vector: index indicates src segment.
+    // the second level vector: index indicates row id of source segment,
+    // value indicates row id of destination segment.
+    // <UINT32_MAX, UINT32_MAX> indicates current row not exist.
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    _setupTransVec(trans_vec);
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+    
+    // 9 = 8 + 1
+    CLUCENE_ASSERT(source_cardinality == (dest_cardinality + 1));
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+void TestMergeNullBitmapEmptySrc(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    // empty source bitmap values
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    _setupTransVec(trans_vec);
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+    
+    // 0 = 0
+    CLUCENE_ASSERT(source_cardinality == dest_cardinality);
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+void TestMergeNullBitmapEmptyIndexSrcBitmapValues(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    // empty source bitmap values for every index
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    srcNullBitmapValues.push_back(std::vector<uint32_t>());
+    srcNullBitmapValues.push_back(std::vector<uint32_t>());
+    srcNullBitmapValues.push_back(std::vector<uint32_t>());
+
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    _setupTransVec(trans_vec);
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+    
+    // 0 = 0
+    CLUCENE_ASSERT(source_cardinality == dest_cardinality);
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+void TestMergeNullBitmapIgnoreDoc(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    _setupSourceNullBitmapValues(srcNullBitmapValues);
+
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    // all docs in src index are ignored
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    trans_vec.resize(srcNullBitmapValues.size());
+    for (int i = 0; i < trans_vec.size(); i++) {
+        trans_vec[i].resize(6);
+    }
+    for (int i = 0; i < srcNullBitmapValues.size(); i++) {
+        for (int j = 0; j < 6; j++) {
+            trans_vec[i][j] = std::pair<uint32_t, uint32_t>{UINT32_MAX, 
UINT32_MAX};
+        }
+    }
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+
+    // 9 = 0 + 9
+    CLUCENE_ASSERT(source_cardinality == dest_cardinality + 
source_cardinality);
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+
+
+CuSuite* testIndexCompaction() {
+    CuSuite* suite = CuSuiteNew(_T("CLucene Index Compaction Test"));
+
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapWriteNullBitmap);
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapEmptySrc);
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapEmptyIndexSrcBitmapValues);
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapIgnoreDoc);
+
+    return suite;
+}
\ No newline at end of file
diff --git a/src/test/search/TestSearchRange.cpp 
b/src/test/search/TestSearchRange.cpp
index 82a4e17037..a94fb386d5 100644
--- a/src/test/search/TestSearchRange.cpp
+++ b/src/test/search/TestSearchRange.cpp
@@ -1087,7 +1087,7 @@ static void testSearchEqual(CuTest* tc) {
           searcher._search(query, [&result2](DocRange* docRange) {
             if (docRange->type_ == DocRangeType::kMany) {
               result2.addMany(docRange->doc_many_size_,
-                              docRange->doc_many.data());
+                              docRange->doc_many->data());
             } else if (docRange->type_ == DocRangeType::kRange) {
               result2.addRange(docRange->doc_range.first,
                                docRange->doc_range.second);
diff --git a/src/test/test.h b/src/test/test.h
index da1fde00e7..a6d72732e7 100644
--- a/src/test/test.h
+++ b/src/test/test.h
@@ -83,6 +83,7 @@ CuSuite *teststandard95(void);
 CuSuite *testStrConvert(void);
 CuSuite *testSearchRange(void);
 CuSuite *testMultiPhraseQuery(void);
+CuSuite *testIndexCompaction(void);
 
 #ifdef TEST_CONTRIB_LIBS
 //CuSuite *testGermanAnalyzer(void);
diff --git a/src/test/tests.cpp b/src/test/tests.cpp
index 5ca803b9b0..282794fe43 100644
--- a/src/test/tests.cpp
+++ b/src/test/tests.cpp
@@ -49,6 +49,7 @@ unittest tests[] = {
         {"strconvert", testStrConvert},
         {"searchRange", testSearchRange},
         {"MultiPhraseQuery", testMultiPhraseQuery},
+        {"IndexCompaction", testIndexCompaction},
 #ifdef TEST_CONTRIB_LIBS
         {"chinese", testchinese},
 #endif


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to