This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene-2.0 by this push:
new 2a20b0d72e [fix](index compaction) Support merge null_bitmap during
index compaction (#178) (#180)
2a20b0d72e is described below
commit 2a20b0d72e6d05cb7025137823f874ff062bdebf
Author: qiye <[email protected]>
AuthorDate: Wed Jan 24 19:50:49 2024 +0800
[fix](index compaction) Support merge null_bitmap during index compaction
(#178) (#180)
Spport merge null_bitmap during index compaction.
We read the source indices null_bitmap files and write them to new ones
according to the translation vector doc id mapping.
fix cl_test compile error
---
src/core/CLucene/index/IndexWriter.cpp | 95 ++++++++-
src/core/CLucene/index/IndexWriter.h | 16 ++
src/core/CLucene/util/stringUtil.h | 1 +
src/test/CMakeLists.txt | 1 +
src/test/index/TestIndexCompaction.cpp | 344 +++++++++++++++++++++++++++++++++
src/test/search/TestSearchRange.cpp | 2 +-
src/test/test.h | 1 +
src/test/tests.cpp | 1 +
8 files changed, 458 insertions(+), 3 deletions(-)
diff --git a/src/core/CLucene/index/IndexWriter.cpp
b/src/core/CLucene/index/IndexWriter.cpp
index 71cf4f2cac..e30abf5107 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -40,6 +40,13 @@
#include <memory>
#include <assert.h>
#include <iostream>
+#include <roaring/roaring.hh>
+
+#define FINALLY_CLOSE_OUTPUT(x) \
+ try { \
+ if (x != nullptr) x->close(); \
+ } catch (...) { \
+ }
CL_NS_USE(store)
CL_NS_USE(util)
@@ -50,6 +57,7 @@ CL_NS_DEF(index)
int64_t IndexWriter::WRITE_LOCK_TIMEOUT = 1000;
const char *IndexWriter::WRITE_LOCK_NAME = "write.lock";
+const char *IndexWriter::NULL_BITMAP_FILE_NAME = "null_bitmap";
std::ostream *IndexWriter::defaultInfoStream = NULL;
const int32_t IndexWriter::MERGE_READ_BUFFER_SIZE = 4096;
@@ -1255,18 +1263,43 @@ void
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
int numIndices = src_dirs.size();
//Set of IndexReaders
- if (infoStream != NULL) {
+ if (infoStream != nullptr) {
message(string("src index dir size: ") + Misc::toString(numIndices));
}
+
+ // first level vector index is src_index_id
+ // second level vector index is src_doc_id
+ std::vector<std::vector<uint32_t>> srcNullBitmapValues(numIndices);
+ IndexInput* null_bitmap_in = nullptr;
for (int32_t i = 0; i < numIndices; i++) {
// One index dir may have more than one segment, so we change the code
to open all segments by using IndexReader::open
// To keep the number of readers consistent with the number of src
dirs.
// Using IndexWriter::segmentInfos will be incorrect when there are
more than one segment in one index dir
IndexReader* reader = lucene::index::IndexReader::open(src_dirs[i],
MERGE_READ_BUFFER_SIZE, false);
readers.push_back(reader);
- if (infoStream != NULL) {
+ if (infoStream != nullptr) {
message(src_dirs[i]->toString());
}
+
+ // read null_bitmap and store values in srcBitmapValues
+ try {
+ if (src_dirs[i]->fileExists(NULL_BITMAP_FILE_NAME)) {
+ // get null_bitmap index input
+ null_bitmap_in = src_dirs[i]->openInput(NULL_BITMAP_FILE_NAME);
+ size_t null_bitmap_size = null_bitmap_in->length();
+ std::string buf;
+ buf.resize(null_bitmap_size);
+
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(const_cast<char*>(buf.data())),
null_bitmap_size);
+ auto null_bitmap = roaring::Roaring::read(buf.data(), false);
+ null_bitmap.runOptimize();
+ for (unsigned int v : null_bitmap) {
+ srcNullBitmapValues[i].emplace_back(v);
+ }
+ FINALLY_CLOSE_OUTPUT(null_bitmap_in);
+ }
+ } catch (CLuceneError &e) {
+ FINALLY_CLOSE_OUTPUT(null_bitmap_in);
+ }
}
assert(readers.size() == numIndices);
@@ -1302,6 +1335,7 @@ void
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
docStoreSegment.clear();
std::vector<lucene::index::IndexWriter *> destIndexWriterList;
+ std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
try {
/// merge fields
mergeFields(hasProx);
@@ -1345,10 +1379,17 @@ void
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
skipInterval = termInfosWriter->skipInterval;
maxSkipLevels = termInfosWriter->maxSkipLevels;
skipListWriterList.push_back(_CLNEW
DefaultSkipListWriter(skipInterval, maxSkipLevels, (int) dest_index_docs[j],
freqOutputList[j], proxOutputList[j]));
+
+ // create null_bitmap index output
+ auto* null_bitmap_out =
dest_dir->createOutput(NULL_BITMAP_FILE_NAME);
+ nullBitmapIndexOutputList.push_back(null_bitmap_out);
}
/// merge terms
mergeTerms(hasProx);
+
+ /// merge null_bitmap
+ mergeNullBitmap(srcNullBitmapValues, nullBitmapIndexOutputList);
} catch (CLuceneError &e) {
throw e;
}
@@ -1387,6 +1428,13 @@ void
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
_CLDELETE(r);
}
} readers.clear(););
+ for (auto* null_bitmap_out
+ : nullBitmapIndexOutputList) {
+ if (null_bitmap_out != nullptr) {
+ null_bitmap_out->close();
+ _CLDELETE(null_bitmap_out);
+ }
+ } nullBitmapIndexOutputList.clear();
// update segment infos of dest index_writer in memory
// close dest index writer
@@ -1818,6 +1866,49 @@ void IndexWriter::mergeTerms(bool hasProx) {
}
}
+void IndexWriter::mergeNullBitmap(std::vector<std::vector<uint32_t>>
srcNullBitmapValues, std::vector<lucene::store::IndexOutput *>
nullBitmapIndexOutputList) {
+ // first level vector index is dest_index_id
+ // second level vector index is dest_doc_id
+ std::vector<std::vector<uint32_t>> destNullBitmapValues(numDestIndexes);
+
+ // iterate srcNullBitmapValues to construct destNullBitmapValues
+ for (size_t i = 0; i < srcNullBitmapValues.size(); ++i) {
+ std::vector<uint32_t> &indexSrcBitmapValues = srcNullBitmapValues[i];
+ if (indexSrcBitmapValues.empty()) {
+ // empty indicates there is no null_bitmap file in this index
+ continue;
+ }
+ for (const auto& srcDocId : indexSrcBitmapValues) {
+ auto destIdx = _trans_vec[i][srcDocId].first;
+ auto destDocId = _trans_vec[i][srcDocId].second;
+ // <UINT32_MAX, UINT32_MAX> indicates current row not exist in
Doris dest segment.
+ // So we ignore this doc here.
+ if (destIdx == UINT32_MAX || destDocId == UINT32_MAX) {
+ continue;
+ }
+ destNullBitmapValues[destIdx].emplace_back(destDocId);
+ }
+ }
+
+ // construct null_bitmap and write null_bitmap to dest index
+ for (size_t i = 0; i < destNullBitmapValues.size(); ++i) {
+ roaring::Roaring null_bitmap;
+ for (const auto& v : destNullBitmapValues[i]) {
+ null_bitmap.add(v);
+ }
+ // write null_bitmap file
+ auto* nullBitmapIndexOutput = nullBitmapIndexOutputList[i];
+ null_bitmap.runOptimize();
+ size_t size = null_bitmap.getSizeInBytes(false);
+ if (size > 0) {
+ std::string buf;
+ buf.resize(size);
+ null_bitmap.write(reinterpret_cast<char*>(buf.data()), false);
+
nullBitmapIndexOutput->writeBytes(reinterpret_cast<uint8_t*>(buf.data()), size);
+ }
+ }
+}
+
void
IndexWriter::addIndexesNoOptimize(CL_NS(util)::ArrayBase<CL_NS(store)::Directory
*> &dirs) {
ensureOpen();
diff --git a/src/core/CLucene/index/IndexWriter.h
b/src/core/CLucene/index/IndexWriter.h
index 719ce0e5dc..7cfb67d2ca 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -325,10 +325,21 @@ public:
void writeFields(lucene::store::Directory* d, std::string segment);
// merge terms and write files
void mergeTerms(bool hasProx);
+ // merge null_bitmap
+ void mergeNullBitmap(std::vector<std::vector<uint32_t>> srcBitmapValues,
std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList);
// Compare current index with the other
void compareIndexes(lucene::store::Directory* other);
+ // only for tests
+ void setNumDestIndexes(int32_t num_dest_indexes) {
+ numDestIndexes = num_dest_indexes;
+ }
+ // only for tests
+ void setTransVec(std::vector<std::vector<std::pair<uint32_t, uint32_t>>>
trans_vec) {
+ _trans_vec = std::move(trans_vec);
+ }
+
// Release the write lock, if needed.
SegmentInfos* segmentInfos;
@@ -414,6 +425,11 @@ public:
*/
static const char* WRITE_LOCK_NAME; //"write.lock";
+ /**
+ * Name of the null bitmap in the index.
+ */
+ static const char* NULL_BITMAP_FILE_NAME; //"null_bitmap";
+
/**
* @deprecated
* @see LogMergePolicy#DEFAULT_MERGE_FACTOR
diff --git a/src/core/CLucene/util/stringUtil.h
b/src/core/CLucene/util/stringUtil.h
index 070ebe9c24..4a022e3e24 100644
--- a/src/core/CLucene/util/stringUtil.h
+++ b/src/core/CLucene/util/stringUtil.h
@@ -12,6 +12,7 @@
#endif
#include <cstring>
+#include <assert.h>
#include "SSEUtil.h"
template <typename T>
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index b06f7f7d20..736a5f7ea4 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -85,6 +85,7 @@ SET(test_files ./tests.cpp
./search/spans/TestSpanExplanations.h
./search/spans/TestSpanExplanationsOfNonMatches.cpp
./search/spans/TestSpanExplanationsOfNonMatches.h
+ ./index/TestIndexCompaction.cpp
./index/TestIndexModifier.cpp
./index/TestIndexWriter.cpp
./index/TestIndexModifier.cpp
diff --git a/src/test/index/TestIndexCompaction.cpp
b/src/test/index/TestIndexCompaction.cpp
new file mode 100644
index 0000000000..1d49c59788
--- /dev/null
+++ b/src/test/index/TestIndexCompaction.cpp
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License
+
+#include <cstdint>
+#include <iostream>
+#include <utility>
+#include <vector>
+#include "CLucene/debug/mem.h"
+#include "test.h"
+#include "CLucene/debug/error.h"
+#include "CLucene/index/IndexWriter.h"
+#include "CLucene/store/IndexInput.h"
+#include "CLucene/store/IndexOutput.h"
+#include "roaring/roaring.hh"
+
+void _setupSourceNullBitmapValues(std::vector<std::vector<uint32_t>>
&srcNullBitmapValues) {
+ srcNullBitmapValues.push_back(std::vector<uint32_t>{1, 2, 3});
+ srcNullBitmapValues.push_back(std::vector<uint32_t>{2, 3, 4});
+ srcNullBitmapValues.push_back(std::vector<uint32_t>{3, 4, 5});
+}
+
+void _setupTransVec(std::vector<std::vector<std::pair<uint32_t, uint32_t>>>&
trans_vec) {
+
+ trans_vec.resize(3);
+ for (int i = 0; i < 3; i++) {
+ trans_vec[i].resize(6);
+ }
+
+ trans_vec[0][0] = std::pair<uint32_t, uint32_t>{0, 1};
+ trans_vec[0][1] = std::pair<uint32_t, uint32_t>{0, 2};
+ trans_vec[0][2] = std::pair<uint32_t, uint32_t>{0, 5};
+ trans_vec[0][3] = std::pair<uint32_t, uint32_t>{0, 7};
+ trans_vec[0][4] = std::pair<uint32_t, uint32_t>{0, 3};
+ trans_vec[0][5] = std::pair<uint32_t, uint32_t>{0, 8};
+ trans_vec[1][0] = std::pair<uint32_t, uint32_t>{0, 4};
+ trans_vec[1][1] = std::pair<uint32_t, uint32_t>{0, 6};
+ trans_vec[1][2] = std::pair<uint32_t, uint32_t>{UINT32_MAX, UINT32_MAX};
+ trans_vec[1][3] = std::pair<uint32_t, uint32_t>{1, 1};
+ trans_vec[1][4] = std::pair<uint32_t, uint32_t>{1, 2};
+ trans_vec[1][5] = std::pair<uint32_t, uint32_t>{1, 9};
+ trans_vec[2][0] = std::pair<uint32_t, uint32_t>{1, 3};
+ trans_vec[2][1] = std::pair<uint32_t, uint32_t>{1, 4};
+ trans_vec[2][2] = std::pair<uint32_t, uint32_t>{1, 5};
+ trans_vec[2][3] = std::pair<uint32_t, uint32_t>{1, 6};
+ trans_vec[2][4] = std::pair<uint32_t, uint32_t>{1, 7};
+ trans_vec[2][5] = std::pair<uint32_t, uint32_t>{1, 8};
+}
+
+uint64_t _getNullBitmapCardinality(RAMDirectory& dir) {
+ IndexInput* null_bitmap_in = nullptr;
+ CLuceneError error;
+ dir.openInput(IndexWriter::NULL_BITMAP_FILE_NAME, null_bitmap_in, error);
+ if (error.number() != 0) {
+ return 0;
+ }
+ size_t null_bitmap_size = null_bitmap_in->length();
+ std::string buf;
+ buf.resize(null_bitmap_size);
+
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(const_cast<char*>(buf.data())),
null_bitmap_size);
+ auto null_bitmap = roaring::Roaring::read(buf.data(), false);
+ null_bitmap.runOptimize();
+
+ // close resources
+ null_bitmap_in->close();
+ _CLLDELETE(null_bitmap_in);
+
+ return null_bitmap.cardinality();
+}
+
+// src segments -> dest segments
+// 3 -> 2
+// docs 18 -> 17
+// 1,2,3,4,5,6
+// 1,2,3,4,5,6 -> 1,2,3,4,5,6,7,8
+// 1,2,3,4,5,6 1,2,3,4,5,6,7,8,9
+//
+// null values
+// 1,2,3
+// 2,3,4 -> 2,5,7
+// 3,4,5 1,2,6,7,8
+void TestMergeNullBitmapWriteNullBitmap(CuTest *tc) {
+ lucene::analysis::SimpleAnalyzer<char> analyzer;
+ RAMDirectory dir;
+ auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer,
true);
+ std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+ std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+ _setupSourceNullBitmapValues(srcNullBitmapValues);
+
+ // setup _trans_vec
+ // translation vec
+ // <<dest_idx_num, dest_docId>>
+ // the first level vector: index indicates src segment.
+ // the second level vector: index indicates row id of source segment,
+ // value indicates row id of destination segment.
+ // <UINT32_MAX, UINT32_MAX> indicates current row not exist.
+ std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+ _setupTransVec(trans_vec);
+
+ RAMDirectory dest_dir1;
+ RAMDirectory dest_dir2;
+ auto* dest_output_index1 =
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ auto* dest_output_index2 =
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ nullBitmapIndexOutputList.push_back(dest_output_index1);
+ nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+ try {
+ index_writer->setNumDestIndexes(2);
+ index_writer->setTransVec(trans_vec);
+ index_writer->mergeNullBitmap(srcNullBitmapValues,
nullBitmapIndexOutputList);
+ } catch (const std::exception& ex) {
+ std::cout << "Caught exception: " << ex.what() << std::endl;
+ } catch (...) {
+ std::cout << "merge null bitmap failed" << std::endl;
+ return;
+ }
+ dest_output_index1->close();
+ dest_output_index2->close();
+ _CLLDELETE(dest_output_index1);
+ _CLLDELETE(dest_output_index2);
+ nullBitmapIndexOutputList.clear();
+ index_writer->close();
+ _CLDELETE(index_writer);
+
+ // check cardinality
+ uint64_t source_cardinality = 0;
+ for (const auto& vec : srcNullBitmapValues) {
+ source_cardinality += vec.size();
+ }
+ auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+ auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+ auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+
+ // 9 = 8 + 1
+ CLUCENE_ASSERT(source_cardinality == (dest_cardinality + 1));
+
+ // release resources
+ dest_dir1.close();
+ dest_dir2.close();
+ dir.close();
+}
+
+void TestMergeNullBitmapEmptySrc(CuTest *tc) {
+ lucene::analysis::SimpleAnalyzer<char> analyzer;
+ RAMDirectory dir;
+ auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer,
true);
+ // empty source bitmap values
+ std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+ std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+ std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+ _setupTransVec(trans_vec);
+
+ RAMDirectory dest_dir1;
+ RAMDirectory dest_dir2;
+ auto* dest_output_index1 =
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ auto* dest_output_index2 =
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ nullBitmapIndexOutputList.push_back(dest_output_index1);
+ nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+ try {
+ index_writer->setNumDestIndexes(2);
+ index_writer->setTransVec(trans_vec);
+ index_writer->mergeNullBitmap(srcNullBitmapValues,
nullBitmapIndexOutputList);
+ } catch (const std::exception& ex) {
+ std::cout << "Caught exception: " << ex.what() << std::endl;
+ } catch (...) {
+ std::cout << "merge null bitmap failed" << std::endl;
+ return;
+ }
+ dest_output_index1->close();
+ dest_output_index2->close();
+ _CLLDELETE(dest_output_index1);
+ _CLLDELETE(dest_output_index2);
+ nullBitmapIndexOutputList.clear();
+ index_writer->close();
+ _CLDELETE(index_writer);
+
+ // check cardinality
+ uint64_t source_cardinality = 0;
+ for (const auto& vec : srcNullBitmapValues) {
+ source_cardinality += vec.size();
+ }
+ auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+ auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+ auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+
+ // 0 = 0
+ CLUCENE_ASSERT(source_cardinality == dest_cardinality);
+
+ // release resources
+ dest_dir1.close();
+ dest_dir2.close();
+ dir.close();
+}
+
+void TestMergeNullBitmapEmptyIndexSrcBitmapValues(CuTest *tc) {
+ lucene::analysis::SimpleAnalyzer<char> analyzer;
+ RAMDirectory dir;
+ auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer,
true);
+ // empty source bitmap values for every index
+ std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+ srcNullBitmapValues.push_back(std::vector<uint32_t>());
+ srcNullBitmapValues.push_back(std::vector<uint32_t>());
+ srcNullBitmapValues.push_back(std::vector<uint32_t>());
+
+ std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+ std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+ _setupTransVec(trans_vec);
+
+ RAMDirectory dest_dir1;
+ RAMDirectory dest_dir2;
+ auto* dest_output_index1 =
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ auto* dest_output_index2 =
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ nullBitmapIndexOutputList.push_back(dest_output_index1);
+ nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+ try {
+ index_writer->setNumDestIndexes(2);
+ index_writer->setTransVec(trans_vec);
+ index_writer->mergeNullBitmap(srcNullBitmapValues,
nullBitmapIndexOutputList);
+ } catch (const std::exception& ex) {
+ std::cout << "Caught exception: " << ex.what() << std::endl;
+ } catch (...) {
+ std::cout << "merge null bitmap failed" << std::endl;
+ return;
+ }
+ dest_output_index1->close();
+ dest_output_index2->close();
+ _CLLDELETE(dest_output_index1);
+ _CLLDELETE(dest_output_index2);
+ nullBitmapIndexOutputList.clear();
+ index_writer->close();
+ _CLDELETE(index_writer);
+
+ // check cardinality
+ uint64_t source_cardinality = 0;
+ for (const auto& vec : srcNullBitmapValues) {
+ source_cardinality += vec.size();
+ }
+ auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+ auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+ auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+
+ // 0 = 0
+ CLUCENE_ASSERT(source_cardinality == dest_cardinality);
+
+ // release resources
+ dest_dir1.close();
+ dest_dir2.close();
+ dir.close();
+}
+
+void TestMergeNullBitmapIgnoreDoc(CuTest *tc) {
+ lucene::analysis::SimpleAnalyzer<char> analyzer;
+ RAMDirectory dir;
+ auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer,
true);
+ std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+ _setupSourceNullBitmapValues(srcNullBitmapValues);
+
+ std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+ // all docs in src index are ignored
+ std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+ trans_vec.resize(srcNullBitmapValues.size());
+ for (int i = 0; i < trans_vec.size(); i++) {
+ trans_vec[i].resize(6);
+ }
+ for (int i = 0; i < srcNullBitmapValues.size(); i++) {
+ for (int j = 0; j < 6; j++) {
+ trans_vec[i][j] = std::pair<uint32_t, uint32_t>{UINT32_MAX,
UINT32_MAX};
+ }
+ }
+
+ RAMDirectory dest_dir1;
+ RAMDirectory dest_dir2;
+ auto* dest_output_index1 =
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ auto* dest_output_index2 =
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+ nullBitmapIndexOutputList.push_back(dest_output_index1);
+ nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+ try {
+ index_writer->setNumDestIndexes(2);
+ index_writer->setTransVec(trans_vec);
+ index_writer->mergeNullBitmap(srcNullBitmapValues,
nullBitmapIndexOutputList);
+ } catch (const std::exception& ex) {
+ std::cout << "Caught exception: " << ex.what() << std::endl;
+ } catch (...) {
+ std::cout << "merge null bitmap failed" << std::endl;
+ return;
+ }
+ dest_output_index1->close();
+ dest_output_index2->close();
+ _CLLDELETE(dest_output_index1);
+ _CLLDELETE(dest_output_index2);
+ nullBitmapIndexOutputList.clear();
+ index_writer->close();
+ _CLDELETE(index_writer);
+
+ // check cardinality
+ uint64_t source_cardinality = 0;
+ for (const auto& vec : srcNullBitmapValues) {
+ source_cardinality += vec.size();
+ }
+ auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+ auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+ auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+
+ // 9 = 0 + 9
+ CLUCENE_ASSERT(source_cardinality == dest_cardinality +
source_cardinality);
+
+ // release resources
+ dest_dir1.close();
+ dest_dir2.close();
+ dir.close();
+}
+
+
+
+CuSuite* testIndexCompaction() {
+ CuSuite* suite = CuSuiteNew(_T("CLucene Index Compaction Test"));
+
+ SUITE_ADD_TEST(suite, TestMergeNullBitmapWriteNullBitmap);
+ SUITE_ADD_TEST(suite, TestMergeNullBitmapEmptySrc);
+ SUITE_ADD_TEST(suite, TestMergeNullBitmapEmptyIndexSrcBitmapValues);
+ SUITE_ADD_TEST(suite, TestMergeNullBitmapIgnoreDoc);
+
+ return suite;
+}
\ No newline at end of file
diff --git a/src/test/search/TestSearchRange.cpp
b/src/test/search/TestSearchRange.cpp
index 82a4e17037..a94fb386d5 100644
--- a/src/test/search/TestSearchRange.cpp
+++ b/src/test/search/TestSearchRange.cpp
@@ -1087,7 +1087,7 @@ static void testSearchEqual(CuTest* tc) {
searcher._search(query, [&result2](DocRange* docRange) {
if (docRange->type_ == DocRangeType::kMany) {
result2.addMany(docRange->doc_many_size_,
- docRange->doc_many.data());
+ docRange->doc_many->data());
} else if (docRange->type_ == DocRangeType::kRange) {
result2.addRange(docRange->doc_range.first,
docRange->doc_range.second);
diff --git a/src/test/test.h b/src/test/test.h
index da1fde00e7..a6d72732e7 100644
--- a/src/test/test.h
+++ b/src/test/test.h
@@ -83,6 +83,7 @@ CuSuite *teststandard95(void);
CuSuite *testStrConvert(void);
CuSuite *testSearchRange(void);
CuSuite *testMultiPhraseQuery(void);
+CuSuite *testIndexCompaction(void);
#ifdef TEST_CONTRIB_LIBS
//CuSuite *testGermanAnalyzer(void);
diff --git a/src/test/tests.cpp b/src/test/tests.cpp
index 5ca803b9b0..282794fe43 100644
--- a/src/test/tests.cpp
+++ b/src/test/tests.cpp
@@ -49,6 +49,7 @@ unittest tests[] = {
{"strconvert", testStrConvert},
{"searchRange", testSearchRange},
{"MultiPhraseQuery", testMultiPhraseQuery},
+ {"IndexCompaction", testIndexCompaction},
#ifdef TEST_CONTRIB_LIBS
{"chinese", testchinese},
#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]