This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene-2.0 by this push:
new a071cc5d20 [Pick] (ut) make unittest working for clucene-2.0 (#190)
a071cc5d20 is described below
commit a071cc5d20a1f5a703c20d9be1b5f185040bd976
Author: airborne12 <[email protected]>
AuthorDate: Fri Feb 2 18:25:11 2024 +0800
[Pick] (ut) make unittest working for clucene-2.0 (#190)
* [Update](unitest) make unitest work for clucene (#160)
* [unitest](tokenizer) fix chinese tokenizer unitest (#164)
* [Fix](test) fix unitest error
---
src/test/contribs-lib/analysis/testChinese.cpp | 5 +
src/test/util/TestBKD.cpp | 560 +++++++++----------------
src/test/util/TestBKD.h | 21 +-
3 files changed, 204 insertions(+), 382 deletions(-)
diff --git a/src/test/contribs-lib/analysis/testChinese.cpp
b/src/test/contribs-lib/analysis/testChinese.cpp
index ab36589523..ac31599bd5 100644
--- a/src/test/contribs-lib/analysis/testChinese.cpp
+++ b/src/test/contribs-lib/analysis/testChinese.cpp
@@ -362,6 +362,7 @@ void testSimpleJiebaTokenizer2(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::Default);
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader.get());
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -393,6 +394,7 @@ void testSimpleJiebaTokenizer3(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::Default);
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader.get());
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -412,6 +414,7 @@ void testSimpleJiebaTokenizer4(CuTest* tc) {
//test with chinese
a.setLanguage(_T("chinese"));
a.setStem(false);
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader.get());
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -509,6 +512,7 @@ void testJiebaMatch(CuTest* tc) {
auto analyzer =
std::make_unique<lucene::analysis::LanguageBasedAnalyzer>();
analyzer->setLanguage(L"chinese");
analyzer->setMode(lucene::analysis::AnalyzerMode::Default);
+ analyzer->initDict("./dict");
IndexWriter w(&dir, analyzer.get(), true);
w.setUseCompoundFile(false);
@@ -596,6 +600,7 @@ void testJiebaMatch2(CuTest* tc) {
auto analyzer =
std::make_unique<lucene::analysis::LanguageBasedAnalyzer>();
analyzer->setLanguage(L"chinese");
analyzer->setMode(lucene::analysis::AnalyzerMode::Default);
+ analyzer->initDict("./dict");
IndexWriter w(&dir, analyzer.get(), true);
w.setUseCompoundFile(false);
diff --git a/src/test/util/TestBKD.cpp b/src/test/util/TestBKD.cpp
index 330f72f739..eba9c451dc 100644
--- a/src/test/util/TestBKD.cpp
+++ b/src/test/util/TestBKD.cpp
@@ -36,30 +36,25 @@ void TestVisitor1::visit(int docID) {
}
}
-int TestVisitor1::matches(uint8_t* packedValue) {
+bool TestVisitor1::matches(uint8_t *packedValue) {
std::vector<uint8_t> result(4);
std::copy(packedValue, packedValue + 4, result.begin());
int x = NumericUtils::sortableBytesToInt(result, 0);
if (x >= queryMin && x <= queryMax) {
- return 0;
- }
- if (x < queryMin) {
- return -1;
- }
- if (x > queryMax) {
- return 1;
+ return true;
}
+ return false;
}
void TestVisitor1::visit(roaring::Roaring *docID, std::vector<uint8_t>
&packedValue) {
- if (matches(packedValue.data()) != 0) {
+ if (!matches(packedValue.data())) {
return;
}
visit(*docID);
}
void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter,
std::vector<uint8_t> &packedValue) {
- if (matches(packedValue.data()) != 0) {
+ if (!matches(packedValue.data())) {
return;
}
int32_t docID = iter->docid_set->nextDoc();
@@ -69,7 +64,8 @@ void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter,
std::vector<uint8_t>
}
}
-int TestVisitor1::visit(int docID, std::vector<uint8_t>& packedValue) {
+void TestVisitor1::visit(
+ int docID, std::vector<uint8_t> &packedValue) {
int x = NumericUtils::sortableBytesToInt(packedValue, 0);
if (0) {
wcout << L"visit docID=" << docID << L" x=" << x << endl;
@@ -77,29 +73,17 @@ int TestVisitor1::visit(int docID, std::vector<uint8_t>&
packedValue) {
if (x >= queryMin && x <= queryMax) {
//wcout << L"visit docID=" << docID << L" x=" << x << endl;
hits->set(docID);
- return 0;
- }
- if (x < queryMin) {
- return -1;
}
- if (x > queryMax) {
- return 1;
- }
- return 0;
-}
-
-lucene::util::bkd::relation TestVisitor1::compare_prefix(std::vector<uint8_t>&
prefix) {
- return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
}
-lucene::util::bkd::relation TestVisitor1::compare(std::vector<uint8_t>&
minPacked,
- std::vector<uint8_t>&
maxPacked) {
+lucene::util::bkd::relation TestVisitor1::compare(
+ std::vector<uint8_t> &minPacked, std::vector<uint8_t> &maxPacked) {
int min = NumericUtils::sortableBytesToInt(minPacked, 0);
int max = NumericUtils::sortableBytesToInt(maxPacked, 0);
assert(max >= min);
if (0) {
- wcout << L"compare: min=" << min << L" max=" << max << L" vs
queryMin=" << queryMin
- << L" queryMax=" << queryMax << endl;
+ wcout << L"compare: min=" << min << L" max=" << max << L" vs queryMin="
+ << queryMin << L" queryMax=" << queryMax << endl;
}
if (max < queryMin || min > queryMax) {
@@ -111,262 +95,104 @@ lucene::util::bkd::relation
TestVisitor1::compare(std::vector<uint8_t>& minPacke
}
}
-template <predicate QT>
-TestVisitor<QT>::TestVisitor(const uint8_t* qMin, const uint8_t* qMax, BitSet*
h) {
+TestVisitor::TestVisitor(const uint8_t *qMin, const uint8_t *qMax,
+ BitSet *h, predicate p) {
queryMin = qMin;
queryMax = qMax;
hits = h;
+ pred = p;
}
-template <predicate QT>
-int TestVisitor<QT>::matches(uint8_t* packed_value) {
- bool all_greater_than_max = true;
- bool all_within_range = true;
-
+bool TestVisitor::matches(uint8_t *packedValue) {
for (int dim = 0; dim < reader->num_data_dims_; dim++) {
int offset = dim * reader->bytes_per_dim_;
-
- auto result_max = lucene::util::FutureArrays::CompareUnsigned(
- packed_value, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
- offset + reader->bytes_per_dim_);
-
- auto result_min = lucene::util::FutureArrays::CompareUnsigned(
- packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
- offset + reader->bytes_per_dim_);
-
- all_greater_than_max &= (result_max > 0);
- all_within_range &= (result_min > 0 && result_max < 0);
-
- if (!all_greater_than_max && !all_within_range) {
- return -1;
- }
- }
-
- if (all_greater_than_max) {
- return 1;
- } else if (all_within_range) {
- return 0;
- } else {
- return -1;
- }
-}
-
-template <>
-int TestVisitor<predicate::EQ>::matches(uint8_t* packed_value) {
- // if query type is equal, query_min == query_max
- if (reader->num_data_dims_ == 1) {
- return std::memcmp(packed_value, queryMin, reader->bytes_per_dim_);
- } else {
- // if all dim value > matched value, then return > 0, otherwise return
< 0
- int return_result = 0;
- for (int dim = 0; dim < reader->num_data_dims_; dim++) {
- int offset = dim * reader->bytes_per_dim_;
- auto result = lucene::util::FutureArrays::CompareUnsigned(
- packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
- offset + reader->bytes_per_dim_);
- if (result < 0) {
- return -1;
- } else if (result > 0) {
- return_result = 1;
+ if (pred == L) {
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ packedValue, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
+ offset + reader->bytes_per_dim_) >= 0) {
+ // Doc's value is too high, in this dimension
+ return false;
}
- }
- return return_result;
- }
-}
-
-template <>
-int TestVisitor<predicate::L>::matches(uint8_t* packed_value) {
- if (reader->num_data_dims_ == 1) {
- auto result = std::memcmp(packed_value, queryMax,
reader->bytes_per_dim_);
- if (result >= 0) {
- return 1;
- }
- return 0;
- } else {
- bool all_greater_or_equal = true;
- bool all_lesser = true;
-
- for (int dim = 0; dim < reader->num_data_dims_; dim++) {
- int offset = dim * reader->bytes_per_dim_;
- auto result = lucene::util::FutureArrays::CompareUnsigned(
- packed_value, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
- offset + reader->bytes_per_dim_);
-
- all_greater_or_equal &=
- (result >= 0); // Remains true only if all results
are greater or equal
- all_lesser &= (result < 0); // Remains true only if all results
are lesser
- }
-
- // Return 1 if all values are greater or equal, 0 if all are lesser,
otherwise -1
- return all_greater_or_equal ? 1 : (all_lesser ? 0 : -1);
- }
-}
-
-template <>
-int TestVisitor<predicate::LE>::matches(uint8_t* packed_value) {
- if (reader->num_data_dims_ == 1) {
- auto result = std::memcmp(packed_value, queryMax,
reader->bytes_per_dim_);
- if (result > 0) {
- return 1;
- }
- return 0;
- } else {
- bool all_greater = true;
- bool all_lesser_or_equal = true;
-
- for (int dim = 0; dim < reader->num_data_dims_; dim++) {
- int offset = dim * reader->bytes_per_dim_;
- auto result = lucene::util::FutureArrays::CompareUnsigned(
- packed_value, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
- offset + reader->bytes_per_dim_);
-
- all_greater &= (result > 0); // Remains true only if all results
are greater
- all_lesser_or_equal &=
- (result <= 0); // Remains true only if all results are
lesser or equal
- }
-
- // Return 1 if all values are greater or equal, 0 if all are lesser,
otherwise -1
- return all_greater ? 1 : (all_lesser_or_equal ? 0 : -1);
- }
-}
-
-template <>
-int TestVisitor<predicate::G>::matches(uint8_t* packed_value) {
- if (reader->num_data_dims_ == 1) {
- auto result = std::memcmp(packed_value, queryMin,
reader->bytes_per_dim_);
- if (result <= 0) {
- return -1;
- }
- return 0;
- } else {
- for (int dim = 0; dim < reader->num_data_dims_; dim++) {
- int offset = dim * reader->bytes_per_dim_;
- auto result = lucene::util::FutureArrays::CompareUnsigned(
- packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
- offset + reader->bytes_per_dim_);
- if (result <= 0) {
- return -1;
+ } else if (pred == G) {
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ packedValue, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
+ offset + reader->bytes_per_dim_) <= 0) {
+ // Doc's value is too high, in this dimension
+ return false;
}
- }
- return 0;
- }
-}
-
-template <>
-int TestVisitor<predicate::GE>::matches(uint8_t* packed_value) {
- if (reader->num_data_dims_ == 1) {
- auto result = std::memcmp(packed_value, queryMin,
reader->bytes_per_dim_);
- if (result < 0) {
- return -1;
- }
- return 0;
- } else {
- for (int dim = 0; dim < reader->num_data_dims_; dim++) {
- int offset = dim * reader->bytes_per_dim_;
- auto result = lucene::util::FutureArrays::CompareUnsigned(
- packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
- offset + reader->bytes_per_dim_);
- if (result < 0) {
- return -1;
+ } else {
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ packedValue, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
+ offset + reader->bytes_per_dim_) < 0) {
+ // Doc's value is too low, in this dimension
+ return false;
+ }
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ packedValue, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
+ offset + reader->bytes_per_dim_) > 0) {
+ // Doc's value is too high, in this dimension
+ return false;
}
}
- return 0;
}
+ return true;
}
-template <predicate QT>
-void TestVisitor<QT>::visit(int rowID) {
+void TestVisitor::visit(int rowID) {
hits->set(rowID);
if (0) {
std::wcout << L"visit docID=" << rowID << std::endl;
}
}
-template <predicate QT>
-int TestVisitor<QT>::visit(int rowID, std::vector<uint8_t>& packedValue) {
+void TestVisitor::visit(int rowID, std::vector<uint8_t> &packedValue) {
if (0) {
int x = lucene::util::NumericUtils::sortableBytesToLong(packedValue,
0);
std::wcout << L"visit docID=" << rowID << L" x=" << x << std::endl;
}
- auto result = matches(packedValue.data());
- if (result != 0) {
- return result;
+ if (matches(packedValue.data())) {
+ hits->set(rowID);
}
- hits->set(rowID);
- return 0;
}
-template <>
-lucene::util::bkd::relation
TestVisitor<predicate::L>::compare(std::vector<uint8_t>& min_packed,
-
std::vector<uint8_t>& max_packed) {
+lucene::util::bkd::relation TestVisitor::compare(std::vector<uint8_t>
&minPacked,
+ std::vector<uint8_t>
&maxPacked) {
bool crosses = false;
- for (int dim = 0; dim < reader->num_data_dims_; dim++) {
- int offset = dim * reader->bytes_per_dim_;
- if (lucene::util::FutureArrays::CompareUnsigned(
- min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
- offset + reader->bytes_per_dim_) >= 0) {
- return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
- }
- crosses |= lucene::util::FutureArrays::CompareUnsigned(
- min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
- offset, offset + reader->bytes_per_dim_) <= 0 ||
- lucene::util::FutureArrays::CompareUnsigned(
- max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
- offset, offset + reader->bytes_per_dim_) >= 0;
- }
- if (crosses) {
- return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
- } else {
- return lucene::util::bkd::relation::CELL_INSIDE_QUERY;
- }
-}
-template <>
-lucene::util::bkd::relation
TestVisitor<predicate::G>::compare(std::vector<uint8_t>& min_packed,
-
std::vector<uint8_t>& max_packed) {
- bool crosses = false;
for (int dim = 0; dim < reader->num_data_dims_; dim++) {
int offset = dim * reader->bytes_per_dim_;
- if (lucene::util::FutureArrays::CompareUnsigned(
- max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
- offset + reader->bytes_per_dim_) <= 0) {
- return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
+
+ if (pred == L) {
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ minPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
+ offset + reader->bytes_per_dim_) >= 0) {
+ return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
+ }
+ } else if (pred == G) {
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ maxPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
+ offset + reader->bytes_per_dim_) <= 0) {
+ return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
+ }
+ } else {
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ minPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
+ offset + reader->bytes_per_dim_) > 0 ||
+ lucene::util::FutureArrays::CompareUnsigned(
+ maxPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
+ offset + reader->bytes_per_dim_) < 0) {
+ return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
+ }
}
+
crosses |= lucene::util::FutureArrays::CompareUnsigned(
- min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
+ minPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
offset, offset + reader->bytes_per_dim_) <= 0 ||
lucene::util::FutureArrays::CompareUnsigned(
- max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
+ maxPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
offset, offset + reader->bytes_per_dim_) >= 0;
}
- if (crosses) {
- return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
- } else {
- return lucene::util::bkd::relation::CELL_INSIDE_QUERY;
- }
-}
-template <predicate QT>
-lucene::util::bkd::relation TestVisitor<QT>::compare(std::vector<uint8_t>&
min_packed,
- std::vector<uint8_t>&
max_packed) {
- bool crosses = false;
- for (int dim = 0; dim < reader->num_data_dims_; dim++) {
- int offset = dim * reader->bytes_per_dim_;
- if (lucene::util::FutureArrays::CompareUnsigned(
- min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
- offset + reader->bytes_per_dim_) > 0 ||
- lucene::util::FutureArrays::CompareUnsigned(
- max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
- offset + reader->bytes_per_dim_) < 0) {
- return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
- }
- crosses |= lucene::util::FutureArrays::CompareUnsigned(
- min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
- offset, offset + reader->bytes_per_dim_) < 0 ||
- lucene::util::FutureArrays::CompareUnsigned(
- max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
- offset, offset + reader->bytes_per_dim_) > 0;
- }
if (crosses) {
return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
} else {
@@ -374,23 +200,6 @@ lucene::util::bkd::relation
TestVisitor<QT>::compare(std::vector<uint8_t>& min_p
}
}
-template <predicate QT>
-lucene::util::bkd::relation
TestVisitor<QT>::compare_prefix(std::vector<uint8_t>& prefix) {
- if (lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMax, 0,
- prefix.size()) > 0 ||
- lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMin, 0,
- prefix.size()) < 0) {
- return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
- }
- if (lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMin, 0,
- prefix.size()) > 0 &&
- lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMax, 0,
- prefix.size()) < 0) {
- return lucene::util::bkd::relation::CELL_INSIDE_QUERY;
- }
- return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
-}
-
Directory *getDirectory(int numPoints) {
Directory *dir;
if (numPoints > 100000) {
@@ -470,7 +279,7 @@ void testSameInts1DRead(CuTest *tc) {
r->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error in testSameInts1DRead: %s\n", r.what());
+ printf("clucene error: %s\n", r.what());
}
for (int docID = 0; docID < N; docID++) {
bool expected = docID >= queryMin && docID <= queryMax;
@@ -502,7 +311,7 @@ void testSameInts1DRead(CuTest *tc) {
void testBug1Write(CuTest *tc) {
const int N = 8;
- Directory *dir(FSDirectory::getDirectory("testBug1"));
+ Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 4, 100.0f, N, true);
w->docs_seen_ = N;
@@ -522,9 +331,9 @@ void testBug1Write(CuTest *tc) {
int64_t indexFP;
{
- std::unique_ptr<IndexOutput> out(dir->createOutput("bkd"));
- std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd_meta"));
- std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd_index"));
+ std::unique_ptr<IndexOutput> out(dir->createOutput("bkd3"));
+ std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd3_meta"));
+ std::unique_ptr<IndexOutput>
index_out(dir->createOutput("bkd3_index"));
try {
indexFP = w->finish(out.get(), index_out.get());
w->meta_finish(meta_out.get(), indexFP, 0);
@@ -539,15 +348,13 @@ void testBug1Write(CuTest *tc) {
void testBug1Read(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
- auto *dir = FSDirectory::getDirectory("testBug1");
+ Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
{
- auto closeDirectory = true;
- auto bkd_reader =
- std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
- if (!bkd_reader->open()) {
- printf("can not open bkd file\n");
- exit(1);
- }
+ auto in_ = std::unique_ptr<IndexInput>(dir->openInput("bkd3"));
+ auto meta_in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd3_meta"));
+ auto index_in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd3_index"));
+
+ shared_ptr<bkd::bkd_reader> r =
make_shared<bkd::bkd_reader>(in_.release());
// Simple 1D range query:
int value = 0;
auto result = std::make_unique<BitSet>(10);
@@ -557,24 +364,29 @@ void testBug1Read(CuTest *tc) {
const auto *max = reinterpret_cast<const uint8_t
*>(value_bytes.data());
const auto *min = reinterpret_cast<const uint8_t
*>(value_bytes.data());
- auto v = std::make_unique<TestVisitor<EQ>>(min, max, result.get());
+ auto v = std::make_unique<TestVisitor>(min, max, result.get(), EQ);
try {
- v->setReader(bkd_reader);
- bkd_reader->intersect(v.get());
+ v->setReader(r);
+ r->read_meta(meta_in_.get());
+ //auto type = r->read_type();
+ CuAssertEquals(tc, 0, r->type);
+ r->read_index(index_in_.get());
+ r->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error in testBug1Read: %s\n", r.what());
+ printf("clucene error: %s\n", r.what());
}
//printf("hits count=%d\n", result->count());
CuAssertEquals(tc, result->count(), 6);
//printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
}
- _CLLDECDELETE(dir)
+ dir->close();
+ _CLDECDELETE(dir);
}
void testLowCardinalInts1DWrite(CuTest *tc) {
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("testLowCardinalInts1D"));
+ Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true);
w->docs_seen_ = N;
@@ -592,9 +404,9 @@ void testLowCardinalInts1DWrite(CuTest *tc) {
// equivalent: ORIGINAL LINE: try (org.apache.lucene.store.IndexOutput out
=
// dir.createOutput("bkd", org.apache.lucene.store.IOContext.DEFAULT))
{
- std::unique_ptr<IndexOutput> out(dir->createOutput("bkd"));
- std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd_meta"));
- std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd_index"));
+ std::unique_ptr<IndexOutput> out(dir->createOutput("bkd2"));
+ std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd2_meta"));
+ std::unique_ptr<IndexOutput>
index_out(dir->createOutput("bkd2_index"));
//auto metaOffset = w->MetaInit(out.get());
try {
@@ -612,68 +424,72 @@ void testLowCardinalInts1DWrite(CuTest *tc) {
void testLowCardinalInts1DRead2(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
const int N = 1024 * 1024;
- Directory *dir = FSDirectory::getDirectory("testLowCardinalInts1D");
+ Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
{
+ auto in_ = std::unique_ptr<IndexInput>(dir->openInput("bkd2"));
+ auto meta_in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd2_meta"));
+ auto index_in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd2_index"));
- auto closeDirectory = true;
- auto bkd_reader =
- std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
- if (!bkd_reader->open()) {
- printf("can not open bkd file\n");
- exit(1);
- }
+ shared_ptr<bkd::bkd_reader> r =
make_shared<bkd::bkd_reader>(in_.release());
// Simple 1D range query:
constexpr int queryMin = 0; //std::numeric_limits<int>::min();
constexpr int queryMax = 100;//std::numeric_limits<int>::max();
auto hits = std::make_shared<BitSet>(N);
auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
try {
- bkd_reader->intersect(v.get());
+ r->read_meta(meta_in_.get());
+ //auto type = r->read_type();
+ CuAssertEquals(tc, 0, r->type);
+ r->read_index(index_in_.get());
+ r->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error in testLowCardinalInts1DRead2: %s\n",
r.what());
+ printf("clucene error: %s\n", r.what());
}
//printf("hits count=%d\n", hits->count());
CuAssertEquals(tc, hits->count(), 12928);
//printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
- _CLLDECDELETE(dir)
}
+ dir->close();
+ _CLDECDELETE(dir);
}
void testLowCardinalInts1DRead(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
const int N = 1024 * 1024;
- Directory *dir = FSDirectory::getDirectory("testLowCardinalInts1D");
+ Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
{
- auto closeDirectory = true;
- auto bkd_reader =
- std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
- if (!bkd_reader->open()) {
- printf("can not open bkd file\n");
- exit(1);
- }
+ auto in_ = std::unique_ptr<IndexInput>(dir->openInput("bkd2"));
+ auto meta_in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd2_meta"));
+ auto index_in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd2_index"));
+ shared_ptr<bkd::bkd_reader> r =
make_shared<bkd::bkd_reader>(in_.release());
// Simple 1D range query:
constexpr int queryMin = 0;//std::numeric_limits<int>::min();
constexpr int queryMax = 1;//std::numeric_limits<int>::max();
auto hits = std::make_shared<BitSet>(N);
auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
try {
- bkd_reader->intersect(v.get());
+ r->read_meta(meta_in_.get());
+ //auto type = r->read_type();
+ CuAssertEquals(tc, 0, r->type);
+ r->read_index(index_in_.get());
+ r->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error in testLowCardinalInts1DRead: %s\n",
r.what());
+ printf("clucene error: %s\n", r.what());
}
//printf("hits count=%d\n", hits->count());
CuAssertEquals(tc, hits->count(), 256);
//printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
- _CLLDECDELETE(dir)
}
+ dir->close();
+ _CLDECDELETE(dir);
}
void testBasicsInts1DWrite(CuTest *tc) {
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("testBasicsInts1D"));
+ Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true);
w->docs_seen_ = N;
@@ -710,56 +526,54 @@ void testBasicsInts1DWrite(CuTest *tc) {
void testBasicsInts1DRead(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
const int N = 1024 * 1024;
- Directory *dir = FSDirectory::getDirectory("testBasicsInts1D");
- {
- auto closeDirectory = true;
- auto bkd_reader =
- std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
- if (!bkd_reader->open()) {
- printf("can not open bkd file\n");
- exit(1);
- }
-
- // Simple 1D range query:
- constexpr int queryMin = 1024;
- constexpr int queryMax = std::numeric_limits<int>::max();
- auto hits = std::make_shared<BitSet>(N);
- auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
- try {
- bkd_reader->intersect(v.get());
- } catch (CLuceneError &r) {
- //printf("something wrong in read\n");
- printf("clucene error in testBasicsInts1DRead: %s\n", r.what());
- }
- for (int docID = 0; docID < N; docID++) {
- bool expected = docID >= queryMin && docID <= queryMax;
- bool actual = hits->get(N - docID - 1);
- if (expected != actual) {
- wcout << docID << " " << expected << " " << actual;
- }
- CuAssertEquals(tc, expected, actual);
-
- //assertEquals(L"docID=" + to_wstring(docID), expected, actual);
- }
- //printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
- auto hits1 = std::make_shared<BitSet>(N);
- auto v1 = std::make_unique<TestVisitor1>(queryMin, queryMax, hits1);
- str = Misc::currentTimeMillis();
-
- bkd_reader->intersect(v1.get());
- for (int docID = 0; docID < N; docID++) {
- bool expected = docID >= queryMin && docID <= queryMax;
- bool actual = hits1->get(N - docID - 1);
- if (expected != actual) {
- wcout << "failed to equal: " << docID << " " << expected << "
" << actual;
- }
- CuAssertEquals(tc, expected, actual);
- //assertEquals(L"docID=" + to_wstring(docID), expected, actual);
+ auto dir =
std::unique_ptr<Directory>(FSDirectory::getDirectory("TestBKDTree"));
+ std::unique_ptr<IndexInput> in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd"));;
+ auto meta_in_ = std::unique_ptr<IndexInput>(dir->openInput("bkd_meta"));
+ auto index_in_ = std::unique_ptr<IndexInput>(dir->openInput("bkd_index"));
+ shared_ptr<bkd::bkd_reader> r =
make_shared<bkd::bkd_reader>(in_.release());
+ // Simple 1D range query:
+ constexpr int queryMin = 1024;
+ constexpr int queryMax = std::numeric_limits<int>::max();
+ auto hits = std::make_shared<BitSet>(N);
+ auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
+ try {
+ r->read_meta(meta_in_.get());
+ //auto type = r->read_type();
+ CuAssertEquals(tc, 0, r->type);
+ r->read_index(index_in_.get());
+ r->intersect(v.get());
+ } catch (CLuceneError &r) {
+ //printf("something wrong in read\n");
+ printf("clucene error: %s\n", r.what());
+ }
+ for (int docID = 0; docID < N; docID++) {
+ bool expected = docID >= queryMin && docID <= queryMax;
+ bool actual = hits->get(N - docID - 1);
+ if (expected != actual) {
+ wcout << docID << " " << expected << " " << actual;
+ }
+ CuAssertEquals(tc, expected, actual);
+
+ //assertEquals(L"docID=" + to_wstring(docID), expected, actual);
+ }
+ //printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
+ auto hits1 = std::make_shared<BitSet>(N);
+ auto v1 = std::make_unique<TestVisitor1>(queryMin, queryMax, hits1);
+ str = Misc::currentTimeMillis();
+
+ r->intersect(v1.get());
+ for (int docID = 0; docID < N; docID++) {
+ bool expected = docID >= queryMin && docID <= queryMax;
+ bool actual = hits1->get(N - docID - 1);
+ if (expected != actual) {
+ wcout << "failed to equal: " << docID << " " << expected << " " <<
actual;
}
- //printf("\nSecond search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
+ CuAssertEquals(tc, expected, actual);
+ //assertEquals(L"docID=" + to_wstring(docID), expected, actual);
}
- //dir->close();
- _CLDECDELETE(dir);
+ //printf("\nSecond search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
+ dir->close();
+ //_CLDECDELETE(dir);
}
void testHttplogsRead(CuTest *tc) {
@@ -785,7 +599,7 @@ void testHttplogsRead(CuTest *tc) {
const auto *max = reinterpret_cast<const uint8_t *>(scratch2.data());
const auto *min = reinterpret_cast<const uint8_t *>(scratch.data());
- auto v = std::make_unique<TestVisitor<G>>(min, max, result.get());
+ auto v = std::make_unique<TestVisitor>(min, max, result.get(), G);
v->setReader(r);
try {
str = Misc::currentTimeMillis();
@@ -798,7 +612,7 @@ void testHttplogsRead(CuTest *tc) {
//printf("\nsearch time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error in testHttplogsRead: %s\n", r.what());
+ printf("clucene error: %s\n", r.what());
}
//printf("result size = %d\n", result->count());
CuAssertEquals(tc, result->count(), 8445);
@@ -886,7 +700,7 @@ void testSame(CuTest *tc) {
{
//std::shared_ptr<Directory> dir{getDirectory(10001)};
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("testSame"));
+ Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true);
@@ -914,13 +728,11 @@ void testSame(CuTest *tc) {
// equivalent: ORIGINAL LINE: try (org.apache.lucene.store.IndexInput
in =
// dir.openInput("bkd", org.apache.lucene.store.IOContext.DEFAULT))
{
- auto closeDirectory = true;
- auto bkd_reader =
- std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
- if (!bkd_reader->open()) {
- printf("can not open bkd file\n");
- exit(1);
- }
+ auto in_ = std::unique_ptr<IndexInput>(dir->openInput("bkd"));
+ auto meta_in_=
std::unique_ptr<IndexInput>(dir->openInput("bkd_meta"));
+ auto index_in_ =
std::unique_ptr<IndexInput>(dir->openInput("bkd_index"));
+ //in_->seek(indexFP);
+ shared_ptr<bkd::bkd_reader> r =
make_shared<bkd::bkd_reader>(in_.release());
// Simple 1D range query:
constexpr int queryMin = 100;
@@ -929,7 +741,11 @@ void testSame(CuTest *tc) {
//std::shared_ptr<BitSet> hits;
auto hits = std::make_shared<BitSet>(N);
auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
- bkd_reader->intersect(v.get());
+ r->read_meta(meta_in_.get());
+ //auto type = r->read_type();
+ CuAssertEquals(tc, 0, r->type);
+ r->read_index(index_in_.get());
+ r->intersect(v.get());
for (int docID = 0; docID < N; docID++) {
bool expected = (100 >= queryMin && 100 <= queryMax);
@@ -941,7 +757,7 @@ void testSame(CuTest *tc) {
//assertEquals(L"docID=" + to_wstring(docID), expected,
actual);
}
}
- //dir->close();
+ dir->close();
_CLDECDELETE(dir);
}
}
@@ -953,7 +769,7 @@ void
equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
const auto *max = reinterpret_cast<const uint8_t *>(&value);
const auto *min = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor<EQ>>(min, max, result.get());
+ auto v = std::make_unique<TestVisitor>(min, max, result.get(), EQ);
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
@@ -976,7 +792,7 @@ void
less_equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *max = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor<LE>>(min.data(), max,
result.get());
+ auto v = std::make_unique<TestVisitor>(min.data(), max, result.get(),
LE);
v->setReader(r);
r->intersect(v.get());
printf("\ncount: %d\n", result->count());
@@ -1002,7 +818,7 @@ void
less_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *max = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor<L>>(min.data(), max, result.get());
+ auto v = std::make_unique<TestVisitor>(min.data(), max, result.get(), L);
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
@@ -1024,7 +840,7 @@ void
greater_equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *min = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor<GE>>(min, max.data(), result.get());
+ auto v = std::make_unique<TestVisitor>(min, max.data(), result.get(), GE);
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
@@ -1046,7 +862,7 @@ void
greater_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *min = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor<G>>(min, max.data(), result.get());
+ auto v = std::make_unique<TestVisitor>(min, max.data(), result.get(), G);
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
diff --git a/src/test/util/TestBKD.h b/src/test/util/TestBKD.h
index 327c70fe82..a66f252ad4 100644
--- a/src/test/util/TestBKD.h
+++ b/src/test/util/TestBKD.h
@@ -26,20 +26,19 @@ public:
}
}
void visit(std::vector<char>& docID, std::vector<uint8_t> &packedValue)
override {
- if (matches(packedValue.data()) != 0) {
+ if (!matches(packedValue.data())) {
return;
}
visit(roaring::Roaring::read(docID.data(), false));
}
void visit(roaring::Roaring *docID, std::vector<uint8_t> &packedValue)
override;
+ void visit(int docID, std::vector<uint8_t> &packedValue) override;
void visit(lucene::util::bkd::bkd_docid_set_iterator *iter,
std::vector<uint8_t> &packedValue) override;
- int visit(int docid, std::vector<uint8_t> &packedValue) override;
- int matches(uint8_t *packedValue);
+ bool matches(uint8_t *packedValue);
lucene::util::bkd::relation compare(std::vector<uint8_t> &minPacked,
std::vector<uint8_t> &maxPacked)
override;
- lucene::util::bkd::relation compare_prefix(std::vector<uint8_t> &prefix)
override;
};
enum predicate {
@@ -50,17 +49,20 @@ enum predicate {
EQ
};
-template <predicate QT>
class TestVisitor : public lucene::util::bkd::bkd_reader::intersect_visitor {
private:
const uint8_t *queryMin;
const uint8_t *queryMax;
+ //int queryMin = 0;
+ //int queryMax = 0;
lucene::util::BitSet *hits;
+ //std::shared_ptr<lucene::util::BitSet> hits;
std::shared_ptr<lucene::util::bkd::bkd_reader> reader;
+ predicate pred;
public:
- TestVisitor(const uint8_t *queryMin, const uint8_t *queryMax,
lucene::util::BitSet *hits);
- ~TestVisitor() override = default;
+ TestVisitor(const uint8_t *queryMin, const uint8_t *queryMax,
lucene::util::BitSet *hits, predicate p);
+ virtual ~TestVisitor() = default;
void setReader(std::shared_ptr<lucene::util::bkd::bkd_reader> &r) { reader
= r; };
@@ -99,10 +101,9 @@ public:
docID = iter->docid_set->nextDoc();
}
};
- int matches(uint8_t *packedValue);
- lucene::util::bkd::relation compare_prefix(std::vector<uint8_t> &prefix)
override;
+ bool matches(uint8_t *packedValue);
- int visit(int rowID, std::vector<uint8_t> &packedValue) override;
+ void visit(int rowID, std::vector<uint8_t> &packedValue) override;
lucene::util::bkd::relation compare(std::vector<uint8_t> &minPacked,
std::vector<uint8_t> &maxPacked)
override;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]