This is an automated email from the ASF dual-hosted git repository.

adonisling pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new 3f91661  [test] Add testcase for document add (#57)
3f91661 is described below

commit 3f91661ca51a3e0e2e32de196847f3fe2be53395
Author: airborne12 <[email protected]>
AuthorDate: Mon Apr 17 15:42:20 2023 +0800

    [test] Add testcase for document add (#57)
---
 src/test/document/TestDocument.cpp | 46 ++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/src/test/document/TestDocument.cpp 
b/src/test/document/TestDocument.cpp
index b90f67d..93d52e8 100644
--- a/src/test/document/TestDocument.cpp
+++ b/src/test/document/TestDocument.cpp
@@ -407,23 +407,55 @@ void TestSetFieldBench(CuTest *tc) {
     });
 }
 
+string generateRandomString(int length) {
+    static const char alphanum[] =
+            "0123456789"
+            "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+            "abcdefghijklmnopqrstuvwxyz";
+    string randomString;
+    random_device rd;
+    mt19937 gen(rd());
+    uniform_int_distribution<> dis(0, sizeof(alphanum) - 2);
+    for (int i = 0; i < length; ++i) {
+        randomString += alphanum[dis(gen)];
+    }
+    return randomString;
+}
+
+const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL;
+const int32_t MAX_BUFFER_DOCS = 100000000;
+const int32_t MERGE_FACTOR = 100000000;
 void TestAddDocument(CuTest *tc) {
     RAMDirectory dir;
     SimpleAnalyzer<char> sanalyzer;
     IndexWriter w(&dir, NULL, true);
     w.setUseCompoundFile(false);
+    w.setMaxBufferedDocs(MAX_BUFFER_DOCS);
+    w.setRAMBufferSizeMB(256);
+    w.setMaxFieldLength(MAX_FIELD_LEN);
+    w.setMergeFactor(MERGE_FACTOR);
     w.setDocumentWriter(_CLNEW SDocumentsWriter<char>(w.getDirectory(), &w));
     Document doc;
     auto field_name = lucene::util::Misc::_charToWide("f3");
-    auto value1 = "value1";
-    auto stringReader = _CLNEW lucene::util::SStringReader<char>(
-            value1, strlen(value1), false);
-    auto stream = sanalyzer.reusableTokenStream(field_name, stringReader);
-
     auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | 
Field::STORE_NO);
-    field->setValue(stream);
     doc.add(*field);
-    w.addDocument(&doc, &sanalyzer);
+
+    for (int i = 0; i <= 2000000; i++) {
+        std::string value1 = "value1";
+        if (i > 0)
+            value1 = generateRandomString(2000);
+        auto stringReader = _CLNEW lucene::util::SStringReader<char>(
+                value1.c_str(), strlen(value1.c_str()), false);
+        auto stream = sanalyzer.reusableTokenStream(field_name, stringReader);
+
+        field->setValue(stream);
+        w.addDocument(&doc, &sanalyzer);
+    }
+    IndexSearcher searcher(&dir);
+    Term *t2 = _CLNEW Term(_T("f3"), _T("value1"));
+    auto *query2 = _CLNEW TermQuery(t2);
+    Hits *hits2 = searcher.search(query2);
+    CLUCENE_ASSERT(1 == hits2->length());
     doc.clear();
     w.close();
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to