This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new 7ee46851ae support multi add different field for one doc (#200)
7ee46851ae is described below

commit 7ee46851aee2f89df1abd7bc9408e6a7ace729b9
Author: amory <[email protected]>
AuthorDate: Mon Mar 18 10:25:36 2024 +0800

    support multi add different field for one doc (#200)
---
 src/core/CLucene/document/Document.cpp     |  7 +++++++
 src/core/CLucene/document/Document.h       |  7 ++++++-
 src/core/CLucene/index/SDocumentWriter.cpp | 25 +++++++++++++++++++++++++
 src/core/CLucene/index/SDocumentWriter.h   |  2 ++
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/src/core/CLucene/document/Document.cpp 
b/src/core/CLucene/document/Document.cpp
index 69ba2af120..d3da684b15 100644
--- a/src/core/CLucene/document/Document.cpp
+++ b/src/core/CLucene/document/Document.cpp
@@ -85,6 +85,13 @@ CL_NS_DEF(document)
       return boost;
    }
 
+   void Document::setNeedResetFieldData(bool needResetFieldData) {
+       this->needResetFieldData = needResetFieldData;
+   }
+
+        bool Document::getNeedResetFieldData() const {
+         return needResetFieldData;
+        }
 
         Field* Document::getField(const TCHAR* name) const{
     CND_PRECONDITION(name != NULL, "name is NULL");
diff --git a/src/core/CLucene/document/Document.h 
b/src/core/CLucene/document/Document.h
index 400ec59122..2dc59cb993 100644
--- a/src/core/CLucene/document/Document.h
+++ b/src/core/CLucene/document/Document.h
@@ -35,6 +35,7 @@ public:
 private:
        FieldsType* _fields;
        float_t boost;
+        bool needResetFieldData = false;
 public:
        /** Constructs a new document with no fields. */
        Document();
@@ -66,7 +67,11 @@ public:
        *
        * @see #setBoost(float_t)
        */
-       float_t getBoost() const;
+        float_t getBoost() const;
+
+        void setNeedResetFieldData(bool needResetFieldData);
+
+        bool getNeedResetFieldData() const;
 
        /**
        * <p>Adds a field to a document.  Several fields may be added with
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp 
b/src/core/CLucene/index/SDocumentWriter.cpp
index c757ba1a42..fa0f992349 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -126,6 +126,29 @@ SDocumentsWriter<T>::ThreadState::~ThreadState() {
         _CLDELETE(allFieldDataArray.values[i]);
 }
 
+template<typename T>
+void SDocumentsWriter<T>::ThreadState::resetCurrentFieldData(Document *doc) {
+    const Document::FieldsType &docFields = *doc->getFields();
+    const int32_t numDocFields = docFields.size();
+
+    if (FieldData* fp = fieldDataArray.values[0]; fp && numDocFields > 0) {
+        numFieldData = 1;
+        // reset fp for new fields
+        fp->fieldCount = 0;
+        fp->docFields.deleteValues();
+        fp->docFields.resize(1);
+        for (int32_t i = 0; i < numDocFields; i++) {
+            Field *field = docFields[i];
+            if (fp->fieldCount == fp->docFields.length) {
+                fp->docFields.resize(fp->docFields.length * 2);
+            }
+
+            fp->docFields.values[fp->fieldCount++] = field;
+        }
+    }
+    return;
+}
+
 template<typename T>
 typename SDocumentsWriter<T>::ThreadState 
*SDocumentsWriter<T>::getThreadState(Document *doc) {
     if (threadState == nullptr) {
@@ -135,6 +158,8 @@ typename SDocumentsWriter<T>::ThreadState 
*SDocumentsWriter<T>::getThreadState(D
     if (segment.empty()) {
         segment = writer->newSegmentName();
         threadState->init(doc, nextDocID);
+    } else if (doc->getNeedResetFieldData()) {
+        threadState->resetCurrentFieldData(doc);
     }
 
     threadState->docID = nextDocID;
diff --git a/src/core/CLucene/index/SDocumentWriter.h 
b/src/core/CLucene/index/SDocumentWriter.h
index b1217ba18b..3dd9818663 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -390,6 +390,8 @@ public:
         /** Tokenizes the fields of a document into Postings */
         void processDocument(CL_NS(analysis)::Analyzer *sanalyzer);
 
+        void resetCurrentFieldData(CL_NS(document)::Document *doc);
+
         /** If there are fields we've seen but did not see again
       *  in the last run, then free them up.  Also reduce
       *  postings hash size. */


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to