This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 7ee46851ae support multi add different field for one doc (#200)
7ee46851ae is described below
commit 7ee46851aee2f89df1abd7bc9408e6a7ace729b9
Author: amory <[email protected]>
AuthorDate: Mon Mar 18 10:25:36 2024 +0800
support multi add different field for one doc (#200)
---
src/core/CLucene/document/Document.cpp | 7 +++++++
src/core/CLucene/document/Document.h | 7 ++++++-
src/core/CLucene/index/SDocumentWriter.cpp | 25 +++++++++++++++++++++++++
src/core/CLucene/index/SDocumentWriter.h | 2 ++
4 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/src/core/CLucene/document/Document.cpp
b/src/core/CLucene/document/Document.cpp
index 69ba2af120..d3da684b15 100644
--- a/src/core/CLucene/document/Document.cpp
+++ b/src/core/CLucene/document/Document.cpp
@@ -85,6 +85,13 @@ CL_NS_DEF(document)
return boost;
}
+ void Document::setNeedResetFieldData(bool needResetFieldData) {
+ this->needResetFieldData = needResetFieldData;
+ }
+
+ bool Document::getNeedResetFieldData() const {
+ return needResetFieldData;
+ }
Field* Document::getField(const TCHAR* name) const{
CND_PRECONDITION(name != NULL, "name is NULL");
diff --git a/src/core/CLucene/document/Document.h
b/src/core/CLucene/document/Document.h
index 400ec59122..2dc59cb993 100644
--- a/src/core/CLucene/document/Document.h
+++ b/src/core/CLucene/document/Document.h
@@ -35,6 +35,7 @@ public:
private:
FieldsType* _fields;
float_t boost;
+ bool needResetFieldData = false;
public:
/** Constructs a new document with no fields. */
Document();
@@ -66,7 +67,11 @@ public:
*
* @see #setBoost(float_t)
*/
- float_t getBoost() const;
+ float_t getBoost() const;
+
+ void setNeedResetFieldData(bool needResetFieldData);
+
+ bool getNeedResetFieldData() const;
/**
* <p>Adds a field to a document. Several fields may be added with
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp
b/src/core/CLucene/index/SDocumentWriter.cpp
index c757ba1a42..fa0f992349 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -126,6 +126,29 @@ SDocumentsWriter<T>::ThreadState::~ThreadState() {
_CLDELETE(allFieldDataArray.values[i]);
}
+template<typename T>
+void SDocumentsWriter<T>::ThreadState::resetCurrentFieldData(Document *doc) {
+ const Document::FieldsType &docFields = *doc->getFields();
+ const int32_t numDocFields = docFields.size();
+
+ if (FieldData* fp = fieldDataArray.values[0]; fp && numDocFields > 0) {
+ numFieldData = 1;
+ // reset fp for new fields
+ fp->fieldCount = 0;
+ fp->docFields.deleteValues();
+ fp->docFields.resize(1);
+ for (int32_t i = 0; i < numDocFields; i++) {
+ Field *field = docFields[i];
+ if (fp->fieldCount == fp->docFields.length) {
+ fp->docFields.resize(fp->docFields.length * 2);
+ }
+
+ fp->docFields.values[fp->fieldCount++] = field;
+ }
+ }
+ return;
+}
+
template<typename T>
typename SDocumentsWriter<T>::ThreadState
*SDocumentsWriter<T>::getThreadState(Document *doc) {
if (threadState == nullptr) {
@@ -135,6 +158,8 @@ typename SDocumentsWriter<T>::ThreadState
*SDocumentsWriter<T>::getThreadState(D
if (segment.empty()) {
segment = writer->newSegmentName();
threadState->init(doc, nextDocID);
+ } else if (doc->getNeedResetFieldData()) {
+ threadState->resetCurrentFieldData(doc);
}
threadState->docID = nextDocID;
diff --git a/src/core/CLucene/index/SDocumentWriter.h
b/src/core/CLucene/index/SDocumentWriter.h
index b1217ba18b..3dd9818663 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -390,6 +390,8 @@ public:
/** Tokenizes the fields of a document into Postings */
void processDocument(CL_NS(analysis)::Analyzer *sanalyzer);
+ void resetCurrentFieldData(CL_NS(document)::Document *doc);
+
/** If there are fields we've seen but did not see again
* in the last run, then free them up. Also reduce
* postings hash size. */
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]