This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 1936207ae4 [Feature](term info) add ram used function for term info
reader (#216)
1936207ae4 is described below
commit 1936207ae4345e45fcb2b4e6b1ed1a318f8c1b1b
Author: airborne12 <[email protected]>
AuthorDate: Tue May 21 12:49:24 2024 +0800
[Feature](term info) add ram used function for term info reader (#216)
---
src/core/CLucene/index/IndexReader.cpp | 4 ++++
src/core/CLucene/index/IndexReader.h | 1 +
src/core/CLucene/index/TermInfosReader.cpp | 26 +++++++++++++++++++-------
src/core/CLucene/index/_MultiSegmentReader.h | 8 ++++++++
src/core/CLucene/index/_SegmentHeader.h | 4 ++++
src/core/CLucene/index/_TermInfosReader.h | 10 ++++++++--
6 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/src/core/CLucene/index/IndexReader.cpp
b/src/core/CLucene/index/IndexReader.cpp
index a63b8d1ad8..5b9f8ad262 100644
--- a/src/core/CLucene/index/IndexReader.cpp
+++ b/src/core/CLucene/index/IndexReader.cpp
@@ -194,6 +194,10 @@ CL_NS_DEF(index)
_CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support this
method.");
}
+ int64_t IndexReader::getTermInfosRAMUsed() const {
+ _CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support
this method.");
+ }
+
bool IndexReader::isCurrent() {
_CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support this
method.");
}
diff --git a/src/core/CLucene/index/IndexReader.h
b/src/core/CLucene/index/IndexReader.h
index 060c0545ff..4307a0d933 100644
--- a/src/core/CLucene/index/IndexReader.h
+++ b/src/core/CLucene/index/IndexReader.h
@@ -380,6 +380,7 @@ public:
* current indexDivisor.
* @see #setTermInfosIndexDivisor */
int32_t getTermInfosIndexDivisor();
+ virtual int64_t getTermInfosRAMUsed() const;
/**
* Check whether this IndexReader is still using the
diff --git a/src/core/CLucene/index/TermInfosReader.cpp
b/src/core/CLucene/index/TermInfosReader.cpp
index 6cf8b42fe4..9044d1d36a 100644
--- a/src/core/CLucene/index/TermInfosReader.cpp
+++ b/src/core/CLucene/index/TermInfosReader.cpp
@@ -129,18 +129,23 @@ void TermInfosReader::close() {
}
#endif
}
+ numBytesUsed -= (sizeof(Term) * indexTermsLength + sizeof(TermInfo) *
indexTermsLength + sizeof(int64_t) * indexTermsLength);
//Delete the arrays
- if (indexTerms) {
+ if (indexTerms != nullptr) {
+ for (int32_t i = 0; i < indexTermsLength; ++i) {
+ numBytesUsed -= indexTerms[i].textLength();
+ }
delete[] indexTerms;
indexTerms = NULL;
}
- if (indexInfos) {
+ if (indexInfos != nullptr) {
+ numBytesUsed -= sizeof(TermInfo) * indexTermsLength;
_CLDELETE_ARRAY(indexInfos);
indexInfos = NULL;
}
-
//Delete the arrays
- if (indexPointers) {
+ if (indexPointers != NULL) {
+ numBytesUsed -= sizeof(int64_t) * indexTermsLength;
_CLDELETE_ARRAY(indexPointers);
indexPointers = NULL;
}
@@ -324,9 +329,6 @@ void TermInfosReader::ensureIndexIsRead() {
SCOPED_LOCK_MUTEX(THIS_LOCK)
if (indexIsRead) return;
-
- //https://jira.qianxin-inc.cn/browse/XHBUG-2921
- //https://jira.qianxin-inc.cn/browse/XHBUG-3053
if (indexEnum == NULL) _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
try {
@@ -360,6 +362,16 @@ void TermInfosReader::ensureIndexIsRead() {
if (!indexEnum->next()) break;
}
indexIsRead = true;
+ numBytesUsed = sizeof(Term) * indexTermsLength + sizeof(TermInfo) *
indexTermsLength + sizeof(int64_t) * indexTermsLength;
+ for (int32_t i = 0; i < indexTermsLength; ++i) {
+ numBytesUsed += indexTerms[i].textLength();
+ }
+ if (indexInfos != NULL) {
+ numBytesUsed += sizeof(TermInfo) * indexTermsLength;
+ }
+ if (indexPointers != NULL) {
+ numBytesUsed += sizeof(int64_t) * indexTermsLength;
+ }
}
_CLFINALLY(indexEnum->close();
//Close and delete the IndexInput is. The close is done by the
destructor.
diff --git a/src/core/CLucene/index/_MultiSegmentReader.h
b/src/core/CLucene/index/_MultiSegmentReader.h
index ad600824fa..c5f8deeea2 100644
--- a/src/core/CLucene/index/_MultiSegmentReader.h
+++ b/src/core/CLucene/index/_MultiSegmentReader.h
@@ -123,6 +123,14 @@ public:
const char* getObjectName() const;
IndexVersion getIndexVersion() override;
+
+ int64_t getTermInfosRAMUsed() const override {
+ int64_t size = 0;
+ for (size_t i = 0; i < subReaders->length; i++) {
+ size += (*subReaders)[i]->getTermInfosRAMUsed();
+ }
+ return size;
+ }
};
diff --git a/src/core/CLucene/index/_SegmentHeader.h
b/src/core/CLucene/index/_SegmentHeader.h
index c1f01e7cec..6bf7d1819b 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -321,6 +321,10 @@ class SegmentReader: public DirectoryIndexReader {
void initialize(SegmentInfo* si, int32_t readBufferSize, bool doOpenStores,
bool doingReopen);
+ int64_t getTermInfosRAMUsed() const override {
+ return tis->getRAMUsed();
+ }
+
/**
* Create a clone from the initial TermVectorsReader and store it in the
ThreadLocal.
* @return TermVectorsReader
diff --git a/src/core/CLucene/index/_TermInfosReader.h
b/src/core/CLucene/index/_TermInfosReader.h
index c2b41aca6c..a9a993795e 100644
--- a/src/core/CLucene/index/_TermInfosReader.h
+++ b/src/core/CLucene/index/_TermInfosReader.h
@@ -47,7 +47,9 @@ CL_NS_DEF(index)
int32_t indexDivisor;
int32_t totalIndexInterval;
- DEFINE_MUTEX(THIS_LOCK)
+ int64_t numBytesUsed;
+
+ DEFINE_MUTEX(THIS_LOCK)
public:
/**
@@ -102,7 +104,11 @@ CL_NS_DEF(index)
/** Returns the TermInfo for a Term in the set, or null. */
TermInfo* get(const Term* term);
- private:
+
+ int64_t getRAMUsed() const {
+ return numBytesUsed;
+ }
+ private:
/** Reads the term info index file or .tti file. */
void ensureIndexIsRead();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]