This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new fd453665 [feature](analysis) add tokenizer CharFilter preprocessing 
interface (#118)
fd453665 is described below

commit fd453665055c65b94892d13a93ac47180afd72bb
Author: zzzxl <[email protected]>
AuthorDate: Fri Sep 8 16:14:54 2023 +0800

    [feature](analysis) add tokenizer CharFilter preprocessing interface (#118)
---
 src/core/CLucene/analysis/CharFilter.h        | 37 +++++++++++++++++++++++++++
 src/core/CLucene/index/MultiSegmentReader.cpp | 10 +++++---
 src/core/CLucene/util/CLStreams.h             |  8 ++++--
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/core/CLucene/analysis/CharFilter.h 
b/src/core/CLucene/analysis/CharFilter.h
new file mode 100644
index 00000000..b0238893
--- /dev/null
+++ b/src/core/CLucene/analysis/CharFilter.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <memory>
+
+#include "CLucene/util/CLStreams.h"
+
+namespace lucene::analysis {
+
+class CharFilter : public lucene::util::Reader {
+public:
+    CharFilter(lucene::util::Reader* input) : input_(input) {}
+
+    virtual ~CharFilter() {
+        if (input_) {
+            delete input_;
+            input_ = nullptr;
+        }
+    }
+
+    int64_t position() override {
+        _CLTHROWA(CL_ERR_UnsupportedOperation,
+                  "UnsupportedOperationException CharFilter::position");
+    }
+
+    int64_t skip(int64_t ntoskip) override {
+        _CLTHROWA(CL_ERR_UnsupportedOperation, "UnsupportedOperationException 
CharFilter::skip");
+    }
+
+    size_t size() override {
+        _CLTHROWA(CL_ERR_UnsupportedOperation, "UnsupportedOperationException 
CharFilter::size");
+    }
+
+protected:
+    lucene::util::Reader* input_ = nullptr;
+};
+
+} // namespace lucene::analysis
\ No newline at end of file
diff --git a/src/core/CLucene/index/MultiSegmentReader.cpp 
b/src/core/CLucene/index/MultiSegmentReader.cpp
index d4e8c8ea..e093cd61 100644
--- a/src/core/CLucene/index/MultiSegmentReader.cpp
+++ b/src/core/CLucene/index/MultiSegmentReader.cpp
@@ -592,11 +592,13 @@ void MultiTermDocs::seek( Term* tterm) {
        pointer = 0;
        current = NULL;
 
-       for (int32_t i = 0; i < readerTermDocs->length; i++) {
-               termDocs(i);
+       if (readerTermDocs) {
+               for (int32_t i = 0; i < readerTermDocs->length; i++) {
+                       termDocs(i);
+               }
+               base = starts[pointer];
+               current = termDocs(pointer++);
        }
-       base = starts[pointer];
-       current = termDocs(pointer++);
 }
 
 bool MultiTermDocs::next() {
diff --git a/src/core/CLucene/util/CLStreams.h 
b/src/core/CLucene/util/CLStreams.h
index 4fb00eee..82bb1274 100644
--- a/src/core/CLucene/util/CLStreams.h
+++ b/src/core/CLucene/util/CLStreams.h
@@ -27,6 +27,10 @@ class CLUCENE_EXPORT CLStream: public IReader{
 public:
        virtual ~CLStream(){}
 
+    virtual void init(const void *_value, int32_t _length, bool copyData) {
+        _CLTHROWA(CL_ERR_UnsupportedOperation, "UnsupportedOperationException: 
CLStream::init");
+    }
+
        inline int read(){
                const T* buffer;
                const int32_t nread = read((const void**)&buffer,1, 1);
@@ -191,7 +195,7 @@ public:
         this->buffer_size = 0;
         this->init(_value, _length, copyData);
     }
-    void init(const T *_value, int32_t _length, bool copyData = true){
+    void init(const void *_value, int32_t _length, bool copyData = true) 
override {
         const size_t length = _length;
         this->pos = 0;
         if (copyData) {
@@ -209,7 +213,7 @@ public:
             if (ownValue && this->value != NULL) {
                 _CLDELETE_LARRAY((T *) this->value);
             }
-            this->value = _value;
+            this->value = (T *)_value;
             this->buffer_size = 0;
         }
         this->m_size = length;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to