This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new fc9f34fedc lucene `IndexOutOfBounds` bugfix, and use 
NRTCachingDirectory for realtime segment (#13308)
fc9f34fedc is described below

commit fc9f34fedcb3e3a351c1772141582951fdfecf33
Author: Christopher Peck <[email protected]>
AuthorDate: Tue Jun 4 09:56:32 2024 -0700

    lucene `IndexOutOfBounds` bugfix, and use NRTCachingDirectory for realtime 
segment (#13308)
---
 .../creator/impl/text/LuceneTextIndexCreator.java     | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java
index 1e9581980d..7a60e4b5c5 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java
@@ -40,6 +40,7 @@ import org.apache.lucene.index.NoMergeScheduler;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.NRTCachingDirectory;
 import 
org.apache.pinot.segment.local.realtime.impl.invertedindex.RealtimeLuceneTextIndex;
 import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentColumnarIndexCreator;
 import 
org.apache.pinot.segment.local.segment.index.text.AbstractTextIndexCreator;
@@ -139,8 +140,15 @@ public class LuceneTextIndexCreator extends 
AbstractTextIndexCreator {
       // merge segments in the background, which is problematic because the 
lucene index directory's
       // contents is copied to create the immutable segment. If a background 
merge occurs during this
       // copy, a FileNotFoundException will be triggered and segment build 
will fail.
+      //
+      // Also, for the realtime segment, we set the OpenMode to CREATE to 
ensure that any existing artifacts
+      // will be overwritten. This is necessary because the realtime segment 
can be created multiple times
+      // during a server crash and restart scenario. If the existing artifacts 
are appended to, the realtime
+      // query results will be accurate, but after segment conversion the 
mapping file generated will be loaded
+      // for only the first numDocs lucene docIds, which can cause 
IndexOutOfBounds errors.
       if (!_commitOnClose) {
         indexWriterConfig.setMergeScheduler(NoMergeScheduler.INSTANCE);
+        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
       }
 
       if (_reuseMutableIndex) {
@@ -150,7 +158,16 @@ public class LuceneTextIndexCreator extends 
AbstractTextIndexCreator {
         return;
       }
 
-      _indexDirectory = FSDirectory.open(_indexFile.toPath());
+      if (_commitOnClose) {
+        _indexDirectory = FSDirectory.open(_indexFile.toPath());
+      } else {
+        // For realtime index, use NRTCachingDirectory to reduce the number of 
open files. This buffers the
+        // flushes triggered by the near real-time refresh and writes them to 
disk when the buffer is full,
+        // reducing the number of small writes.
+        _indexDirectory =
+            new NRTCachingDirectory(FSDirectory.open(_indexFile.toPath()), 
config.getLuceneMaxBufferSizeMB(),
+                config.getLuceneMaxBufferSizeMB());
+      }
       _indexWriter = new IndexWriter(_indexDirectory, indexWriterConfig);
     } catch (ReflectiveOperationException e) {
       throw new RuntimeException(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to