Hi. I noticed that after the upgrade from Lucene8.5.x to Lucene >=8.6,
search became slower(example TopScoreDocCollector became 20-30% slower,
from ElasticSearch - 50%).
While testing, I realized that it happened after LUCENE-9257(commit
e7a61ea). Bug or feature? Can add settings for isOffHeep? To make the
developer explicitly make this choice
Added a file that shows a simple demo that the search is slow
Need to run on commit e7a61ea and 90aced5, you will notice how the speed
drops to 30%
package org.apache.lucene.demo;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.LeafReaderContext;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
public class SpeedLucene {
public static void main(String[] args) throws IOException, ParseException {
System.out.println("-Start-");
// 0. create the analyzer
StandardAnalyzer analyzer = new StandardAnalyzer();
// 1. create the index
boolean isDeleteDir = deleteDirectory(new File("fs_test"));
Directory index = new NIOFSDirectory(Paths.get("fs_test/"));
IndexWriterConfig config = new IndexWriterConfig(analyzer);
// config.setRAMBufferSizeMB(16);
IndexWriter w = new IndexWriter(index, config);
for (int x = 0; x < 1000000; x++) {
addDoc(w, "Lucene in Action " + x, "1" + x);
}
w.close();
// 2. query
Query q = new QueryParser("title", analyzer).parse("lucene");
// 3. search
int hitsPerPage = 10;
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
// 3.1 segment info
int numDocs = 0;
int numDeletedDocs = 0;
long sizeInBytes = 0;
for (LeafReaderContext readerContext : reader.leaves()) {
final SegmentReader segmentReader = (SegmentReader) readerContext.reader();
SegmentCommitInfo info = segmentReader.getSegmentInfo();
numDocs += readerContext.reader().numDocs();
numDeletedDocs += readerContext.reader().numDeletedDocs();
long ramBytesUsed = segmentReader.getPostingsReader().ramBytesUsed();
System.out.println("Codec" + info.info.getCodec());
System.out.println("Postings ram " + ramBytesUsed + " byte");
if (segmentReader.getNormsReader() != null) {
ramBytesUsed += segmentReader.getNormsReader().ramBytesUsed();
System.out.println("Norms ram " + segmentReader.getNormsReader().ramBytesUsed() + " byte");
}
if (segmentReader.getDocValuesReader() != null) {
ramBytesUsed += segmentReader.getDocValuesReader().ramBytesUsed();
System.out.println("DocValues ram " + segmentReader.getDocValuesReader().ramBytesUsed() + " byte");
}
if (segmentReader.getFieldsReader() != null) {
ramBytesUsed += segmentReader.getFieldsReader().ramBytesUsed();
System.out.println("Fields ram " + segmentReader.getFieldsReader().ramBytesUsed() + " byte");
}
if (segmentReader.getTermVectorsReader() != null) {
ramBytesUsed += segmentReader.getTermVectorsReader().ramBytesUsed();
System.out.println("TermVectors ram " + segmentReader.getTermVectorsReader().ramBytesUsed() + " byte");
}
if (segmentReader.getPointsReader() != null) {
ramBytesUsed += segmentReader.getPointsReader().ramBytesUsed();
System.out.println("Points ram " + segmentReader.getPointsReader().ramBytesUsed() + " byte");
}
System.out.println("---");
sizeInBytes += ramBytesUsed;
}
System.out.println("sizeInBytes " + sizeInBytes + " ");
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, hitsPerPage);
long startTime = System.currentTimeMillis();
for (int x = 0; x < 100000; x++) {
searcher.search(q, collector);
}
System.out.println("Time searcher " + (System.currentTimeMillis() - startTime));
ScoreDoc[] hits = collector.topDocs().scoreDocs;
// 4. display results
startTime = System.currentTimeMillis();
for (int x = 0; x < 100000; x++) {
for (int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
//System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title"));
}
}
System.out.println("Time get doc " + (System.currentTimeMillis() - startTime));
reader.close();
}
private static void addDoc(IndexWriter w, String title, String isbn) throws IOException {
Document doc = new Document();
doc.add(new TextField("title", title, Field.Store.YES));
doc.add(new StringField("isbn", isbn, Field.Store.YES));
w.addDocument(doc);
}
private static boolean deleteDirectory(File directoryToBeDeleted) {
File[] allContents = directoryToBeDeleted.listFiles();
if (allContents != null) {
for (File file : allContents) {
deleteDirectory(file);
}
}
return directoryToBeDeleted.delete();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]