Hi, I'd like to add the attached class to Lucene's core. It makes live easier for people who need to add and delete documents from an index by hiding all the IndexReader/IndexWriter stuff. Anybody needing full performance or best performance with threads can still use IndexReader/IndexWriter directly.
What do you think? If this gets accepted, it also needs a better name. Regards Daniel -- http://www.danielnaber.de
package org.apache.lucene.index; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * A class to delete and add documents to an index without the * need to care about the implementation detail that adding is done * via IndexWriter and deletion is done via IndexReader. * * <p>Note that you cannot create more than one <code>Index</code> object * on the same directory at the same time. * * <p>Although an instance of this class can be used from more than one * thread, you will not get good performance. Use IndexReader and IndexWriter * directly for that (you will need to care about synchronizsation yourself * then). * * <p>While you can freely mix calls to add() and delete() using this class, * you should batch you calls for best performance. For example, if you * want to update 20 document, you should first delete all those documents, * then add all the new documents. * * @author Daniel Naber */ public class Index { IndexWriter indexWriter = null; IndexReader indexReader = null; Directory directory; Analyzer analyzer; boolean open = false; /** * Open an index with write access. * * @param directory the index directory * @param analyzer the analyzer to use for adding new documents * @param create <code>true</code> to create the index or overwrite the existing one; * <code>false</code> to append to the existing index */ public Index(Directory directory, Analyzer analyzer, boolean create) throws IOException { init(directory, analyzer, create); } /** * Open an index with write access. * * @param dirName the index directory * @param analyzer the analyzer to use for adding new documents * @param create <code>true</code> to create the index or overwrite the existing one; * <code>false</code> to append to the existing index */ public Index(String dirName, Analyzer analyzer, boolean create) throws IOException { Directory dir = FSDirectory.getDirectory(dirName, create); init(dir, analyzer, create); } /** * Open an index with write access. * * @param file the index directory * @param analyzer the analyzer to use for adding new documents * @param create <code>true</code> to create the index or overwrite the existing one; * <code>false</code> to append to the existing index */ public Index(File file, Analyzer analyzer, boolean create) throws IOException { Directory dir = FSDirectory.getDirectory(file, create); init(dir, analyzer, create); } private void init(Directory directory, Analyzer analyzer, boolean create) throws IOException { this.directory = directory; synchronized(this.directory) { this.analyzer = analyzer; indexWriter = new IndexWriter(directory, analyzer, create); open = true; } } /** * @see IndexWriter#addDocument(Document, Analyzer) * @throws IllegalStateException if the index is closed */ public void addDocument(Document doc, Analyzer docAnalyzer) throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexWriter(); if (docAnalyzer != null) indexWriter.addDocument(doc, docAnalyzer); else indexWriter.addDocument(doc); } } /** * @see IndexWriter#addDocument(Document) * @throws IllegalStateException if the index is closed */ public void addDocument(Document doc) throws IOException { addDocument(doc, null); } private void createIndexWriter() throws IOException { if (indexWriter == null) { if (indexReader != null) { indexReader.close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false); } } private void createIndexReader() throws IOException { if (indexReader == null) { if (indexWriter != null) { indexWriter.close(); indexWriter = null; } indexReader = IndexReader.open(directory); } } /** * @see IndexReader#delete(Term) * @throws IllegalStateException if the index is closed */ public void delete(Term term) throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexReader(); indexReader.delete(term); } } /** * @see IndexReader#delete(int) * @throws IllegalStateException if the index is closed */ public void delete(int docNum) throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexReader(); indexReader.delete(docNum); } } /** * @see IndexWriter#docCount() * @throws IllegalStateException if the index is closed */ public int docCount() { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); if (indexWriter != null) { return indexWriter.docCount(); } else { return indexReader.numDocs(); } } } /** * @see IndexWriter#optimize() * @throws IllegalStateException if the index is closed */ public void optimize() throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexWriter(); indexWriter.optimize(); } } /** * Close this index, writing all pending changes to disk. * * @throws IllegalStateException if the index has been closed before already */ public void close() throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed already"); if (indexWriter != null) { indexWriter.close(); indexWriter = null; } else { indexReader.close(); indexReader = null; } open = false; } } //TODO: implement from reader: isDeleted, hasDeletions //TODO: implement from writer: setXYZ... }
package org.apache.lucene.index; import java.io.IOException; import java.util.Random; import java.util.Stack; import junit.framework.TestCase; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestIndex extends TestCase { public void testIndex() throws IOException { testIndexInternal(0); testIndexInternal(10); testIndexInternal(50); } private void testIndexInternal(int maxWait) throws IOException { boolean create = true; // FIXME: use system temp dir: Directory rd = FSDirectory.getDirectory("/tmp/testindex", create); Index index = new Index(rd, new StandardAnalyzer(), create); System.out.println("START"); IndexThread thread1 = new IndexThread(index, maxWait); thread1.start(); IndexThread thread2 = new IndexThread(index, maxWait); thread2.start(); while(thread1.isAlive() || thread2.isAlive()) { try { Thread.sleep(100); } catch (InterruptedException e) { e.printStackTrace(); } } System.out.println("\nfinal optimize...."); index.optimize(); System.out.println("index size=" + index.docCount()); int added = thread1.added + thread2.added; System.out.println("added=" + added); int deleted = thread1.deleted + thread2.deleted; System.out.println("deleted=" + deleted); System.out.println("expected index size=" + (added-deleted)); assertEquals(added-deleted, index.docCount()); //System.out.println("max id=" + IndexThread.id); index.close(); try { index.close(); fail(); } catch(IllegalStateException e) { // expected exception } } private int id = 0; private Stack idStack = new Stack(); // TODO: test case is not reproducible despite pseudo-random numbers // used for anything: private Random random = new Random(101); // constant seed for reproducability private class IndexThread extends Thread { private final int ITERATIONS = 250; private int maxWait = 10; private Index index; private int added = 0; private int deleted = 0; IndexThread(Index index, int maxWait) { this.index = index; this.maxWait = maxWait; id = 0; idStack.clear(); } public void run() { try { for(int i = 0; i < ITERATIONS; i++) { if (random.nextInt(101) < 5) { System.out.println("--- optimize... ---"); index.optimize(); } else if (random.nextInt(101) < 70) { Document doc = getDocument(); //System.out.println(this + " add doc id=" + doc.get("id")); System.out.println("add doc id=" + doc.get("id")); index.addDocument(doc); idStack.push(doc.get("id")); added++; } else { if (idStack.size() == 0) { // not enough docs in index, let's wait for next chance } else { // we just delete the last document added and remove it // from the id stack so that it won't be removed twice: String delId = (String)idStack.pop(); //System.out.println(this + " delete doc id = " + delId); System.out.println("delete doc id = " + delId); index.delete(new Term("id", new Integer(delId).toString())); deleted++; } } if (maxWait > 0) { try { int rand = random.nextInt(maxWait); System.out.println("waiting " + rand + "ms"); Thread.sleep(rand); } catch (InterruptedException e) { e.printStackTrace(); } } } //System.out.println(">>>>"+x); } catch (IOException e) { throw new RuntimeException(e); } } private Document getDocument() { Document doc = new Document(); doc.add(new Field("id", new Integer(id++).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); // add random stuff: doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, Field.Index.TOKENIZED)); return doc; } } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]