Thanks for the code and performance metric Robert. Have you had any issues with the deleted segments as Doug has been describing?
----- Original Message ---- From: Robert Engels <[EMAIL PROTECTED]> To: java-dev@lucene.apache.org; jason rutherglen <[EMAIL PROTECTED]> Sent: Monday, May 1, 2006 11:49:41 AM Subject: RE: GData, updateable IndexSearcher Attached. It uses subclasses and instanceof which is sort of "hackish" - to do it correctly requires changes to the base classes. -----Original Message----- From: jason rutherglen [mailto:[EMAIL PROTECTED] Sent: Monday, May 01, 2006 1:43 PM To: java-dev@lucene.apache.org Subject: Re: GData, updateable IndexSearcher Can you post your code? ----- Original Message ---- From: Robert Engels <[EMAIL PROTECTED]> To: java-dev@lucene.apache.org; jason rutherglen <[EMAIL PROTECTED]> Sent: Monday, May 1, 2006 11:33:06 AM Subject: RE: GData, updateable IndexSearcher fyi, using my reopen(0 implementation (which rereads the deletions) on a 135mb index, with 5000 iterations open & close time using new reader = 585609 open & close time using reopen = 27422 Almost 20x faster. Important in a highly interactive/incremental updating index. -----Original Message----- From: jason rutherglen [mailto:[EMAIL PROTECTED] Sent: Monday, May 01, 2006 1:24 PM To: java-dev@lucene.apache.org Subject: Re: GData, updateable IndexSearcher I wanted to post a quick hack to see if it is along the correct lines. A few of the questions regard whether to resuse existing MultiReaders or simply strip out only the SegmentReaders. I do a compare on the segment name and made it public. Thanks! public static IndexReader reopen(IndexReader indexReader) throws IOException { if (indexReader instanceof MultiReader) { MultiReader multiReader = (MultiReader)indexReader; SegmentInfos segmentInfos = new SegmentInfos(); segmentInfos.read(indexReader.directory()); if (segmentInfos.size() == 1) { // index is optimized return SegmentReader.get(segmentInfos, segmentInfos.info(0), false); } IndexReader[] existingIndexReaders = multiReader.getSubReaders(); // now go through and compare the segment readers Map<String,SegmentReader> existingSegmentMap = new HashMap<String,SegmentReader>(); getSegmentReaders(existingIndexReaders, existingSegmentMap); Map<String,SegmentInfo> newSegmentInfosMap = new HashMap<String,SegmentInfo>(); List<SegmentReader> newSegmentReaders = new ArrayList<SegmentReader>(); Iterator segmentInfosIterator = segmentInfos.iterator(); while (segmentInfosIterator.hasNext()) { SegmentInfo segmentInfo = (SegmentInfo)segmentInfosIterator.next(); if (!existingSegmentMap.containsKey(segmentInfo.name)) { // it's new SegmentReader newSegmentReader = SegmentReader.get(segmentInfo); newSegmentReaders.add(newSegmentReader); } } List<IndexReader> allSegmentReaders = new ArrayList<IndexReader>(); allSegmentReaders.add(multiReader); allSegmentReaders.addAll(newSegmentReaders); return new MultiReader(indexReader.directory(), segmentInfos, false, (IndexReader[])allSegmentReaders.toArray(new IndexReader[0])); } throw new RuntimeException("indexReader not supported at this time"); } public static void getSegmentReaders(IndexReader[] indexReaders, Map<String,SegmentReader> map) { for (int x=0; x < indexReaders.length; x++) { if (indexReaders[x] instanceof MultiReader) { MultiReader multiReader = (MultiReader)indexReaders[x]; IndexReader[] subReaders = multiReader.getSubReaders(); getSegmentReaders(subReaders, map); } else if (indexReaders[x] instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader)indexReaders[x]; map.put(segmentReader.segment, segmentReader); } } } -----Inline Attachment Follows----- package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.store.Directory; /** * overridden to allow retrieval of contained IndexReader's to enable IndexReaderUtils.reopen() */ public class MyMultiReader extends MultiReader { private IndexReader[] readers; public MyMultiReader(Directory directory,SegmentInfos infos,IndexReader[] subReaders) throws IOException { super(directory,infos,true,subReaders); readers = subReaders; } public IndexReader[] getReaders() { return readers; } public void doCommit() throws IOException { super.doCommit(); } } -----Inline Attachment Follows----- package org.apache.lucene.index; import java.io.IOException; import java.util.*; import org.apache.lucene.store.*; public class IndexReaderUtils { private static Map segments = new WeakHashMap(); static { // must use String class name, otherwise instantiation order will not allow the override to work System.setProperty("org.apache.lucene.SegmentReader.class","org.apache.lucene.index.MySegmentReader"); } /** * reopens the IndexReader, possibly reusing the segments for greater efficiency. The original IndexReader instance * is closed, and the reference is no longer valid * * @return the new IndexReader */ public static synchronized IndexReader reopen(IndexReader ir) throws IOException { final Directory directory = ir.directory(); if(!(ir instanceof MyMultiReader)) { SegmentInfos infos = new SegmentInfos(); infos.read(directory); IndexReader[] readers = new IndexReader[infos.size()]; for(int i=0;i<infos.size();i++){ readers[i] = MySegmentReader.get((SegmentInfo) infos.get(i)); } // System.err.println("reopen, fresh reader with "+infos.size()+" segments"); return new MyMultiReader(directory,infos,readers); } MyMultiReader mr = (MyMultiReader) ir; final IndexReader[] oldreaders = mr.getReaders(); final boolean[] stayopen = new boolean[oldreaders.length]; synchronized (directory) { // in- & inter-process sync return (IndexReader)new Lock.With( directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), IndexWriter.COMMIT_LOCK_TIMEOUT) { public Object doBody() throws IOException { SegmentInfos infos = new SegmentInfos(); infos.read(directory); if (infos.size() == 1) { // index is optimized // System.err.println("single segment during reopen"); return MySegmentReader.get(infos.info(0)); } else { // System.err.println("reopen, has "+infos.size()+" segments"); IndexReader[] readers = new IndexReader[infos.size()]; for (int i = 0; i < infos.size(); i++) { SegmentInfo newsi = (SegmentInfo) infos.get(i); for(int j=0;j<oldreaders.length;j++) { SegmentReader sr = (SegmentReader) oldreaders[j]; SegmentInfo si = (SegmentInfo) segments.get(sr); if(si!=null && si.name.equals(newsi.name)) { readers[i]=sr; ((MySegmentReader)sr).reopen(); stayopen[j]=true; // System.err.println("keeping "+si.name+" on reopen"); } } if(readers[i]==null) { readers[i] = MySegmentReader.get(newsi); segments.put(readers[i],newsi); } } for(int i=0;i<stayopen.length;i++) if(!stayopen[i]) oldreaders[i].close(); return new MyMultiReader(directory,infos,readers); } } }.run(); } } public static synchronized IndexReader open(String path) throws IOException { Directory d = FSDirectory.getDirectory(path,false); return open(d,true); } private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException { synchronized (directory) { // in- & inter-process sync return (IndexReader)new Lock.With( directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), IndexWriter.COMMIT_LOCK_TIMEOUT) { public Object doBody() throws IOException { SegmentInfos infos = new SegmentInfos(); infos.read(directory); if (infos.size() == 1) { // index is optimized return MySegmentReader.get(infos.info(0)); } else { IndexReader[] readers = new IndexReader[infos.size()]; for (int i = 0; i < infos.size(); i++) { SegmentInfo si = infos.info(i); readers[i] = MySegmentReader.get(si); segments.put(readers[i],si); } return new MyMultiReader(directory,infos,readers); } } }.run(); } } } -----Inline Attachment Follows----- package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.util.BitVector; public class MySegmentReader extends SegmentReader { SegmentInfo si; public MySegmentReader() { } public void reopen() throws IOException { if (hasDeletions(si)) deletedDocs = new BitVector(directory(), si.name + ".del"); } public static SegmentReader get(SegmentInfo si) throws IOException { MySegmentReader reader = (MySegmentReader) SegmentReader.get(si); reader.si = si; return reader; } } --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]