cutting 2004/02/19 10:28:59 Modified: . CHANGES.txt src/java/org/apache/lucene/index FilterIndexReader.java IndexReader.java SegmentReader.java Added: src/java/org/apache/lucene/index MultiReader.java Removed: src/java/org/apache/lucene/index SegmentsReader.java Log: Added MultiReader, an IndexReader that combines multiple other IndexReaders. Revision Changes Path 1.73 +3 -1 jakarta-lucene/CHANGES.txt Index: CHANGES.txt =================================================================== RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v retrieving revision 1.72 retrieving revision 1.73 diff -u -r1.72 -r1.73 --- CHANGES.txt 17 Feb 2004 19:00:31 -0000 1.72 +++ CHANGES.txt 19 Feb 2004 18:28:59 -0000 1.73 @@ -51,6 +51,8 @@ one that delegates through that of the Searcher. (Julien Nioche via Cutting) + 9. Added MultiReader, an IndexReader that combines multiple other + IndexReaders. (Cutting) 1.3 final 1.6 +3 -0 jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Index: FilterIndexReader.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- FilterIndexReader.java 15 Dec 2003 23:04:42 -0000 1.5 +++ FilterIndexReader.java 19 Feb 2004 18:28:59 -0000 1.6 @@ -128,6 +128,9 @@ public void undeleteAll() throws IOException { in.undeleteAll(); } public byte[] norms(String f) throws IOException { return in.norms(f); } + public void norms(String f, byte[] bytes, int offset) throws IOException { + in.norms(f, bytes, offset); + } public void setNorm(int d, String f, byte b) throws IOException { in.setNorm(d,f,b); } 1.25 +11 -3 jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java Index: IndexReader.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v retrieving revision 1.24 retrieving revision 1.25 diff -u -r1.24 -r1.25 --- IndexReader.java 15 Dec 2003 23:04:42 -0000 1.24 +++ IndexReader.java 19 Feb 2004 18:28:59 -0000 1.25 @@ -117,10 +117,10 @@ if (infos.size() == 1) { // index is optimized return new SegmentReader(infos, infos.info(0), true); } else { - SegmentReader[] readers = new SegmentReader[infos.size()]; + IndexReader[] readers = new IndexReader[infos.size()]; for (int i = 0; i < infos.size(); i++) readers[i] = new SegmentReader(infos, infos.info(i), i==infos.size()-1); - return new SegmentsReader(infos, directory, readers); + return new MultiReader(directory, readers); } } }.run(); @@ -271,6 +271,14 @@ * @see Field#setBoost(float) */ public abstract byte[] norms(String field) throws IOException; + + /** Reads the byte-encoded normalization factor for the named field of every + * document. This is used by the search code to score documents. + * + * @see Field#setBoost(float) + */ + public abstract void norms(String field, byte[] bytes, int offset) + throws IOException; /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's [EMAIL PROTECTED] 1.18 +2 -2 jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java Index: SegmentReader.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v retrieving revision 1.17 retrieving revision 1.18 diff -u -r1.17 -r1.18 --- SegmentReader.java 15 Dec 2003 23:04:42 -0000 1.17 +++ SegmentReader.java 19 Feb 2004 18:28:59 -0000 1.18 @@ -357,7 +357,7 @@ } /** Read norms into a pre-allocated array. */ - synchronized void norms(String field, byte[] bytes, int offset) + public synchronized void norms(String field, byte[] bytes, int offset) throws IOException { Norm norm = (Norm)norms.get(field); 1.1 jakarta-lucene/src/java/org/apache/lucene/index/MultiReader.java Index: MultiReader.java =================================================================== package org.apache.lucene.index; /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Lucene" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * "Apache Lucene", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; /** An IndexReader which reads multiple indexes, appending their content. * * @version $Id: MultiReader.java,v 1.1 2004/02/19 18:28:59 cutting Exp $ */ public class MultiReader extends IndexReader { private IndexReader[] readers; private int[] starts; // 1st docno for each segment private Hashtable normsCache = new Hashtable(); private int maxDoc = 0; private int numDocs = -1; private boolean hasDeletions = false; /** Construct reading the named set of readers. */ public MultiReader(IndexReader[] readers) throws IOException { this(readers.length == 0 ? null : readers[0].directory(), readers); } /** Construct reading the named set of readers. */ public MultiReader(Directory directory, IndexReader[] readers) throws IOException { super(directory); this.readers = readers; starts = new int[readers.length + 1]; // build starts array for (int i = 0; i < readers.length; i++) { starts[i] = maxDoc; maxDoc += readers[i].maxDoc(); // compute maxDocs if (readers[i].hasDeletions()) hasDeletions = true; } starts[readers.length] = maxDoc; } public synchronized int numDocs() { if (numDocs == -1) { // check cache int n = 0; // cache miss--recompute for (int i = 0; i < readers.length; i++) n += readers[i].numDocs(); // sum from readers numDocs = n; } return numDocs; } public int maxDoc() { return maxDoc; } public Document document(int n) throws IOException { int i = readerIndex(n); // find segment num return readers[i].document(n - starts[i]); // dispatch to segment reader } public boolean isDeleted(int n) { int i = readerIndex(n); // find segment num return readers[i].isDeleted(n - starts[i]); // dispatch to segment reader } public boolean hasDeletions() { return hasDeletions; } protected synchronized void doDelete(int n) throws IOException { numDocs = -1; // invalidate cache int i = readerIndex(n); // find segment num readers[i].doDelete(n - starts[i]); // dispatch to segment reader hasDeletions = true; } public void undeleteAll() throws IOException { for (int i = 0; i < readers.length; i++) readers[i].undeleteAll(); hasDeletions = false; } private int readerIndex(int n) { // find reader for doc n: int lo = 0; // search starts array int hi = readers.length - 1; // for first element less while (hi >= lo) { int mid = (lo + hi) >> 1; int midValue = starts[mid]; if (n < midValue) hi = mid - 1; else if (n > midValue) lo = mid + 1; else { // found a match while (mid+1 < readers.length && starts[mid+1] == midValue) { mid++; // scan to last match } return mid; } } return hi; } public synchronized byte[] norms(String field) throws IOException { byte[] bytes = (byte[])normsCache.get(field); if (bytes != null) return bytes; // cache hit bytes = new byte[maxDoc()]; for (int i = 0; i < readers.length; i++) readers[i].norms(field, bytes, starts[i]); normsCache.put(field, bytes); // update cache return bytes; } public synchronized void norms(String field, byte[] result, int offset) throws IOException { byte[] bytes = (byte[])normsCache.get(field); if (bytes != null) // cache hit System.arraycopy(bytes, 0, result, offset, maxDoc()); for (int i = 0; i < readers.length; i++) // read from segments readers[i].norms(field, result, offset + starts[i]); } public synchronized void setNorm(int n, String field, byte value) throws IOException { normsCache.remove(field); // clear cache int i = readerIndex(n); // find segment num readers[i].setNorm(n-starts[i], field, value); // dispatch } public TermEnum terms() throws IOException { return new MultiTermEnum(readers, starts, null); } public TermEnum terms(Term term) throws IOException { return new MultiTermEnum(readers, starts, term); } public int docFreq(Term t) throws IOException { int total = 0; // sum freqs in segments for (int i = 0; i < readers.length; i++) total += readers[i].docFreq(t); return total; } public TermDocs termDocs() throws IOException { return new MultiTermDocs(readers, starts); } public TermPositions termPositions() throws IOException { return new MultiTermPositions(readers, starts); } protected synchronized void doClose() throws IOException { for (int i = 0; i < readers.length; i++) readers[i].close(); } /** * @see IndexReader#getFieldNames() */ public Collection getFieldNames() throws IOException { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < readers.length; i++) { IndexReader reader = readers[i]; Collection names = reader.getFieldNames(); // iterate through the field names and add them to the set for (Iterator iterator = names.iterator(); iterator.hasNext();) { String s = (String) iterator.next(); fieldSet.add(s); } } return fieldSet; } /** * @see IndexReader#getFieldNames(boolean) */ public Collection getFieldNames(boolean indexed) throws IOException { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < readers.length; i++) { IndexReader reader = readers[i]; Collection names = reader.getFieldNames(indexed); fieldSet.addAll(names); } return fieldSet; } } class MultiTermEnum extends TermEnum { private SegmentMergeQueue queue; private Term term; private int docFreq; public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) throws IOException { queue = new SegmentMergeQueue(readers.length); for (int i = 0; i < readers.length; i++) { IndexReader reader = readers[i]; SegmentTermEnum termEnum; if (t != null) { termEnum = (SegmentTermEnum)reader.terms(t); } else termEnum = (SegmentTermEnum)reader.terms(); SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); if (t == null ? smi.next() : termEnum.term() != null) queue.put(smi); // initialize queue else smi.close(); } if (t != null && queue.size() > 0) { next(); } } public boolean next() throws IOException { SegmentMergeInfo top = (SegmentMergeInfo)queue.top(); if (top == null) { term = null; return false; } term = top.term; docFreq = 0; while (top != null && term.compareTo(top.term) == 0) { queue.pop(); docFreq += top.termEnum.docFreq(); // increment freq if (top.next()) queue.put(top); // restore queue else top.close(); // done with a segment top = (SegmentMergeInfo)queue.top(); } return true; } public Term term() { return term; } public int docFreq() { return docFreq; } public void close() throws IOException { queue.close(); } } class MultiTermDocs implements TermDocs { protected IndexReader[] readers; protected int[] starts; protected Term term; protected int base = 0; protected int pointer = 0; private SegmentTermDocs[] segTermDocs; protected SegmentTermDocs current; // == segTermDocs[pointer] public MultiTermDocs(IndexReader[] r, int[] s) { readers = r; starts = s; segTermDocs = new SegmentTermDocs[r.length]; } public int doc() { return base + current.doc; } public int freq() { return current.freq; } public void seek(Term term) { this.term = term; this.base = 0; this.pointer = 0; this.current = null; } public void seek(TermEnum termEnum) throws IOException { seek(termEnum.term()); } public boolean next() throws IOException { if (current != null && current.next()) { return true; } else if (pointer < readers.length) { base = starts[pointer]; current = termDocs(pointer++); return next(); } else return false; } /** Optimized implementation. */ public int read(final int[] docs, final int[] freqs) throws IOException { while (true) { while (current == null) { if (pointer < readers.length) { // try next segment base = starts[pointer]; current = termDocs(pointer++); } else { return 0; } } int end = current.read(docs, freqs); if (end == 0) { // none left in segment current = null; } else { // got some final int b = base; // adjust doc numbers for (int i = 0; i < end; i++) docs[i] += b; return end; } } } /** As yet unoptimized implementation. */ public boolean skipTo(int target) throws IOException { do { if (!next()) return false; } while (target > doc()); return true; } private SegmentTermDocs termDocs(int i) throws IOException { if (term == null) return null; SegmentTermDocs result = segTermDocs[i]; if (result == null) result = segTermDocs[i] = termDocs(readers[i]); result.seek(term); return result; } protected SegmentTermDocs termDocs(IndexReader reader) throws IOException { return (SegmentTermDocs)reader.termDocs(); } public void close() throws IOException { for (int i = 0; i < segTermDocs.length; i++) { if (segTermDocs[i] != null) segTermDocs[i].close(); } } } class MultiTermPositions extends MultiTermDocs implements TermPositions { public MultiTermPositions(IndexReader[] r, int[] s) { super(r,s); } protected SegmentTermDocs termDocs(IndexReader reader) throws IOException { return (SegmentTermDocs)reader.termPositions(); } public int nextPosition() throws IOException { return ((SegmentTermPositions)current).nextPosition(); } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]