cutting 2004/02/09 14:03:42 Modified: src/java/org/apache/lucene/search/spans NearSpans.java SpanOrQuery.java SpanTermQuery.java src/test/org/apache/lucene/search TestBasics.java Removed: src/java/org/apache/lucene/search/spans SpanQueue.java Log: Fixed a few bugs in span searching. Revision Changes Path 1.3 +49 -21 jakarta-lucene/src/java/org/apache/lucene/search/spans/NearSpans.java Index: NearSpans.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/spans/NearSpans.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- NearSpans.java 2 Feb 2004 13:27:52 -0000 1.2 +++ NearSpans.java 9 Feb 2004 22:03:42 -0000 1.3 @@ -23,6 +23,7 @@ import java.util.Iterator; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; class NearSpans implements Spans { private SpanNearQuery query; @@ -36,22 +37,48 @@ private int totalLength; // sum of current lengths - private SpanQueue queue; // sorted queue of spans + private CellQueue queue; // sorted queue of spans private SpansCell max; // max element in queue private boolean more = true; // true iff not done private boolean firstTime = true; // true before first next() - private boolean queueStale = false; // true if queue not sorted - private boolean listStale = true; // true if list not sorted + private class CellQueue extends PriorityQueue { + public CellQueue(int size) { + initialize(size); + } + + protected final boolean lessThan(Object o1, Object o2) { + SpansCell spans1 = (SpansCell)o1; + SpansCell spans2 = (SpansCell)o2; + if (spans1.doc() == spans2.doc()) { + if (spans1.start() == spans2.start()) { + if (spans1.end() == spans2.end()) { + return spans1.index > spans2.index; + } else { + return spans1.end() < spans2.end(); + } + } else { + return spans1.start() < spans2.start(); + } + } else { + return spans1.doc() < spans2.doc(); + } + } + } + /** Wraps a Spans, and can be used to form a linked list. */ private class SpansCell implements Spans { private Spans spans; private SpansCell next; private int length = -1; + private int index; - public SpansCell(Spans spans) { this.spans = spans; } + public SpansCell(Spans spans, int index) { + this.spans = spans; + this.index = index; + } public boolean next() throws IOException { if (length != -1) // subtract old length @@ -93,7 +120,7 @@ public int start() { return spans.start(); } public int end() { return spans.end(); } - public String toString() { return spans.toString(); } + public String toString() { return spans.toString() + "#" + index; } } public NearSpans(SpanNearQuery query, IndexReader reader) @@ -103,10 +130,10 @@ this.inOrder = query.isInOrder(); SpanQuery[] clauses = query.getClauses(); // initialize spans & list - queue = new SpanQueue(clauses.length); + queue = new CellQueue(clauses.length); for (int i = 0; i < clauses.length; i++) { SpansCell cell = // construct clause spans - new SpansCell(clauses[i].getSpans(reader)); + new SpansCell(clauses[i].getSpans(reader), i); ordered.add(cell); // add to ordered } } @@ -114,18 +141,21 @@ public boolean next() throws IOException { if (firstTime) { initList(true); - listToQueue(); // initialize queue + listToQueue(); // initialize queue firstTime = false; - } else { - more = last.next(); // trigger scan - queueStale = true; + } else if (more) { + more = min().next(); // trigger further scanning + if (more) + queue.adjustTop(); // maintain queue } while (more) { - if (listStale) { // maintain list + boolean queueStale = false; + + if (min().doc() != max.doc()) { // maintain list queueToList(); - listStale = false; + queueStale = true; } // skip to doc w/ all clauses @@ -152,13 +182,8 @@ } more = min().next(); // trigger further scanning - - if (more) { + if (more) queue.adjustTop(); // maintain queue - if (min().doc() != max.doc()) { - listStale = true; // maintain list - } - } } return false; // no more matches } @@ -175,7 +200,6 @@ if (more) { listToQueue(); - listStale = true; if (min().doc() == max.doc()) { // at a match? int matchLength = max.end() - min().start(); @@ -183,6 +207,7 @@ return true; } } + return next(); // no, scan } @@ -195,7 +220,10 @@ public int start() { return min().start(); } public int end() { return max.end(); } - public String toString() { return "spans(" + query.toString() + ")"; } + public String toString() { + return "spans("+query.toString()+")@"+ + (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END")); + } private void initList(boolean next) throws IOException { for (int i = 0; more && i < ordered.size(); i++) { 1.3 +48 -9 jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java Index: SpanOrQuery.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- SpanOrQuery.java 2 Feb 2004 13:27:52 -0000 1.2 +++ SpanOrQuery.java 9 Feb 2004 22:03:42 -0000 1.3 @@ -24,6 +24,7 @@ import java.util.Iterator; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; /** Matches the union of its clauses.*/ public class SpanOrQuery extends SpanQuery { @@ -78,6 +79,27 @@ return buffer.toString(); } + private class SpanQueue extends PriorityQueue { + public SpanQueue(int size) { + initialize(size); + } + + protected final boolean lessThan(Object o1, Object o2) { + Spans spans1 = (Spans)o1; + Spans spans2 = (Spans)o2; + if (spans1.doc() == spans2.doc()) { + if (spans1.start() == spans2.start()) { + return spans1.end() < spans2.end(); + } else { + return spans1.start() < spans2.start(); + } + } else { + return spans1.doc() < spans2.doc(); + } + } + } + + public Spans getSpans(final IndexReader reader) throws IOException { if (clauses.size() == 1) // optimize 1-clause case return ((SpanQuery)clauses.get(0)).getSpans(reader); @@ -101,6 +123,8 @@ Spans spans = (Spans)all.get(i); if (spans.next()) { // move to first entry queue.put(spans); // build queue + } else { + all.remove(i--); } } firstTime = false; @@ -111,26 +135,39 @@ return false; } - if (top().next()) { // move to next + if (top().next()) { // move to next queue.adjustTop(); return true; } - queue.pop(); // exhausted a clause + all.remove(queue.pop()); // exhausted a clause + return queue.size() != 0; } private Spans top() { return (Spans)queue.top(); } public boolean skipTo(int target) throws IOException { - queue.clear(); // clear the queue - for (int i = 0; i < all.size(); i++) { - Spans spans = (Spans)all.get(i); - if (spans.skipTo(target)) { // skip each spans in all - queue.put(spans); // rebuild queue + if (firstTime) { + for (int i = 0; i < all.size(); i++) { + Spans spans = (Spans)all.get(i); + if (spans.skipTo(target)) { // skip each spans in all + queue.put(spans); // build queue + } else { + all.remove(i--); + } + } + firstTime = false; + } else { + while (queue.size() != 0 && top().doc() < target) { + if (top().skipTo(target)) { + queue.adjustTop(); + } else { + all.remove(queue.pop()); + } } } - firstTime = false; + return queue.size() != 0; } @@ -139,7 +176,9 @@ public int end() { return top().end(); } public String toString() { - return "spans(" + SpanOrQuery.this.toString() + ")"; + return "spans("+SpanOrQuery.this+")@"+ + (firstTime?"START" + :(queue.size()>0?(doc()+":"+start()+"-"+end()):"END")); } }; 1.3 +9 -4 jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Index: SpanTermQuery.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- SpanTermQuery.java 2 Feb 2004 13:27:52 -0000 1.2 +++ SpanTermQuery.java 9 Feb 2004 22:03:42 -0000 1.3 @@ -54,15 +54,17 @@ return new Spans() { private TermPositions positions = reader.termPositions(term); - private int doc; + private int doc = -1; private int freq; private int count; private int position; public boolean next() throws IOException { if (count == freq) { - if (!positions.next()) + if (!positions.next()) { + doc = Integer.MAX_VALUE; return false; + } doc = positions.doc(); freq = positions.freq(); count = 0; @@ -73,8 +75,10 @@ } public boolean skipTo(int target) throws IOException { - if (!positions.skipTo(target)) + if (!positions.skipTo(target)) { + doc = Integer.MAX_VALUE; return false; + } doc = positions.doc(); freq = positions.freq(); @@ -91,7 +95,8 @@ public int end() { return position + 1; } public String toString() { - return "spans(" + SpanTermQuery.this.toString() + ")"; + return "spans(" + SpanTermQuery.this.toString() + ")@"+ + (doc==-1?"START":(doc==Integer.MAX_VALUE)?"END":doc+"-"+position); } }; 1.3 +24 -0 jakarta-lucene/src/test/org/apache/lucene/search/TestBasics.java Index: TestBasics.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestBasics.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- TestBasics.java 30 Jan 2004 22:10:00 -0000 1.2 +++ TestBasics.java 9 Feb 2004 22:03:42 -0000 1.3 @@ -256,6 +256,30 @@ //System.out.println(searcher.explain(query, 333)); } + public void testSpanNearOr() throws Exception { + + SpanTermQuery t1 = new SpanTermQuery(new Term("field","six")); + SpanTermQuery t3 = new SpanTermQuery(new Term("field","seven")); + + SpanTermQuery t5 = new SpanTermQuery(new Term("field","seven")); + SpanTermQuery t6 = new SpanTermQuery(new Term("field","six")); + + SpanOrQuery to1 = new SpanOrQuery(new SpanQuery[] {t1, t3}); + SpanOrQuery to2 = new SpanOrQuery(new SpanQuery[] {t5, t6}); + + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {to1, to2}, + 10, true); + + checkHits(query, new int[] + {606, 607, 626, 627, 636, 637, 646, 647, + 656, 657, 666, 667, 676, 677, 686, 687, 696, 697, + 706, 707, 726, 727, 736, 737, 746, 747, + 756, 757, 766, 767, 776, 777, 786, 787, 796, 797}); + } + + + + private void checkHits(Query query, int[] results) throws IOException { Hits hits = searcher.search(query);
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]