goller      2004/10/06 09:19:52

  Modified:    src/java/org/apache/lucene/queryParser QueryParser.jj
                        QueryParser.java
               src/java/org/apache/lucene/search FuzzyQuery.java
                        FuzzyTermEnum.java FilteredTermEnum.java
               src/test/org/apache/lucene/search TestFuzzyQuery.java
               src/test/org/apache/lucene/queryParser TestQueryParser.java
  Log:
  FuzzyQuery is now using a default prefix length of 2 in
  order to become usable for big indices. Furthermore, it
  no longer throws TooManyClausesException.
  
  Revision  Changes    Path
  1.51      +24 -5     
jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
  
  Index: QueryParser.jj
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
  retrieving revision 1.50
  retrieving revision 1.51
  diff -u -r1.50 -r1.51
  --- QueryParser.jj    15 Sep 2004 13:37:26 -0000      1.50
  +++ QueryParser.jj    6 Oct 2004 16:19:51 -0000       1.51
  @@ -97,6 +97,7 @@
     String field;
     int phraseSlop = 0;
     float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
  +  int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
     Locale locale = Locale.getDefault();
   
     /** Parses a query string, returning a [EMAIL PROTECTED] 
org.apache.lucene.search.Query}.
  @@ -154,17 +155,35 @@
     }
     
      /**
  -   * Get the default minimal similarity for fuzzy queries.
  +   * Get the minimal similarity for fuzzy queries.
      */
     public float getFuzzyMinSim() {
         return fuzzyMinSim;
     }
  +  
     /**
  -   *Set the default minimum similarity for fuzzy queries.
  +   * Set the minimum similarity for fuzzy queries.
  +   * Default is 0.5f.
      */
     public void setFuzzyMinSim(float fuzzyMinSim) {
         this.fuzzyMinSim = fuzzyMinSim;
     }
  +  
  +   /**
  +   * Get the prefix length for fuzzy queries. 
  +   * @return Returns the fuzzyPrefixLength.
  +   */
  +  public int getFuzzyPrefixLength() {
  +    return fuzzyPrefixLength;
  +  }
  +  
  +  /**
  +   * Set the prefix length for fuzzy queries. Default is 2.
  +   * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
  +   */
  +  public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
  +    this.fuzzyPrefixLength = fuzzyPrefixLength;
  +  }
   
     /**
      * Sets the default slop for phrases.  If zero, then exact phrase matches
  @@ -441,8 +460,8 @@
       Term t = new Term(field, termStr);
       return new PrefixQuery(t);
     }
  -
  -  /**
  +  
  +   /**
      * Factory method for generating a query (similar to
      * ([EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses
      * an input term token that has the fuzzy suffix (~) appended.
  @@ -456,7 +475,7 @@
     protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) 
throws ParseException
     {
       Term t = new Term(field, termStr);
  -    return new FuzzyQuery(t, minSimilarity);
  +    return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
     }
   
     /**
  
  
  
  1.18      +23 -4     
jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
  
  Index: QueryParser.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- QueryParser.java  15 Sep 2004 13:37:26 -0000      1.17
  +++ QueryParser.java  6 Oct 2004 16:19:51 -0000       1.18
  @@ -74,6 +74,7 @@
     String field;
     int phraseSlop = 0;
     float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
  +  int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
     Locale locale = Locale.getDefault();
   
     /** Parses a query string, returning a [EMAIL PROTECTED] 
org.apache.lucene.search.Query}.
  @@ -131,18 +132,36 @@
     }
   
      /**
  -   * Get the default minimal similarity for fuzzy queries.
  +   * Get the minimal similarity for fuzzy queries.
      */
     public float getFuzzyMinSim() {
         return fuzzyMinSim;
     }
  +
     /**
  -   *Set the default minimum similarity for fuzzy queries.
  +   * Set the minimum similarity for fuzzy queries.
  +   * Default is 0.5f.
      */
     public void setFuzzyMinSim(float fuzzyMinSim) {
         this.fuzzyMinSim = fuzzyMinSim;
     }
   
  +   /**
  +   * Get the prefix length for fuzzy queries. 
  +   * @return Returns the fuzzyPrefixLength.
  +   */
  +  public int getFuzzyPrefixLength() {
  +    return fuzzyPrefixLength;
  +  }
  +
  +  /**
  +   * Set the prefix length for fuzzy queries. Default is 2.
  +   * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
  +   */
  +  public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
  +    this.fuzzyPrefixLength = fuzzyPrefixLength;
  +  }
  +
     /**
      * Sets the default slop for phrases.  If zero, then exact phrase matches
      * are required.  Default value is zero.
  @@ -419,7 +438,7 @@
       return new PrefixQuery(t);
     }
   
  -  /**
  +   /**
      * Factory method for generating a query (similar to
      * ([EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses
      * an input term token that has the fuzzy suffix (~) appended.
  @@ -433,7 +452,7 @@
     protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) 
throws ParseException
     {
       Term t = new Term(field, termStr);
  -    return new FuzzyQuery(t, minSimilarity);
  +    return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
     }
   
     /**
  
  
  
  1.8       +75 -7     jakarta-lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
  
  Index: FuzzyQuery.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FuzzyQuery.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- FuzzyQuery.java   15 Sep 2004 19:44:01 -0000      1.7
  +++ FuzzyQuery.java   6 Oct 2004 16:19:51 -0000       1.8
  @@ -18,6 +18,8 @@
   
   import org.apache.lucene.index.IndexReader;
   import org.apache.lucene.index.Term;
  +import org.apache.lucene.util.PriorityQueue;
  +
   import java.io.IOException;
   
   /** Implements the fuzzy search query. The similiarity measurement
  @@ -26,6 +28,8 @@
   public final class FuzzyQuery extends MultiTermQuery {
     
     public final static float defaultMinSimilarity = 0.5f;
  +  public final static int defaultPrefixLength = 2;
  +  
     private float minimumSimilarity;
     private int prefixLength;
     
  @@ -48,16 +52,14 @@
     public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws 
IllegalArgumentException {
       super(term);
       
  -    if (minimumSimilarity > 1.0f)
  -      throw new IllegalArgumentException("minimumSimilarity > 1");
  +    if (minimumSimilarity >= 1.0f)
  +      throw new IllegalArgumentException("minimumSimilarity >= 1");
       else if (minimumSimilarity < 0.0f)
         throw new IllegalArgumentException("minimumSimilarity < 0");
  -    this.minimumSimilarity = minimumSimilarity;
       
  +    this.minimumSimilarity = minimumSimilarity;
       if(prefixLength < 0)
           throw new IllegalArgumentException("prefixLength < 0");
  -    else if(prefixLength >= term.text().length())
  -        throw new IllegalArgumentException("prefixLength >= term.text().length()");
       this.prefixLength = prefixLength;
     }
     
  @@ -65,14 +67,14 @@
      * Calls [EMAIL PROTECTED] #FuzzyQuery(Term, float) FuzzyQuery(term, 
minimumSimilarity, 0)}.
      */
     public FuzzyQuery(Term term, float minimumSimilarity) throws 
IllegalArgumentException {
  -      this(term, minimumSimilarity, 0);
  +      this(term, minimumSimilarity, defaultPrefixLength);
     }
   
     /**
      * Calls [EMAIL PROTECTED] #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.
      */
     public FuzzyQuery(Term term) {
  -    this(term, defaultMinSimilarity, 0);
  +    this(term, defaultMinSimilarity, defaultPrefixLength);
     }
     
     /**
  @@ -95,8 +97,74 @@
     protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
       return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
     }
  +  
  +  public Query rewrite(IndexReader reader) throws IOException {
  +    FilteredTermEnum enumerator = getEnum(reader);
  +    int maxClauseCount = BooleanQuery.getMaxClauseCount();
  +    ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
  +    
  +    try {
  +      do {
  +        float minScore = 0.0f;
  +        float score = 0.0f;
  +        Term t = enumerator.term();
  +        if (t != null) {
  +          score = enumerator.difference();
  +          // terms come in alphabetical order, therefore if queue is full and score
  +          // not bigger than minScore, we can skip
  +          if(stQueue.size() < maxClauseCount || score > minScore){
  +            stQueue.insert(new ScoreTerm(t, score));
  +            minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore
  +          }
  +        }
  +      } while (enumerator.next());
  +    } finally {
  +      enumerator.close();
  +    }
  +    
  +    BooleanQuery query = new BooleanQuery();
  +    int size = stQueue.size();
  +    for(int i = 0; i < size; i++){
  +      ScoreTerm st = (ScoreTerm) stQueue.pop();
  +      TermQuery tq = new TermQuery(st.term);      // found a match
  +      tq.setBoost(getBoost() * st.score); // set the boost
  +      query.add(tq, BooleanClause.Occur.SHOULD);          // add to query
  +    }
  +
  +    return query;
  +  }
       
     public String toString(String field) {
       return super.toString(field) + '~' + Float.toString(minimumSimilarity);
  +  }
  +  
  +  private static class ScoreTerm{
  +    public Term term;
  +    public float score;
  +    
  +    public ScoreTerm(Term term, float score){
  +      this.term = term;
  +      this.score = score;
  +    }
  +  }
  +  
  +  private static class ScoreTermQueue extends PriorityQueue {
  +    
  +    public ScoreTermQueue(int size){
  +      initialize(size);
  +    }
  +    
  +    /* (non-Javadoc)
  +     * @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, 
java.lang.Object)
  +     */
  +    protected boolean lessThan(Object a, Object b) {
  +      ScoreTerm termA = (ScoreTerm)a;
  +      ScoreTerm termB = (ScoreTerm)b;
  +      if (termA.score == termB.score)
  +        return termA.term.compareTo(termB.term) > 0;
  +      else
  +        return termA.score < termB.score;
  +    }
  +    
     }
   }
  
  
  
  1.9       +26 -14    
jakarta-lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java
  
  Index: FuzzyTermEnum.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- FuzzyTermEnum.java        14 Sep 2004 13:45:15 -0000      1.8
  +++ FuzzyTermEnum.java        6 Oct 2004 16:19:51 -0000       1.9
  @@ -25,7 +25,7 @@
     <p>Term enumerations are always ordered by Term.compareTo().  Each term in
     the enumeration is greater than all that precede it.  */
   public final class FuzzyTermEnum extends FilteredTermEnum {
  -    double distance;
  +    float similarity;
       boolean endEnum = false;
   
       Term searchTerm = null;
  @@ -35,7 +35,7 @@
       String prefix = "";
       int prefixLength = 0;
       float minimumSimilarity;
  -    double scale_factor;
  +    float scale_factor;
       
       
       /**
  @@ -47,7 +47,7 @@
        * @see #FuzzyTermEnum(IndexReader, Term, float, int)
        */
       public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
  -      this(reader, term, FuzzyQuery.defaultMinSimilarity, 0);
  +      this(reader, term, FuzzyQuery.defaultMinSimilarity, 
FuzzyQuery.defaultPrefixLength);
       }
       
       /**
  @@ -60,7 +60,7 @@
        * @see #FuzzyTermEnum(IndexReader, Term, float, int)
        */
       public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws 
IOException {
  -      this(reader, term, minSimilarity, 0);
  +      this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength);
       }
       
       /**
  @@ -76,18 +76,30 @@
        */
       public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int 
prefixLength) throws IOException {
           super();
  +        
  +        if (minimumSimilarity >= 1.0f)
  +          throw new IllegalArgumentException("minimumSimilarity >= 1");
  +        else if (minimumSimilarity < 0.0f)
  +          throw new IllegalArgumentException("minimumSimilarity < 0");
  +        
           minimumSimilarity = minSimilarity;
           scale_factor = 1.0f / (1.0f - minimumSimilarity);
           searchTerm = term;
           field = searchTerm.field();
           text = searchTerm.text();
           textlen = text.length();
  -        if(prefixLength > 0 && prefixLength < textlen){
  -            this.prefixLength = prefixLength;
  -            prefix = text.substring(0, prefixLength);
  -            text = text.substring(prefixLength);
  -            textlen = text.length();
  -        }
  +        
  +        if(prefixLength < 0)
  +          throw new IllegalArgumentException("prefixLength < 0");
  +        
  +        if(prefixLength > textlen)
  +          prefixLength = textlen;
  +        
  +        this.prefixLength = prefixLength;
  +        prefix = text.substring(0, prefixLength);
  +        text = text.substring(prefixLength);
  +        textlen = text.length();
  +        
           setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
       }
       
  @@ -101,15 +113,15 @@
               String target = termText.substring(prefixLength);
               int targetlen = target.length();
               int dist = editDistance(text, target, textlen, targetlen);
  -            distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));
  -            return (distance > minimumSimilarity);
  +            similarity = 1 - ((float)dist / (float) (prefixLength + 
Math.min(textlen, targetlen)));
  +            return (similarity > minimumSimilarity);
           }
           endEnum = true;
           return false;
       }
       
  -    protected final float difference() {
  -        return (float)((distance - minimumSimilarity) * scale_factor);
  +    public final float difference() {
  +        return (float)((similarity - minimumSimilarity) * scale_factor);
       }
       
       public final boolean endEnum() {
  
  
  
  1.7       +1 -1      
jakarta-lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java
  
  Index: FilteredTermEnum.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- FilteredTermEnum.java     6 Aug 2004 20:19:12 -0000       1.6
  +++ FilteredTermEnum.java     6 Oct 2004 16:19:51 -0000       1.7
  @@ -34,7 +34,7 @@
       protected abstract boolean termCompare(Term term);
       
       /** Equality measure on the term */
  -    protected abstract float difference();
  +    public abstract float difference();
   
       /** Indiciates the end of the enumeration has been reached */
       protected abstract boolean endEnum();
  
  
  
  1.4       +117 -14   
jakarta-lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java
  
  Index: TestFuzzyQuery.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- TestFuzzyQuery.java       6 Sep 2004 22:01:49 -0000       1.3
  +++ TestFuzzyQuery.java       6 Oct 2004 16:19:52 -0000       1.4
  @@ -47,20 +47,40 @@
       writer.close();
       IndexSearcher searcher = new IndexSearcher(directory);
   
  -    FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"));   
  +    FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 0);   
       Hits hits = searcher.search(query);
       assertEquals(3, hits.length());
  +    
  +    // same with prefix
  +    query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 1);   
  +    hits = searcher.search(query);
  +    assertEquals(3, hits.length());
  +    query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 2);   
  +    hits = searcher.search(query);
  +    assertEquals(3, hits.length());
  +    query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 3);   
  +    hits = searcher.search(query);
  +    assertEquals(3, hits.length());
  +    query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 4);   
  +    hits = searcher.search(query);
  +    assertEquals(2, hits.length());
  +    query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 5);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 6);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
   
       // not similar enough:
  -    query = new FuzzyQuery(new Term("field", "xxxxx"));      
  +    query = new FuzzyQuery(new Term("field", "xxxxx"), 
FuzzyQuery.defaultMinSimilarity, 0);          
       hits = searcher.search(query);
       assertEquals(0, hits.length());
  -    query = new FuzzyQuery(new Term("field", "aaccc"));   // edit distance to 
"aaaaa" = 3
  +    query = new FuzzyQuery(new Term("field", "aaccc"), 
FuzzyQuery.defaultMinSimilarity, 0);   // edit distance to "aaaaa" = 3
       hits = searcher.search(query);
       assertEquals(0, hits.length());
   
       // query identical to a word in the index:
  -    query = new FuzzyQuery(new Term("field", "aaaaa"));   
  +    query = new FuzzyQuery(new Term("field", "aaaaa"), 
FuzzyQuery.defaultMinSimilarity, 0);   
       hits = searcher.search(query);
       assertEquals(3, hits.length());
       assertEquals(hits.doc(0).get("field"), ("aaaaa"));
  @@ -69,20 +89,71 @@
       assertEquals(hits.doc(2).get("field"), ("aaabb"));
   
       // query similar to a word in the index:
  -    query = new FuzzyQuery(new Term("field", "aaaac"));   
  +    query = new FuzzyQuery(new Term("field", "aaaac"), 
FuzzyQuery.defaultMinSimilarity, 0);   
  +    hits = searcher.search(query);
  +    assertEquals(3, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("aaaaa"));
  +    assertEquals(hits.doc(1).get("field"), ("aaaab"));
  +    assertEquals(hits.doc(2).get("field"), ("aaabb"));
  +    
  +    // now with prefix
  +    query = new FuzzyQuery(new Term("field", "aaaac"), 
FuzzyQuery.defaultMinSimilarity, 1);   
       hits = searcher.search(query);
       assertEquals(3, hits.length());
       assertEquals(hits.doc(0).get("field"), ("aaaaa"));
       assertEquals(hits.doc(1).get("field"), ("aaaab"));
       assertEquals(hits.doc(2).get("field"), ("aaabb"));
  +    query = new FuzzyQuery(new Term("field", "aaaac"), 
FuzzyQuery.defaultMinSimilarity, 2);   
  +    hits = searcher.search(query);
  +    assertEquals(3, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("aaaaa"));
  +    assertEquals(hits.doc(1).get("field"), ("aaaab"));
  +    assertEquals(hits.doc(2).get("field"), ("aaabb"));
  +    query = new FuzzyQuery(new Term("field", "aaaac"), 
FuzzyQuery.defaultMinSimilarity, 3);   
  +    hits = searcher.search(query);
  +    assertEquals(3, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("aaaaa"));
  +    assertEquals(hits.doc(1).get("field"), ("aaaab"));
  +    assertEquals(hits.doc(2).get("field"), ("aaabb"));
  +    query = new FuzzyQuery(new Term("field", "aaaac"), 
FuzzyQuery.defaultMinSimilarity, 4);   
  +    hits = searcher.search(query);
  +    assertEquals(2, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("aaaaa"));
  +    assertEquals(hits.doc(1).get("field"), ("aaaab"));
  +    query = new FuzzyQuery(new Term("field", "aaaac"), 
FuzzyQuery.defaultMinSimilarity, 5);   
  +    hits = searcher.search(query);
  +    assertEquals(0, hits.length());
  +    
   
  -    query = new FuzzyQuery(new Term("field", "ddddX"));   
  +    query = new FuzzyQuery(new Term("field", "ddddX"), 
FuzzyQuery.defaultMinSimilarity, 0);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("ddddd"));
  +    
  +    // now with prefix
  +    query = new FuzzyQuery(new Term("field", "ddddX"), 
FuzzyQuery.defaultMinSimilarity, 1);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("ddddd"));
  +    query = new FuzzyQuery(new Term("field", "ddddX"), 
FuzzyQuery.defaultMinSimilarity, 2);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("ddddd"));
  +    query = new FuzzyQuery(new Term("field", "ddddX"), 
FuzzyQuery.defaultMinSimilarity, 3);   
       hits = searcher.search(query);
       assertEquals(1, hits.length());
       assertEquals(hits.doc(0).get("field"), ("ddddd"));
  +    query = new FuzzyQuery(new Term("field", "ddddX"), 
FuzzyQuery.defaultMinSimilarity, 4);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("ddddd"));
  +    query = new FuzzyQuery(new Term("field", "ddddX"), 
FuzzyQuery.defaultMinSimilarity, 5);   
  +    hits = searcher.search(query);
  +    assertEquals(0, hits.length());
  +    
   
       // different field = no match:
  -    query = new FuzzyQuery(new Term("anotherfield", "ddddX"));   
  +    query = new FuzzyQuery(new Term("anotherfield", "ddddX"), 
FuzzyQuery.defaultMinSimilarity, 0);   
       hits = searcher.search(query);
       assertEquals(0, hits.length());
   
  @@ -101,31 +172,63 @@
   
       FuzzyQuery query;
       // not similar enough:
  -    query = new FuzzyQuery(new Term("field", "xxxxx"));   
  +    query = new FuzzyQuery(new Term("field", "xxxxx"), 
FuzzyQuery.defaultMinSimilarity, 0);   
       Hits hits = searcher.search(query);
       assertEquals(0, hits.length());
       // edit distance to "aaaaaaa" = 3, this matches because the string is longer 
than
       // in testDefaultFuzziness so a bigger difference is allowed:
  -    query = new FuzzyQuery(new Term("field", "aaaaccc"));   
  +    query = new FuzzyQuery(new Term("field", "aaaaccc"), 
FuzzyQuery.defaultMinSimilarity, 0);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("aaaaaaa"));
  +    
  +    // now with prefix
  +    query = new FuzzyQuery(new Term("field", "aaaaccc"), 
FuzzyQuery.defaultMinSimilarity, 1);   
       hits = searcher.search(query);
       assertEquals(1, hits.length());
       assertEquals(hits.doc(0).get("field"), ("aaaaaaa"));
  +    query = new FuzzyQuery(new Term("field", "aaaaccc"), 
FuzzyQuery.defaultMinSimilarity, 4);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    assertEquals(hits.doc(0).get("field"), ("aaaaaaa"));
  +    query = new FuzzyQuery(new Term("field", "aaaaccc"), 
FuzzyQuery.defaultMinSimilarity, 5);   
  +    hits = searcher.search(query);
  +    assertEquals(0, hits.length());
   
       // no match, more than half of the characters is wrong:
  -    query = new FuzzyQuery(new Term("field", "aaacccc"));   
  +    query = new FuzzyQuery(new Term("field", "aaacccc"), 
FuzzyQuery.defaultMinSimilarity, 0);   
  +    hits = searcher.search(query);
  +    assertEquals(0, hits.length());
  +    
  +    // now with prefix
  +    query = new FuzzyQuery(new Term("field", "aaacccc"), 
FuzzyQuery.defaultMinSimilarity, 2);   
       hits = searcher.search(query);
       assertEquals(0, hits.length());
   
       // "student" and "stellent" are indeed similar to "segment" by default:
  -    query = new FuzzyQuery(new Term("field", "student"));   
  +    query = new FuzzyQuery(new Term("field", "student"), 
FuzzyQuery.defaultMinSimilarity, 0);   
       hits = searcher.search(query);
       assertEquals(1, hits.length());
  -    query = new FuzzyQuery(new Term("field", "stellent"));   
  +    query = new FuzzyQuery(new Term("field", "stellent"), 
FuzzyQuery.defaultMinSimilarity, 0);   
       hits = searcher.search(query);
       assertEquals(1, hits.length());
  -
  +    
  +    // now with prefix
  +    query = new FuzzyQuery(new Term("field", "student"), 
FuzzyQuery.defaultMinSimilarity, 1);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    query = new FuzzyQuery(new Term("field", "stellent"), 
FuzzyQuery.defaultMinSimilarity, 1);   
  +    hits = searcher.search(query);
  +    assertEquals(1, hits.length());
  +    query = new FuzzyQuery(new Term("field", "student"), 
FuzzyQuery.defaultMinSimilarity, 2);   
  +    hits = searcher.search(query);
  +    assertEquals(0, hits.length());
  +    query = new FuzzyQuery(new Term("field", "stellent"), 
FuzzyQuery.defaultMinSimilarity, 2);   
  +    hits = searcher.search(query);
  +    assertEquals(0, hits.length());
  +    
       // "student" doesn't match anymore thanks to increased minimum similarity:
  -    query = new FuzzyQuery(new Term("field", "student"), 0.6f);   
  +    query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0);   
       hits = searcher.search(query);
       assertEquals(0, hits.length());
   
  
  
  
  1.32      +2 -2      
jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
  
  Index: TestQueryParser.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- TestQueryParser.java      15 Sep 2004 19:45:02 -0000      1.31
  +++ TestQueryParser.java      6 Oct 2004 16:19:52 -0000       1.32
  @@ -248,10 +248,10 @@
       assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
       FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null);
       assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
  -    assertEquals(0, fq.getPrefixLength());
  +    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
       fq = (FuzzyQuery)getQuery("term~", null);
       assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
  -    assertEquals(0, fq.getPrefixLength());
  +    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
       try {
         getQuery("term~1.1", null);   // value > 1, throws exception
         fail();
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to