dnaber      2004/11/13 07:11:26

  Modified:    src/java/org/apache/lucene/queryParser QueryParser.java
                        QueryParser.jj
               .        CHANGES.txt
  Log:
  make QueryParser work with analyzers that return more than one token per 
position
  PR: 23307
  Submitted by: Pierrick Brihaye
  
  Revision  Changes    Path
  1.21      +52 -11    
jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
  
  Index: QueryParser.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- QueryParser.java  15 Oct 2004 19:47:43 -0000      1.20
  +++ QueryParser.java  13 Nov 2004 15:11:26 -0000      1.21
  @@ -292,10 +292,11 @@
       // Use the analyzer to get all the tokens, and then build a TermQuery,
       // PhraseQuery, or nothing based on the term count
   
  -    TokenStream source = analyzer.tokenStream(field,
  -                                              new StringReader(queryText));
  +    TokenStream source = analyzer.tokenStream(field, new 
StringReader(queryText));
       Vector v = new Vector();
       org.apache.lucene.analysis.Token t;
  +    int positionCount = 0;
  +    boolean severalTokensAtSamePosition = false;
   
       while (true) {
         try {
  @@ -306,7 +307,11 @@
         }
         if (t == null)
           break;
  -      v.addElement(t.termText());
  +      v.addElement(t);
  +      if (t.getPositionIncrement() == 1)
  +        positionCount++;
  +      else
  +        severalTokensAtSamePosition = true;
       }
       try {
         source.close();
  @@ -317,15 +322,48 @@
   
       if (v.size() == 0)
         return null;
  -    else if (v.size() == 1)
  -      return new TermQuery(new Term(field, (String) v.elementAt(0)));
  -    else {
  -      PhraseQuery q = new PhraseQuery();
  -      q.setSlop(phraseSlop);
  -      for (int i=0; i<v.size(); i++) {
  -        q.add(new Term(field, (String) v.elementAt(i)));
  +    else if (v.size() == 1) {
  +      t = (org.apache.lucene.analysis.Token) v.elementAt(0);
  +      return new TermQuery(new Term(field, t.termText()));
  +    } else {
  +      if (severalTokensAtSamePosition) {
  +        if (positionCount == 1) {
  +          // no phrase query:
  +          BooleanQuery q = new BooleanQuery();
  +          for (int i = 0; i < v.size(); i++) {
  +            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
  +            TermQuery currentQuery = new TermQuery(
  +                new Term(field, t.termText()));
  +            q.add(currentQuery, BooleanClause.Occur.SHOULD);
  +          }
  +          return q;
  +        }
  +        else {
  +          // phrase query:
  +          MultiPhraseQuery mpq = new MultiPhraseQuery();
  +          List multiTerms = new ArrayList();
  +          for (int i = 0; i < v.size(); i++) {
  +            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
  +            if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
  +              mpq.add((Term[])multiTerms.toArray(new Term[0]));
  +              multiTerms.clear();
  +            }
  +            multiTerms.add(new Term(field, t.termText()));
  +          }
  +          mpq.add((Term[])multiTerms.toArray(new Term[0]));
  +          return mpq;
  +        }
  +      }
  +      else {
  +        PhraseQuery q = new PhraseQuery();
  +        q.setSlop(phraseSlop);
  +        for (int i = 0; i < v.size(); i++) {
  +          q.add(new Term(field, ((org.apache.lucene.analysis.Token)
  +              v.elementAt(i)).termText()));
  +
  +        }
  +        return q;
         }
  -      return q;
       }
     }
   
  @@ -356,6 +394,9 @@
   
       if (query instanceof PhraseQuery) {
         ((PhraseQuery) query).setSlop(slop);
  +    }
  +    if (query instanceof MultiPhraseQuery) {
  +      ((MultiPhraseQuery) query).setSlop(slop);
       }
   
       return query;
  
  
  
  1.55      +52 -11    
jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
  
  Index: QueryParser.jj
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
  retrieving revision 1.54
  retrieving revision 1.55
  diff -u -r1.54 -r1.55
  --- QueryParser.jj    15 Oct 2004 19:47:43 -0000      1.54
  +++ QueryParser.jj    13 Nov 2004 15:11:26 -0000      1.55
  @@ -315,10 +315,11 @@
       // Use the analyzer to get all the tokens, and then build a TermQuery,
       // PhraseQuery, or nothing based on the term count
   
  -    TokenStream source = analyzer.tokenStream(field,
  -                                              new StringReader(queryText));
  +    TokenStream source = analyzer.tokenStream(field, new 
StringReader(queryText));
       Vector v = new Vector();
       org.apache.lucene.analysis.Token t;
  +    int positionCount = 0;
  +    boolean severalTokensAtSamePosition = false;
   
       while (true) {
         try {
  @@ -329,7 +330,11 @@
         }
         if (t == null)
           break;
  -      v.addElement(t.termText());
  +      v.addElement(t);
  +      if (t.getPositionIncrement() == 1)
  +        positionCount++;
  +      else
  +        severalTokensAtSamePosition = true;
       }
       try {
         source.close();
  @@ -340,15 +345,48 @@
   
       if (v.size() == 0)
         return null;
  -    else if (v.size() == 1)
  -      return new TermQuery(new Term(field, (String) v.elementAt(0)));
  -    else {
  -      PhraseQuery q = new PhraseQuery();
  -      q.setSlop(phraseSlop);
  -      for (int i=0; i<v.size(); i++) {
  -        q.add(new Term(field, (String) v.elementAt(i)));
  +    else if (v.size() == 1) {
  +      t = (org.apache.lucene.analysis.Token) v.elementAt(0);
  +      return new TermQuery(new Term(field, t.termText()));
  +    } else {
  +      if (severalTokensAtSamePosition) {
  +        if (positionCount == 1) {
  +          // no phrase query:
  +          BooleanQuery q = new BooleanQuery();
  +          for (int i = 0; i < v.size(); i++) {
  +            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
  +            TermQuery currentQuery = new TermQuery(
  +                new Term(field, t.termText()));
  +            q.add(currentQuery, BooleanClause.Occur.SHOULD);
  +          }
  +          return q;
  +        }
  +        else {
  +          // phrase query:
  +          MultiPhraseQuery mpq = new MultiPhraseQuery();
  +          List multiTerms = new ArrayList();
  +          for (int i = 0; i < v.size(); i++) {
  +            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
  +            if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
  +              mpq.add((Term[])multiTerms.toArray(new Term[0]));
  +              multiTerms.clear();
  +            }
  +            multiTerms.add(new Term(field, t.termText()));
  +          }
  +          mpq.add((Term[])multiTerms.toArray(new Term[0]));
  +          return mpq;
  +        }
  +      }
  +      else {
  +        PhraseQuery q = new PhraseQuery();
  +        q.setSlop(phraseSlop);
  +        for (int i = 0; i < v.size(); i++) {
  +          q.add(new Term(field, ((org.apache.lucene.analysis.Token) 
  +              v.elementAt(i)).termText()));
  +
  +        }
  +        return q;
         }
  -      return q;
       }
     }
     
  @@ -379,6 +417,9 @@
   
       if (query instanceof PhraseQuery) {
         ((PhraseQuery) query).setSlop(slop);
  +    }
  +    if (query instanceof MultiPhraseQuery) {
  +      ((MultiPhraseQuery) query).setSlop(slop);
       }
   
       return query;
  
  
  
  1.121     +9 -2      jakarta-lucene/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
  retrieving revision 1.120
  retrieving revision 1.121
  diff -u -r1.120 -r1.121
  --- CHANGES.txt       7 Nov 2004 23:31:16 -0000       1.120
  +++ CHANGES.txt       13 Nov 2004 15:11:26 -0000      1.121
  @@ -12,7 +12,7 @@
    2. FuzzyQuery now takes an additional parameter that specifies the
       minimum similarity that is required for a term to match the query.
       The QueryParser syntax for this is term~x, where x is a floating 
  -    point number between 0 and 1 (a bigger number means that a higher
  +    point number >= 0 and < 1 (a bigger number means that a higher
       similarity is required). Furthermore, a prefix can be specified
       for FuzzyQuerys so that only those terms are considered similar that 
       start with this prefix. This can speed up FuzzyQuery greatly.
  @@ -112,6 +112,13 @@
   24. Optimize fuzzy queries so the standard fuzzy queries with a prefix 
       of 0 now run 20-50% faster (Patch #31882).
       (Jonathan Hager via Daniel Naber)
  +
  +25. QueryParser now correctly works with Analyzers that can return more 
  +    than one token per position. For example, a query "+fast +car"
  +    would be parsed as "+fast +(car automobile)" if the Analyzer
  +    returns "car" and "automobile" at the same position whenever it 
  +    finds "car" (Patch #23307).
  +    (Pierrick Brihaye, Daniel Naber)
   
   
   1.4.1
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to