Author: cutting
Date: Mon Mar 7 11:26:27 2005
New Revision: 156438
URL: http://svn.apache.org/viewcvs?view=rev&rev=156438
Log:
Patch #33472. Disable coord() in automatically generated queries.
Added:
lucene/java/trunk/src/java/org/apache/lucene/search/SimilarityDelegator.java
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java
lucene/java/trunk/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java
lucene/java/trunk/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
lucene/java/trunk/src/java/org/apache/lucene/search/PrefixQuery.java
lucene/java/trunk/src/java/org/apache/lucene/search/Query.java
lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java
Modified: lucene/java/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Mar 7 11:26:27 2005
@@ -12,7 +12,7 @@
go into the rewritten query and thus the exception is avoided.
(Christoph)
- 2. Changed system property from "org.apache.lucene.lockdir" to
+ 2. Changed system property from "org.apache.lucene.lockdir" to
"org.apache.lucene.lockDir", so that its casing follows the existing
pattern used in other Lucene system properties. (Bernhard)
@@ -116,6 +116,15 @@
5. Small bug in skipTo of ConjunctionScorer that caused NullPointerException
if skipTo() was called without prior call to next() fixed. (Christoph)
+ 6. Disable Similiarty.coord() in the scoring of most automatically
+ generated boolean queries. The coord() score factor is
+ appropriate when clauses are independently specified by a user,
+ but is usually not appropriate when clauses are generated
+ automatically, e.g., by a fuzzy, wildcard or range query. Matches
+ on such automatically generated queries are no longer penalized
+ for not matching all terms. (Doug Cutting, Patch #33472)
+
+
Optimizations
1. Disk usage (peak requirements during indexing and optimization)
Modified:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java?view=diff&r1=156437&r2=156438
==============================================================================
---
lucene/java/trunk/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java
(original)
+++
lucene/java/trunk/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java
Mon Mar 7 11:26:27 2005
@@ -63,7 +63,7 @@
for (int i = 0; i < fields.length; i++)
clauses.add(new BooleanClause(super.getFieldQuery(fields[i],
queryText),
BooleanClause.Occur.SHOULD));
- return getBooleanQuery(clauses);
+ return getBooleanQuery(clauses, true);
}
return super.getFieldQuery(field, queryText);
}
@@ -95,7 +95,7 @@
clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr,
minSimilarity),
BooleanClause.Occur.SHOULD));
}
- return getBooleanQuery(clauses);
+ return getBooleanQuery(clauses, true);
}
return super.getFuzzyQuery(field, termStr, minSimilarity);
}
@@ -108,7 +108,7 @@
clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr),
BooleanClause.Occur.SHOULD));
}
- return getBooleanQuery(clauses);
+ return getBooleanQuery(clauses, true);
}
return super.getPrefixQuery(field, termStr);
}
@@ -128,7 +128,7 @@
clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1,
part2, inclusive),
BooleanClause.Occur.SHOULD));
}
- return getBooleanQuery(clauses);
+ return getBooleanQuery(clauses, true);
}
return super.getRangeQuery(field, part1, part2, inclusive);
}
Modified:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
Mon Mar 7 11:26:27 2005
@@ -393,7 +393,7 @@
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
- BooleanQuery q = new BooleanQuery();
+ BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery(
@@ -521,9 +521,27 @@
* @return Resulting [EMAIL PROTECTED] Query} object.
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getBooleanQuery(Vector clauses) throws ParseException
+ protected Query getBooleanQuery(Vector clauses) throws ParseException {
+ return getBooleanQuery(clauses, false);
+ }
+ /**
+ * Factory method for generating query, given a set of clauses.
+ * By default creates a boolean query composed of clauses passed in.
+ *
+ * Can be overridden by extending classes, to modify query being
+ * returned.
+ *
+ * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause}
instances
+ * to join.
+ * @param disableCoord true if coord scoring should be disabled.
+ *
+ * @return Resulting [EMAIL PROTECTED] Query} object.
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
+ throws ParseException
{
- BooleanQuery query = new BooleanQuery();
+ BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i));
}
Modified:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj Mon
Mar 7 11:26:27 2005
@@ -416,7 +416,7 @@
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
- BooleanQuery q = new BooleanQuery();
+ BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery(
@@ -544,9 +544,27 @@
* @return Resulting [EMAIL PROTECTED] Query} object.
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getBooleanQuery(Vector clauses) throws ParseException
+ protected Query getBooleanQuery(Vector clauses) throws ParseException {
+ getBooleanQuery(clauses, false);
+ }
+ /**
+ * Factory method for generating query, given a set of clauses.
+ * By default creates a boolean query composed of clauses passed in.
+ *
+ * Can be overridden by extending classes, to modify query being
+ * returned.
+ *
+ * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause}
instances
+ * to join.
+ * @param disableCoord true if coord scoring should be disabled.
+ *
+ * @return Resulting [EMAIL PROTECTED] Query} object.
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
+ throws ParseException
{
- BooleanQuery query = new BooleanQuery();
+ BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i));
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java Mon
Mar 7 11:26:27 2005
@@ -63,9 +63,43 @@
}
private Vector clauses = new Vector();
+ private boolean disableCoord;
/** Constructs an empty boolean query. */
public BooleanQuery() {}
+
+ /** Constructs an empty boolean query.
+ *
+ * [EMAIL PROTECTED] Similarity#coord(int,int)} may be disabled in scoring,
as
+ * appropriate. For example, this score factor does not make sense for most
+ * automatically generated queries, like [EMAIL PROTECTED] WildCardQuery}
and [EMAIL PROTECTED]
+ * FuzzyQuery}.
+ *
+ * @param disableCoord disables [EMAIL PROTECTED] Similarity#coord(int,int)}
in scoring.
+ */
+ public BooleanQuery(boolean disableCoord) {
+ this.disableCoord = disableCoord;
+ }
+
+ /** Returns true iff [EMAIL PROTECTED] Similarity#coord(int,int)} is
disabled in
+ * scoring for this query instance.
+ * @see BooleanQuery(boolean)
+ */
+ public boolean isCoordDisabled() { return disableCoord; }
+
+ // Implement coord disabling.
+ // Inherit javadoc.
+ public Similarity getSimilarity(Searcher searcher) {
+ Similarity result = super.getSimilarity(searcher);
+ if (disableCoord) { // disable coord as requested
+ result = new SimilarityDelegator(result) {
+ public float coord(int overlap, int maxOverlap) {
+ return 1.0f;
+ }
+ };
+ }
+ return result;
+ }
/** Adds a clause to a boolean query. Clauses may be:
* <ul>
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java Mon Mar
7 11:26:27 2005
@@ -122,7 +122,7 @@
enumerator.close();
}
- BooleanQuery query = new BooleanQuery();
+ BooleanQuery query = new BooleanQuery(true);
int size = stQueue.size();
for(int i = 0; i < size; i++){
ScoreTerm st = (ScoreTerm) stQueue.pop();
Modified:
lucene/java/trunk/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/MultiPhraseQuery.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/MultiPhraseQuery.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Mon Mar 7 11:26:27 2005
@@ -228,7 +228,7 @@
public Query rewrite(IndexReader reader) {
if (termArrays.size() == 1) { // optimize one-term case
Term[] terms = (Term[])termArrays.get(0);
- BooleanQuery boq = new BooleanQuery();
+ BooleanQuery boq = new BooleanQuery(true);
for (int i=0; i<terms.length; i++) {
boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
}
Modified:
lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java Mon
Mar 7 11:26:27 2005
@@ -51,7 +51,7 @@
public Query rewrite(IndexReader reader) throws IOException {
FilteredTermEnum enumerator = getEnum(reader);
- BooleanQuery query = new BooleanQuery();
+ BooleanQuery query = new BooleanQuery(true);
try {
do {
Term t = enumerator.term();
Modified:
lucene/java/trunk/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/PhrasePrefixQuery.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
Mon Mar 7 11:26:27 2005
@@ -229,7 +229,7 @@
protected Weight createWeight(Searcher searcher) {
if (termArrays.size() == 1) { // optimize one-term case
Term[] terms = (Term[])termArrays.get(0);
- BooleanQuery boq = new BooleanQuery();
+ BooleanQuery boq = new BooleanQuery(true);
for (int i=0; i<terms.length; i++) {
boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/PrefixQuery.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/PrefixQuery.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/PrefixQuery.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/PrefixQuery.java Mon
Mar 7 11:26:27 2005
@@ -35,7 +35,7 @@
public Term getPrefix() { return prefix; }
public Query rewrite(IndexReader reader) throws IOException {
- BooleanQuery query = new BooleanQuery();
+ BooleanQuery query = new BooleanQuery(true);
TermEnum enumerator = reader.terms(prefix);
try {
String prefixText = prefix.text();
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/Query.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/Query.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/Query.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/Query.java Mon Mar 7
11:26:27 2005
@@ -125,7 +125,9 @@
}
}
- BooleanQuery result = new BooleanQuery();
+ boolean coordDisabled =
+ queries.length==0? false : ((BooleanQuery)queries[0]).isCoordDisabled();
+ BooleanQuery result = new BooleanQuery(coordDisabled);
Iterator i = allClauses.iterator();
while (i.hasNext()) {
result.add((BooleanClause)i.next());
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java?view=diff&r1=156437&r2=156438
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java Mon Mar
7 11:26:27 2005
@@ -64,7 +64,7 @@
public Query rewrite(IndexReader reader) throws IOException {
- BooleanQuery query = new BooleanQuery();
+ BooleanQuery query = new BooleanQuery(true);
TermEnum enumerator = reader.terms(lowerTerm);
try {
Added:
lucene/java/trunk/src/java/org/apache/lucene/search/SimilarityDelegator.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/SimilarityDelegator.java?view=auto&rev=156438
==============================================================================
---
lucene/java/trunk/src/java/org/apache/lucene/search/SimilarityDelegator.java
(added)
+++
lucene/java/trunk/src/java/org/apache/lucene/search/SimilarityDelegator.java
Mon Mar 7 11:26:27 2005
@@ -0,0 +1,58 @@
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Expert: Delegating scoring implementation. Useful in [EMAIL PROTECTED]
+ * Query#getSimilarity(Searcher) implementations, to override only certain
+ * methods of a Searcher's Similiarty implementation.. */
+public class SimilarityDelegator extends Similarity {
+
+ private Similarity delegee;
+
+ /** Construct a [EMAIL PROTECTED] Similiarity} that delegates all methods to
another.
+ *
+ * @param delegee the Similarity implementation to delegate to
+ */
+ public SimilarityDelegator(Similarity delegee) {
+ this.delegee = delegee;
+ }
+
+ public float lengthNorm(String fieldName, int numTerms) {
+ return delegee.lengthNorm(fieldName, numTerms);
+ }
+
+ public float queryNorm(float sumOfSquaredWeights) {
+ return delegee.queryNorm(sumOfSquaredWeights);
+ }
+
+ public float tf(float freq) {
+ return delegee.tf(freq);
+ }
+
+ public float sloppyFreq(int distance) {
+ return delegee.sloppyFreq(distance);
+ }
+
+ public float idf(int docFreq, int numDocs) {
+ return delegee.idf(docFreq, numDocs);
+ }
+
+ public float coord(int overlap, int maxOverlap) {
+ return delegee.coord(overlap, maxOverlap);
+ }
+
+}