Don't we need a note about introducing the new attribute in schema version?
> 1.3: removed optional field compress feature Koji -- http://www.rondhuit.com/en/ (10/07/26 0:13), [email protected] wrote:
Author: yonik Date: Sun Jul 25 15:13:05 2010 New Revision: 979049 URL: http://svn.apache.org/viewvc?rev=979049&view=rev Log: SOLR-2015: add boolean attribute autoGeneratePhraseQueries to TextField Added: lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java Modified: lucene/dev/trunk/solr/CHANGES.txt lucene/dev/trunk/solr/example/solr/conf/schema.xml lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml Modified: lucene/dev/trunk/solr/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=979049&r1=979048&r2=979049&view=diff ============================================================================== --- lucene/dev/trunk/solr/CHANGES.txt (original) +++ lucene/dev/trunk/solr/CHANGES.txt Sun Jul 25 15:13:05 2010 @@ -203,6 +203,15 @@ New Features http://wiki.apache.org/solr/SpatialSearch and the example. Refactored some items in Lucene spatial. Removed SpatialTileField as the underlying CartesianTier is broken beyond repair and is going to be moved. (gsingers) +* SOLR-2015: Add a boolean attribute autoGeneratePhraseQueries to TextField. + autoGeneratePhraseQueries="true" (the default) causes the query parser to + generate phrase queries if multiple tokens are generated from a single + non-quoted analysis string. For example WordDelimiterFilter splitting text:pdp-11 + will cause the parser to generate text:"pdp 11" rather than (text:PDP OR text:11). + Note that autoGeneratePhraseQueries="true" tends to not work well for non whitespace + delimited languages. (yonik) + + Optimizations ---------------------- Modified: lucene/dev/trunk/solr/example/solr/conf/schema.xml URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/schema.xml?rev=979049&r1=979048&r2=979049&view=diff ============================================================================== --- lucene/dev/trunk/solr/example/solr/conf/schema.xml (original) +++ lucene/dev/trunk/solr/example/solr/conf/schema.xml Sun Jul 25 15:13:05 2010 @@ -213,8 +213,12 @@ words on case-change, alpha numeric boundaries, and non-alphanumeric chars, so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi". Synonyms and stopwords are customized by external files, and stemming is enabled. + The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to + form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser + to generate text:"pdp 11" rather than (text:PDP OR text:11). + NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages. --> -<fieldType name="text" class="solr.TextField" positionIncrementGap="100"> +<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <analyzer type="index"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <!-- in this example, we will only use synonyms at query time Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java?rev=979049&r1=979048&r2=979049&view=diff ============================================================================== --- lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java (original) +++ lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java Sun Jul 25 15:13:05 2010 @@ -46,13 +46,21 @@ import java.io.StringReader; * @version $Id$ */ public class TextField extends FieldType { + protected boolean autoGeneratePhraseQueries = true; + protected void init(IndexSchema schema, Map<String,String> args) { properties |= TOKENIZED; if (schema.getVersion()> 1.1f) properties&= ~OMIT_TF_POSITIONS; - + String autoGeneratePhraseQueriesStr = args.remove("autoGeneratePhraseQueries"); + if (autoGeneratePhraseQueriesStr != null) + autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr); super.init(schema, args); } + public boolean getAutoGeneratePhraseQueries() { + return autoGeneratePhraseQueries; + } + public SortField getSortField(SchemaField field, boolean reverse) { return getStringSort(field, reverse); } Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java?rev=979049&r1=979048&r2=979049&view=diff ============================================================================== --- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java (original) +++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java Sun Jul 25 15:13:05 2010 @@ -142,11 +142,15 @@ public class SolrQueryParser extends Que return parser.subQuery(queryText, null).getQuery(); } } - //Intercept poly fields, as they get expanded by default to an OR clause of - SchemaField sf = schema.getField(field); - //TODO: is there anyway to avoid this instance of check? - if (sf != null&& !(sf.getType() instanceof TextField)){//we have a poly field, deal with it specially by delegating to the FieldType - return sf.getType().getFieldQuery(parser, sf, queryText); + SchemaField sf = schema.getFieldOrNull(field); + if (sf != null) { + FieldType ft = sf.getType(); + // delegate to type for everything except TextField + if (ft instanceof TextField) { + return super.getFieldQuery(field, queryText, quoted || ((TextField)ft).getAutoGeneratePhraseQueries()); + } else { + return sf.getType().getFieldQuery(parser, sf, queryText); + } } // default to a normal field query Added: lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java?rev=979049&view=auto ============================================================================== --- lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java (added) +++ lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java Sun Jul 25 15:13:05 2010 @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.After; +import org.junit.Test; + + +public class TestSolrQueryParser extends SolrTestCaseJ4 { + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema12.xml"); + createIndex(); + } + + public static void createIndex() { + String v; + v="how now brown cow"; + assertU(adoc("id","1", "text",v, "text_np",v)); + v="now cow"; + assertU(adoc("id","2", "text",v, "text_np",v)); + assertU(commit()); + } + + @Test + public void testPhrase() { + // should generate a phrase of "now cow" and match only one doc + assertQ(req("q","text:now-cow", "indent","true") + ,"//*...@numfound='1']" + ); + // should generate a query of (now OR cow) and match both docs + assertQ(req("q","text_np:now-cow", "indent","true") + ,"//*...@numfound='2']" + ); + } + +} Modified: lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml?rev=979049&r1=979048&r2=979049&view=diff ============================================================================== --- lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml (original) +++ lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml Sun Jul 25 15:13:05 2010 @@ -145,6 +145,35 @@ </fieldType> +<!-- field type that doesn't generate phrases from unquoted multiple tokens per analysis unit --> +<fieldType name="text_np" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> +<analyzer type="index"> +<tokenizer class="solr.WhitespaceTokenizerFactory"/> +<filter class="solr.StopFilterFactory" + ignoreCase="true" + words="stopwords.txt" + enablePositionIncrements="true" + /> +<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> +<filter class="solr.LowerCaseFilterFactory"/> +<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> +<filter class="solr.PorterStemFilterFactory"/> +</analyzer> +<analyzer type="query"> +<tokenizer class="solr.WhitespaceTokenizerFactory"/> +<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> +<filter class="solr.StopFilterFactory" + ignoreCase="true" + words="stopwords.txt" + enablePositionIncrements="true" + /> +<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> +<filter class="solr.LowerCaseFilterFactory"/> +<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> +<filter class="solr.PorterStemFilterFactory"/> +</analyzer> +</fieldType> + <fieldtype name="nametext" class="solr.TextField"> <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/> </fieldtype> @@ -403,6 +432,8 @@ <field name="weight" type="float" indexed="true" stored="true"/> <field name="bday" type="date" indexed="true" stored="true"/> +<field name="text_np" type="text_np" indexed="true" stored="false"/> + <field name="title_stemmed" type="text" indexed="true" stored="false"/> <field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
--------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
