Author: yonik
Date: Sat Nov 29 08:38:08 2008
New Revision: 721687
URL: http://svn.apache.org/viewvc?rev=721687&view=rev
Log:
SOLR-879: enable position increments in QP, fix example schemas
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/contrib/javascript/example/testsolr/solr/conf/schema.xml
lucene/solr/trunk/contrib/velocity/src/main/solr/conf/schema.xml
lucene/solr/trunk/example/solr/conf/schema.xml
lucene/solr/trunk/src/java/org/apache/solr/search/SolrQueryParser.java
Modified: lucene/solr/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=721687&r1=721686&r2=721687&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Sat Nov 29 08:38:08 2008
@@ -135,6 +135,11 @@
11. SOLR-872: Better error message for incorrect copyField destination (Noble
Paul via shalin)
+12. SOLR-879: Enable position increments in the query parser and fix the
+ example schema to enable position increments for the stop filter in
+ both the index and query analyzers to fix the bug with phrase queries
+ with stopwords. (yonik)
+
Other Changes
----------------------
Modified:
lucene/solr/trunk/contrib/javascript/example/testsolr/solr/conf/schema.xml
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/javascript/example/testsolr/solr/conf/schema.xml?rev=721687&r1=721686&r2=721687&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/javascript/example/testsolr/solr/conf/schema.xml
(original)
+++ lucene/solr/trunk/contrib/javascript/example/testsolr/solr/conf/schema.xml
Sat Nov 29 08:38:08 2008
@@ -1,4 +1,42 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default)
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+-->
+
<schema name="example" version="1.1">
+ <!-- attribute "name" is the name of this schema and is only used for
display purposes.
+ Applications should change this to reflect the nature of the search
collection.
+ version="1.1" is Solr's version number for the schema syntax and
semantics. It should
+ not normally be changed by applications.
+ 1.0: multiValued attribute did not exist, all fields are multiValued by
nature
+ 1.1: multiValued attribute introduced, false by default -->
+
<types>
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
@@ -127,8 +165,8 @@
<filter class="solr.SynonymFilterFactory"
synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal.
- enablePositionIncrements=true ensures that a 'gap' is left to
- allow for accurate phrase queries.
+ add enablePositionIncrements=true in both the index and query
+ analyzers to leave a 'gap' for more accurate phrase queries.
-->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
@@ -143,7 +181,11 @@
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="stopwords.txt"
+ enablePositionIncrements="true"
+ />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"
protected="protwords.txt"/>
@@ -166,7 +208,6 @@
</analyzer>
</fieldType>
-
<!--
Setup simple analysis for spell checking
-->
@@ -178,6 +219,16 @@
</analyzer>
</fieldType>
+ <!-- charFilter + "CharStream aware" WhitespaceTokenizer -->
+ <!--
+ <fieldType name="textCharNorm" class="solr.TextField"
positionIncrementGap="100" >
+ <analyzer>
+ <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/>
+ <tokenizer class="solr.CharStreamAwareWhitespaceTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+ -->
+
<!-- This is an example of using the KeywordTokenizer along
With various TokenFilterFactories to produce a sortable field
that does not include some properties of the source text
@@ -210,6 +261,14 @@
/>
</analyzer>
</fieldType>
+
+ <fieldtype name="phonetic" stored="false" indexed="true"
class="solr.TextField" >
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+ </analyzer>
+ </fieldtype>
+
<!-- since fields of this type are by default not stored or indexed, any
data added to
them will be ignored outright
@@ -220,32 +279,134 @@
<fields>
+ <!-- Valid attributes for fields:
+ name: mandatory - the name for the field
+ type: mandatory - the name of a previously defined type from the <types>
section
+ indexed: true if this field should be indexed (searchable or sortable)
+ stored: true if this field should be retrievable
+ compressed: [false] if this field should be stored using gzip compression
+ (this will only apply if the field type is compressable; among
+ the standard field types, only TextField and StrField are)
+ multiValued: true if this field may contain multiple values per document
+ omitNorms: (expert) set to true to omit the norms associated with
+ this field (this disables length normalization and index-time
+ boosting for the field, and saves some memory). Only full-text
+ fields or fields that need an index-time boost need norms.
+ termVectors: [false] set to true to store the term vector for a given
field.
+ When using MoreLikeThis, fields used for similarity should be stored
for
+ best performance.
+ termPositions: Store position information with the term vector. This
will increase storage costs.
+ termOffsets: Store offset information with the term vector. This will
increase storage costs.
+ -->
+
<field name="id" type="string" indexed="true" stored="true" required="true"
/>
- <field name="title" type="text" indexed="true" stored="true"/>
- <field name="text" type="text" indexed="true" stored="true"/>
- <field name="date" type="date" indexed="true" stored="true"/>
- <field name="dateline" type="text" indexed="true" stored="true"/>
- <field name="places" type="string" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
- <field name="countryCodes" type="string" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
- <field name="topics" type="string" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
- <field name="organisations" type="string" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
- <field name="exchanges" type="string" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
- <field name="companies" type="string" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
- <field name="allText" type="text" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
- </fields>
+ <field name="sku" type="textTight" indexed="true" stored="true"
omitNorms="true"/>
+ <field name="name" type="text" indexed="true" stored="true"/>
+ <field name="nameSort" type="string" indexed="true" stored="false"/>
+ <field name="alphaNameSort" type="alphaOnlySort" indexed="true"
stored="false"/>
+ <field name="manu" type="text" indexed="true" stored="true"
omitNorms="true"/>
+ <field name="cat" type="text_ws" indexed="true" stored="true"
multiValued="true" omitNorms="true" termVectors="true" />
+ <field name="features" type="text" indexed="true" stored="true"
multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
+ <field name="includes" type="text" indexed="true" stored="true"/>
+
+ <field name="weight" type="sfloat" indexed="true" stored="true"/>
+ <field name="price" type="sfloat" indexed="true" stored="true"/>
+ <!-- "default" values can be specified for fields, indicating which
+ value should be used if no value is specified when adding a document.
+ -->
+ <field name="popularity" type="sint" indexed="true" stored="true"
default="0"/>
+ <field name="inStock" type="boolean" indexed="true" stored="true"/>
- <copyField source="title" dest="allText"/>
- <copyField source="text" dest="allText"/>
- <copyField source="places" dest="allText"/>
- <copyField source="topics" dest="allText"/>
- <copyField source="companies" dest="allText"/>
- <copyField source="exchanges" dest="allText"/>
+ <!-- Some sample docs exists solely to demonstrate the spellchecker
+ functionality, this is the only field they container.
+ Typically you might build the spellchecker of "catchall" type field
+ containing all of the text in each document
+ -->
+ <field name="word" type="string" indexed="true" stored="true"/>
+
+
+ <!-- catchall field, containing all other searchable text fields
(implemented
+ via copyField further on in this schema -->
+ <field name="text" type="text" indexed="true" stored="false"
multiValued="true"/>
+
+ <!-- non-tokenized version of manufacturer to make it easier to sort or
group
+ results by manufacturer. copied from "manu" via copyField -->
+ <field name="manu_exact" type="string" indexed="true" stored="false"/>
+
+ <!-- Here, default is used to create a "timestamp" field indicating
+ When each document was indexed.
+ -->
+ <field name="timestamp" type="date" indexed="true" stored="true"
default="NOW" multiValued="false"/>
+
+ <field name="spell" type="textSpell" indexed="true" stored="true"
multiValued="true"/>
+ <!-- Dynamic field definitions. If a field name is not found, dynamicFields
+ will be used if the name matches any of the patterns.
+ RESTRICTION: the glob-like pattern in the name attribute must have
+ a "*" only at the start or the end.
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i,
z_i)
+ Longer patterns will be matched first. if equal size patterns
+ both match, the first appearing in the schema will be used. -->
+ <dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
+ <dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
+ <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
+ <dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
+ <dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+
+ <dynamicField name="random*" type="random" />
+
+ <!-- uncomment the following to ignore any fields that don't already match
an existing
+ field name or dynamic field, rather than reporting them as an error.
+ alternately, change the type="ignored" to some other type e.g. "text"
if you want
+ unknown fields indexed and/or stored by default -->
+ <!--dynamicField name="*" type="ignored" /-->
+
+ </fields>
+ <!-- Field to use to determine and enforce document uniqueness.
+ Unless this field is marked with required="false", it will be a required
field
+ -->
<uniqueKey>id</uniqueKey>
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>text</defaultSearchField>
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="OR"/>
+ <!-- copyField commands copy one field to another at the time a document
+ is added to the index. It's used either to index the same field
differently,
+ or to add multiple fields to the same field for easier/faster
searching. -->
+ <copyField source="id" dest="sku"/>
+
+ <copyField source="incubationdate_dt" dest="incubationdate_s"/>
+ <copyField source="cat" dest="text"/>
+ <copyField source="name" dest="text"/>
+ <copyField source="name" dest="nameSort"/>
+ <copyField source="name" dest="alphaNameSort"/>
+ <copyField source="manu" dest="text"/>
+ <copyField source="features" dest="text"/>
+ <copyField source="includes" dest="text"/>
+
+ <copyField source="manu" dest="manu_exact"/>
+
+ <copyField source="name" dest="spell"/>
+
+ <!-- Similarity is the scoring routine for each document vs. a query.
+ A custom similarity may be specified here, but the default is fine
+ for most applications. -->
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
+ <!-- ... OR ...
+ Specify a SimilarityFactory class name implementation
+ allowing parameters to be used.
+ -->
+ <!--
+ <similarity class="com.example.solr.CustomSimilarityFactory">
+ <str name="paramkey">param value</str>
+ </similarity>
+ -->
+
</schema>
Modified: lucene/solr/trunk/contrib/velocity/src/main/solr/conf/schema.xml
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/velocity/src/main/solr/conf/schema.xml?rev=721687&r1=721686&r2=721687&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/velocity/src/main/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/contrib/velocity/src/main/solr/conf/schema.xml Sat Nov 29
08:38:08 2008
@@ -165,8 +165,8 @@
<filter class="solr.SynonymFilterFactory"
synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal.
- enablePositionIncrements=true ensures that a 'gap' is left to
- allow for accurate phrase queries.
+ add enablePositionIncrements=true in both the index and query
+ analyzers to leave a 'gap' for more accurate phrase queries.
-->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
@@ -181,7 +181,11 @@
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="stopwords.txt"
+ enablePositionIncrements="true"
+ />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"
protected="protwords.txt"/>
@@ -215,6 +219,16 @@
</analyzer>
</fieldType>
+ <!-- charFilter + "CharStream aware" WhitespaceTokenizer -->
+ <!--
+ <fieldType name="textCharNorm" class="solr.TextField"
positionIncrementGap="100" >
+ <analyzer>
+ <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/>
+ <tokenizer class="solr.CharStreamAwareWhitespaceTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+ -->
+
<!-- This is an example of using the KeywordTokenizer along
With various TokenFilterFactories to produce a sortable field
that does not include some properties of the source text
Modified: lucene/solr/trunk/example/solr/conf/schema.xml
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/schema.xml?rev=721687&r1=721686&r2=721687&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/example/solr/conf/schema.xml Sat Nov 29 08:38:08 2008
@@ -165,8 +165,8 @@
<filter class="solr.SynonymFilterFactory"
synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal.
- enablePositionIncrements=true ensures that a 'gap' is left to
- allow for accurate phrase queries.
+ add enablePositionIncrements=true in both the index and query
+ analyzers to leave a 'gap' for more accurate phrase queries.
-->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
@@ -181,7 +181,11 @@
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="stopwords.txt"
+ enablePositionIncrements="true"
+ />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"
protected="protwords.txt"/>
Modified: lucene/solr/trunk/src/java/org/apache/solr/search/SolrQueryParser.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/SolrQueryParser.java?rev=721687&r1=721686&r2=721687&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/SolrQueryParser.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/SolrQueryParser.java Sat
Nov 29 08:38:08 2008
@@ -73,6 +73,7 @@
this.parser = null;
this.defaultField = defaultField;
setLowercaseExpandedTerms(false);
+ setEnablePositionIncrements(true);
}
public SolrQueryParser(QParser parser, String defaultField) {
@@ -85,6 +86,7 @@
this.parser = parser;
this.defaultField = defaultField;
setLowercaseExpandedTerms(false);
+ setEnablePositionIncrements(true);
}
private void checkNullField(String field) throws SolrException {