schema.xml

yonik Tue, 20 Oct 2009 16:51:29 -0700

Author: yonik
Date: Tue Oct 20 23:50:59 2009
New Revision: 827841

URL: http://svn.apache.org/viewvc?rev=827841&view=rev
Log:
use example schema for clustering example


Modified:
    lucene/solr/trunk/contrib/clustering/example/conf/schema.xml

Modified: lucene/solr/trunk/contrib/clustering/example/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/example/conf/schema.xml?rev=827841&r1=827840&r2=827841&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/example/conf/schema.xml (original)
+++ lucene/solr/trunk/contrib/clustering/example/conf/schema.xml Tue Oct 20 
23:50:59 2009
@@ -16,10 +16,10 @@
  limitations under the License.
 -->
 
-<!--
+<!--  
  This is the Solr schema file. This file should be named "schema.xml" and
  should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default)
+ (i.e. ./solr/conf/schema.xml by default) 
  or located where the classloader for the Solr webapp can find it.
 
  This example schema is the recommended starting point for users.
@@ -27,15 +27,33 @@
 
  For more information, on how to customize this file, please see
  http://wiki.apache.org/solr/SchemaXml
+
+ PERFORMANCE NOTE: this schema includes many optional features and should not
+ be used for benchmarking.  To improve performance one could
+  - set stored="false" for all fields possible (esp large fields) when you
+    only need to search on the field but don't need to return the original
+    value.
+  - set indexed="false" if you don't need to search on the field, but only
+    return the field as a result of searching on other indexed fields.
+  - remove all unneeded copyField statements
+  - for best index size and searching performance, set "index" to false
+    for all general text fields, use copyField to copy them to the
+    catchall "text" field, and use that for searching.
+  - For maximum indexing performance, use the StreamingUpdateSolrServer
+    java client.
+  - Remember to run the JVM in server mode, and use a higher logging level
+    that avoids logging every request
 -->
 
-<schema name="example" version="1.1">
+<schema name="example" version="1.2">
   <!-- attribute "name" is the name of this schema and is only used for 
display purposes.
        Applications should change this to reflect the nature of the search 
collection.
-       version="1.1" is Solr's version number for the schema syntax and 
semantics.  It should
+       version="1.2" is Solr's version number for the schema syntax and 
semantics.  It should
        not normally be changed by applications.
        1.0: multiValued attribute did not exist, all fields are multiValued by 
nature
-       1.1: multiValued attribute introduced, false by default -->
+       1.1: multiValued attribute introduced, false by default 
+       1.2: omitTermFreqAndPositions attribute introduced, true by default 
except for text fields.
+     -->
 
   <types>
     <!-- field type definitions. The "name" attribute is
@@ -46,7 +64,7 @@
        org.apache.solr.analysis package.
     -->
 
-    <!-- The StrField type is not analyzed, but indexed/stored verbatim.
+    <!-- The StrField type is not analyzed, but indexed/stored verbatim.  
        - StrField and TextField support an optional compressThreshold which
        limits compression (if enabled in the derived fields) to values which
        exceed a certain size (in characters).
@@ -55,9 +73,12 @@
 
     <!-- boolean type: "true" or "false" -->
     <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" 
omitNorms="true"/>
+    <!--Binary data type. The data should be sent/retrieved in as Base64 
encoded Strings -->
+    <fieldtype name="binary" class="solr.BinaryField"/>
 
     <!-- The optional sortMissingLast and sortMissingFirst attributes are
          currently supported on types that are sorted internally as strings.
+              This includes 
"string","boolean","sint","slong","sfloat","sdouble","pdate"
        - If sortMissingLast="true", then a sort on this field will cause 
documents
          without the field to come after documents with the field,
          regardless of the requested sort order (asc or desc).
@@ -67,31 +88,34 @@
        - If sortMissingLast="false" and sortMissingFirst="false" (the default),
          then default lucene sorting will be used which places docs without the
          field first in an ascending sort and last in a descending sort.
-    -->
-
+    -->    
 
-    <!-- numeric field types that store and index the text
-         value verbatim (and hence don't support range queries, since the
-         lexicographic ordering isn't equal to the numeric ordering) -->
-    <fieldType name="integer" class="solr.IntField" omitNorms="true"/>
-    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
-    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
-    <fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
-
-
-    <!-- Numeric field types that manipulate the value into
-         a string value that isn't human-readable in its internal form,
-         but with a lexicographic ordering the same as the numeric ordering,
-         so that range queries work correctly. -->
-    <fieldType name="sint" class="solr.SortableIntField" 
sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="slong" class="solr.SortableLongField" 
sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sfloat" class="solr.SortableFloatField" 
sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sdouble" class="solr.SortableDoubleField" 
sortMissingLast="true" omitNorms="true"/>
+    <!--
+      Default numeric field types. For faster range queries, consider the 
tint/tfloat/tlong/tdouble types.
+    -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
 
+    <!--
+     Numeric field types that index each value at various levels of precision
+     to accelerate range queries when the number of values between the range
+     endpoints is large. See the javadoc for NumericRangeQuery for internal
+     implementation details.
+
+     Smaller precisionStep values (specified in bits) will lead to more tokens
+     indexed per value, slightly larger index size, and faster range queries.
+     A precisionStep of 0 disables indexing at different precision levels.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
 
     <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, 
and
          is a more restricted form of the canonical representation of dateTime
-         http://www.w3.org/TR/xmlschema-2/#dateTime
+         http://www.w3.org/TR/xmlschema-2/#dateTime    
          The trailing "Z" designates UTC time and is mandatory.
          Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
          All other components are mandatory.
@@ -106,20 +130,57 @@
                NOW/DAY+6MONTHS+3DAYS
                   ... 6 months and 3 days in the future from the start of
                       the current day
-
+                      
          Consult the DateField javadocs for more information.
+
+         Note: For faster range queries, consider the tdate type
       -->
-    <fieldType name="date" class="solr.DateField" sortMissingLast="true" 
omitNorms="true"/>
+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" 
precisionStep="0" positionIncrementGap="0"/>
+
+    <!-- A Trie based date field for faster date range queries and date 
faceting. -->
+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" 
precisionStep="6" positionIncrementGap="0"/>
+
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes 
(created with older Solr versions)
+      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. 
Use Trie based fields instead.
+
+      Plain numeric field types that store and index the text
+      value verbatim (and hence don't support range queries, since the
+      lexicographic ordering isn't equal to the numeric ordering)
+    -->
+    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
+    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
+    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
+    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
+    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" 
omitNorms="true"/>
+
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes 
(created with older Solr versions)
+      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. 
Use Trie based fields instead.
+
+      Numeric field types that manipulate the value into
+      a string value that isn't human-readable in its internal form,
+      but with a lexicographic ordering the same as the numeric ordering,
+      so that range queries work correctly.
+    -->
+    <fieldType name="sint" class="solr.SortableIntField" 
sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="slong" class="solr.SortableLongField" 
sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="sfloat" class="solr.SortableFloatField" 
sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="sdouble" class="solr.SortableDoubleField" 
sortMissingLast="true" omitNorms="true"/>
 
 
     <!-- The "RandomSortField" is not used to store or search any
          data.  You can declare fields of this type it in your schema
-         to generate psuedo-random orderings of your docs for sorting
-         purposes.  The ordering is generated based on the field name
+         to generate pseudo-random orderings of your docs for sorting 
+         purposes.  The ordering is generated based on the field name 
          and the version of the index, As long as the index version
          remains unchanged, and the same field name is reused,
-         the ordering of the docs will be consistent.
-         If you want differend psuedo-random orderings of documents,
+         the ordering of the docs will be consistent.  
+         If you want different psuedo-random orderings of documents,
          for the same version of the index, use a dynamicField and
          change the name
      -->
@@ -155,8 +216,6 @@
         words on case-change, alpha numeric boundaries, and non-alphanumeric 
chars,
         so that a query of "wifi" or "wi fi" could match a document containing 
"Wi-Fi".
         Synonyms and stopwords are customized by external files, and stemming 
is enabled.
-        Duplicate tokens at the same position (which may result from Stemmed 
Synonyms or
-        WordDelim parts) are removed.
         -->
     <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
@@ -165,8 +224,8 @@
         <filter class="solr.SynonymFilterFactory" 
synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
         -->
         <!-- Case insensitive stop word removal.
-             enablePositionIncrements=true ensures that a 'gap' is left to
-             allow for accurate phrase queries.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
         -->
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
@@ -175,17 +234,19 @@
                 />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" 
splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" 
protected="protwords.txt"/>
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" 
protected="protwords.txt"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" 
splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" 
protected="protwords.txt"/>
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" 
protected="protwords.txt"/>
       </analyzer>
     </fieldType>
 
@@ -199,21 +260,70 @@
         <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt"/>
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" 
generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" 
protected="protwords.txt"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" 
protected="protwords.txt"/>
+        <!-- this filter can remove any duplicate tokens that appear at the 
same position - sometimes
+             possible with WordDelimiterFilter in conjuncton with stemming. -->
         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
       </analyzer>
     </fieldType>
 
+
+    <!-- A general unstemmed text field - good if one does not know the 
language of the field -->
+    <fieldType name="textgen" class="solr.TextField" 
positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" 
splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" 
splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+    <!-- A general unstemmed text field that indexes tokens normally and also
+         reversed (via ReversedWildcardFilterFactory), to enable more 
efficient 
+        leading wildcard queries. -->
+    <fieldType name="text_rev" class="solr.TextField" 
positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" 
splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" 
splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- charFilter + WhitespaceTokenizer  -->
     <!--
-     Setup simple analysis for spell checking
-     -->
-    <fieldType name="textSpell" class="solr.TextField" 
positionIncrementGap="100" >
+    <fieldType name="textCharNorm" class="solr.TextField" 
positionIncrementGap="100" >
       <analyzer>
-        <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+        <charFilter class="solr.MappingCharFilterFactory" 
mapping="mapping-ISOLatin1Accent.txt"/>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
       </analyzer>
     </fieldType>
+    -->
 
     <!-- This is an example of using the KeywordTokenizer along
          With various TokenFilterFactories to produce a sortable field
@@ -233,13 +343,13 @@
         <filter class="solr.TrimFilterFactory" />
         <!-- The PatternReplaceFilter gives you the flexibility to use
              Java Regular expression to replace any sequence of characters
-             matching a pattern with an arbitrary replacement string,
-             which may include back refrences to portions of the orriginal
+             matching a pattern with an arbitrary replacement string, 
+             which may include back references to portions of the original
              string matched by the pattern.
-
+             
              See the Java Regular Expression documentation for more
-             infomation on pattern and replacement string syntax.
-
+             information on pattern and replacement string syntax.
+             
              
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
           -->
         <filter class="solr.PatternReplaceFilterFactory"
@@ -247,11 +357,44 @@
         />
       </analyzer>
     </fieldType>
+    
+    <fieldtype name="phonetic" stored="false" indexed="true" 
class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true" 
class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!--
+        The DelimitedPayloadTokenFilter can put payloads on tokens... for 
example,
+        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
+        Attributes of the DelimitedPayloadTokenFilterFactory : 
+         "delimiter" - a one character delimiter. Default is | (pipe)
+        "encoder" - how to encode the following value into a playload
+           float -> org.apache.lucene.analysis.payloads.FloatEncoder,
+           integer -> o.a.l.a.p.IntegerEncoder
+           identity -> o.a.l.a.p.IdentityEncoder
+            Fully Qualified class name implementing PayloadEncoder, Encoder 
must have a no arg constructor.
+         -->
+        <filter class="solr.DelimitedPayloadTokenFilterFactory" 
encoder="float"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- lowercases the entire field value, keeping it as a single token.  -->
+    <fieldType name="lowercase" class="solr.TextField" 
positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
 
-    <!-- since fields of this type are by default not stored or indexed, any 
data added to
-         them will be ignored outright
-     -->
-    <fieldtype name="ignored" stored="false" indexed="false" 
class="solr.StrField" />
+
+    <!-- since fields of this type are by default not stored or indexed,
+         any data added to them will be ignored outright.  --> 
+    <fieldtype name="ignored" stored="false" indexed="false" 
multiValued="true" class="solr.StrField" /> 
 
  </types>
 
@@ -259,7 +402,8 @@
  <fields>
    <!-- Valid attributes for fields:
      name: mandatory - the name for the field
-     type: mandatory - the name of a previously defined type from the <types> 
section
+     type: mandatory - the name of a previously defined type from the 
+       <types> section
      indexed: true if this field should be indexed (searchable or sortable)
      stored: true if this field should be retrievable
      compressed: [false] if this field should be stored using gzip compression
@@ -270,51 +414,72 @@
        this field (this disables length normalization and index-time
        boosting for the field, and saves some memory).  Only full-text
        fields or fields that need an index-time boost need norms.
-     termVectors: [false] set to true to store the term vector for a given 
field.
-       When using MoreLikeThis, fields used for similarity should be stored for
-       best performance.
+     termVectors: [false] set to true to store the term vector for a
+       given field.
+       When using MoreLikeThis, fields used for similarity should be
+       stored for best performance.
+     termPositions: Store position information with the term vector.  
+       This will increase storage costs.
+     termOffsets: Store offset information with the term vector. This 
+       will increase storage costs.
+     default: a value that should be used if no value is specified
+       when adding a document.
    -->
 
-   <field name="id" type="string" indexed="true" stored="true" required="true" 
/>
+   <field name="id" type="string" indexed="true" stored="true" required="true" 
/> 
    <field name="sku" type="textTight" indexed="true" stored="true" 
omitNorms="true"/>
-   <field name="name" type="text" indexed="true" stored="true"/>
-   <field name="nameSort" type="string" indexed="true" stored="false"/>
+   <field name="name" type="textgen" indexed="true" stored="true"/>
    <field name="alphaNameSort" type="alphaOnlySort" indexed="true" 
stored="false"/>
-   <field name="manu" type="text" indexed="true" stored="true" 
omitNorms="true"/>
-   <field name="cat" type="text_ws" indexed="true" stored="true" 
multiValued="true" omitNorms="true" termVectors="true" />
+   <field name="manu" type="textgen" indexed="true" stored="true" 
omitNorms="true"/>
+   <field name="cat" type="text_ws" indexed="true" stored="true" 
multiValued="true" omitNorms="true" />
    <field name="features" type="text" indexed="true" stored="true" 
multiValued="true"/>
-   <field name="includes" type="text" indexed="true" stored="true"/>
+   <field name="includes" type="text" indexed="true" stored="true" 
termVectors="true" termPositions="true" termOffsets="true" />
 
-   <field name="weight" type="sfloat" indexed="true" stored="true"/>
-   <field name="price"  type="sfloat" indexed="true" stored="true"/>
-   <!-- "default" values can be specified for fields, indicating which
-        value should be used if no value is specified when adding a document.
-     -->
-   <field name="popularity" type="sint" indexed="true" stored="true" 
default="0"/>
-   <field name="inStock" type="boolean" indexed="true" stored="true"/>
+   <field name="weight" type="float" indexed="true" stored="true"/>
+   <field name="price"  type="float" indexed="true" stored="true"/>
+   <field name="popularity" type="int" indexed="true" stored="true" />
+   <field name="inStock" type="boolean" indexed="true" stored="true" />
+
+
+   <!-- Common metadata fields, named specifically to match up with
+     SolrCell metadata when parsing rich documents such as Word, PDF.
+     Some fields are multiValued only because Tika currently may return
+     multiple values for them.
+   -->
+   <field name="title" type="text" indexed="true" stored="true" 
multiValued="true"/>
+   <field name="subject" type="text" indexed="true" stored="true"/>
+   <field name="description" type="text" indexed="true" stored="true"/>
+   <field name="comments" type="text" indexed="true" stored="true"/>
+   <field name="author" type="textgen" indexed="true" stored="true"/>
+   <field name="keywords" type="textgen" indexed="true" stored="true"/>
+   <field name="category" type="textgen" indexed="true" stored="true"/>
+   <field name="content_type" type="string" indexed="true" stored="true" 
multiValued="true"/>
+   <field name="last_modified" type="date" indexed="true" stored="true"/>
+   <field name="links" type="string" indexed="true" stored="true" 
multiValued="true"/>
 
-   <!-- Some sample docs exists solely to demonstrate the spellchecker
-        functionality, this is the only field they container.
-        Typically you might build the spellchecker of "catchall" type field
-        containing all of the text in each document
-     -->
-   <field name="word" type="string" indexed="true" stored="true"/>
 
-   
    <!-- catchall field, containing all other searchable text fields 
(implemented
         via copyField further on in this schema  -->
    <field name="text" type="text" indexed="true" stored="false" 
multiValued="true"/>
 
+   <!-- catchall text field that indexes tokens both normally and in reverse 
for efficient
+        leading wildcard queries. -->
+   <field name="text_rev" type="text_rev" indexed="true" stored="false" 
multiValued="true"/>
+
    <!-- non-tokenized version of manufacturer to make it easier to sort or 
group
         results by manufacturer.  copied from "manu" via copyField -->
    <field name="manu_exact" type="string" indexed="true" stored="false"/>
 
-   <!-- Here, default is used to create a "timestamp" field indicating
-        When each document was indexed.
+   <field name="payloads" type="payloads" indexed="true" stored="true"/>
+
+   <!-- Uncommenting the following will create a "timestamp" field using
+        a default value of "NOW" to indicate when each document was indexed.
      -->
+   <!--
    <field name="timestamp" type="date" indexed="true" stored="true" 
default="NOW" multiValued="false"/>
+     -->
+   
 
-   <field name="spell" type="textSpell" indexed="true" stored="true" 
multiValued="true"/>
    <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
         will be used if the name matches any of the patterns.
         RESTRICTION: the glob-like pattern in the name attribute must have
@@ -322,23 +487,38 @@
         EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, 
z_i)
         Longer patterns will be matched first.  if equal size patterns
         both match, the first appearing in the schema will be used.  -->
-   <dynamicField name="*_i"  type="sint"    indexed="true"  stored="true"/>
+   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
-   <dynamicField name="*_l"  type="slong"   indexed="true"  stored="true"/>
+   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
    <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
    <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
-   <dynamicField name="*_f"  type="sfloat"  indexed="true"  stored="true"/>
-   <dynamicField name="*_d"  type="sdouble" indexed="true"  stored="true"/>
+   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
+   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
    <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
 
-   <dynamicField name="random*" type="random" />
-
-   <!-- for this example, ignore anything we aren't expecting -->
-   <dynamicField name="*" type="ignored" multiValued="true" />
-
+   <!-- some trie-coded dynamic fields for faster range queries -->
+   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
+   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
+   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
+   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
+   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
+
+   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
+
+   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" 
multiValued="true"/>
+
+   <dynamicField name="random_*" type="random" />
+
+   <!-- uncomment the following to ignore any fields that don't already match 
an existing 
+        field name or dynamic field, rather than reporting them as an error. 
+        alternately, change the type="ignored" to some other type e.g. "text" 
if you want 
+        unknown fields indexed and/or stored by default --> 
+   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
+   
  </fields>
 
- <!-- Field to use to determine and enforce document uniqueness.
+ <!-- Field to use to determine and enforce document uniqueness. 
       Unless this field is marked with required="false", it will be a required 
field
    -->
  <uniqueKey>id</uniqueKey>
@@ -352,20 +532,24 @@
   <!-- copyField commands copy one field to another at the time a document
         is added to the index.  It's used either to index the same field 
differently,
         or to add multiple fields to the same field for easier/faster 
searching.  -->
-   <copyField source="id" dest="sku"/>
 
-   <copyField source="incubationdate_dt" dest="incubationdate_s"/>
    <copyField source="cat" dest="text"/>
    <copyField source="name" dest="text"/>
-   <copyField source="name" dest="nameSort"/>
-   <copyField source="name" dest="alphaNameSort"/>
    <copyField source="manu" dest="text"/>
    <copyField source="features" dest="text"/>
    <copyField source="includes" dest="text"/>
-
    <copyField source="manu" dest="manu_exact"/>
-
-   <copyField source="name" dest="spell"/>
+       
+   <!-- Above, multiple source fields are copied to the [text] field. 
+         Another way to map multiple source fields to the same 
+         destination field is to use the dynamic field syntax. 
+         copyField also supports a maxChars to copy setting.  -->
+          
+   <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
+
+   <!-- copy name to alphaNameSort, a field designed for sorting by name -->
+   <!-- <copyField source="name" dest="alphaNameSort"/> -->
+ 
 
  <!-- Similarity is the scoring routine for each document vs. a query.
       A custom similarity may be specified here, but the default is fine

svn commit: r827841 - /lucene/solr/trunk/contrib/clustering/example/conf/schema.xml

Reply via email to