Build: https://builds.apache.org/job/Lucene-Solr-Tests-master/3663/
1 tests failed.
FAILED:
org.apache.solr.cloud.TestConfigSetsAPI.testUserAndTestDefaultConfigsetsAreSame
Error Message:
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/solr/core/src/test-files/solr/configsets/_default/conf/managed-schema
contents doesn't match expected
(/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/solr/server/solr/configsets/_default/conf/managed-schema)
expected:<... <tokenizer [name="whitespace"/> </analyzer>
</fieldType> <!-- A general text field that has reasonable, generic
cross-language defaults: it tokenizes with StandardTokenizer, removes
stop words from case-insensitive "stopwords.txt" (empty by default),
and down cases. At query time only, it also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField"
positionIncrementGap="100" multiValued="true"> <analyzer type="index">
<tokenizer name="standard"/> <filter name="stop" ignoreCase="true"
words="stopwords.txt" /> <!-- in this example, we will only use
synonyms at query time <filter name="synonymGraph"
synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter name="flattenGraph"/> --> <filter name="lowercase"/>
</analyzer> <analyzer type="query"> <tokenizer
name="standard"/> <filter name="stop" ignoreCase="true"
words="stopwords.txt" /> <filter name="synonymGraph"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter
name="lowercase"/> </analyzer> </fieldType> <!--
SortableTextField generaly functions exactly like TextField, except
that it supports, and by default uses, docValues for sorting (or faceting)
on the first 1024 characters of the original field values (which is
configurable). This makes it a bit more useful then
TextField in many situations, but the trade-off is that it takes up
more space on disk; which is why it's not used in place of TextField
for every fieldType in this _default schema. --> <dynamicField
name="*_t_sort" type="text_gen_sort" indexed="true" stored="true"
multiValued="false"/> <dynamicField name="*_txt_sort" type="text_gen_sort"
indexed="true" stored="true"/> <fieldType name="text_gen_sort"
class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index"> <tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="stopwords.txt" /> <filter
name="lowercase"/> </analyzer> <analyzer type="query">
<tokenizer name="standard"/> <filter name="stop" ignoreCase="true"
words="stopwords.txt" /> <filter name="synonymGraph"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter
name="lowercase"/> </analyzer> </fieldType> <!-- A text field
with defaults appropriate for English: it tokenizes with StandardTokenizer,
removes English stop words (lang/stopwords_en.txt), down cases, protects
words from protwords.txt, and finally applies Porter's stemming. The
query time analyzer also applies synonyms from synonyms.txt. -->
<dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <tokenizer name="standard"/> <!--
in this example, we will only use synonyms at query time <filter
name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true"
expand="false"/> <filter name="flattenGraph"/> --> <!--
Case insensitive stop word removal. --> <filter name="stop"
ignoreCase="true" words="lang/stopwords_en.txt"
/> <filter name="lowercase"/> <filter
name="englishPossessive"/> <filter name="keywordMarker"
protected="protwords.txt"/> <!-- Optionally you may want to use this
less aggressive stemmer instead of PorterStemFilterFactory: <filter
name="englishMinimalStem"/> --> <filter name="porterStem"/>
</analyzer> <analyzer type="query"> <tokenizer name="standard"/>
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="true"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_en.txt" /> <filter
name="lowercase"/> <filter name="englishPossessive"/> <filter
name="keywordMarker" protected="protwords.txt"/> <!-- Optionally you
may want to use this less aggressive stemmer instead of
PorterStemFilterFactory: <filter name="englishMinimalStem"/> -->
<filter name="porterStem"/> </analyzer> </fieldType>
<!-- A text field with defaults appropriate for English, plus
aggressive word-splitting and autophrase features enabled. This field
is just like text_en, except it adds WordDelimiterGraphFilter to
enable splitting and matching of words on case-change, alpha numeric
boundaries, and non-alphanumeric chars. This means certain compound
word cases will work, for example query "wi fi" will match
document "WiFi" or "wi-fi". --> <dynamicField name="*_txt_en_split"
type="text_en_splitting" indexed="true" stored="true"/> <fieldType
name="text_en_splitting" class="solr.TextField" positionIncrementGap="100"
autoGeneratePhraseQueries="true"> <analyzer type="index">
<tokenizer name="whitespace"/> <!-- in this example, we will only use
synonyms at query time <filter name="synonymGraph"
synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> -->
<!-- Case insensitive stop word removal. --> <filter
name="stop" ignoreCase="true"
words="lang/stopwords_en.txt" /> <filter
name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter name="lowercase"/> <filter name="keywordMarker"
protected="protwords.txt"/> <filter name="porterStem"/> <filter
name="flattenGraph" /> </analyzer> <analyzer type="query">
<tokenizer name="whitespace"/> <filter name="synonymGraph"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter
name="stop" ignoreCase="true"
words="lang/stopwords_en.txt" /> <filter
name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1"
catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter name="lowercase"/> <filter name="keywordMarker"
protected="protwords.txt"/> <filter name="porterStem"/>
</analyzer> </fieldType> <!-- Less flexible matching, but less false
matches. Probably not ideal for product names, but may be good for
SKUs. Can insert dashes in the wrong place and still match. -->
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight"
indexed="true" stored="true"/> <fieldType name="text_en_splitting_tight"
class="solr.TextField" positionIncrementGap="100"
autoGeneratePhraseQueries="true"> <analyzer type="index">
<tokenizer name="whitespace"/> <filter name="synonymGraph"
synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter
name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0"
catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter
name="lowercase"/> <filter name="keywordMarker"
protected="protwords.txt"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same
position - sometimes possible with WordDelimiterGraphFilter in
conjuncton with stemming. --> <filter name="removeDuplicates"/>
<filter name="flattenGraph" /> </analyzer> <analyzer type="query">
<tokenizer name="whitespace"/> <filter name="synonymGraph"
synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter
name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0"
catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter
name="lowercase"/> <filter name="keywordMarker"
protected="protwords.txt"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same
position - sometimes possible with WordDelimiterGraphFilter in
conjuncton with stemming. --> <filter name="removeDuplicates"/>
</analyzer> </fieldType> <!-- Just like text_general except it
reverses the characters of each token, to enable more efficient leading
wildcard queries. --> <dynamicField name="*_txt_rev"
type="text_general_rev" indexed="true" stored="true"/> <fieldType
name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <tokenizer name="standard"/> <filter
name="stop" ignoreCase="true" words="stopwords.txt" /> <filter
name="lowercase"/> <filter name="reversedWildcard" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2"
maxFractionAsterisk="0.33"/> </analyzer> <analyzer type="query">
<tokenizer name="standard"/> <filter name="synonymGraph"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter
name="stop" ignoreCase="true" words="stopwords.txt" /> <filter
name="lowercase"/> </analyzer> </fieldType> <dynamicField
name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
<fieldType name="phonetic_en" stored="false" indexed="true"
class="solr.TextField" > <analyzer> <tokenizer name="standard"/>
<filter name="doubleMetaphone" inject="false"/> </analyzer>
</fieldType> <!-- lowercases the entire field value, keeping it as a
single token. --> <dynamicField name="*_s_lower" type="lowercase"
indexed="true" stored="true"/> <fieldType name="lowercase"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="keyword"/> <filter name="lowercase" />
</analyzer> </fieldType> <!-- Example of using
PathHierarchyTokenizerFactory at index time, so queries for paths match
documents at that path, or in descendent paths --> <dynamicField
name="*_descendent_path" type="descendent_path" indexed="true"
stored="true"/> <fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index"> <tokenizer name="pathHierarchy"
delimiter="/" /> </analyzer> <analyzer type="query">
<tokenizer name="keyword" /> </analyzer> </fieldType> <!--
Example of using PathHierarchyTokenizerFactory at query time, so queries
for paths match documents at that path, or in ancestor paths -->
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true"
stored="true"/> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <tokenizer name="keyword" />
</analyzer> <analyzer type="query"> <tokenizer
name="pathHierarchy" delimiter="/" /> </analyzer> </fieldType>
<!-- This point type indexes the coordinates as separate fields (subFields)
If subFieldType is defined, it references a type, and a dynamic field
definition is created matching *___<typename>. Alternately, if
subFieldSuffix is defined, that is used to create the subFields. Example:
if subFieldType="double", then the coordinates would be indexed in
fields myloc_0___double,myloc_1___double. Example: if subFieldSuffix="_d"
then the coordinates would be indexed in fields myloc_0_d,myloc_1_d
The subFields are an implementation detail of the fieldType, and end
users normally should not need to know about them. --> <dynamicField
name="*_point" type="point" indexed="true" stored="true"/> <fieldType
name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
<!-- A specialized field for geospatial search filters and distance sorting.
--> <fieldType name="location" class="solr.LatLonPointSpatialField"
docValues="true"/> <!-- A geospatial field type that supports multiValued
and polygon shapes. For more information about this and other spatial
fields see: http://lucene.apache.org/solr/guide/spatial-search.html
--> <fieldType name="location_rpt"
class="solr.SpatialRecursivePrefixTreeFieldType" geo="true"
distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> <!--
Payloaded field types --> <fieldType name="delimited_payloads_float"
stored="false" indexed="true" class="solr.TextField"> <analyzer>
<tokenizer name="whitespace"/> <filter name="delimitedPayload"
encoder="float"/> </analyzer> </fieldType> <fieldType
name="delimited_payloads_int" stored="false" indexed="true"
class="solr.TextField"> <analyzer> <tokenizer name="whitespace"/>
<filter name="delimitedPayload" encoder="integer"/> </analyzer>
</fieldType> <fieldType name="delimited_payloads_string" stored="false"
indexed="true" class="solr.TextField"> <analyzer> <tokenizer
name="whitespace"/> <filter name="delimitedPayload"
encoder="identity"/> </analyzer> </fieldType> <!-- some examples
for different languages (generally ordered by ISO code) --> <!-- Arabic
--> <dynamicField name="*_txt_ar" type="text_ar" indexed="true"
stored="true"/> <fieldType name="text_ar" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
name="standard"/> <!-- for any non-arabic --> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_ar.txt" /> <!-- normalizes ﻯ to ﻱ, etc -->
<filter name="arabicNormalization"/> <filter name="arabicStem"/>
</analyzer> </fieldType> <!-- Bulgarian --> <dynamicField
name="*_txt_bg" type="text_bg" indexed="true" stored="true"/> <fieldType
name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_bg.txt" /> <filter name="bulgarianStem"/>
</analyzer> </fieldType> <!-- Catalan --> <dynamicField
name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> <fieldType
name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <!-- removes l', etc
--> <filter name="elision" ignoreCase="true"
articles="lang/contractions_ca.txt"/> <filter name="lowercase"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_ca.txt" />
<filter name="snowballPorter" language="Catalan"/> </analyzer>
</fieldType> <!-- CJK bigram (see text_ja for a Japanese configuration
using morphological analysis) --> <dynamicField name="*_txt_cjk"
type="text_cjk" indexed="true" stored="true"/> <fieldType name="text_cjk"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <!-- normalize width before bigram, as
e.g. half-width dakuten combine --> <filter name="CJKWidth"/>
<!-- for any non-CJK --> <filter name="lowercase"/> <filter
name="CJKBigram"/> </analyzer> </fieldType> <!-- Czech -->
<dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/>
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_cz.txt" /> <filter name="czechStem"/>
</analyzer> </fieldType> <!-- Danish --> <dynamicField
name="*_txt_da" type="text_da" indexed="true" stored="true"/> <fieldType
name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_da.txt" format="snowball" /> <filter
name="snowballPorter" language="Danish"/> </analyzer> </fieldType>
<!-- German --> <dynamicField name="*_txt_de" type="text_de"
indexed="true" stored="true"/> <fieldType name="text_de"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball"
/> <filter name="germanNormalization"/> <filter
name="germanLightStem"/> <!-- less aggressive: <filter
name="germanMinimalStem"/> --> <!-- more aggressive: <filter
name="snowballPorter" language="German2"/> --> </analyzer>
</fieldType> <!-- Greek --> <dynamicField name="*_txt_el"
type="text_el" indexed="true" stored="true"/> <fieldType name="text_el"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <!-- greek specific lowercase for sigma
--> <filter name="greekLowercase"/> <filter name="stop"
ignoreCase="false" words="lang/stopwords_el.txt" /> <filter
name="greekStem"/> </analyzer> </fieldType> <!-- Spanish -->
<dynamicField name="*_txt_es" type="text_es" indexed="true"
stored="true"/> <fieldType name="text_es" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball"
/> <filter name="spanishLightStem"/> <!-- more aggressive:
<filter name="snowballPorter" language="Spanish"/> --> </analyzer>
</fieldType> <!-- Estonian --> <dynamicField name="*_txt_et"
type="text_et" indexed="true" stored="true"/> <fieldType name="text_et"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_et.txt" /> <filter
name="snowballPorter" language="Estonian"/> </analyzer> </fieldType>
<!-- Basque --> <dynamicField name="*_txt_eu" type="text_eu"
indexed="true" stored="true"/> <fieldType name="text_eu"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_eu.txt" /> <filter
name="snowballPorter" language="Basque"/> </analyzer> </fieldType>
<!-- Persian --> <dynamicField name="*_txt_fa" type="text_fa"
indexed="true" stored="true"/> <fieldType name="text_fa"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<!-- for ZWNJ --> <charFilter name="persian"/> <tokenizer
name="standard"/> <filter name="lowercase"/> <filter
name="arabicNormalization"/> <filter name="persianNormalization"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer> </fieldType> <!-- Finnish --> <dynamicField
name="*_txt_fi" type="text_fi" indexed="true" stored="true"/> <fieldType
name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_fi.txt" format="snowball" /> <filter
name="snowballPorter" language="Finnish"/> <!-- less aggressive:
<filter name="finnishLightStem"/> --> </analyzer> </fieldType>
<!-- French --> <dynamicField name="*_txt_fr" type="text_fr"
indexed="true" stored="true"/> <fieldType name="text_fr"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <!-- removes l', etc --> <filter
name="elision" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_fr.txt" format="snowball" /> <filter
name="frenchLightStem"/> <!-- less aggressive: <filter
name="frenchMinimalStem"/> --> <!-- more aggressive: <filter
name="snowballPorter" language="French"/> --> </analyzer>
</fieldType> <!-- Irish --> <dynamicField name="*_txt_ga"
type="text_ga" indexed="true" stored="true"/> <fieldType name="text_ga"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <!-- removes d', etc --> <filter
name="elision" ignoreCase="true" articles="lang/contractions_ga.txt"/>
<!-- removes n-, etc. position increments is intentionally false! -->
<filter name="stop" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter name="irishLowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_ga.txt"/> <filter name="snowballPorter"
language="Irish"/> </analyzer> </fieldType> <!-- Galician
--> <dynamicField name="*_txt_gl" type="text_gl" indexed="true"
stored="true"/> <fieldType name="text_gl" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_gl.txt" /> <filter
name="galicianStem"/> <!-- less aggressive: <filter
name="galicianMinimalStem"/> --> </analyzer> </fieldType>
<!-- Hindi --> <dynamicField name="*_txt_hi" type="text_hi" indexed="true"
stored="true"/> <fieldType name="text_hi" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
name="standard"/> <filter name="lowercase"/> <!-- normalizes
unicode representation --> <filter name="indicNormalization"/>
<!-- normalizes variation in spelling --> <filter
name="hindiNormalization"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_hi.txt" /> <filter name="hindiStem"/>
</analyzer> </fieldType> <!-- Hungarian --> <dynamicField
name="*_txt_hu" type="text_hu" indexed="true" stored="true"/> <fieldType
name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_hu.txt" format="snowball" /> <filter
name="snowballPorter" language="Hungarian"/> <!-- less aggressive:
<filter name="hungarianLightStem"/> --> </analyzer> </fieldType>
<!-- Armenian --> <dynamicField name="*_txt_hy" type="text_hy"
indexed="true" stored="true"/> <fieldType name="text_hy"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_hy.txt" /> <filter
name="snowballPorter" language="Armenian"/> </analyzer> </fieldType>
<!-- Indonesian --> <dynamicField name="*_txt_id" type="text_id"
indexed="true" stored="true"/> <fieldType name="text_id"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_id.txt" /> <!-- for
a less aggressive approach (only inflectional suffixes), set stemDerivational
to false --> <filter name="indonesianStem" stemDerivational="true"/>
</analyzer> </fieldType> <!-- Italian --> <dynamicField
name="*_txt_it" type="text_it" indexed="true" stored="true"/> <fieldType
name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <!-- removes l', etc
--> <filter name="elision" ignoreCase="true"
articles="lang/contractions_it.txt"/> <filter name="lowercase"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_it.txt"
format="snowball" /> <filter name="italianLightStem"/> <!--
more aggressive: <filter name="snowballPorter" language="Italian"/> -->
</analyzer> </fieldType> <!-- Japanese using morphological
analysis (see text_cjk for a configuration using bigramming) NOTE: If
you want to optimize search for precision, use default operator AND in your
request handler config (q.op) Use OR if you would like to optimize for
recall (default). --> <dynamicField name="*_txt_ja" type="text_ja"
indexed="true" stored="true"/> <fieldType name="text_ja"
class="solr.TextField" positionIncrementGap="100"
autoGeneratePhraseQueries="false"> <analyzer> <!-- Kuromoji
Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
Kuromoji has a search mode (default) that does segmentation useful for search.
A heuristic is used to segment compounds into its parts and the
compound itself is kept as synonym. Valid values for attribute mode
are: normal: regular segmentation search:
segmentation useful for search with synonyms compounds (default)
extended: same as search mode, but unigrams unknown words (experimental)
For some applications it might be good to use search mode for indexing and
normal mode for queries to reduce recall and prevent parts of
compounds from being matched and highlighted. Use <analyzer
type="index"> and <analyzer type="query"> for this and mode normal in query.
Kuromoji also has a convenient user dictionary feature that allows
overriding the statistical model with your own entries for
segmentation, part-of-speech tags and readings without a need to
specify weights. Notice that user dictionaries have not been subject to
extensive testing. User dictionary attributes are:
userDictionary: user dictionary filename
userDictionaryEncoding: user dictionary encoding (default is UTF-8)
See lang/userdict_ja.txt for a sample user dictionary file.
Punctuation characters are discarded by default. Use
discardPunctuation="false" to keep them. --> <tokenizer
name="japanese" mode="search"/> <!--<tokenizer name="japanese"
mode="search" userDictionary="lang/userdict_ja.txt"/>--> <!-- Reduces
inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
<filter name="japaneseBaseForm"/> <!-- Removes tokens with certain
part-of-speech tags --> <filter name="japanesePartOfSpeechStop"
tags="lang/stoptags_ja.txt" /> <!-- Normalizes full-width romaji to
half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter name="cjkWidth"/> <!-- Removes common tokens typically not
useful for search, but have a negative effect on ranking --> <filter
name="stop" ignoreCase="true" words="lang/stopwords_ja.txt" /> <!--
Normalizes common katakana spelling variations by removing any last long sound
character (U+30FC) --> <filter name="japaneseKatakanaStem"
minimumLength="4"/> <!-- Lower-cases romaji characters -->
<filter name="lowercase"/> </analyzer> </fieldType> <!--
Korean morphological analysis --> <dynamicField name="*_txt_ko"
type="text_ko" indexed="true" stored="true"/> <fieldType name="text_ko"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
It uses the mecab-ko-dic dictionary to perform morphological analysis of
Korean texts. This dictionary was built with MeCab, it defines a
format for the features adapted for the Korean language.
Nori also has a convenient user dictionary feature that allows
overriding the statistical model with your own entries for
segmentation, part-of-speech tags and readings without a need to
specify weights. Notice that user dictionaries have not been subject to
extensive testing. The tokenizer supports multiple schema
attributes: * userDictionary: User dictionary path. *
userDictionaryEncoding: User dictionary encoding. * decompoundMode:
Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
* outputUnknownUnigrams: If true outputs unigrams for unknown words.
--> <tokenizer name="korean" decompoundMode="discard"
outputUnknownUnigrams="false"/> <!-- Removes some part of speech stuff
like EOMI (Pos.E), you can add a parameter 'tags', listing the tags
to remove. By default it removes: E, IC, J, MAG, MAJ, MM, SP, SSC,
SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV This is basically an
equivalent to stemming. --> <filter
name="koreanPartOfSpeechStop" /> <!-- Replaces term text with the
Hangul transcription of Hanja characters, if applicable: --> <filter
name="koreanReadingForm" /> <filter name="lowercase" />
</analyzer> </fieldType> <!-- Latvian --> <dynamicField
name="*_txt_lv" type="text_lv" indexed="true" stored="true"/> <fieldType
name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_lv.txt" /> <filter name="latvianStem"/>
</analyzer> </fieldType> <!-- Dutch --> <dynamicField
name="*_txt_nl" type="text_nl" indexed="true" stored="true"/> <fieldType
name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="lowercase"/> <filter name="stop" ignoreCase="true"
words="lang/stopwords_nl.txt" format="snowball" /> <filter
name="stemmerOverride" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter name="snowballPorter" language="Dutch"/> </analyzer>
</fieldType> <!-- Norwegian --> <dynamicField name="*_txt_no"
type="text_no" indexed="true" stored="true"/> <fieldType name="text_no"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball"
/> <filter name="snowballPorter" language="Norwegian"/> <!--
less aggressive: <filter name="norwegianLightStem"/> --> <!--
singular/plural: <filter name="norwegianMinimalStem"/> --> </analyzer>
</fieldType> <!-- Portuguese --> <dynamicField name="*_txt_pt"
type="text_pt" indexed="true" stored="true"/> <fieldType name="text_pt"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball"
/> <filter name="portugueseLightStem"/> <!-- less aggressive:
<filter name="portugueseMinimalStem"/> --> <!-- more aggressive:
<filter name="snowballPorter" language="Portuguese"/> --> <!-- most
aggressive: <filter name="portugueseStem"/> --> </analyzer>
</fieldType> <!-- Romanian --> <dynamicField name="*_txt_ro"
type="text_ro" indexed="true" stored="true"/> <fieldType name="text_ro"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_ro.txt" /> <filter
name="snowballPorter" language="Romanian"/> </analyzer> </fieldType>
<!-- Russian --> <dynamicField name="*_txt_ru" type="text_ru"
indexed="true" stored="true"/> <fieldType name="text_ru"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball"
/> <filter name="snowballPorter" language="Russian"/> <!-- less
aggressive: <filter name="russianLightStem"/> --> </analyzer>
</fieldType> <!-- Swedish --> <dynamicField name="*_txt_sv"
type="text_sv" indexed="true" stored="true"/> <fieldType name="text_sv"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="standard"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball"
/> <filter name="snowballPorter" language="Swedish"/> <!-- less
aggressive: <filter name="swedishLightStem"/> --> </analyzer>
</fieldType> <!-- Thai --> <dynamicField name="*_txt_th"
type="text_th" indexed="true" stored="true"/> <fieldType name="text_th"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer name="thai"/> <filter name="lowercase"/> <filter
name="stop" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer> </fieldType> <!-- Turkish --> <dynamicField
name="*_txt_tr" type="text_tr" indexed="true" stored="true"/> <fieldType
name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer name="standard"/> <filter
name="turkishLowercase"/> <filter name="stop" ignoreCase="false"
words="lang/stopwords_tr.txt" /> <filter name="snowballPorter]"
language="Turkish"...> but was:<... <tokenizer
[class="solr.WhitespaceTokenizerFactory"/> </analyzer> </fieldType>
<!-- A general text field that has reasonable, generic
cross-language defaults: it tokenizes with StandardTokenizer, removes
stop words from case-insensitive "stopwords.txt" (empty by default),
and down cases. At query time only, it also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField"
positionIncrementGap="100" multiValued="true"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time <filter
class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt"
ignoreCase="true" expand="false"/> <filter
class="solr.FlattenGraphFilterFactory"/> --> <filter
class="solr.LowerCaseFilterFactory"/> </analyzer> <analyzer
type="query"> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"
/> <filter class="solr.SynonymGraphFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter
class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType>
<!-- SortableTextField generaly functions exactly like TextField,
except that it supports, and by default uses, docValues for sorting (or
faceting) on the first 1024 characters of the original field values
(which is configurable). This makes it a bit more useful
then TextField in many situations, but the trade-off is that it takes
up more space on disk; which is why it's not used in place of TextField
for every fieldType in this _default schema. --> <dynamicField
name="*_t_sort" type="text_gen_sort" indexed="true" stored="true"
multiValued="false"/> <dynamicField name="*_txt_sort" type="text_gen_sort"
indexed="true" stored="true"/> <fieldType name="text_gen_sort"
class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index"> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> </analyzer>
<analyzer type="query"> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/> <filter
class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType>
<!-- A text field with defaults appropriate for English: it tokenizes with
StandardTokenizer, removes English stop words (lang/stopwords_en.txt),
down cases, protects words from protwords.txt, and finally applies
Porter's stemming. The query time analyzer also applies synonyms from
synonyms.txt. --> <dynamicField name="*_txt_en" type="text_en"
indexed="true" stored="true"/> <fieldType name="text_en"
class="solr.TextField" positionIncrementGap="100"> <analyzer
type="index"> <tokenizer class="solr.StandardTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time <filter
class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt"
ignoreCase="true" expand="false"/> <filter
class="solr.FlattenGraphFilterFactory"/> --> <!-- Case
insensitive stop word removal. --> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" /> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.EnglishPossessiveFilterFactory"/> <filter
class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of
PorterStemFilterFactory: <filter
class="solr.EnglishMinimalStemFilterFactory"/> --> <filter
class="solr.PorterStemFilterFactory"/> </analyzer> <analyzer
type="query"> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" /> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.EnglishPossessiveFilterFactory"/> <filter
class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of
PorterStemFilterFactory: <filter
class="solr.EnglishMinimalStemFilterFactory"/> --> <filter
class="solr.PorterStemFilterFactory"/> </analyzer> </fieldType>
<!-- A text field with defaults appropriate for English, plus
aggressive word-splitting and autophrase features enabled. This field
is just like text_en, except it adds WordDelimiterGraphFilter to
enable splitting and matching of words on case-change, alpha numeric
boundaries, and non-alphanumeric chars. This means certain compound
word cases will work, for example query "wi fi" will match
document "WiFi" or "wi-fi". --> <dynamicField name="*_txt_en_split"
type="text_en_splitting" indexed="true" stored="true"/> <fieldType
name="text_en_splitting" class="solr.TextField" positionIncrementGap="100"
autoGeneratePhraseQueries="true"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <!-- in this
example, we will only use synonyms at query time <filter
class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt"
ignoreCase="true" expand="false"/> --> <!-- Case insensitive
stop word removal. --> <filter class="solr.StopFilterFactory"
ignoreCase="true" words="lang/stopwords_en.txt"
/> <filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter
class="solr.FlattenGraphFilterFactory" /> </analyzer> <analyzer
type="query"> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" /> <filter
class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/> <filter
class="solr.PorterStemFilterFactory"/> </analyzer> </fieldType>
<!-- Less flexible matching, but less false matches. Probably not ideal for
product names, but may be good for SKUs. Can insert dashes in the
wrong place and still match. --> <dynamicField name="*_txt_en_split_tight"
type="text_en_splitting_tight" indexed="true" stored="true"/> <fieldType
name="text_en_splitting_tight" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true"> <analyzer
type="index"> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false"/> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt"/> <filter
class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0"
generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <!-- this filter
can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming.
--> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" /> </analyzer>
<analyzer type="query"> <tokenizer
class="solr.WhitespaceTokenizerFactory"/> <filter
class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false"/> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt"/> <filter
class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0"
generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <!-- this filter
can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming.
--> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer> </fieldType> <!-- Just like text_general except it
reverses the characters of each token, to enable more efficient leading
wildcard queries. --> <dynamicField name="*_txt_rev"
type="text_general_rev" indexed="true" stored="true"/> <fieldType
name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer> <analyzer type="query"> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> </analyzer>
</fieldType> <dynamicField name="*_phon_en" type="phonetic_en"
indexed="true" stored="true"/> <fieldType name="phonetic_en"
stored="false" indexed="true" class="solr.TextField" > <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.DoubleMetaphoneFilterFactory" inject="false"/> </analyzer>
</fieldType> <!-- lowercases the entire field value, keeping it as a
single token. --> <dynamicField name="*_s_lower" type="lowercase"
indexed="true" stored="true"/> <fieldType name="lowercase"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType>
<!-- Example of using PathHierarchyTokenizerFactory at index time, so
queries for paths match documents at that path, or in descendent paths
--> <dynamicField name="*_descendent_path" type="descendent_path"
indexed="true" stored="true"/> <fieldType name="descendent_path"
class="solr.TextField"> <analyzer type="index"> <tokenizer
class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> </analyzer>
<analyzer type="query"> <tokenizer
class="solr.KeywordTokenizerFactory" /> </analyzer> </fieldType>
<!-- Example of using PathHierarchyTokenizerFactory at query time, so
queries for paths match documents at that path, or in ancestor paths -->
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true"
stored="true"/> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <tokenizer
class="solr.KeywordTokenizerFactory" /> </analyzer> <analyzer
type="query"> <tokenizer class="solr.PathHierarchyTokenizerFactory"
delimiter="/" /> </analyzer> </fieldType> <!-- This point type
indexes the coordinates as separate fields (subFields) If subFieldType is
defined, it references a type, and a dynamic field definition is created
matching *___<typename>. Alternately, if subFieldSuffix is defined,
that is used to create the subFields. Example: if subFieldType="double",
then the coordinates would be indexed in fields
myloc_0___double,myloc_1___double. Example: if subFieldSuffix="_d" then
the coordinates would be indexed in fields myloc_0_d,myloc_1_d
The subFields are an implementation detail of the fieldType, and end
users normally should not need to know about them. --> <dynamicField
name="*_point" type="point" indexed="true" stored="true"/> <fieldType
name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
<!-- A specialized field for geospatial search filters and distance sorting.
--> <fieldType name="location" class="solr.LatLonPointSpatialField"
docValues="true"/> <!-- A geospatial field type that supports multiValued
and polygon shapes. For more information about this and other spatial
fields see: http://lucene.apache.org/solr/guide/spatial-search.html
--> <fieldType name="location_rpt"
class="solr.SpatialRecursivePrefixTreeFieldType" geo="true"
distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> <!--
Payloaded field types --> <fieldType name="delimited_payloads_float"
stored="false" indexed="true" class="solr.TextField"> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter
class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
</analyzer> </fieldType> <fieldType name="delimited_payloads_int"
stored="false" indexed="true" class="solr.TextField"> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter
class="solr.DelimitedPayloadTokenFilterFactory" encoder="integer"/>
</analyzer> </fieldType> <fieldType name="delimited_payloads_string"
stored="false" indexed="true" class="solr.TextField"> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter
class="solr.DelimitedPayloadTokenFilterFactory" encoder="identity"/>
</analyzer> </fieldType> <!-- some examples for different languages
(generally ordered by ISO code) --> <!-- Arabic --> <dynamicField
name="*_txt_ar" type="text_ar" indexed="true" stored="true"/> <fieldType
name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<!-- for any non-arabic --> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt"
/> <!-- normalizes ﻯ to ﻱ, etc --> <filter
class="solr.ArabicNormalizationFilterFactory"/> <filter
class="solr.ArabicStemFilterFactory"/> </analyzer> </fieldType>
<!-- Bulgarian --> <dynamicField name="*_txt_bg" type="text_bg"
indexed="true" stored="true"/> <fieldType name="text_bg"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt"
/> <filter class="solr.BulgarianStemFilterFactory"/>
</analyzer> </fieldType> <!-- Catalan --> <dynamicField
name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> <fieldType
name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<!-- removes l', etc --> <filter class="solr.ElisionFilterFactory"
ignoreCase="true" articles="lang/contractions_ca.txt"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt"
/> <filter class="solr.SnowballPorterFilterFactory"
language="Catalan"/> </analyzer> </fieldType> <!--
CJK bigram (see text_ja for a Japanese configuration using morphological
analysis) --> <dynamicField name="*_txt_cjk" type="text_cjk"
indexed="true" stored="true"/> <fieldType name="text_cjk"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <!-- normalize width
before bigram, as e.g. half-width dakuten combine --> <filter
class="solr.CJKWidthFilterFactory"/> <!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.CJKBigramFilterFactory"/> </analyzer> </fieldType>
<!-- Czech --> <dynamicField name="*_txt_cz" type="text_cz" indexed="true"
stored="true"/> <fieldType name="text_cz" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt"
/> <filter class="solr.CzechStemFilterFactory"/>
</analyzer> </fieldType> <!-- Danish --> <dynamicField
name="*_txt_da" type="text_da" indexed="true" stored="true"/> <fieldType
name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt"
format="snowball" /> <filter class="solr.SnowballPorterFilterFactory"
language="Danish"/> </analyzer> </fieldType> <!--
German --> <dynamicField name="*_txt_de" type="text_de" indexed="true"
stored="true"/> <fieldType name="text_de" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt"
format="snowball" /> <filter
class="solr.GermanNormalizationFilterFactory"/> <filter
class="solr.GermanLightStemFilterFactory"/> <!-- less aggressive:
<filter class="solr.GermanMinimalStemFilterFactory"/> --> <!-- more
aggressive: <filter class="solr.SnowballPorterFilterFactory"
language="German2"/> --> </analyzer> </fieldType> <!-- Greek
--> <dynamicField name="*_txt_el" type="text_el" indexed="true"
stored="true"/> <fieldType name="text_el" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <!-- greek specific lowercase
for sigma --> <filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="false"
words="lang/stopwords_el.txt" /> <filter
class="solr.GreekStemFilterFactory"/> </analyzer> </fieldType>
<!-- Spanish --> <dynamicField name="*_txt_es" type="text_es"
indexed="true" stored="true"/> <fieldType name="text_es"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt"
format="snowball" /> <filter
class="solr.SpanishLightStemFilterFactory"/> <!-- more aggressive:
<filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
</analyzer> </fieldType> <!-- Estonian --> <dynamicField
name="*_txt_et" type="text_et" indexed="true" stored="true"/> <fieldType
name="text_et" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_et.txt"
/> <filter class="solr.SnowballPorterFilterFactory"
language="Estonian"/> </analyzer> </fieldType> <!-- Basque -->
<dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/>
<fieldType name="text_eu" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt"
/> <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
</analyzer> </fieldType> <!-- Persian --> <dynamicField
name="*_txt_fa" type="text_fa" indexed="true" stored="true"/> <fieldType
name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer> <!-- for ZWNJ --> <charFilter
class="solr.PersianCharFilterFactory"/> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.ArabicNormalizationFilterFactory"/> <filter
class="solr.PersianNormalizationFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt"
/> </analyzer> </fieldType> <!-- Finnish -->
<dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/>
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt"
format="snowball" /> <filter class="solr.SnowballPorterFilterFactory"
language="Finnish"/> <!-- less aggressive: <filter
class="solr.FinnishLightStemFilterFactory"/> --> </analyzer>
</fieldType> <!-- French --> <dynamicField name="*_txt_fr"
type="text_fr" indexed="true" stored="true"/> <fieldType name="text_fr"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <!-- removes l', etc
--> <filter class="solr.ElisionFilterFactory" ignoreCase="true"
articles="lang/contractions_fr.txt"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt"
format="snowball" /> <filter
class="solr.FrenchLightStemFilterFactory"/> <!-- less aggressive:
<filter class="solr.FrenchMinimalStemFilterFactory"/> --> <!-- more
aggressive: <filter class="solr.SnowballPorterFilterFactory"
language="French"/> --> </analyzer> </fieldType> <!-- Irish
--> <dynamicField name="*_txt_ga" type="text_ga" indexed="true"
stored="true"/> <fieldType name="text_ga" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <!-- removes d', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true"
articles="lang/contractions_ga.txt"/> <!-- removes n-, etc. position
increments is intentionally false! --> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="lang/hyphenations_ga.txt"/> <filter
class="solr.IrishLowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ga.txt"/> <filter
class="solr.SnowballPorterFilterFactory" language="Irish"/> </analyzer>
</fieldType> <!-- Galician --> <dynamicField name="*_txt_gl"
type="text_gl" indexed="true" stored="true"/> <fieldType name="text_gl"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt"
/> <filter class="solr.GalicianStemFilterFactory"/> <!-- less
aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
</analyzer> </fieldType> <!-- Hindi --> <dynamicField
name="*_txt_hi" type="text_hi" indexed="true" stored="true"/> <fieldType
name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/> <!-- normalizes unicode
representation --> <filter
class="solr.IndicNormalizationFilterFactory"/> <!-- normalizes
variation in spelling --> <filter
class="solr.HindiNormalizationFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt"
/> <filter class="solr.HindiStemFilterFactory"/> </analyzer>
</fieldType> <!-- Hungarian --> <dynamicField name="*_txt_hu"
type="text_hu" indexed="true" stored="true"/> <fieldType name="text_hu"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt"
format="snowball" /> <filter class="solr.SnowballPorterFilterFactory"
language="Hungarian"/> <!-- less aggressive: <filter
class="solr.HungarianLightStemFilterFactory"/> --> </analyzer>
</fieldType> <!-- Armenian --> <dynamicField name="*_txt_hy"
type="text_hy" indexed="true" stored="true"/> <fieldType name="text_hy"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt"
/> <filter class="solr.SnowballPorterFilterFactory"
language="Armenian"/> </analyzer> </fieldType> <!--
Indonesian --> <dynamicField name="*_txt_id" type="text_id" indexed="true"
stored="true"/> <fieldType name="text_id" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt"
/> <!-- for a less aggressive approach (only inflectional suffixes),
set stemDerivational to false --> <filter
class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
</analyzer> </fieldType> <!-- Italian --> <dynamicField
name="*_txt_it" type="text_it" indexed="true" stored="true"/> <fieldType
name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<!-- removes l', etc --> <filter class="solr.ElisionFilterFactory"
ignoreCase="true" articles="lang/contractions_it.txt"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"
format="snowball" /> <filter
class="solr.ItalianLightStemFilterFactory"/> <!-- more aggressive:
<filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
</analyzer> </fieldType> <!-- Japanese using morphological
analysis (see text_cjk for a configuration using bigramming) NOTE: If
you want to optimize search for precision, use default operator AND in your
request handler config (q.op) Use OR if you would like to optimize for
recall (default). --> <dynamicField name="*_txt_ja" type="text_ja"
indexed="true" stored="true"/> <fieldType name="text_ja"
class="solr.TextField" positionIncrementGap="100"
autoGeneratePhraseQueries="false"> <analyzer> <!-- Kuromoji
Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
Kuromoji has a search mode (default) that does segmentation useful for search.
A heuristic is used to segment compounds into its parts and the
compound itself is kept as synonym. Valid values for attribute mode
are: normal: regular segmentation search:
segmentation useful for search with synonyms compounds (default)
extended: same as search mode, but unigrams unknown words (experimental)
For some applications it might be good to use search mode for indexing and
normal mode for queries to reduce recall and prevent parts of
compounds from being matched and highlighted. Use <analyzer
type="index"> and <analyzer type="query"> for this and mode normal in query.
Kuromoji also has a convenient user dictionary feature that allows
overriding the statistical model with your own entries for
segmentation, part-of-speech tags and readings without a need to
specify weights. Notice that user dictionaries have not been subject to
extensive testing. User dictionary attributes are:
userDictionary: user dictionary filename
userDictionaryEncoding: user dictionary encoding (default is UTF-8)
See lang/userdict_ja.txt for a sample user dictionary file.
Punctuation characters are discarded by default. Use
discardPunctuation="false" to keep them. --> <tokenizer
class="solr.JapaneseTokenizerFactory" mode="search"/> <!--<tokenizer
class="solr.JapaneseTokenizerFactory" mode="search"
userDictionary="lang/userdict_ja.txt"/>--> <!-- Reduces inflected verbs
and adjectives to their base/dictionary forms (辞書形) --> <filter
class="solr.JapaneseBaseFormFilterFactory"/> <!-- Removes tokens with
certain part-of-speech tags --> <filter
class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"
/> <!-- Normalizes full-width romaji to half-width and half-width kana
to full-width (Unicode NFKC subset) --> <filter
class="solr.CJKWidthFilterFactory"/> <!-- Removes common tokens
typically not useful for search, but have a negative effect on ranking -->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ja.txt" /> <!-- Normalizes common katakana
spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
<!-- Lower-cases romaji characters --> <filter
class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType>
<!-- Korean morphological analysis --> <dynamicField name="*_txt_ko"
type="text_ko" indexed="true" stored="true"/> <fieldType name="text_ko"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
It uses the mecab-ko-dic dictionary to perform morphological analysis of
Korean texts. This dictionary was built with MeCab, it defines a
format for the features adapted for the Korean language.
Nori also has a convenient user dictionary feature that allows
overriding the statistical model with your own entries for
segmentation, part-of-speech tags and readings without a need to
specify weights. Notice that user dictionaries have not been subject to
extensive testing. The tokenizer supports multiple schema
attributes: * userDictionary: User dictionary path. *
userDictionaryEncoding: User dictionary encoding. * decompoundMode:
Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
* outputUnknownUnigrams: If true outputs unigrams for unknown words.
--> <tokenizer class="solr.KoreanTokenizerFactory"
decompoundMode="discard" outputUnknownUnigrams="false"/> <!-- Removes
some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
listing the tags to remove. By default it removes: E, IC, J,
MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
This is basically an equivalent to stemming. --> <filter
class="solr.KoreanPartOfSpeechStopFilterFactory" /> <!-- Replaces term
text with the Hangul transcription of Hanja characters, if applicable: -->
<filter class="solr.KoreanReadingFormFilterFactory" /> <filter
class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType>
<!-- Latvian --> <dynamicField name="*_txt_lv" type="text_lv"
indexed="true" stored="true"/> <fieldType name="text_lv"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt"
/> <filter class="solr.LatvianStemFilterFactory"/> </analyzer>
</fieldType> <!-- Dutch --> <dynamicField name="*_txt_nl"
type="text_nl" indexed="true" stored="true"/> <fieldType name="text_nl"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt"
format="snowball" /> <filter class="solr.StemmerOverrideFilterFactory"
dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> <filter
class="solr.SnowballPorterFilterFactory" language="Dutch"/> </analyzer>
</fieldType> <!-- Norwegian --> <dynamicField name="*_txt_no"
type="text_no" indexed="true" stored="true"/> <fieldType name="text_no"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt"
format="snowball" /> <filter class="solr.SnowballPorterFilterFactory"
language="Norwegian"/> <!-- less aggressive: <filter
class="solr.NorwegianLightStemFilterFactory"/> --> <!--
singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
</analyzer> </fieldType> <!-- Portuguese --> <dynamicField
name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> <fieldType
name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt"
format="snowball" /> <filter
class="solr.PortugueseLightStemFilterFactory"/> <!-- less aggressive:
<filter class="solr.PortugueseMinimalStemFilterFactory"/> --> <!-- more
aggressive: <filter class="solr.SnowballPorterFilterFactory"
language="Portuguese"/> --> <!-- most aggressive: <filter
class="solr.PortugueseStemFilterFactory"/> --> </analyzer>
</fieldType> <!-- Romanian --> <dynamicField name="*_txt_ro"
type="text_ro" indexed="true" stored="true"/> <fieldType name="text_ro"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt"
/> <filter class="solr.SnowballPorterFilterFactory"
language="Romanian"/> </analyzer> </fieldType> <!-- Russian
--> <dynamicField name="*_txt_ru" type="text_ru" indexed="true"
stored="true"/> <fieldType name="text_ru" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt"
format="snowball" /> <filter class="solr.SnowballPorterFilterFactory"
language="Russian"/> <!-- less aggressive: <filter
class="solr.RussianLightStemFilterFactory"/> --> </analyzer>
</fieldType> <!-- Swedish --> <dynamicField name="*_txt_sv"
type="text_sv" indexed="true" stored="true"/> <fieldType name="text_sv"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt"
format="snowball" /> <filter class="solr.SnowballPorterFilterFactory"
language="Swedish"/> <!-- less aggressive: <filter
class="solr.SwedishLightStemFilterFactory"/> --> </analyzer>
</fieldType> <!-- Thai --> <dynamicField name="*_txt_th"
type="text_th" indexed="true" stored="true"/> <fieldType name="text_th"
class="solr.TextField" positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt"
/> </analyzer> </fieldType> <!-- Turkish -->
<dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/>
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt"
/> <filter class="solr.SnowballPorterFilterFactory]"
language="Turkish"...>
Stack Trace:
org.junit.ComparisonFailure:
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/solr/core/src/test-files/solr/configsets/_default/conf/managed-schema
contents doesn't match expected
(/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/solr/server/solr/configsets/_default/conf/managed-schema)
expected:<...
<tokenizer [name="whitespace"/>
</analyzer>
</fieldType>
<!-- A general text field that has reasonable, generic
cross-language defaults: it tokenizes with StandardTokenizer,
removes stop words from case-insensitive "stopwords.txt"
(empty by default), and down cases. At query time only, it
also applies synonyms.
-->
<fieldType name="text_general" class="solr.TextField"
positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time
<filter name="synonymGraph" synonyms="index_synonyms.txt"
ignoreCase="true" expand="false"/>
<filter name="flattenGraph"/>
-->
<filter name="lowercase"/>
</analyzer>
<analyzer type="query">
<tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="true"/>
<filter name="lowercase"/>
</analyzer>
</fieldType>
<!-- SortableTextField generaly functions exactly like TextField,
except that it supports, and by default uses, docValues for sorting
(or faceting)
on the first 1024 characters of the original field values (which is
configurable).
This makes it a bit more useful then TextField in many situations, but
the trade-off
is that it takes up more space on disk; which is why it's not used in
place of TextField
for every fieldType in this _default schema.
-->
<dynamicField name="*_t_sort" type="text_gen_sort" indexed="true"
stored="true" multiValued="false"/>
<dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true"
stored="true"/>
<fieldType name="text_gen_sort" class="solr.SortableTextField"
positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter name="lowercase"/>
</analyzer>
<analyzer type="query">
<tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="true"/>
<filter name="lowercase"/>
</analyzer>
</fieldType>
<!-- A text field with defaults appropriate for English: it tokenizes with
StandardTokenizer,
removes English stop words (lang/stopwords_en.txt), down cases,
protects words from protwords.txt, and
finally applies Porter's stemming. The query time analyzer also
applies synonyms from synonyms.txt. -->
<dynamicField name="*_txt_en" type="text_en" indexed="true"
stored="true"/>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer name="standard"/>
<!-- in this example, we will only use synonyms at query time
<filter name="synonymGraph" synonyms="index_synonyms.txt"
ignoreCase="true" expand="false"/>
<filter name="flattenGraph"/>
-->
<!-- Case insensitive stop word removal.
-->
<filter name="stop"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter name="lowercase"/>
<filter name="englishPossessive"/>
<filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer
instead of PorterStemFilterFactory:
<filter name="englishMinimalStem"/>
-->
<filter name="porterStem"/>
</analyzer>
<analyzer type="query">
<tokenizer name="standard"/>
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="true"/>
<filter name="stop"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter name="lowercase"/>
<filter name="englishPossessive"/>
<filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer
instead of PorterStemFilterFactory:
<filter name="englishMinimalStem"/>
-->
<filter name="porterStem"/>
</analyzer>
</fieldType>
<!-- A text field with defaults appropriate for English, plus
aggressive word-splitting and autophrase features enabled.
This field is just like text_en, except it adds
WordDelimiterGraphFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and
non-alphanumeric chars. This means certain compound word
cases will work, for example query "wi fi" will match
document "WiFi" or "wi-fi".
-->
<dynamicField name="*_txt_en_split" type="text_en_splitting"
indexed="true" stored="true"/>
<fieldType name="text_en_splitting" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer name="whitespace"/>
<!-- in this example, we will only use synonyms at query time
<filter name="synonymGraph" synonyms="index_synonyms.txt"
ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal.
-->
<filter name="stop"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter name="wordDelimiterGraph" generateWordParts="1"
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"
splitOnCaseChange="1"/>
<filter name="lowercase"/>
<filter name="keywordMarker" protected="protwords.txt"/>
<filter name="porterStem"/>
<filter name="flattenGraph" />
</analyzer>
<analyzer type="query">
<tokenizer name="whitespace"/>
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="true"/>
<filter name="stop"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter name="wordDelimiterGraph" generateWordParts="1"
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
splitOnCaseChange="1"/>
<filter name="lowercase"/>
<filter name="keywordMarker" protected="protwords.txt"/>
<filter name="porterStem"/>
</analyzer>
</fieldType>
<!-- Less flexible matching, but less false matches. Probably not ideal
for product names,
but may be good for SKUs. Can insert dashes in the wrong place and
still match. -->
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight"
indexed="true" stored="true"/>
<fieldType name="text_en_splitting_tight" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer name="whitespace"/>
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="false"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter name="wordDelimiterGraph" generateWordParts="0"
generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter name="lowercase"/>
<filter name="keywordMarker" protected="protwords.txt"/>
<filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the
same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with
stemming. -->
<filter name="removeDuplicates"/>
<filter name="flattenGraph" />
</analyzer>
<analyzer type="query">
<tokenizer name="whitespace"/>
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="false"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter name="wordDelimiterGraph" generateWordParts="0"
generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter name="lowercase"/>
<filter name="keywordMarker" protected="protwords.txt"/>
<filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the
same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with
stemming. -->
<filter name="removeDuplicates"/>
</analyzer>
</fieldType>
<!-- Just like text_general except it reverses the characters of
each token, to enable more efficient leading wildcard queries.
-->
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true"
stored="true"/>
<fieldType name="text_general_rev" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter name="lowercase"/>
<filter name="reversedWildcard" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2"
maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer name="standard"/>
<filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true"
expand="true"/>
<filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter name="lowercase"/>
</analyzer>
</fieldType>
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true"
stored="true"/>
<fieldType name="phonetic_en" stored="false" indexed="true"
class="solr.TextField" >
<analyzer>
<tokenizer name="standard"/>
<filter name="doubleMetaphone" inject="false"/>
</analyzer>
</fieldType>
<!-- lowercases the entire field value, keeping it as a single token. -->
<dynamicField name="*_s_lower" type="lowercase" indexed="true"
stored="true"/>
<fieldType name="lowercase" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer name="keyword"/>
<filter name="lowercase" />
</analyzer>
</fieldType>
<!--
Example of using PathHierarchyTokenizerFactory at index time, so
queries for paths match documents at that path, or in descendent paths
-->
<dynamicField name="*_descendent_path" type="descendent_path"
indexed="true" stored="true"/>
<fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index">
<tokenizer name="pathHierarchy" delimiter="/" />
</analyzer>
<analyzer type="query">
<tokenizer name="keyword" />
</analyzer>
</fieldType>
<!--
Example of using PathHierarchyTokenizerFactory at query time, so
queries for paths match documents at that path, or in ancestor paths
-->
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true"
stored="true"/>
<fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index">
<tokenizer name="keyword" />
</analyzer>
<analyzer type="query">
<tokenizer name="pathHierarchy" delimiter="/" />
</analyzer>
</fieldType>
<!-- This point type indexes the coordinates as separate fields (subFields)
If subFieldType is defined, it references a type, and a dynamic field
definition is created matching *___<typename>. Alternately, if
subFieldSuffix is defined, that is used to create the subFields.
Example: if subFieldType="double", then the coordinates would be
indexed in fields myloc_0___double,myloc_1___double.
Example: if subFieldSuffix="_d" then the coordinates would be indexed
in fields myloc_0_d,myloc_1_d
The subFields are an implementation detail of the fieldType, and end
users normally should not need to know about them.
-->
<dynamicField name="*_point" type="point" indexed="true" stored="true"/>
<fieldType name="point" class="solr.PointType" dimension="2"
subFieldSuffix="_d"/>
<!-- A specialized field for geospatial search filters and distance
sorting. -->
<fieldType name="location" class="solr.LatLonPointSpatialField"
docValues="true"/>
<!-- A geospatial field type that supports multiValued and polygon shapes.
For more information about this and other spatial fields see:
http://lucene.apache.org/solr/guide/spatial-search.html
-->
<fieldType name="location_rpt"
class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.001"
distanceUnits="kilometers" />
<!-- Payloaded field types -->
<fieldType name="delimited_payloads_float" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer name="whitespace"/>
<filter name="delimitedPayload" encoder="float"/>
</analyzer>
</fieldType>
<fieldType name="delimited_payloads_int" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer name="whitespace"/>
<filter name="delimitedPayload" encoder="integer"/>
</analyzer>
</fieldType>
<fieldType name="delimited_payloads_string" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer name="whitespace"/>
<filter name="delimitedPayload" encoder="identity"/>
</analyzer>
</fieldType>
<!-- some examples for different languages (generally ordered by ISO code)
-->
<!-- Arabic -->
<dynamicField name="*_txt_ar" type="text_ar" indexed="true"
stored="true"/>
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer name="standard"/>
<!-- for any non-arabic -->
<filter name="lowercase"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc -->
<filter name="arabicNormalization"/>
<filter name="arabicStem"/>
</analyzer>
</fieldType>
<!-- Bulgarian -->
<dynamicField name="*_txt_bg" type="text_bg" indexed="true"
stored="true"/>
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer n
[...truncated too long message...]
e/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
ivy-availability-check:
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0.
-ivy-fail-disallowed-ivy-version:
ivy-fail:
ivy-fail:
ivy-configure:
[ivy:configure] :: loading settings :: file =
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/lucene/top-level-ivy-settings.xml
resolve:
jar-checksums:
[mkdir] Created dir:
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/solr/null848668938
[copy] Copying 249 files to
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/solr/null848668938
[delete] Deleting directory
/home/jenkins/jenkins-slave/workspace/Lucene-Solr-Tests-master/solr/null848668938
check-working-copy:
[ivy:cachepath] :: resolving dependencies :: #;working@lucene1-us-west
[ivy:cachepath] confs: [default]
[ivy:cachepath] found
org.eclipse.jgit#org.eclipse.jgit;5.3.0.201903130848-r in public
[ivy:cachepath] found com.jcraft#jsch;0.1.54 in public
[ivy:cachepath] found com.jcraft#jzlib;1.1.1 in public
[ivy:cachepath] found com.googlecode.javaewah#JavaEWAH;1.1.6 in public
[ivy:cachepath] found org.slf4j#slf4j-api;1.7.2 in public
[ivy:cachepath] found org.bouncycastle#bcpg-jdk15on;1.60 in public
[ivy:cachepath] found org.bouncycastle#bcprov-jdk15on;1.60 in public
[ivy:cachepath] found org.bouncycastle#bcpkix-jdk15on;1.60 in public
[ivy:cachepath] found org.slf4j#slf4j-nop;1.7.2 in public
[ivy:cachepath] :: resolution report :: resolve 39ms :: artifacts dl 2ms
---------------------------------------------------------------------
| | modules || artifacts |
| conf | number| search|dwnlded|evicted|| number|dwnlded|
---------------------------------------------------------------------
| default | 9 | 0 | 0 | 0 || 9 | 0 |
---------------------------------------------------------------------
[wc-checker] Initializing working copy...
[wc-checker] Checking working copy status...
-jenkins-base:
BUILD SUCCESSFUL
Total time: 106 minutes 16 seconds
Archiving artifacts
java.lang.InterruptedException: no matches found within 10000
at hudson.FilePath$ValidateAntFileMask.hasMatch(FilePath.java:2847)
at hudson.FilePath$ValidateAntFileMask.invoke(FilePath.java:2726)
at hudson.FilePath$ValidateAntFileMask.invoke(FilePath.java:2707)
at hudson.FilePath$FileCallableWrapper.call(FilePath.java:3086)
Also: hudson.remoting.Channel$CallSiteStackTrace: Remote call to lucene
at
hudson.remoting.Channel.attachCallSiteStackTrace(Channel.java:1741)
at
hudson.remoting.UserRequest$ExceptionResponse.retrieve(UserRequest.java:357)
at hudson.remoting.Channel.call(Channel.java:955)
at hudson.FilePath.act(FilePath.java:1072)
at hudson.FilePath.act(FilePath.java:1061)
at hudson.FilePath.validateAntFileMask(FilePath.java:2705)
at
hudson.tasks.ArtifactArchiver.perform(ArtifactArchiver.java:243)
at
hudson.tasks.BuildStepCompatibilityLayer.perform(BuildStepCompatibilityLayer.java:81)
at
hudson.tasks.BuildStepMonitor$1.perform(BuildStepMonitor.java:20)
at
hudson.model.AbstractBuild$AbstractBuildExecution.perform(AbstractBuild.java:744)
at
hudson.model.AbstractBuild$AbstractBuildExecution.performAllBuildSteps(AbstractBuild.java:690)
at hudson.model.Build$BuildExecution.post2(Build.java:186)
at
hudson.model.AbstractBuild$AbstractBuildExecution.post(AbstractBuild.java:635)
at hudson.model.Run.execute(Run.java:1835)
at hudson.model.FreeStyleBuild.run(FreeStyleBuild.java:43)
at
hudson.model.ResourceController.execute(ResourceController.java:97)
at hudson.model.Executor.run(Executor.java:429)
Caused: hudson.FilePath$TunneledInterruptedException
at hudson.FilePath$FileCallableWrapper.call(FilePath.java:3088)
at hudson.remoting.UserRequest.perform(UserRequest.java:212)
at hudson.remoting.UserRequest.perform(UserRequest.java:54)
at hudson.remoting.Request$2.run(Request.java:369)
at
hudson.remoting.InterceptingExecutorService$1.call(InterceptingExecutorService.java:72)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:744)
Caused: java.lang.InterruptedException: java.lang.InterruptedException: no
matches found within 10000
at hudson.FilePath.act(FilePath.java:1074)
at hudson.FilePath.act(FilePath.java:1061)
at hudson.FilePath.validateAntFileMask(FilePath.java:2705)
at hudson.tasks.ArtifactArchiver.perform(ArtifactArchiver.java:243)
at
hudson.tasks.BuildStepCompatibilityLayer.perform(BuildStepCompatibilityLayer.java:81)
at hudson.tasks.BuildStepMonitor$1.perform(BuildStepMonitor.java:20)
at
hudson.model.AbstractBuild$AbstractBuildExecution.perform(AbstractBuild.java:744)
at
hudson.model.AbstractBuild$AbstractBuildExecution.performAllBuildSteps(AbstractBuild.java:690)
at hudson.model.Build$BuildExecution.post2(Build.java:186)
at
hudson.model.AbstractBuild$AbstractBuildExecution.post(AbstractBuild.java:635)
at hudson.model.Run.execute(Run.java:1835)
at hudson.model.FreeStyleBuild.run(FreeStyleBuild.java:43)
at hudson.model.ResourceController.execute(ResourceController.java:97)
at hudson.model.Executor.run(Executor.java:429)
No artifacts found that match the file pattern
"**/*.events,heapdumps/**,**/hs_err_pid*". Configuration error?
Recording test results
Build step 'Publish JUnit test result report' changed build result to UNSTABLE
Email was triggered for: Unstable (Test Failures)
Sending email for trigger: Unstable (Test Failures)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]