OODT-925 Update schema.xml field definitions for SOLR 5.X
Project: http://git-wip-us.apache.org/repos/asf/oodt/repo Commit: http://git-wip-us.apache.org/repos/asf/oodt/commit/6aadf678 Tree: http://git-wip-us.apache.org/repos/asf/oodt/tree/6aadf678 Diff: http://git-wip-us.apache.org/repos/asf/oodt/diff/6aadf678 Branch: refs/heads/avrorpc Commit: 6aadf6780d1dca8d5cc0b08637ba6ffacdb17b86 Parents: 91d0baf Author: Lewis John McGibbney <lewis.j.mcgibb...@jpl.nasa.gov> Authored: Mon Mar 28 08:09:58 2016 -0700 Committer: Lewis John McGibbney <lewis.j.mcgibb...@jpl.nasa.gov> Committed: Mon Mar 28 08:09:58 2016 -0700 ---------------------------------------------------------------------- filemgr/src/main/resources/schema.xml | 428 +---------------------------- 1 file changed, 7 insertions(+), 421 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oodt/blob/6aadf678/filemgr/src/main/resources/schema.xml ---------------------------------------------------------------------- diff --git a/filemgr/src/main/resources/schema.xml b/filemgr/src/main/resources/schema.xml index d2a6975..128c17a 100644 --- a/filemgr/src/main/resources/schema.xml +++ b/filemgr/src/main/resources/schema.xml @@ -137,7 +137,7 @@ <dynamicField name="*" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true" /> <!-- catch-all text fields for full free-text query --> - <copyField source="*" dest="text" /> + <copyField source="*" dest="text"/> <copyField source="*" dest="text_rev" /> <!-- Main body of document extracted by SolrCell. @@ -277,21 +277,6 @@ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> <fieldtype name="binary" class="solr.BinaryField"/> - <!-- - Note: - These should only be used for compatibility with existing indexes (created with lucene or older Solr versions). - Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last - - Plain numeric field types that store and index the text - value verbatim (and hence don't correctly support range queries, since the - lexicographic ordering isn't equal to the numeric ordering) - --> - <fieldType name="pint" class="solr.IntField"/> - <fieldType name="plong" class="solr.LongField"/> - <fieldType name="pfloat" class="solr.FloatField"/> - <fieldType name="pdouble" class="solr.DoubleField"/> - <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/> - <!-- The "RandomSortField" is not used to store or search any data. You can declare fields of this type it in your schema to generate pseudo-random orderings of your docs for sorting @@ -340,7 +325,7 @@ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <!-- in this example, we will only use synonyms at query time <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> --> @@ -348,7 +333,7 @@ </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> @@ -365,14 +350,10 @@ <!-- in this example, we will only use synonyms at query time <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> --> - <!-- Case insensitive stop word removal. - add enablePositionIncrements=true in both the index and query - analyzers to leave a 'gap' for more accurate phrase queries. - --> + <!-- Case insensitive stop word removal. --> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" - enablePositionIncrements="true" /> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.EnglishPossessiveFilterFactory"/> @@ -388,7 +369,6 @@ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" - enablePositionIncrements="true" /> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.EnglishPossessiveFilterFactory"/> @@ -415,14 +395,10 @@ <!-- in this example, we will only use synonyms at query time <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> --> - <!-- Case insensitive stop word removal. - add enablePositionIncrements=true in both the index and query - analyzers to leave a 'gap' for more accurate phrase queries. - --> + <!-- Case insensitive stop word removal. --> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" - enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/> @@ -435,7 +411,6 @@ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" - enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/> @@ -466,7 +441,7 @@ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> @@ -474,7 +449,7 @@ <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType> @@ -619,395 +594,6 @@ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60) --> <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" /> - - - - <!-- some examples for different languages (generally ordered by ISO code) --> - - <!-- Arabic --> - <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- for any non-arabic --> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/> - <!-- normalizes ﻯ to ï»±, etc --> - <filter class="solr.ArabicNormalizationFilterFactory"/> - <filter class="solr.ArabicStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Bulgarian --> - <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/> - <filter class="solr.BulgarianStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Catalan --> - <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes l', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> - </analyzer> - </fieldType> - - <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> - <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- normalize width before bigram, as e.g. half-width dakuten combine --> - <filter class="solr.CJKWidthFilterFactory"/> - <!-- for any non-CJK --> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.CJKBigramFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Czech --> - <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/> - <filter class="solr.CzechStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Danish --> - <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> - </analyzer> - </fieldType> - - <!-- German --> - <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.GermanNormalizationFilterFactory"/> - <filter class="solr.GermanLightStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> - </analyzer> - </fieldType> - - <!-- Greek --> - <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- greek specific lowercase for sigma --> - <filter class="solr.GreekLowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/> - <filter class="solr.GreekStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Spanish --> - <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.SpanishLightStemFilterFactory"/> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> - </analyzer> - </fieldType> - - <!-- Basque --> - <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> - </analyzer> - </fieldType> - - <!-- Persian --> - <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <!-- for ZWNJ --> - <charFilter class="solr.PersianCharFilterFactory"/> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.ArabicNormalizationFilterFactory"/> - <filter class="solr.PersianNormalizationFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/> - </analyzer> - </fieldType> - - <!-- Finnish --> - <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> - <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- French --> - <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes l', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.FrenchLightStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> - </analyzer> - </fieldType> - - <!-- Irish --> - <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes d', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> - <!-- removes n-, etc. position increments is intentionally false! --> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/> - <filter class="solr.IrishLowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> - </analyzer> - </fieldType> - - <!-- Galician --> - <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/> - <filter class="solr.GalicianStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Hindi --> - <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <!-- normalizes unicode representation --> - <filter class="solr.IndicNormalizationFilterFactory"/> - <!-- normalizes variation in spelling --> - <filter class="solr.HindiNormalizationFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/> - <filter class="solr.HindiStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Hungarian --> - <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> - <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Armenian --> - <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> - </analyzer> - </fieldType> - - <!-- Indonesian --> - <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/> - <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> - <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> - </analyzer> - </fieldType> - - <!-- Italian --> - <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes l', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.ItalianLightStemFilterFactory"/> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> - </analyzer> - </fieldType> - - <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming) - - NOTE: If you want to optimize search for precision, use default operator AND in your query - parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use - OR if you would like to optimize for recall (default). - --> - <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> - <analyzer> - <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer) - - Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic - is used to segment compounds into its parts and the compound itself is kept as synonym. - - Valid values for attribute mode are: - normal: regular segmentation - search: segmentation useful for search with synonyms compounds (default) - extended: same as search mode, but unigrams unknown words (experimental) - - For some applications it might be good to use search mode for indexing and normal mode for - queries to reduce recall and prevent parts of compounds from being matched and highlighted. - Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query. - - Kuromoji also has a convenient user dictionary feature that allows overriding the statistical - model with your own entries for segmentation, part-of-speech tags and readings without a need - to specify weights. Notice that user dictionaries have not been subject to extensive testing. - - User dictionary attributes are: - userDictionary: user dictionary filename - userDictionaryEncoding: user dictionary encoding (default is UTF-8) - - See lang/userdict_ja.txt for a sample user dictionary file. - - Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them. - - See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. - --> - <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> - <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> - <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (è¾æ¸å½¢) --> - <filter class="solr.JapaneseBaseFormFilterFactory"/> - <!-- Removes tokens with certain part-of-speech tags --> - <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/> - <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> - <filter class="solr.CJKWidthFilterFactory"/> - <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" /> - <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> - <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> - <!-- Lower-cases romaji characters --> - <filter class="solr.LowerCaseFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Latvian --> - <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/> - <filter class="solr.LatvianStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Dutch --> - <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> - <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> - </analyzer> - </fieldType> - - <!-- Norwegian --> - <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> - <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> - <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Portuguese --> - <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.PortugueseLightStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> - <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Romanian --> - <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> - </analyzer> - </fieldType> - - <!-- Russian --> - <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> - <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Swedish --> - <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> - <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Thai --> - <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.ThaiWordFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/> - </analyzer> - </fieldType> - - <!-- Turkish --> - <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.TurkishLowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/> - <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> - </analyzer> - </fieldType> </types>