I think you can use ASCIIFoldingFIlter
http://lucene.apache.org/core/6_2_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html
by inserting its factory in your schema.
http://lucene.apache.org/core/6_2_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.html
I would suggest making a separate field for this so that exact match can be
boosted.
On 10/29/17 10:56 AM, Robert Brown wrote:
Hi,
I have a text field in my index containing extended characters, which I'd like
to match against when searching without the extended characters.
e.g. field contains "EnsÅ" which I want to match when searching for just
"enso".
My current config for that field (type) is given below:
<fieldType name="text" class="solr.TextField" positionIncrementGap="100"
autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="index_synonyms.txt" ignoreCase="true" expand="true" />
<!-- Case insensitive stop word removal -->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" />
<!-- see
https://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters -->
<filter class="solr.WordDelimiterFilterFactory"
splitOnCaseChange="1"
splitOnNumerics="1"
stemEnglishPossessive="1"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="1"
preserveOriginal="1"
protected="lang/protwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<!--
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
-->
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- we deal with synonyms at index time only
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
-->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" />
<!-- see
https://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters -->
<filter class="solr.WordDelimiterFilterFactory"
splitOnCaseChange="1"
splitOnNumerics="1"
stemEnglishPossessive="1"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="1"
preserveOriginal="1"
protected="lang/protwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<!--
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
-->
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
</schema>
----
Kuro