I'm trying to index some data which often includes domain names. I'd like to remove the .com TLD, so I have modified the text_en field type by adding a PatternReplaceFilterFactory filter. However, it doesn't appear to be working as a search for "text:(mydomain.com)" matches records but "text:(mydomain)" does not.

<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([-a-z])\.com" replacement="$1"/>
      <filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
      <filter class="solr.PorterStemFilterFactory"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([-a-z])\.com" replacement="$1"/>
      <filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
      <filter class="solr.PorterStemFilterFactory"/>
    </analyzer>
  </fieldType>

The actual field definitions are as follows:

<field name="companyName" type="text_en" indexed="true" stored="true" required="true" /> <field name="jobTitle" type="text_en" indexed="true" stored="true" required="true" /> <field name="text" type="text_general" indexed="true" stored="false" />

  <copyField source="companyName" dest="text" />
  <copyField source="jobTitle"    dest="text" />

Reply via email to