My problem was that I specified the per-field similarity class INSIDE the analyzer instead of outside it.

<fieldType>
<analyzer>
<similarity>
</fieldType>

On 09/24/2012 02:56 PM, Carrie Coy wrote:
I'm trying to configure per-field similarity to disregard term frequency (omitTf) in a 'title' field. I'm trying to follow the example docs without success: my custom similarity doesn't seem to have any effect on 'tf'. Is the NoTfSimilarity function below written correctly? Any advice is much appreciated.

my schema.xml:

<field name="title" type="text_custom_sim" indexed="true" stored="true" omitNorms="true" termVectors="true" />

<similarity class="solr.SchemaSimilarityFactory"/>
<fieldType name="text_custom_sim" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<similarity class="com.ssww.NoTfSimilarityFactory" />
         .....
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<similarity class="com.ssww.NoTfSimilarityFactory" />
         .....
</analyzer>


NoTfSimilarityFactory.java:

   package com.ssww;

   import org.apache.lucene.search.similarities.Similarity;
   import org.apache.solr.schema.SimilarityFactory;

   public class NoTfSimilarityFactory extends SimilarityFactory {
      @Override
      public Similarity getSimilarity() {
        return new NoTfSimilarity();
      }
   }


NoTfSimilarity.java:

   package com.ssww;
   import org.apache.lucene.search.similarities.DefaultSimilarity;

   public final class NoTfSimilarity extends DefaultSimilarity {
        public float tf(int i) {
             return 1;
        }

   }

These two files are in a jar in the lib directory of this core. Here's the results of a search for "paint" with custom and default similarity:

Indexed with per-field NoTfSimilarity:

284.5441 = (MATCH) boost(+(title:paint^8.0 | search_keywords:paint | shingle_text:paint^2.0 | description:paint^0.5 | nosyn:paint^5.0 | bullets:paint^0.5) () () () () () (),scale(int(page_views),1.0,3.0)), product of:
  280.5598 = (MATCH) sum of:
    280.5598 = (MATCH) max of:
      280.5598 = (MATCH) weight(title:paint^8.0 in 48) [], result of:
        280.5598 = score(doc=48,freq=2.0 = termFreq=2.0
), product of:
          39.83825 = queryWeight, product of:
            8.0 = boost
            4.979781 = idf(docFreq=187, maxDocs=10059)
            1.0 = queryNorm
          7.042474 = fieldWeight in 48, product of:
            1.4142135 = tf(freq=2.0), with freq of:
              2.0 = termFreq=2.0
            4.979781 = idf(docFreq=187, maxDocs=10059)
            1.0 = fieldNorm(doc=48)
18.217428 = (MATCH) weight(search_keywords:paint in 48) [], result of:
        18.217428 = score(doc=48,freq=1.0 = termFreq=1.0
), product of:
          4.268188 = queryWeight, product of:
            4.268188 = idf(docFreq=382, maxDocs=10059)
            1.0 = queryNorm
          4.268188 = fieldWeight in 48, product of:
            1.0 = tf(freq=1.0), with freq of:
              1.0 = termFreq=1.0
            4.268188 = idf(docFreq=382, maxDocs=10059)
            1.0 = fieldNorm(doc=48)
7.725952 = (MATCH) weight(description:paint^0.5 in 48) [], result of:
        7.725952 = score(doc=48,freq=2.0 = termFreq=2.0
), product of:
          1.6527361 = queryWeight, product of:
            0.5 = boost
            3.3054721 = idf(docFreq=1002, maxDocs=10059)
            1.0 = queryNorm
          4.6746435 = fieldWeight in 48, product of:
            1.4142135 = tf(freq=2.0), with freq of:
              2.0 = termFreq=2.0
            3.3054721 = idf(docFreq=1002, maxDocs=10059)
            1.0 = fieldNorm(doc=48)
      106.50396 = (MATCH) weight(nosyn:paint^5.0 in 48) [], result of:
        106.50396 = score(doc=48,freq=4.0 = termFreq=4.0
), product of:
          16.317472 = queryWeight, product of:
            5.0 = boost
            3.2634945 = idf(docFreq=1045, maxDocs=10059)
            1.0 = queryNorm
          6.526989 = fieldWeight in 48, product of:
            2.0 = tf(freq=4.0), with freq of:
              4.0 = termFreq=4.0
            3.2634945 = idf(docFreq=1045, maxDocs=10059)
            1.0 = fieldNorm(doc=48)
1.0142012 = scale(int(page_views)=18,toMin=1.0,toMax=3.0,fromMin=0.0,fromMax=2535.0)


Indexed with DefaultSimilarity:

7.630908 = (MATCH) boost(+(title:paint^8.0 | search_keywords:paint | shingle_text:paint^2.0 | description:paint^0.5 | nosyn:paint^5.0 | bullets:paint^0.5) () () () () () (),scale(int(page_views),1.0,3.0)), product of:
  7.524058 = (MATCH) sum of:
    7.524058 = (MATCH) max of:
7.524058 = (MATCH) weight(title:paint^8.0 in 3504) [DefaultSimilarity], result of:
        7.524058 = fieldWeight in 3504, product of:
          1.4142135 = tf(freq=2.0), with freq of:
            2.0 = termFreq=2.0
          5.3203125 = idf(docFreq=197, maxDocs=14892)
          1.0 = fieldNorm(doc=3504)
0.5091842 = (MATCH) weight(search_keywords:paint in 3504) [DefaultSimilarity], result of:
        0.5091842 = score(doc=3504,freq=1.0 = termFreq=1.0
), product of:
          0.10937647 = queryWeight, product of:
            4.655336 = idf(docFreq=384, maxDocs=14892)
            0.02349486 = queryNorm
          4.655336 = fieldWeight in 3504, product of:
            1.0 = tf(freq=1.0), with freq of:
              1.0 = termFreq=1.0
            4.655336 = idf(docFreq=384, maxDocs=14892)
            1.0 = fieldNorm(doc=3504)
0.20965372 = (MATCH) weight(description:paint^0.5 in 3504) [DefaultSimilarity], result of:
        0.20965372 = score(doc=3504,freq=2.0 = termFreq=2.0
), product of:
          0.04173162 = queryWeight, product of:
            0.5 = boost
            3.5524042 = idf(docFreq=1159, maxDocs=14892)
            0.02349486 = queryNorm
          5.023858 = fieldWeight in 3504, product of:
            1.4142135 = tf(freq=2.0), with freq of:
              2.0 = termFreq=2.0
            3.5524042 = idf(docFreq=1159, maxDocs=14892)
            1.0 = fieldNorm(doc=3504)
2.8990223 = (MATCH) weight(nosyn:paint^5.0 in 3504) [DefaultSimilarity], result of:
        2.8990223 = score(doc=3504,freq=4.0 = termFreq=4.0
), product of:
          0.41265035 = queryWeight, product of:
            5.0 = boost
            3.5126863 = idf(docFreq=1206, maxDocs=14892)
            0.02349486 = queryNorm
          7.0253725 = fieldWeight in 3504, product of:
            2.0 = tf(freq=4.0), with freq of:
              4.0 = termFreq=4.0
            3.5126863 = idf(docFreq=1206, maxDocs=14892)
            1.0 = fieldNorm(doc=3504)
1.0142012 = scale(int(page_views)=18,toMin=1.0,toMax=3.0,fromMin=0.0,fromMax=2535.0)




Reply via email to