Hi there,
I have a question about the way Lucene computes the length norm of field norm
for its documents.
My documents are indexed using Solr.
These are the documents that where indexed (ignore 'score', that is not part of
the document itself)
<doc>
<float name="score">1.00711</float>
<str name="_id">ejn01:2560000000075596</str>
<str name="title">Journal of neurology research</str>
</doc>
<doc>
<float name="score">1.00711</float>
<str name="_id">ejn01:954925518616</str>
<str name="title">Journal of neurology</str>
</doc>
The field "title" has the following definition in schema.xml:
<fieldType name="utf8text" class="solr.TextField" positionIncrementGap="100"
omitNorms="false">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="1024"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt"
format="solr" ignoreCase="false" expand="true"
tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="1024"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt"
format="solr" ignoreCase="false" expand="true"
tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
If I use the query "journal of neurology", both documents have the same score,
although the second document is more exact. Supplying a phrase query does not
fix the issue. I also see that the computed fieldNorm is "0.5" for both
documents. Does this have something to do with the loss of precision when
storing the length norm into one byte?
These are all the supplied parameters (defaults in solrconfig.xml):
<str name="lowercaseOperators">false</str>
<str name="mm">-10%</str>
<str name="pf">author^3 title^2</str>
<str name="sort">score desc</str>
<arr name="bq">
<str>source:ser01^10</str>
<str>source:ejn01^10</str>
<str>(*:* -type:article)^999</str>
</arr>
<str name="echoParams">all</str>
<str name="df">all</str>
<str name="tie">0</str>
<str name="qf">
author^15 title^10 subject^1 summary^1 library^1 location^1 publisher^1
place_published^1 issn^1 isbn^1
</str>
<str name="q.alt">*:*</str>
<str name="ps">2</str>
<str name="defType">edismax</str>
<str name="q">journal of neurology</str>
<str name="echoParams">all</str>
<str name="sort">score desc</str>
Looking the computation of the score, I see no single difference between them
(see down below)
Any idea why the fieldNorm is the same for both documents?
Thanks in advance!
Greetings,
Nicolas
<str name="ejn01:2560000000075596">
1.0071099 = (MATCH) sum of:
0.0053001107 = (MATCH) sum of:
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:journal^10.0 in 0), product of:
0.005943145 = queryWeight(title:journal^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:journal in 0), product of:
1.0 = tf(termFreq(title:journal)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=0)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:of^10.0 in 0), product of:
0.005943145 = queryWeight(title:of^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:of in 0), product of:
1.0 = tf(termFreq(title:of)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=0)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:neurology^10.0 in 0), product of:
0.005943145 = queryWeight(title:neurology^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:neurology in 0), product of:
1.0 = tf(termFreq(title:neurology)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=0)
0.0031800664 = (MATCH) max of:
0.0031800664 = (MATCH) weight(title:"journal of neurology"~2^2.0 in 0),
product of:
0.0035658872 = queryWeight(title:"journal of neurology"~2^2.0), product
of:
2.0 = boost
1.7836046 = idf(title: journal=2 of=2 neurology=2)
9.996294E-4 = queryNorm
0.8918023 = fieldWeight(title:"journal of neurology" in 0), product of:
1.0 = tf(phraseFreq=1.0)
1.7836046 = idf(title: journal=2 of=2 neurology=2)
0.5 = fieldNorm(field=title, doc=0)
0.99862975 = (MATCH) sum of:
0.99862975 = (MATCH) MatchAllDocsQuery, product of:
0.99862975 = queryNorm
</str>
<str name="ejn01:954925518616">
1.0071099 = (MATCH) sum of:
0.0053001107 = (MATCH) sum of:
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:journal^10.0 in 1), product of:
0.005943145 = queryWeight(title:journal^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:journal in 1), product of:
1.0 = tf(termFreq(title:journal)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=1)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:of^10.0 in 1), product of:
0.005943145 = queryWeight(title:of^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:of in 1), product of:
1.0 = tf(termFreq(title:of)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=1)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:neurology^10.0 in 1), product of:
0.005943145 = queryWeight(title:neurology^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:neurology in 1), product of:
1.0 = tf(termFreq(title:neurology)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=1)
0.0031800664 = (MATCH) max of:
0.0031800664 = (MATCH) weight(title:"journal of neurology"~2^2.0 in 1),
product of:
0.0035658872 = queryWeight(title:"journal of neurology"~2^2.0), product
of:
2.0 = boost
1.7836046 = idf(title: journal=2 of=2 neurology=2)
9.996294E-4 = queryNorm
0.8918023 = fieldWeight(title:"journal of neurology" in 1), product of:
1.0 = tf(phraseFreq=1.0)
1.7836046 = idf(title: journal=2 of=2 neurology=2)
<b>0.5 = fieldNorm(field=title, doc=1)
0.99862975 = (MATCH) sum of:
0.99862975 = (MATCH) MatchAllDocsQuery, product of:
0.99862975 = queryNorm
</str>