Repository: lucenenet Updated Branches: refs/heads/master 1197b1aed -> 666de32b0
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2a1541c1/src/Lucene.Net/Search/Similarities/Similarity.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Search/Similarities/Similarity.cs b/src/Lucene.Net/Search/Similarities/Similarity.cs index 95b3c01..44cd6e0 100644 --- a/src/Lucene.Net/Search/Similarities/Similarity.cs +++ b/src/Lucene.Net/Search/Similarities/Similarity.cs @@ -30,71 +30,72 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Similarity defines the components of Lucene scoring. - /// <p> + /// <para/> /// Expert: Scoring API. - /// <p> - /// this is a low-level API, you should only extend this API if you want to implement + /// <para/> + /// This is a low-level API, you should only extend this API if you want to implement /// an information retrieval <i>model</i>. If you are instead looking for a convenient way /// to alter Lucene's scoring, consider extending a higher-level implementation - /// such as <seealso cref="TFIDFSimilarity"/>, which implements the vector space model with this API, or - /// just tweaking the default implementation: <seealso cref="DefaultSimilarity"/>. - /// <p> + /// such as <see cref="TFIDFSimilarity"/>, which implements the vector space model with this API, or + /// just tweaking the default implementation: <see cref="DefaultSimilarity"/>. + /// <para/> /// Similarity determines how Lucene weights terms, and Lucene interacts with /// this class at both <a href="#indextime">index-time</a> and /// <a href="#querytime">query-time</a>. - /// <p> + /// <para/> /// <a name="indextime"/> - /// At indexing time, the indexer calls <seealso cref="#computeNorm(FieldInvertState)"/>, allowing - /// the Similarity implementation to set a per-document value for the field that will - /// be later accessible via <seealso cref="AtomicReader#getNormValues(String)"/>. Lucene makes no assumption + /// At indexing time, the indexer calls <see cref="ComputeNorm(FieldInvertState)"/>, allowing + /// the <see cref="Similarity"/> implementation to set a per-document value for the field that will + /// be later accessible via <see cref="Index.AtomicReader.GetNormValues(string)"/>. Lucene makes no assumption /// about what is in this norm, but it is most useful for encoding length normalization /// information. - /// <p> + /// <para/> /// Implementations should carefully consider how the normalization is encoded: while - /// Lucene's classical <seealso cref="TFIDFSimilarity"/> encodes a combination of index-time boost - /// and length normalization information with <seealso cref="SmallFloat"/> into a single byte, this + /// Lucene's classical <see cref="TFIDFSimilarity"/> encodes a combination of index-time boost + /// and length normalization information with <see cref="Util.SmallSingle"/> into a single byte, this /// might not be suitable for all purposes. - /// <p> + /// <para/> /// Many formulas require the use of average document length, which can be computed via a - /// combination of <seealso cref="CollectionStatistics#sumTotalTermFreq()"/> and - /// <seealso cref="CollectionStatistics#maxDoc()"/> or <seealso cref="CollectionStatistics#docCount()"/>, + /// combination of <see cref="CollectionStatistics.SumTotalTermFreq"/> and + /// <see cref="CollectionStatistics.MaxDoc"/> or <see cref="CollectionStatistics.DocCount"/>, /// depending upon whether the average should reflect field sparsity. - /// <p> + /// <para/> /// Additional scoring factors can be stored in named - /// <code>NumericDocValuesField</code>s and accessed - /// at query-time with <seealso cref="AtomicReader#getNumericDocValues(String)"/>. - /// <p> + /// <see cref="Documents.NumericDocValuesField"/>s and accessed + /// at query-time with <see cref="Index.AtomicReader.GetNumericDocValues(string)"/>. + /// <para/> /// Finally, using index-time boosts (either via folding into the normalization byte or - /// via DocValues), is an inefficient way to boost the scores of different fields if the + /// via <see cref="Index.DocValues"/>), is an inefficient way to boost the scores of different fields if the /// boost will be the same for every document, instead the Similarity can simply take a constant - /// boost parameter <i>C</i>, and <seealso cref="PerFieldSimilarityWrapper"/> can return different + /// boost parameter <i>C</i>, and <see cref="PerFieldSimilarityWrapper"/> can return different /// instances with different boosts depending upon field name. - /// <p> + /// <para/> /// <a name="querytime"/> /// At query-time, Queries interact with the Similarity via these steps: - /// <ol> - /// <li>The <seealso cref="#computeWeight(float, CollectionStatistics, TermStatistics...)"/> method is called a single time, + /// <list type="number"> + /// <item><description>The <see cref="ComputeWeight(float, CollectionStatistics, TermStatistics[])"/> method is called a single time, /// allowing the implementation to compute any statistics (such as IDF, average document length, etc) - /// across <i>the entire collection</i>. The <seealso cref="TermStatistics"/> and <seealso cref="CollectionStatistics"/> passed in - /// already contain all of the raw statistics involved, so a Similarity can freely use any combination + /// across <i>the entire collection</i>. The <see cref="TermStatistics"/> and <see cref="CollectionStatistics"/> passed in + /// already contain all of the raw statistics involved, so a <see cref="Similarity"/> can freely use any combination /// of statistics without causing any additional I/O. Lucene makes no assumption about what is - /// stored in the returned <seealso cref="Similarity.SimWeight"/> object. - /// <li>The query normalization process occurs a single time: <seealso cref="Similarity.SimWeight#getValueForNormalization()"/> - /// is called for each query leaf node, <seealso cref="Similarity#queryNorm(float)"/> is called for the top-level - /// query, and finally <seealso cref="Similarity.SimWeight#normalize(float, float)"/> passes down the normalization value - /// and any top-level boosts (e.g. from enclosing <seealso cref="BooleanQuery"/>s). - /// <li>For each segment in the index, the Query creates a <seealso cref="#simScorer(SimWeight, AtomicReaderContext)"/> - /// The score() method is called for each matching document. - /// </ol> - /// <p> + /// stored in the returned <see cref="Similarity.SimWeight"/> object.</description></item> + /// <item><description>The query normalization process occurs a single time: <see cref="Similarity.SimWeight.GetValueForNormalization()"/> + /// is called for each query leaf node, <see cref="Similarity.QueryNorm(float)"/> is called for the top-level + /// query, and finally <see cref="Similarity.SimWeight.Normalize(float, float)"/> passes down the normalization value + /// and any top-level boosts (e.g. from enclosing <see cref="BooleanQuery"/>s).</description></item> + /// <item><description>For each segment in the index, the <see cref="Query"/> creates a <see cref="GetSimScorer(SimWeight, AtomicReaderContext)"/> + /// The GetScore() method is called for each matching document.</description></item> + /// </list> + /// <para/> /// <a name="explaintime"/> - /// When <seealso cref="IndexSearcher#explain(Lucene.Net.Search.Query, int)"/> is called, queries consult the Similarity's DocScorer for an + /// When <see cref="IndexSearcher.Explain(Lucene.Net.Search.Query, int)"/> is called, queries consult the Similarity's DocScorer for an /// explanation of how it computed its score. The query passes in a the document id and an explanation of how the frequency /// was computed. + /// <para/> + /// @lucene.experimental /// </summary> - /// <seealso cref= Lucene.Net.Index.IndexWriterConfig#setSimilarity(Similarity) </seealso> - /// <seealso cref= IndexSearcher#setSimilarity(Similarity) - /// @lucene.experimental </seealso> + /// <seealso cref="Lucene.Net.Index.IndexWriterConfig.Similarity"/> + /// <seealso cref="IndexSearcher.Similarity"/> #if FEATURE_SERIALIZABLE [Serializable] #endif @@ -110,10 +111,10 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Hook to integrate coordinate-level matching. - /// <p> - /// By default this is disabled (returns <code>1</code>), as with + /// <para/> + /// By default this is disabled (returns <c>1</c>), as with /// most modern models this will only skew performance, but some - /// implementations such as <seealso cref="TFIDFSimilarity"/> override this. + /// implementations such as <see cref="TFIDFSimilarity"/> override this. /// </summary> /// <param name="overlap"> the number of query terms matched in the document </param> /// <param name="maxOverlap"> the total number of terms in the query </param> @@ -125,14 +126,14 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Computes the normalization value for a query given the sum of the - /// normalized weights <seealso cref="SimWeight#getValueForNormalization()"/> of + /// normalized weights <see cref="SimWeight.GetValueForNormalization()"/> of /// each of the query terms. this value is passed back to the - /// weight (<seealso cref="SimWeight#normalize(float, float)"/> of each query + /// weight (<see cref="SimWeight.Normalize(float, float)"/> of each query /// term, to provide a hook to attempt to make scores from different /// queries comparable. - /// <p> - /// By default this is disabled (returns <code>1</code>), but some - /// implementations such as <seealso cref="TFIDFSimilarity"/> override this. + /// <para/> + /// By default this is disabled (returns <c>1</c>), but some + /// implementations such as <see cref="TFIDFSimilarity"/> override this. /// </summary> /// <param name="valueForNormalization"> the sum of the term normalization values </param> /// <returns> a normalization factor for query weights </returns> @@ -143,12 +144,12 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Computes the normalization value for a field, given the accumulated - /// state of term processing for this field (see <seealso cref="FieldInvertState"/>). - /// - /// <p>Matches in longer fields are less precise, so implementations of this - /// method usually set smaller values when <code>state.getLength()</code> is large, - /// and larger values when <code>state.getLength()</code> is small. + /// state of term processing for this field (see <see cref="FieldInvertState"/>). /// + /// <para/>Matches in longer fields are less precise, so implementations of this + /// method usually set smaller values when <c>state.Length</c> is large, + /// and larger values when <code>state.Length</code> is small. + /// <para/> /// @lucene.experimental /// </summary> /// <param name="state"> current processing state for this field </param> @@ -161,24 +162,24 @@ namespace Lucene.Net.Search.Similarities /// <param name="queryBoost"> the query-time boost. </param> /// <param name="collectionStats"> collection-level statistics, such as the number of tokens in the collection. </param> /// <param name="termStats"> term-level statistics, such as the document frequency of a term across the collection. </param> - /// <returns> SimWeight object with the information this Similarity needs to score a query. </returns> + /// <returns> <see cref="SimWeight"/> object with the information this <see cref="Similarity"/> needs to score a query. </returns> public abstract SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats); /// <summary> - /// Creates a new <seealso cref="Similarity.SimScorer"/> to score matching documents from a segment of the inverted index. </summary> - /// <param name="weight"> collection information from <seealso cref="#computeWeight(float, CollectionStatistics, TermStatistics...)"/> </param> + /// Creates a new <see cref="Similarity.SimScorer"/> to score matching documents from a segment of the inverted index. </summary> + /// <param name="weight"> collection information from <see cref="ComputeWeight(float, CollectionStatistics, TermStatistics[])"/> </param> /// <param name="context"> segment of the inverted index to be scored. </param> - /// <returns> SloppySimScorer for scoring documents across <code>context</code> </returns> - /// <exception cref="IOException"> if there is a low-level I/O error </exception> + /// <returns> Sloppy <see cref="SimScorer"/> for scoring documents across <c>context</c> </returns> + /// <exception cref="System.IO.IOException"> if there is a low-level I/O error </exception> public abstract SimScorer GetSimScorer(SimWeight weight, AtomicReaderContext context); /// <summary> - /// API for scoring "sloppy" queries such as <seealso cref="TermQuery"/>, - /// <seealso cref="SpanQuery"/>, and <seealso cref="PhraseQuery"/>. - /// <p> + /// API for scoring "sloppy" queries such as <see cref="TermQuery"/>, + /// <see cref="Spans.SpanQuery"/>, and <see cref="PhraseQuery"/>. + /// <para/> /// Frequencies are floating-point values: an approximate /// within-document frequency adjusted for "sloppiness" by - /// <seealso cref="SimScorer#computeSlopFactor(int)"/>. + /// <see cref="SimScorer.ComputeSlopFactor(int)"/>. /// </summary> #if FEATURE_SERIALIZABLE [Serializable] @@ -223,8 +224,8 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Stores the weight for a query across the indexed collection. this abstract - /// implementation is empty; descendants of {@code Similarity} should - /// subclass {@code SimWeight} and define the statistics they require in the + /// implementation is empty; descendants of <see cref="Similarity"/> should + /// subclass <see cref="SimWeight"/> and define the statistics they require in the /// subclass. Examples include idf, average field length, etc. /// </summary> #if FEATURE_SERIALIZABLE @@ -242,8 +243,8 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// The value for normalization of contained query clauses (e.g. sum of squared weights). - /// <p> - /// NOTE: a Similarity implementation might not use any query normalization at all, + /// <para/> + /// NOTE: a <see cref="Similarity"/> implementation might not use any query normalization at all, /// its not required. However, if it wants to participate in query normalization, /// it can return a value here. /// </summary> @@ -251,10 +252,10 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Assigns the query normalization factor and boost from parent queries to this. - /// <p> - /// NOTE: a Similarity implementation might not use this normalized value at all, + /// <para/> + /// NOTE: a <see cref="Similarity"/> implementation might not use this normalized value at all, /// its not required. However, its usually a good idea to at least incorporate - /// the topLevelBoost (e.g. from an outer BooleanQuery) into its score. + /// the <paramref name="topLevelBoost"/> (e.g. from an outer <see cref="BooleanQuery"/>) into its score. /// </summary> public abstract void Normalize(float queryNorm, float topLevelBoost); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2a1541c1/src/Lucene.Net/Search/Similarities/SimilarityBase.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Search/Similarities/SimilarityBase.cs b/src/Lucene.Net/Search/Similarities/SimilarityBase.cs index 62288a5..744db3f 100644 --- a/src/Lucene.Net/Search/Similarities/SimilarityBase.cs +++ b/src/Lucene.Net/Search/Similarities/SimilarityBase.cs @@ -26,18 +26,19 @@ namespace Lucene.Net.Search.Similarities */ /// <summary> - /// A subclass of {@code Similarity} that provides a simplified API for its - /// descendants. Subclasses are only required to implement the <seealso cref="#score"/> - /// and <seealso cref="#toString()"/> methods. Implementing - /// <seealso cref="#explain(Explanation, BasicStats, int, float, float)"/> is optional, - /// inasmuch as SimilarityBase already provides a basic explanation of the score + /// A subclass of <see cref="Similarity"/> that provides a simplified API for its + /// descendants. Subclasses are only required to implement the <see cref="Score(BasicStats, float, float)"/> + /// and <see cref="ToString()"/> methods. Implementing + /// <see cref="Explain(Explanation, BasicStats, int, float, float)"/> is optional, + /// inasmuch as <see cref="SimilarityBase"/> already provides a basic explanation of the score /// and the term frequency. However, implementers of a subclass are encouraged to /// include as much detail about the scoring method as possible. - /// <p> + /// <para/> /// Note: multi-word queries such as phrase queries are scored in a different way /// than Lucene's default ranking algorithm: whereas it "fakes" an IDF value for /// the phrase as a whole (since it does not know it), this class instead scores /// phrases as a summation of the individual term scores. + /// <para/> /// @lucene.experimental /// </summary> #if FEATURE_SERIALIZABLE @@ -46,7 +47,7 @@ namespace Lucene.Net.Search.Similarities public abstract class SimilarityBase : Similarity { /// <summary> - /// For <seealso cref="#log2(double)"/>. Precomputed for efficiency reasons. </summary> + /// For <see cref="Log2(double)"/>. Precomputed for efficiency reasons. </summary> private static readonly double LOG_2 = Math.Log(2); /// <summary> @@ -65,13 +66,13 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Determines whether overlap tokens (Tokens with - /// 0 position increment) are ignored when computing - /// norm. By default this is true, meaning overlap - /// tokens do not count when computing norms. - /// - /// @lucene.experimental + /// 0 position increment) are ignored when computing + /// norm. By default this is <c>true</c>, meaning overlap + /// tokens do not count when computing norms. + /// <para/> + /// @lucene.experimental /// </summary> - /// <seealso cref= #computeNorm </seealso> + /// <seealso cref="ComputeNorm(FieldInvertState)"/> public virtual bool DiscountOverlaps { set @@ -103,8 +104,8 @@ namespace Lucene.Net.Search.Similarities } /// <summary> - /// Fills all member fields defined in {@code BasicStats} in {@code stats}. - /// Subclasses can override this method to fill additional stats. + /// Fills all member fields defined in <see cref="BasicStats"/> in <paramref name="stats"/>. + /// Subclasses can override this method to fill additional stats. /// </summary> protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { @@ -150,8 +151,8 @@ namespace Lucene.Net.Search.Similarities } /// <summary> - /// Scores the document {@code doc}. - /// <p>Subclasses must apply their scoring formula in this class.</p> </summary> + /// Scores the document <c>doc</c>. + /// <para>Subclasses must apply their scoring formula in this class.</para> </summary> /// <param name="stats"> the corpus level statistics. </param> /// <param name="freq"> the term frequency. </param> /// <param name="docLen"> the document length. </param> @@ -159,11 +160,11 @@ namespace Lucene.Net.Search.Similarities public abstract float Score(BasicStats stats, float freq, float docLen); /// <summary> - /// Subclasses should implement this method to explain the score. {@code expl} + /// Subclasses should implement this method to explain the score. <paramref name="expl"/> /// already contains the score, the name of the class and the doc id, as well /// as the term frequency and its explanation; subclasses can add additional /// clauses to explain details of their scoring formulae. - /// <p>The default implementation does nothing.</p> + /// <para>The default implementation does nothing.</para> /// </summary> /// <param name="expl"> the explanation to extend with details. </param> /// <param name="stats"> the corpus level statistics. </param> @@ -176,12 +177,12 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Explains the score. The implementation here provides a basic explanation - /// in the format <em>score(name-of-similarity, doc=doc-id, + /// in the format <em>Score(name-of-similarity, doc=doc-id, /// freq=term-frequency), computed from:</em>, and - /// attaches the score (computed via the <seealso cref="#score(BasicStats, float, float)"/> + /// attaches the score (computed via the <see cref="Score(BasicStats, float, float)"/> /// method) and the explanation for the term frequency. Subclasses content with /// this format may add additional details in - /// <seealso cref="#explain(Explanation, BasicStats, int, float, float)"/>. + /// <see cref="Explain(Explanation, BasicStats, int, float, float)"/>. /// </summary> /// <param name="stats"> the corpus level statistics. </param> /// <param name="doc"> the document id. </param> @@ -223,7 +224,7 @@ namespace Lucene.Net.Search.Similarities } /// <summary> - /// Subclasses must override this method to return the name of the Similarity + /// Subclasses must override this method to return the name of the <see cref="Similarity"/> /// and preferably the values of parameters (if any) as well. /// </summary> public override abstract string ToString(); @@ -244,7 +245,7 @@ namespace Lucene.Net.Search.Similarities } /// <summary> - /// Encodes the document length in the same way as <seealso cref="TFIDFSimilarity"/>. </summary> + /// Encodes the document length in the same way as <see cref="TFIDFSimilarity"/>. </summary> public override long ComputeNorm(FieldInvertState state) { float numTerms; @@ -261,14 +262,14 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Decodes a normalization factor (document length) stored in an index. </summary> - /// <seealso cref= #encodeNormValue(float,float) </seealso> + /// <see cref="EncodeNormValue(float,float)"/> protected internal virtual float DecodeNormValue(byte norm) { return NORM_TABLE[norm & 0xFF]; // & 0xFF maps negative bytes to positive above 127 } /// <summary> - /// Encodes the length to a byte via SmallFloat. </summary> + /// Encodes the length to a byte via <see cref="SmallSingle"/>. </summary> protected internal virtual byte EncodeNormValue(float boost, float length) { return SmallSingle.SingleToByte315((boost / (float)Math.Sqrt(length))); @@ -277,7 +278,7 @@ namespace Lucene.Net.Search.Similarities // ----------------------------- Static methods ------------------------------ /// <summary> - /// Returns the base two logarithm of {@code x}. </summary> + /// Returns the base two logarithm of <c>x</c>. </summary> public static double Log2(double x) { // Put this to a 'util' class if we need more of these. @@ -287,10 +288,10 @@ namespace Lucene.Net.Search.Similarities // --------------------------------- Classes --------------------------------- /// <summary> - /// Delegates the <seealso cref="#score(int, float)"/> and - /// <seealso cref="#explain(int, Explanation)"/> methods to - /// <seealso cref="SimilarityBase#score(BasicStats, float, float)"/> and - /// <seealso cref="SimilarityBase#explain(BasicStats, int, Explanation, float)"/>, + /// Delegates the <see cref="Score(int, float)"/> and + /// <see cref="Explain(int, Explanation)"/> methods to + /// <see cref="SimilarityBase.Score(BasicStats, float, float)"/> and + /// <see cref="SimilarityBase.Explain(BasicStats, int, Explanation, float)"/>, /// respectively. /// </summary> #if FEATURE_SERIALIZABLE http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2a1541c1/src/Lucene.Net/Search/Similarities/TFIDFSimilarity.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Search/Similarities/TFIDFSimilarity.cs b/src/Lucene.Net/Search/Similarities/TFIDFSimilarity.cs index 2b89112..ec8a0b8 100644 --- a/src/Lucene.Net/Search/Similarities/TFIDFSimilarity.cs +++ b/src/Lucene.Net/Search/Similarities/TFIDFSimilarity.cs @@ -26,25 +26,25 @@ namespace Lucene.Net.Search.Similarities using NumericDocValues = Lucene.Net.Index.NumericDocValues; /// <summary> - /// Implementation of <seealso cref="Similarity"/> with the Vector Space Model. - /// <p> + /// Implementation of <see cref="Similarity"/> with the Vector Space Model. + /// <para/> /// Expert: Scoring API. - /// <p>TFIDFSimilarity defines the components of Lucene scoring. + /// <para/>TFIDFSimilarity defines the components of Lucene scoring. /// Overriding computation of these components is a convenient /// way to alter Lucene scoring. /// - /// <p>Suggested reading: + /// <para/>Suggested reading: /// <a href="http://nlp.stanford.edu/IR-book/html/htmledition/queries-as-vectors-1.html"> /// Introduction To Information Retrieval, Chapter 6</a>. /// - /// <p>The following describes how Lucene scoring evolves from + /// <para/>The following describes how Lucene scoring evolves from /// underlying information retrieval models to (efficient) implementation. /// We first brief on <i>VSM Score</i>, /// then derive from it <i>Lucene's Conceptual Scoring Formula</i>, /// from which, finally, evolves <i>Lucene's Practical Scoring Function</i> /// (the latter is connected directly with Lucene classes and methods). /// - /// <p>Lucene combines + /// <para/>Lucene combines /// <a href="http://en.wikipedia.org/wiki/Standard_Boolean_model"> /// Boolean model (BM) of Information Retrieval</a> /// with @@ -52,13 +52,13 @@ namespace Lucene.Net.Search.Similarities /// Vector Space Model (VSM) of Information Retrieval</a> - /// documents "approved" by BM are scored by VSM. /// - /// <p>In VSM, documents and queries are represented as + /// <para/>In VSM, documents and queries are represented as /// weighted vectors in a multi-dimensional space, /// where each distinct index term is a dimension, /// and weights are /// <a href="http://en.wikipedia.org/wiki/Tfidf">Tf-idf</a> values. /// - /// <p>VSM does not require weights to be <i>Tf-idf</i> values, + /// <para/>VSM does not require weights to be <i>Tf-idf</i> values, /// but <i>Tf-idf</i> values are believed to produce search results of high quality, /// and so Lucene is using <i>Tf-idf</i>. /// <i>Tf</i> and <i>Idf</i> are described in more detail below, @@ -69,53 +69,48 @@ namespace Lucene.Net.Search.Similarities /// <i>idf(t)</i> similarly varies with the inverse of the /// number of index documents containing term <i>t</i>. /// - /// <p><i>VSM score</i> of document <i>d</i> for query <i>q</i> is the + /// <para/><i>VSM score</i> of document <i>d</i> for query <i>q</i> is the /// <a href="http://en.wikipedia.org/wiki/Cosine_similarity"> /// Cosine Similarity</a> /// of the weighted query vectors <i>V(q)</i> and <i>V(d)</i>: - /// - /// <br> <br> - /// <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto"> - /// <tr><td> - /// <table cellpadding="1" cellspacing="0" border="1" align="center"> - /// <tr><td> - /// <table cellpadding="2" cellspacing="2" border="0" align="center"> - /// <tr> - /// <td valign="middle" align="right" rowspan="1"> - /// cosine-similarity(q,d) = - /// </td> - /// <td valign="middle" align="center"> - /// <table> - /// <tr><td align="center" style="text-align: center"><small>V(q) · V(d)</small></td></tr> - /// <tr><td align="center" style="text-align: center">–––––––––</td></tr> - /// <tr><td align="center" style="text-align: center"><small>|V(q)| |V(d)|</small></td></tr> - /// </table> - /// </td> - /// </tr> - /// </table> - /// </td></tr> - /// </table> - /// </td></tr> - /// <tr><td> - /// <center><font size=-1><u>VSM Score</u></font></center> - /// </td></tr> - /// </table> - /// <br> <br> - /// - /// - /// Where <i>V(q)</i> · <i>V(d)</i> is the + /// <para/> + /// <list type="table"> + /// <item> + /// <term> + /// <list type="table"> + /// <item> + /// <term>cosine-similarity(q,d)   =  </term> + /// <term> + /// <table> + /// <item><term><small>V(q) · V(d)</small></term></item> + /// <item><term>–––––––––</term></item> + /// <item><term><small>|V(q)| |V(d)|</small></term></item> + /// </table> + /// </term> + /// </item> + /// </list> + /// </term> + /// </item> + /// <item> + /// <term>VSM Score</term> + /// </item> + /// </list> + /// <para/> + /// + /// + /// Where <i>V(q)</i> · <i>V(d)</i> is the /// <a href="http://en.wikipedia.org/wiki/Dot_product">dot product</a> /// of the weighted vectors, /// and <i>|V(q)|</i> and <i>|V(d)|</i> are their /// <a href="http://en.wikipedia.org/wiki/Euclidean_norm#Euclidean_norm">Euclidean norms</a>. /// - /// <p>Note: the above equation can be viewed as the dot product of + /// <para/>Note: the above equation can be viewed as the dot product of /// the normalized weighted vectors, in the sense that dividing /// <i>V(q)</i> by its euclidean norm is normalizing it to a unit vector. /// - /// <p>Lucene refines <i>VSM score</i> for both search quality and usability: - /// <ul> - /// <li>Normalizing <i>V(d)</i> to the unit vector is known to be problematic in that + /// <para/>Lucene refines <i>VSM score</i> for both search quality and usability: + /// <list type="bullet"> + /// <item><description>Normalizing <i>V(d)</i> to the unit vector is known to be problematic in that /// it removes all document length information. /// For some documents removing this info is probably ok, /// e.g. a document made by duplicating a certain paragraph <i>10</i> times, @@ -125,89 +120,88 @@ namespace Lucene.Net.Search.Similarities /// To avoid this problem, a different document length normalization /// factor is used, which normalizes to a vector equal to or larger /// than the unit vector: <i>doc-len-norm(d)</i>. - /// </li> + /// </description></item> /// - /// <li>At indexing, users can specify that certain documents are more + /// <item><description>At indexing, users can specify that certain documents are more /// important than others, by assigning a document boost. /// For this, the score of each document is also multiplied by its boost value /// <i>doc-boost(d)</i>. - /// </li> + /// </description></item> /// - /// <li>Lucene is field based, hence each query term applies to a single + /// <item><description>Lucene is field based, hence each query term applies to a single /// field, document length normalization is by the length of the certain field, /// and in addition to document boost there are also document fields boosts. - /// </li> + /// </description></item> /// - /// <li>The same field can be added to a document during indexing several times, + /// <item><description>The same field can be added to a document during indexing several times, /// and so the boost of that field is the multiplication of the boosts of /// the separate additions (or parts) of that field within the document. - /// </li> + /// </description></item> /// - /// <li>At search time users can specify boosts to each query, sub-query, and + /// <item><description>At search time users can specify boosts to each query, sub-query, and /// each query term, hence the contribution of a query term to the score of /// a document is multiplied by the boost of that query term <i>query-boost(q)</i>. - /// </li> + /// </description></item> /// - /// <li>A document may match a multi term query without containing all + /// <item><description>A document may match a multi term query without containing all /// the terms of that query (this is correct for some of the queries), /// and users can further reward documents matching more query terms /// through a coordination factor, which is usually larger when /// more terms are matched: <i>coord-factor(q,d)</i>. - /// </li> - /// </ul> + /// </description></item> + /// </list> /// - /// <p>Under the simplifying assumption of a single field in the index, + /// <para/>Under the simplifying assumption of a single field in the index, /// we get <i>Lucene's Conceptual scoring formula</i>: - /// - /// <br> <br> - /// <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto"> - /// <tr><td> - /// <table cellpadding="1" cellspacing="0" border="1" align="center"> - /// <tr><td> - /// <table cellpadding="2" cellspacing="2" border="0" align="center"> - /// <tr> - /// <td valign="middle" align="right" rowspan="1"> - /// score(q,d) = - /// <font color="#FF9933">coord-factor(q,d)</font> · - /// <font color="#CCCC00">query-boost(q)</font> · - /// </td> - /// <td valign="middle" align="center"> - /// <table> - /// <tr><td align="center" style="text-align: center"><small><font color="#993399">V(q) · V(d)</font></small></td></tr> - /// <tr><td align="center" style="text-align: center">–––––––––</td></tr> - /// <tr><td align="center" style="text-align: center"><small><font color="#FF33CC">|V(q)|</font></small></td></tr> - /// </table> - /// </td> - /// <td valign="middle" align="right" rowspan="1"> - /// · <font color="#3399FF">doc-len-norm(d)</font> - /// · <font color="#3399FF">doc-boost(d)</font> - /// </td> - /// </tr> - /// </table> - /// </td></tr> - /// </table> - /// </td></tr> - /// <tr><td> - /// <center><font size=-1><u>Lucene Conceptual Scoring Formula</u></font></center> - /// </td></tr> - /// </table> - /// <br> <br> - /// - /// <p>The conceptual formula is a simplification in the sense that (1) terms and documents + /// + /// <para/> + /// <list type="table"> + /// <item> + /// <term> + /// <list type="table"> + /// <item> + /// <term> + /// score(q,d)   =   + /// <font color="#FF9933">coord-factor(q,d)</font> ·   + /// <font color="#CCCC00">query-boost(q)</font> ·   + /// </term> + /// <term> + /// <list type="table"> + /// <item><term><small><font color="#993399">V(q) · V(d)</font></small></term></item> + /// <item><term>–––––––––</term></item> + /// <item><term><small><font color="#FF33CC">|V(q)|</font></small></term></item> + /// </list> + /// </term> + /// <term> + ///   ·   <font color="#3399FF">doc-len-norm(d)</font> + ///   ·   <font color="#3399FF">doc-boost(d)</font> + /// </term> + /// </item> + /// </list> + /// </term> + /// </item> + /// <item> + /// <term>Lucene Conceptual Scoring Formula</term> + /// </item> + /// </list> + /// <para/> + /// + /// + /// <para/>The conceptual formula is a simplification in the sense that (1) terms and documents /// are fielded and (2) boosts are usually per query term rather than per query. /// - /// <p>We now describe how Lucene implements this conceptual scoring formula, and + /// <para/>We now describe how Lucene implements this conceptual scoring formula, and /// derive from it <i>Lucene's Practical Scoring Function</i>. /// - /// <p>For efficient score computation some scoring components + /// <para/>For efficient score computation some scoring components /// are computed and aggregated in advance: /// - /// <ul> - /// <li><i>Query-boost</i> for the query (actually for each query term) + /// <list type="bullet"> + /// <item><description><i>Query-boost</i> for the query (actually for each query term) /// is known when search starts. - /// </li> + /// </description></item> /// - /// <li>Query Euclidean norm <i>|V(q)|</i> can be computed when search starts, + /// <item><description>Query Euclidean norm <i>|V(q)|</i> can be computed when search starts, /// as it is independent of the document being scored. /// From search optimization perspective, it is a valid question /// why bother to normalize the query at all, because all @@ -215,8 +209,8 @@ namespace Lucene.Net.Search.Similarities /// and hence documents ranks (their order by score) will not /// be affected by this normalization. /// There are two good reasons to keep this normalization: - /// <ul> - /// <li>Recall that + /// <list type="bullet"> + /// <item><description>Recall that /// <a href="http://en.wikipedia.org/wiki/Cosine_similarity"> /// Cosine Similarity</a> can be used find how similar /// two documents are. One can use Lucene for e.g. @@ -229,70 +223,66 @@ namespace Lucene.Net.Search.Similarities /// There are other applications that may require this. /// And this is exactly what normalizing the query vector <i>V(q)</i> /// provides: comparability (to a certain extent) of two or more queries. - /// </li> + /// </description></item> /// - /// <li>Applying query normalization on the scores helps to keep the + /// <item><description>Applying query normalization on the scores helps to keep the /// scores around the unit vector, hence preventing loss of score data /// because of floating point precision limitations. - /// </li> - /// </ul> - /// </li> + /// </description></item> + /// </list> + /// </description></item> /// - /// <li>Document length norm <i>doc-len-norm(d)</i> and document + /// <item><description>Document length norm <i>doc-len-norm(d)</i> and document /// boost <i>doc-boost(d)</i> are known at indexing time. /// They are computed in advance and their multiplication /// is saved as a single value in the index: <i>norm(d)</i>. /// (In the equations below, <i>norm(t in d)</i> means <i>norm(field(t) in doc d)</i> /// where <i>field(t)</i> is the field associated with term <i>t</i>.) - /// </li> - /// </ul> + /// </description></item> + /// </list> /// - /// <p><i>Lucene's Practical Scoring Function</i> is derived from the above. + /// <para/><i>Lucene's Practical Scoring Function</i> is derived from the above. /// The color codes demonstrate how it relates /// to those of the <i>conceptual</i> formula: /// - /// <P> - /// <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto"> - /// <tr><td> - /// <table cellpadding="" cellspacing="2" border="2" align="center"> - /// <tr><td> - /// <table cellpadding="2" cellspacing="2" border="0" align="center"> - /// <tr> - /// <td valign="middle" align="right" rowspan="1"> - /// score(q,d) = - /// <A HREF="#formula_coord"><font color="#FF9933">coord(q,d)</font></A> · - /// <A HREF="#formula_queryNorm"><font color="#FF33CC">queryNorm(q)</font></A> · - /// </td> - /// <td valign="bottom" align="center" rowspan="1" style="text-align: center"> - /// <big><big><big>∑</big></big></big> - /// </td> - /// <td valign="middle" align="right" rowspan="1"> - /// <big><big>(</big></big> - /// <A HREF="#formula_tf"><font color="#993399">tf(t in d)</font></A> · - /// <A HREF="#formula_idf"><font color="#993399">idf(t)</font></A><sup>2</sup> · - /// <A HREF="#formula_termBoost"><font color="#CCCC00">t.getBoost()</font></A> · - /// <A HREF="#formula_norm"><font color="#3399FF">norm(t,d)</font></A> - /// <big><big>)</big></big> - /// </td> - /// </tr> - /// <tr valigh="top"> - /// <td></td> - /// <td align="center" style="text-align: center"><small>t in q</small></td> - /// <td></td> - /// </tr> - /// </table> - /// </td></tr> - /// </table> - /// </td></tr> - /// <tr><td> - /// <center><font size=-1><u>Lucene Practical Scoring Function</u></font></center> - /// </td></tr> - /// </table> - /// - /// <p> where - /// <ol> - /// <li> - /// <A NAME="formula_tf"></A> + /// <para/> + /// <list type="table"> + /// <item> + /// <term> + /// <list type="table"> + /// <item> + /// <term> + /// score(q,d)   =   + /// <a href="#formula_coord"><font color="#FF9933">coord(q,d)</font></a>   ·   + /// <a href="#formula_queryNorm"><font color="#FF33CC">queryNorm(q)</font></a>   ·   + /// </term> + /// <term><big><big><big>∑</big></big></big></term> + /// <term> + /// <big><big>(</big></big> + /// <a href="#formula_tf"><font color="#993399">tf(t in d)</font></a>   ·   + /// <a href="#formula_idf"><font color="#993399">idf(t)</font></a><sup>2</sup>   ·   + /// <a href="#formula_termBoost"><font color="#CCCC00">t.Boost</font></a>   ·   + /// <a href="#formula_norm"><font color="#3399FF">norm(t,d)</font></a> + /// <big><big>)</big></big> + /// </term> + /// </item> + /// <item> + /// <term></term> + /// <term><small>t in q</small></term> + /// <term></term> + /// </item> + /// </list> + /// </term> + /// </item> + /// <item> + /// <term>Lucene Practical Scoring Function</term> + /// </item> + /// </list> + /// + /// <para/> where + /// <list type="number"> + /// <item><description> + /// <a name="formula_tf"></a> /// <b><i>tf(t in d)</i></b> /// correlates to the term's <i>frequency</i>, /// defined as the number of times term <i>t</i> appears in the currently scored document <i>d</i>. @@ -302,71 +292,67 @@ namespace Lucene.Net.Search.Similarities /// two term-queries with that same term and hence the computation would still be correct (although /// not very efficient). /// The default computation for <i>tf(t in d)</i> in - /// <seealso cref="Lucene.Net.Search.Similarities.DefaultSimilarity#tf(float) DefaultSimilarity"/> is: - /// - /// <br> <br> - /// <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto"> - /// <tr> - /// <td valign="middle" align="right" rowspan="1"> - /// <seealso cref="Lucene.Net.Search.Similarities.DefaultSimilarity#tf(float) tf(t in d)"/> = - /// </td> - /// <td valign="top" align="center" rowspan="1"> - /// frequency<sup><big>½</big></sup> - /// </td> - /// </tr> - /// </table> - /// <br> <br> - /// </li> - /// - /// <li> - /// <A NAME="formula_idf"></A> + /// DefaultSimilarity (<see cref="Lucene.Net.Search.Similarities.DefaultSimilarity.Tf(float)"/>) is: + /// + /// <para/> + /// <list type="table"> + /// <item> + /// <term> + /// tf(t in d)   =   + /// </term> + /// <term> + /// frequency<sup><big>½</big></sup> + /// </term> + /// </item> + /// </list> + /// <para/> + /// + /// </description></item> + /// + /// <item><description> + /// <a name="formula_idf"></a> /// <b><i>idf(t)</i></b> stands for Inverse Document Frequency. this value - /// correlates to the inverse of <i>docFreq</i> + /// correlates to the inverse of <i>DocFreq</i> /// (the number of documents in which the term <i>t</i> appears). /// this means rarer terms give higher contribution to the total score. /// <i>idf(t)</i> appears for <i>t</i> in both the query and the document, /// hence it is squared in the equation. /// The default computation for <i>idf(t)</i> in - /// <seealso cref="Lucene.Net.Search.Similarities.DefaultSimilarity#idf(long, long) DefaultSimilarity"/> is: - /// - /// <br> <br> - /// <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto"> - /// <tr> - /// <td valign="middle" align="right"> - /// <seealso cref="Lucene.Net.Search.Similarities.DefaultSimilarity#idf(long, long) idf(t)"/> = - /// </td> - /// <td valign="middle" align="center"> - /// 1 + log <big>(</big> - /// </td> - /// <td valign="middle" align="center"> - /// <table> - /// <tr><td align="center" style="text-align: center"><small>numDocs</small></td></tr> - /// <tr><td align="center" style="text-align: center">–––––––––</td></tr> - /// <tr><td align="center" style="text-align: center"><small>docFreq+1</small></td></tr> - /// </table> - /// </td> - /// <td valign="middle" align="center"> - /// <big>)</big> - /// </td> - /// </tr> - /// </table> - /// <br> <br> - /// </li> - /// - /// <li> - /// <A NAME="formula_coord"></A> + /// DefaultSimilarity (<see cref="Lucene.Net.Search.Similarities.DefaultSimilarity.Idf(long, long)"/>) is: + /// + /// <para/> + /// <list type="table"> + /// <item> + /// <term>idf(t)   =  </term> + /// <term>1 + log <big>(</big></term> + /// <term> + /// <list type="table"> + /// <item><term><small>NumDocs</small></term></item> + /// <item><term>–––––––––</term></item> + /// <item><term><small>DocFreq+1</small></term></item> + /// </list> + /// </term> + /// <term><big>)</big></term> + /// </item> + /// </list> + /// <para/> + /// + /// </description></item> + /// + /// <item><description> + /// <a name="formula_coord"></a> /// <b><i>coord(q,d)</i></b> /// is a score factor based on how many of the query terms are found in the specified document. /// Typically, a document that contains more of the query's terms will receive a higher score /// than another document with fewer query terms. /// this is a search time factor computed in - /// <seealso cref="#coord(int, int) coord(q,d)"/> + /// coord(q,d) (<see cref="Coord(int, int)"/>) /// by the Similarity in effect at search time. - /// <br> <br> - /// </li> + /// <para/> + /// </description></item> /// - /// <li><b> - /// <A NAME="formula_queryNorm"></A> + /// <item><description><b> + /// <a name="formula_queryNorm"></a> /// <i>queryNorm(q)</i> /// </b> /// is a normalizing factor used to make scores between queries comparable. @@ -375,128 +361,122 @@ namespace Lucene.Net.Search.Similarities /// this is a search time factor computed by the Similarity in effect at search time. /// /// The default computation in - /// <seealso cref="Lucene.Net.Search.Similarities.DefaultSimilarity#queryNorm(float) DefaultSimilarity"/> + /// DefaultSimilarity (<see cref="Lucene.Net.Search.Similarities.DefaultSimilarity.QueryNorm(float)"/>) /// produces a <a href="http://en.wikipedia.org/wiki/Euclidean_norm#Euclidean_norm">Euclidean norm</a>: - /// <br> <br> - /// <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto"> - /// <tr> - /// <td valign="middle" align="right" rowspan="1"> - /// queryNorm(q) = - /// <seealso cref="Lucene.Net.Search.Similarities.DefaultSimilarity#queryNorm(float) queryNorm(sumOfSquaredWeights)"/> - /// = - /// </td> - /// <td valign="middle" align="center" rowspan="1"> - /// <table> - /// <tr><td align="center" style="text-align: center"><big>1</big></td></tr> - /// <tr><td align="center" style="text-align: center"><big> - /// –––––––––––––– - /// </big></td></tr> - /// <tr><td align="center" style="text-align: center">sumOfSquaredWeights<sup><big>½</big></sup></td></tr> - /// </table> - /// </td> - /// </tr> - /// </table> - /// <br> <br> + /// + /// <para/> + /// <list type="table"> + /// <item> + /// <term> + /// queryNorm(q)   =   + /// queryNorm(sumOfSquaredWeights) + ///   =   + /// </term> + /// <term> + /// <list type="table"> + /// <item><term><big>1</big></term></item> + /// <item><term><big>––––––––––––––</big></term></item> + /// <item><term>sumOfSquaredWeights<sup><big>½</big></sup></term></item> + /// </list> + /// </term> + /// </item> + /// </list> + /// <para/> /// /// The sum of squared weights (of the query terms) is - /// computed by the query <seealso cref="Lucene.Net.Search.Weight"/> object. - /// For example, a <seealso cref="Lucene.Net.Search.BooleanQuery"/> + /// computed by the query <see cref="Lucene.Net.Search.Weight"/> object. + /// For example, a <see cref="Lucene.Net.Search.BooleanQuery"/> /// computes this value as: - /// - /// <br> <br> - /// <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto"> - /// <tr> - /// <td valign="middle" align="right" rowspan="1"> - /// <seealso cref="Lucene.Net.Search.Weight#getValueForNormalization() sumOfSquaredWeights"/> = - /// <seealso cref="Lucene.Net.Search.Query#getBoost() q.getBoost()"/> <sup><big>2</big></sup> - /// · - /// </td> - /// <td valign="bottom" align="center" rowspan="1" style="text-align: center"> - /// <big><big><big>∑</big></big></big> - /// </td> - /// <td valign="middle" align="right" rowspan="1"> - /// <big><big>(</big></big> - /// <A HREF="#formula_idf">idf(t)</A> · - /// <A HREF="#formula_termBoost">t.getBoost()</A> - /// <big><big>) <sup>2</sup> </big></big> - /// </td> - /// </tr> - /// <tr valigh="top"> - /// <td></td> - /// <td align="center" style="text-align: center"><small>t in q</small></td> - /// <td></td> - /// </tr> - /// </table> - /// <br> <br> - /// - /// </li> - /// - /// <li> - /// <A NAME="formula_termBoost"></A> - /// <b><i>t.getBoost()</i></b> + /// + /// <para/> + /// <list type="table"> + /// <item> + /// <term> + /// sumOfSquaredWeights   =   + /// q.Boost <sup><big>2</big></sup> + ///  ·  + /// </term> + /// <term><big><big><big>∑</big></big></big></term> + /// <term> + /// <big><big>(</big></big> + /// <a href="#formula_idf">idf(t)</a>  ·  + /// <a href="#formula_termBoost">t.Boost</a> + /// <big><big>) <sup>2</sup> </big></big> + /// </term> + /// </item> + /// <item> + /// <term></term> + /// <term><small>t in q</small></term> + /// <term></term> + /// </item> + /// </list> + /// where sumOfSquaredWeights is <see cref="Weight.GetValueForNormalization()"/> and + /// q.Boost is <see cref="Query.Boost"/> + /// <para/> + /// </description></item> + /// + /// <item><description> + /// <a name="formula_termBoost"></a> + /// <b><i>t.Boost</i></b> /// is a search time boost of term <i>t</i> in the query <i>q</i> as /// specified in the query text - /// (see <A HREF="{@docRoot}/../queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Boosting_a_Term">query syntax</A>), + /// (see <a href="{@docRoot}/../queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Boosting_a_Term">query syntax</a>), /// or as set by application calls to - /// <seealso cref="Lucene.Net.Search.Query#setBoost(float) setBoost()"/>. + /// <see cref="Lucene.Net.Search.Query.Boost"/>. /// Notice that there is really no direct API for accessing a boost of one term in a multi term query, /// but rather multi terms are represented in a query as multi - /// <seealso cref="Lucene.Net.Search.TermQuery TermQuery"/> objects, + /// <see cref="Lucene.Net.Search.TermQuery"/> objects, /// and so the boost of a term in the query is accessible by calling the sub-query - /// <seealso cref="Lucene.Net.Search.Query#getBoost() getBoost()"/>. - /// <br> <br> - /// </li> + /// <see cref="Lucene.Net.Search.Query.Boost"/>. + /// <para/> + /// </description></item> /// - /// <li> - /// <A NAME="formula_norm"></A> + /// <item><description> + /// <a name="formula_norm"></a> /// <b><i>norm(t,d)</i></b> encapsulates a few (indexing time) boost and length factors: /// - /// <ul> - /// <li><b>Field boost</b> - set by calling - /// <seealso cref="Field#setBoost(float) field.setBoost()"/> + /// <list type="bullet"> + /// <item><description><b>Field boost</b> - set + /// <see cref="Documents.Field.Boost"/> /// before adding the field to a document. - /// </li> - /// <li><b>lengthNorm</b> - computed + /// </description></item> + /// <item><description><b>lengthNorm</b> - computed /// when the document is added to the index in accordance with the number of tokens /// of this field in the document, so that shorter fields contribute more to the score. - /// LengthNorm is computed by the Similarity class in effect at indexing. - /// </li> - /// </ul> - /// The <seealso cref="#computeNorm"/> method is responsible for - /// combining all of these factors into a single float. + /// LengthNorm is computed by the <see cref="Similarity"/> class in effect at indexing. + /// </description></item> + /// </list> + /// The <see cref="ComputeNorm(FieldInvertState)"/> method is responsible for + /// combining all of these factors into a single <see cref="float"/>. /// - /// <p> + /// <para/> /// When a document is added to the index, all the above factors are multiplied. /// If the document has multiple fields with the same name, all their boosts are multiplied together: - /// - /// <br> <br> - /// <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto"> - /// <tr> - /// <td valign="middle" align="right" rowspan="1"> - /// norm(t,d) = - /// lengthNorm - /// · - /// </td> - /// <td valign="bottom" align="center" rowspan="1" style="text-align: center"> - /// <big><big><big>∏</big></big></big> - /// </td> - /// <td valign="middle" align="right" rowspan="1"> - /// <seealso cref="Lucene.Net.Index.IIndexableField#boost() f.boost"/>() - /// </td> - /// </tr> - /// <tr valigh="top"> - /// <td></td> - /// <td align="center" style="text-align: center"><small>field <i><b>f</b></i> in <i>d</i> named as <i><b>t</b></i></small></td> - /// <td></td> - /// </tr> - /// </table> + /// + /// <para/> + /// <list type="table"> + /// <item> + /// <term> + /// norm(t,d)   =   + /// lengthNorm + ///  ·  + /// </term> + /// <term><big><big><big>∏</big></big></big></term> + /// <term><see cref="Index.IIndexableField.Boost"/></term> + /// </item> + /// <item> + /// <term></term> + /// <term><small>field <i><b>f</b></i> in <i>d</i> named as <i><b>t</b></i></small></term> + /// <term></term> + /// </item> + /// </list> /// Note that search time is too late to modify this <i>norm</i> part of scoring, - /// e.g. by using a different <seealso cref="Similarity"/> for search. - /// </li> - /// </ol> + /// e.g. by using a different <see cref="Similarity"/> for search. + /// </description></item> + /// </list> /// </summary> - /// <seealso cref= Lucene.Net.Index.IndexWriterConfig#setSimilarity(Similarity) </seealso> - /// <seealso cref= IndexSearcher#setSimilarity(Similarity) </seealso> + /// <seealso cref="Lucene.Net.Index.IndexWriterConfig.Similarity"/> + /// <seealso cref="IndexSearcher.Similarity"/> #if FEATURE_SERIALIZABLE [Serializable] #endif @@ -514,14 +494,14 @@ namespace Lucene.Net.Search.Similarities /// Computes a score factor based on the fraction of all query terms that a /// document contains. this value is multiplied into scores. /// - /// <p>The presence of a large portion of the query terms indicates a better + /// <para/>The presence of a large portion of the query terms indicates a better /// match with the query, so implementations of this method usually return /// larger values when the ratio between these parameters is large and smaller /// values when the ratio between them is small. /// </summary> - /// <param name="overlap"> the number of query terms matched in the document </param> - /// <param name="maxOverlap"> the total number of terms in the query </param> - /// <returns> a score factor based on term overlap with the query </returns> + /// <param name="overlap"> The number of query terms matched in the document </param> + /// <param name="maxOverlap"> The total number of terms in the query </param> + /// <returns> A score factor based on term overlap with the query </returns> public override abstract float Coord(int overlap, int maxOverlap); /// <summary> @@ -531,49 +511,49 @@ namespace Lucene.Net.Search.Similarities /// computed as 1/sqrt(sumOfSquaredWeights), other implementations might /// completely ignore sumOfSquaredWeights (ie return 1). /// - /// <p>this does not affect ranking, but the default implementation does make scores + /// <para/>This does not affect ranking, but the default implementation does make scores /// from different queries more comparable than they would be by eliminating the - /// magnitude of the Query vector as a factor in the score. + /// magnitude of the <see cref="Query"/> vector as a factor in the score. /// </summary> - /// <param name="sumOfSquaredWeights"> the sum of the squares of query term weights </param> - /// <returns> a normalization factor for query weights </returns> + /// <param name="sumOfSquaredWeights"> The sum of the squares of query term weights </param> + /// <returns> A normalization factor for query weights </returns> public override abstract float QueryNorm(float sumOfSquaredWeights); /// <summary> /// Computes a score factor based on a term or phrase's frequency in a - /// document. this value is multiplied by the <seealso cref="#idf(long, long)"/> + /// document. This value is multiplied by the <see cref="Idf(long, long)"/> /// factor for each term in the query and these products are then summed to /// form the initial score for a document. /// - /// <p>Terms and phrases repeated in a document indicate the topic of the + /// <para/>Terms and phrases repeated in a document indicate the topic of the /// document, so implementations of this method usually return larger values - /// when <code>freq</code> is large, and smaller values when <code>freq</code> + /// when <paramref name="freq"/> is large, and smaller values when <paramref name="freq"/> /// is small. /// </summary> - /// <param name="freq"> the frequency of a term within a document </param> - /// <returns> a score factor based on a term's within-document frequency </returns> + /// <param name="freq"> The frequency of a term within a document </param> + /// <returns> A score factor based on a term's within-document frequency </returns> public abstract float Tf(float freq); /// <summary> /// Computes a score factor for a simple term and returns an explanation /// for that score factor. /// - /// <p> + /// <para/> /// The default implementation uses: /// - /// <pre class="prettyprint"> - /// idf(docFreq, searcher.maxDoc()); - /// </pre> + /// <code> + /// Idf(docFreq, searcher.MaxDoc); + /// </code> /// - /// Note that <seealso cref="CollectionStatistics#maxDoc()"/> is used instead of - /// <seealso cref="Lucene.Net.Index.IndexReader#numDocs() IndexReader#numDocs()"/> because also - /// <seealso cref="TermStatistics#docFreq()"/> is used, and when the latter - /// is inaccurate, so is <seealso cref="CollectionStatistics#maxDoc()"/>, and in the same direction. - /// In addition, <seealso cref="CollectionStatistics#maxDoc()"/> is more efficient to compute + /// Note that <see cref="CollectionStatistics.MaxDoc"/> is used instead of + /// <see cref="Lucene.Net.Index.IndexReader.NumDocs"/> because also + /// <see cref="TermStatistics.DocFreq"/> is used, and when the latter + /// is inaccurate, so is <see cref="CollectionStatistics.MaxDoc"/>, and in the same direction. + /// In addition, <see cref="CollectionStatistics.MaxDoc"/> is more efficient to compute /// </summary> - /// <param name="collectionStats"> collection-level statistics </param> - /// <param name="termStats"> term-level statistics for the term </param> - /// <returns> an Explain object that includes both an idf score factor + /// <param name="collectionStats"> Collection-level statistics </param> + /// <param name="termStats"> Term-level statistics for the term </param> + /// <returns> An Explain object that includes both an idf score factor /// and an explanation for the term. </returns> public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { @@ -586,13 +566,13 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Computes a score factor for a phrase. /// - /// <p> + /// <para/> /// The default implementation sums the idf factor for /// each term in the phrase. /// </summary> - /// <param name="collectionStats"> collection-level statistics </param> - /// <param name="termStats"> term-level statistics for the terms in the phrase </param> - /// <returns> an Explain object that includes both an idf + /// <param name="collectionStats"> Collection-level statistics </param> + /// <param name="termStats"> Term-level statistics for the terms in the phrase </param> + /// <returns> An Explain object that includes both an idf /// score factor for the phrase and an explanation /// for each term. </returns> public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) @@ -614,27 +594,27 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Computes a score factor based on a term's document frequency (the number - /// of documents which contain the term). this value is multiplied by the - /// <seealso cref="#tf(float)"/> factor for each term in the query and these products are + /// of documents which contain the term). This value is multiplied by the + /// <see cref="Tf(float)"/> factor for each term in the query and these products are /// then summed to form the initial score for a document. /// - /// <p>Terms that occur in fewer documents are better indicators of topic, so + /// <para/>Terms that occur in fewer documents are better indicators of topic, so /// implementations of this method usually return larger values for rare terms, /// and smaller values for common terms. /// </summary> - /// <param name="docFreq"> the number of documents which contain the term </param> - /// <param name="numDocs"> the total number of documents in the collection </param> - /// <returns> a score factor based on the term's document frequency </returns> + /// <param name="docFreq"> The number of documents which contain the term </param> + /// <param name="numDocs"> The total number of documents in the collection </param> + /// <returns> A score factor based on the term's document frequency </returns> public abstract float Idf(long docFreq, long numDocs); /// <summary> /// Compute an index-time normalization value for this field instance. - /// <p> - /// this value will be stored in a single byte lossy representation by - /// <seealso cref="#encodeNormValue(float)"/>. + /// <para/> + /// This value will be stored in a single byte lossy representation by + /// <see cref="EncodeNormValue(float)"/>. /// </summary> - /// <param name="state"> statistics of the current field (such as length, boost, etc) </param> - /// <returns> an index-time normalization value </returns> + /// <param name="state"> Statistics of the current field (such as length, boost, etc) </param> + /// <returns> An index-time normalization value </returns> public abstract float LengthNorm(FieldInvertState state); public override sealed long ComputeNorm(FieldInvertState state) @@ -646,7 +626,7 @@ namespace Lucene.Net.Search.Similarities /// <summary> /// Decodes a normalization factor stored in an index. /// </summary> - /// <seealso cref= #encodeNormValue(float) </seealso> + /// <see cref="EncodeNormValue(float)"/> public abstract float DecodeNormValue(long norm); /// <summary> @@ -658,14 +638,14 @@ namespace Lucene.Net.Search.Similarities /// this value is summed for each sloppy phrase match in a document to form /// the frequency to be used in scoring instead of the exact term count. /// - /// <p>A phrase match with a small edit distance to a document passage more + /// <para/>A phrase match with a small edit distance to a document passage more /// closely matches the document, so implementations of this method usually /// return larger values when the edit distance is small and smaller values /// when it is large. /// </summary> - /// <seealso cref= PhraseQuery#setSlop(int) </seealso> - /// <param name="distance"> the edit distance of this sloppy phrase match </param> - /// <returns> the frequency increment for this match </returns> + /// <seealso cref="PhraseQuery.Slop"/> + /// <param name="distance"> The edit distance of this sloppy phrase match </param> + /// <returns> The frequency increment for this match </returns> public abstract float SloppyFreq(int distance); /// <summary>
