Lucene.Net.Codecs.Lucene41: Fixed XML documentation comment warnings
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3221b638 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3221b638 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3221b638 Branch: refs/heads/master Commit: 3221b6383abcc0c10537a3c145406bbb918f6285 Parents: ee52fd3 Author: Shad Storhaug <[email protected]> Authored: Mon Jun 5 14:09:15 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Tue Jun 6 06:58:41 2017 +0700 ---------------------------------------------------------------------- CONTRIBUTING.md | 3 +- src/Lucene.Net/Codecs/Lucene41/ForUtil.cs | 54 +-- src/Lucene.Net/Codecs/Lucene41/Lucene41Codec.cs | 17 +- .../Lucene41/Lucene41PostingsBaseFormat.cs | 6 +- .../Codecs/Lucene41/Lucene41PostingsFormat.cs | 404 +++++++++---------- .../Codecs/Lucene41/Lucene41PostingsReader.cs | 7 +- .../Codecs/Lucene41/Lucene41PostingsWriter.cs | 11 +- .../Codecs/Lucene41/Lucene41SkipReader.cs | 22 +- .../Codecs/Lucene41/Lucene41SkipWriter.cs | 7 +- .../Lucene41/Lucene41StoredFieldsFormat.cs | 146 +++---- 10 files changed, 336 insertions(+), 341 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/CONTRIBUTING.md ---------------------------------------------------------------------- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6886da2..5f422f8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -54,8 +54,7 @@ helpers to help with that, see for examples see our [Java style methods to avoid 1. Codecs.Compressing (namespace) 2. Codecs.Lucene3x (namespace) 3. Codecs.Lucene40 (namespace) - 4. Codecs.Lucene41 (namespace) - 5. Util.Packed (namespace) + 4. Util.Packed (namespace) 2. Lucene.Net.Codecs (project) 1. Appending (namespace) 2. BlockTerms (namespace) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/ForUtil.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/ForUtil.cs b/src/Lucene.Net/Codecs/Lucene41/ForUtil.cs index 3488cb3..d65a350 100644 --- a/src/Lucene.Net/Codecs/Lucene41/ForUtil.cs +++ b/src/Lucene.Net/Codecs/Lucene41/ForUtil.cs @@ -23,16 +23,6 @@ namespace Lucene.Net.Codecs.Lucene41 * limitations under the License. */ - /* - using DataInput = Lucene.Net.Store.DataInput; - using DataOutput = Lucene.Net.Store.DataOutput; - using IndexInput = Lucene.Net.Store.IndexInput; - using IndexOutput = Lucene.Net.Store.IndexOutput; - using Decoder = Lucene.Net.Util.Packed.PackedInts.Decoder; - using FormatAndBits = Lucene.Net.Util.Packed.PackedInts.FormatAndBits; - using PackedInts = Lucene.Net.Util.Packed.PackedInts; - */ - /// <summary> /// Encode all values in normal area with fixed bit width, /// which is determined by the max value in this block. @@ -46,15 +36,15 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Upper limit of the number of bytes that might be required to stored - /// <code>BLOCK_SIZE</code> encoded values. + /// <see cref="Lucene41PostingsFormat.BLOCK_SIZE"/> encoded values. /// </summary> public static readonly int MAX_ENCODED_SIZE = Lucene41PostingsFormat.BLOCK_SIZE * 4; /// <summary> /// Upper limit of the number of values that might be decoded in a single call to - /// <seealso cref="#readBlock(IndexInput, byte[], int[])"/>. Although values after - /// <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers - /// whose size is >= MAX_DATA_SIZE to avoid <seealso cref="ArrayIndexOutOfBoundsException"/>s. + /// <see cref="ReadBlock(IndexInput, byte[], int[])"/>. Although values after + /// <see cref="Lucene41PostingsFormat.BLOCK_SIZE"/> are garbage, it is necessary to allocate value buffers + /// whose size is >= MAX_DATA_SIZE to avoid <see cref="IndexOutOfRangeException"/>s. /// </summary> public static readonly int MAX_DATA_SIZE; @@ -81,8 +71,8 @@ namespace Lucene.Net.Codecs.Lucene41 } /// <summary> - /// Compute the number of iterations required to decode <code>BLOCK_SIZE</code> - /// values with the provided <seealso cref="Decoder"/>. + /// Compute the number of iterations required to decode <see cref="Lucene41PostingsFormat.BLOCK_SIZE"/> + /// values with the provided <see cref="PackedInt32s.IDecoder"/>. /// </summary> private static int ComputeIterations(PackedInt32s.IDecoder decoder) { @@ -91,7 +81,7 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Compute the number of bytes required to encode a block of values that require - /// <code>bitsPerValue</code> bits per value with format <code>format</code>. + /// <paramref name="bitsPerValue"/> bits per value with format <paramref name="format"/>. /// </summary> private static int EncodedSize(PackedInt32s.Format format, int packedIntsVersion, int bitsPerValue) { @@ -106,7 +96,7 @@ namespace Lucene.Net.Codecs.Lucene41 private readonly int[] iterations; /// <summary> - /// Create a new <seealso cref="ForUtil"/> instance and save state into <code>out</code>. + /// Create a new <see cref="ForUtil"/> instance and save state into <paramref name="out"/>. /// </summary> internal ForUtil(float acceptableOverheadRatio, DataOutput @out) { @@ -131,7 +121,7 @@ namespace Lucene.Net.Codecs.Lucene41 } /// <summary> - /// Restore a <seealso cref="ForUtil"/> from a <seealso cref="DataInput"/>. + /// Restore a <see cref="ForUtil"/> from a <see cref="DataInput"/>. /// </summary> internal ForUtil(DataInput @in) { @@ -158,12 +148,12 @@ namespace Lucene.Net.Codecs.Lucene41 } /// <summary> - /// Write a block of data (<code>For</code> format). + /// Write a block of data (<c>For</c> format). /// </summary> - /// <param name="data"> the data to write </param> - /// <param name="encoded"> a buffer to use to encode data </param> - /// <param name="out"> the destination output </param> - /// <exception cref="IOException"> If there is a low-level I/O error </exception> + /// <param name="data"> The data to write. </param> + /// <param name="encoded"> A buffer to use to encode data. </param> + /// <param name="out"> The destination output. </param> + /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception> internal void WriteBlock(int[] data, byte[] encoded, IndexOutput @out) { if (IsAllEqual(data)) @@ -188,12 +178,12 @@ namespace Lucene.Net.Codecs.Lucene41 } /// <summary> - /// Read the next block of data (<code>For</code> format). + /// Read the next block of data (<c>For</c> format). /// </summary> - /// <param name="in"> the input to use to read data </param> - /// <param name="encoded"> a buffer that can be used to store encoded data </param> - /// <param name="decoded"> where to write decoded data </param> - /// <exception cref="IOException"> If there is a low-level I/O error </exception> + /// <param name="in"> The input to use to read data. </param> + /// <param name="encoded"> A buffer that can be used to store encoded data. </param> + /// <param name="decoded"> Where to write decoded data. </param> + /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception> internal void ReadBlock(IndexInput @in, byte[] encoded, int[] decoded) { int numBits = @in.ReadByte(); @@ -219,8 +209,8 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Skip the next block of data. /// </summary> - /// <param name="in"> the input where to read data </param> - /// <exception cref="IOException"> If there is a low-level I/O error </exception> + /// <param name="in"> The input where to read data. </param> + /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception> internal void SkipBlock(IndexInput @in) { int numBits = @in.ReadByte(); @@ -249,7 +239,7 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Compute the number of bits required to serialize any of the longs in - /// <code>data</code>. + /// <paramref name="data"/>. /// </summary> private static int BitsRequired(int[] data) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41Codec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41Codec.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41Codec.cs index c59c251..f691c5c 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41Codec.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41Codec.cs @@ -34,13 +34,14 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Implements the Lucene 4.1 index format, with configurable per-field postings formats. - /// <p> + /// <para/> /// If you want to reuse functionality of this codec in another codec, extend - /// <seealso cref="FilterCodec"/>. + /// <see cref="FilterCodec"/>. + /// <para/> + /// See <see cref="Lucene.Net.Codecs.Lucene41"/> package documentation for file format details. + /// <para/> + /// @lucene.experimental /// </summary> - /// <seealso cref= Lucene.Net.Codecs.Lucene41 package documentation for file format details. </seealso> - /// @deprecated Only for reading old 4.0 segments - /// @lucene.experimental [Obsolete("Only for reading old 4.0 segments")] [CodecName("Lucene41")] // LUCENENET specific - using CodecName attribute to ensure the default name passed from subclasses is the same as this class name public class Lucene41Codec : Codec @@ -124,9 +125,9 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Returns the postings format that should be used for writing - /// new segments of <code>field</code>. - /// - /// The default implementation always returns "Lucene41" + /// new segments of <paramref name="field"/>. + /// <para/> + /// The default implementation always returns "Lucene41" /// </summary> public virtual PostingsFormat GetPostingsFormatForField(string field) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsBaseFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsBaseFormat.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsBaseFormat.cs index 6c27407..94c4d54 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsBaseFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsBaseFormat.cs @@ -21,9 +21,9 @@ namespace Lucene.Net.Codecs.Lucene41 using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; /// <summary> - /// Provides a <seealso cref="PostingsReaderBase"/> and {@link - /// PostingsWriterBase}. - /// + /// Provides a <see cref="Codecs.PostingsReaderBase"/> and + /// <see cref="Codecs.PostingsWriterBase"/>. + /// <para/> /// @lucene.experimental /// </summary> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsFormat.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsFormat.cs index e591999..043b6e7 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsFormat.cs @@ -27,132 +27,132 @@ namespace Lucene.Net.Codecs.Lucene41 /// Lucene 4.1 postings format, which encodes postings in packed integer blocks /// for fast decode. /// - /// <p><b>NOTE</b>: this format is still experimental and + /// <para><b>NOTE</b>: this format is still experimental and /// subject to change without backwards compatibility. /// - /// <p> + /// <para> /// Basic idea: - /// <ul> - /// <li> + /// <list type="bullet"> + /// <item><description> /// <b>Packed Blocks and VInt Blocks</b>: - /// <p>In packed blocks, integers are encoded with the same bit width packed format (<see cref="Util.Packed.PackedInt32s"/>): + /// <para>In packed blocks, integers are encoded with the same bit width packed format (<see cref="Util.Packed.PackedInt32s"/>): /// the block size (i.e. number of integers inside block) is fixed (currently 128). Additionally blocks - /// that are all the same value are encoded in an optimized way.</p> - /// <p>In VInt blocks, integers are encoded as VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>): - /// the block size is variable.</p> - /// </li> + /// that are all the same value are encoded in an optimized way.</para> + /// <para>In VInt blocks, integers are encoded as VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>): + /// the block size is variable.</para> + /// </description></item> /// - /// <li> + /// <item><description> /// <b>Block structure</b>: - /// <p>When the postings are long enough, Lucene41PostingsFormat will try to encode most integer data - /// as a packed block.</p> - /// <p>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed - /// blocks, while the remaining 3 are encoded as one VInt block. </p> - /// <p>Different kinds of data are always encoded separately into different packed blocks, but may - /// possibly be interleaved into the same VInt block. </p> - /// <p>this strategy is applied to pairs: + /// <para>When the postings are long enough, Lucene41PostingsFormat will try to encode most integer data + /// as a packed block.</para> + /// <para>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed + /// blocks, while the remaining 3 are encoded as one VInt block. </para> + /// <para>Different kinds of data are always encoded separately into different packed blocks, but may + /// possibly be interleaved into the same VInt block. </para> + /// <para>This strategy is applied to pairs: /// <document number, frequency>, /// <position, payload length>, /// <position, offset start, offset length>, and - /// <position, payload length, offsetstart, offset length>.</p> - /// </li> + /// <position, payload length, offsetstart, offset length>.</para> + /// </description></item> /// - /// <li> + /// <item><description> /// <b>Skipdata settings</b>: - /// <p>The structure of skip table is quite similar to previous version of Lucene. Skip interval is the + /// <para>The structure of skip table is quite similar to previous version of Lucene. Skip interval is the /// same as block size, and each skip entry points to the beginning of each block. However, for - /// the first block, skip data is omitted.</p> - /// </li> + /// the first block, skip data is omitted.</para> + /// </description></item> /// - /// <li> + /// <item><description> /// <b>Positions, Payloads, and Offsets</b>: - /// <p>A position is an integer indicating where the term occurs within one document. + /// <para>A position is an integer indicating where the term occurs within one document. /// A payload is a blob of metadata associated with current position. /// An offset is a pair of integers indicating the tokenized start/end offsets for given term - /// in current position: it is essentially a specialized payload. </p> - /// <p>When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets (assuming a + /// in current position: it is essentially a specialized payload. </para> + /// <para>When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets (assuming a /// null payload contributes one count). As mentioned in block structure, it is possible to encode - /// these three either combined or separately. - /// <p>In all cases, payloads and offsets are stored together. When encoded as a packed block, + /// these three either combined or separately.</para> + /// <para>In all cases, payloads and offsets are stored together. When encoded as a packed block, /// position data is separated out as .pos, while payloads and offsets are encoded in .pay (payload /// metadata will also be stored directly in .pay). When encoded as VInt blocks, all these three are - /// stored interleaved into the .pos (so is payload metadata).</p> - /// <p>With this strategy, the majority of payload and offset data will be outside .pos file. + /// stored interleaved into the .pos (so is payload metadata).</para> + /// <para>With this strategy, the majority of payload and offset data will be outside .pos file. /// So for queries that require only position data, running on a full index with payloads and offsets, - /// this reduces disk pre-fetches.</p> - /// </li> - /// </ul> - /// </p> + /// this reduces disk pre-fetches.</para> + /// </description></item> + /// </list> + /// </para> /// - /// <p> + /// <para> /// Files and detailed format: - /// <ul> - /// <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li> - /// <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li> - /// <li><tt>.doc</tt>: <a href="#Frequencies">Frequencies and Skip Data</a></li> - /// <li><tt>.pos</tt>: <a href="#Positions">Positions</a></li> - /// <li><tt>.pay</tt>: <a href="#Payloads">Payloads and Offsets</a></li> - /// </ul> - /// </p> + /// <list type="bullet"> + /// <item><description><c>.tim</c>: <a href="#Termdictionary">Term Dictionary</a></description></item> + /// <item><description><c>.tip</c>: <a href="#Termindex">Term Index</a></description></item> + /// <item><description><c>.doc</c>: <a href="#Frequencies">Frequencies and Skip Data</a></description></item> + /// <item><description><c>.pos</c>: <a href="#Positions">Positions</a></description></item> + /// <item><description><c>.pay</c>: <a href="#Payloads">Payloads and Offsets</a></description></item> + /// </list> + /// </para> /// /// <a name="Termdictionary" id="Termdictionary"></a> /// <dl> /// <dd> /// <b>Term Dictionary</b> /// - /// <p>The .tim file contains the list of terms in each + /// <para>The .tim file contains the list of terms in each /// field along with per-term statistics (such as docfreq) /// and pointers to the frequencies, positions, payload and /// skip data in the .doc, .pos, and .pay files. - /// See <seealso cref="BlockTreeTermsWriter"/> for more details on the format. - /// </p> + /// See <see cref="BlockTreeTermsWriter"/> for more details on the format. + /// </para> /// - /// <p>NOTE: The term dictionary can plug into different postings implementations: + /// <para>NOTE: The term dictionary can plug into different postings implementations: /// the postings writer/reader are actually responsible for encoding - /// and decoding the PostingsHeader and TermMetadata sections described here:</p> + /// and decoding the PostingsHeader and TermMetadata sections described here:</para> /// - /// <ul> - /// <li>PostingsHeader --> Header, PackedBlockSize</li> - /// <li>TermMetadata --> (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?, - /// SkipFPDelta?</li> - /// <li>Header, --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>PackedBlockSize, SingletonDocID --> <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> <seealso cref="DataOutput#writeVLong VLong"/></li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// <p>Notes:</p> - /// <ul> - /// <li>Header is a <seealso cref="CodecUtil#writeHeader CodecHeader"/> storing the version information - /// for the postings.</li> - /// <li>PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width is + /// <list type="bullet"> + /// <item><description>PostingsHeader --> Header, PackedBlockSize</description></item> + /// <item><description>TermMetadata --> (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?, + /// SkipFPDelta?</description></item> + /// <item><description>Header, --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>PackedBlockSize, SingletonDocID --> VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> VLong (<see cref="Store.DataOutput.WriteVInt64(long)"/>) </description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> + /// </list> + /// <para>Notes:</para> + /// <list type="bullet"> + /// <item><description>Header is a CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) storing the version information + /// for the postings.</description></item> + /// <item><description>PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width is /// determined by the largest integer. Smaller block size result in smaller variance among width /// of integers hence smaller indexes. Larger block size result in more efficient bulk i/o hence - /// better acceleration. this value should always be a multiple of 64, currently fixed as 128 as - /// a tradeoff. It is also the skip interval used to accelerate <seealso cref="DocsEnum#advance(int)"/>. - /// <li>DocFPDelta determines the position of this term's TermFreqs within the .doc file. + /// better acceleration. This value should always be a multiple of 64, currently fixed as 128 as + /// a tradeoff. It is also the skip interval used to accelerate <see cref="Search.DocIdSetIterator.Advance(int)"/>.</description></item> + /// <item><description>DocFPDelta determines the position of this term's TermFreqs within the .doc file. /// In particular, it is the difference of file offset between this term's /// data and previous term's data (or zero, for the first term in the block).On disk it is - /// stored as the difference from previous value in sequence. </li> - /// <li>PosFPDelta determines the position of this term's TermPositions within the .pos file. + /// stored as the difference from previous value in sequence. </description></item> + /// <item><description>PosFPDelta determines the position of this term's TermPositions within the .pos file. /// While PayFPDelta determines the position of this term's <TermPayloads, TermOffsets?> within /// the .pay file. Similar to DocFPDelta, it is the difference between two file positions (or - /// neglected, for fields that omit payloads and offsets).</li> - /// <li>PosVIntBlockFPDelta determines the position of this term's last TermPosition in last pos packed + /// neglected, for fields that omit payloads and offsets).</description></item> + /// <item><description>PosVIntBlockFPDelta determines the position of this term's last TermPosition in last pos packed /// block within the .pos file. It is synonym for PayVIntBlockFPDelta or OffsetVIntBlockFPDelta. - /// this is actually used to indicate whether it is necessary to load following + /// This is actually used to indicate whether it is necessary to load following /// payloads and offsets from .pos instead of .pay. Every time a new block of positions are to be /// loaded, the PostingsReader will use this value to check whether current block is packed format /// or VInt. When packed format, payloads and offsets are fetched from .pay, otherwise from .pos. /// (this value is neglected when total number of positions i.e. totalTermFreq is less or equal - /// to PackedBlockSize). - /// <li>SkipFPDelta determines the position of this term's SkipData within the .doc + /// to PackedBlockSize).</description></item> + /// <item><description>SkipFPDelta determines the position of this term's SkipData within the .doc /// file. In particular, it is the length of the TermFreq data. /// SkipDelta is only stored if DocFreq is not smaller than SkipMinimum - /// (i.e. 128 in Lucene41PostingsFormat).</li> - /// <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead + /// (i.e. 128 in Lucene41PostingsFormat).</description></item> + /// <item><description>SingletonDocID is an optimization when a term only appears in one document. In this case, instead /// of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the - /// single document ID is written to the term dictionary.</li> - /// </ul> + /// single document ID is written to the term dictionary.</description></item> + /// </list> /// </dd> /// </dl> /// @@ -160,8 +160,8 @@ namespace Lucene.Net.Codecs.Lucene41 /// <dl> /// <dd> /// <b>Term Index</b> - /// <p>The .tip file contains an index into the term dictionary, so that it can be - /// accessed randomly. See <seealso cref="BlockTreeTermsWriter"/> for more details on the format.</p> + /// <para>The .tip file contains an index into the term dictionary, so that it can be + /// accessed randomly. See <see cref="BlockTreeTermsWriter"/> for more details on the format.</para> /// </dd> /// </dl> /// @@ -171,86 +171,86 @@ namespace Lucene.Net.Codecs.Lucene41 /// <dd> /// <b>Frequencies and Skip Data</b> /// - /// <p>The .doc file contains the lists of documents which contain each term, along + /// <para>The .doc file contains the lists of documents which contain each term, along /// with the frequency of the term in that document (except when frequencies are - /// omitted: <seealso cref="IndexOptions#DOCS_ONLY"/>). It also saves skip data to the beginning of - /// each packed or VInt block, when the length of document list is larger than packed block size.</p> + /// omitted: <see cref="Index.IndexOptions.DOCS_ONLY"/>). It also saves skip data to the beginning of + /// each packed or VInt block, when the length of document list is larger than packed block size.</para> /// - /// <ul> - /// <li>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup>, Footer</li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>TermFreqs --> <PackedBlock> <sup>PackedDocBlockNum</sup>, - /// VIntBlock? </li> - /// <li>PackedBlock --> PackedDocDeltaBlock, PackedFreqBlock? - /// <li>VIntBlock --> <DocDelta[, Freq?]><sup>DocFreq-PackedBlockSize*PackedDocBlockNum</sup> - /// <li>SkipData --> <<SkipLevelLength, SkipLevel> - /// <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</li> - /// <li>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li> - /// <li>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?, - /// PayFPSkip?>?, SkipChildLevelPointer?</li> - /// <li>PackedDocDeltaBlock, PackedFreqBlock --> <seealso cref="PackedInts PackedInts"/></li> - /// <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto, PayFPSkip + /// <list type="bullet"> + /// <item><description>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup>, Footer</description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>)</description></item> + /// <item><description>TermFreqs --> <PackedBlock> <sup>PackedDocBlockNum</sup>, + /// VIntBlock? </description></item> + /// <item><description>PackedBlock --> PackedDocDeltaBlock, PackedFreqBlock?</description></item> + /// <item><description>VIntBlock --> <DocDelta[, Freq?]><sup>DocFreq-PackedBlockSize*PackedDocBlockNum</sup></description></item> + /// <item><description>SkipData --> <<SkipLevelLength, SkipLevel> + /// <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</description></item> + /// <item><description>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></description></item> + /// <item><description>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?, + /// PayFPSkip?>?, SkipChildLevelPointer?</description></item> + /// <item><description>PackedDocDeltaBlock, PackedFreqBlock --> PackedInts (<see cref="Util.Packed.PackedInt32s"/>) </description></item> + /// <item><description>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto, PayFPSkip /// --> - /// <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>SkipChildLevelPointer --> <seealso cref="DataOutput#writeVLong VLong"/></li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// <p>Notes:</p> - /// <ul> - /// <li>PackedDocDeltaBlock is theoretically generated from two steps: - /// <ol> - /// <li>Calculate the difference between each document number and previous one, - /// and get a d-gaps list (for the first document, use absolute value); </li> - /// <li>For those d-gaps from first one to PackedDocBlockNum*PackedBlockSize<sup>th</sup>, - /// separately encode as packed blocks.</li> - /// </ol> + /// VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>SkipChildLevelPointer --> VLong (<see cref="Store.DataOutput.WriteVInt64(long)"/>) </description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> + /// </list> + /// <para>Notes:</para> + /// <list type="bullet"> + /// <item><description>PackedDocDeltaBlock is theoretically generated from two steps: + /// <list type="number"> + /// <item><description>Calculate the difference between each document number and previous one, + /// and get a d-gaps list (for the first document, use absolute value); </description></item> + /// <item><description>For those d-gaps from first one to PackedDocBlockNum*PackedBlockSize<sup>th</sup>, + /// separately encode as packed blocks.</description></item> + /// </list> /// If frequencies are not omitted, PackedFreqBlock will be generated without d-gap step. - /// </li> - /// <li>VIntBlock stores remaining d-gaps (along with frequencies when possible) with a format + /// </description></item> + /// <item><description>VIntBlock stores remaining d-gaps (along with frequencies when possible) with a format /// that encodes DocDelta and Freq: - /// <p>DocDelta: if frequencies are indexed, this determines both the document + /// <para>DocDelta: if frequencies are indexed, this determines both the document /// number and the frequency. In particular, DocDelta/2 is the difference between /// this document number and the previous document number (or zero when this is the /// first document in a TermFreqs). When DocDelta is odd, the frequency is one. /// When DocDelta is even, the frequency is read as another VInt. If frequencies /// are omitted, DocDelta contains the gap (not multiplied by 2) between document - /// numbers and no frequency information is stored.</p> - /// <p>For example, the TermFreqs for a term which occurs once in document seven + /// numbers and no frequency information is stored.</para> + /// <para>For example, the TermFreqs for a term which occurs once in document seven /// and three times in document eleven, with frequencies indexed, would be the - /// following sequence of VInts:</p> - /// <p>15, 8, 3</p> - /// <p>If frequencies were omitted (<seealso cref="IndexOptions#DOCS_ONLY"/>) it would be this - /// sequence of VInts instead:</p> - /// <p>7,4</p> - /// </li> - /// <li>PackedDocBlockNum is the number of packed blocks for current term's docids or frequencies. - /// In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize) </li> - /// <li>TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq. + /// following sequence of VInts:</para> + /// <para>15, 8, 3</para> + /// <para>If frequencies were omitted (<see cref="Index.IndexOptions.DOCS_ONLY"/>) it would be this + /// sequence of VInts instead:</para> + /// <para>7,4</para> + /// </description></item> + /// <item><description>PackedDocBlockNum is the number of packed blocks for current term's docids or frequencies. + /// In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize) </description></item> + /// <item><description>TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq. /// We use this trick since the definition of skip entry is a little different from base interface. - /// In <seealso cref="MultiLevelSkipListWriter"/>, skip data is assumed to be saved for + /// In <see cref="MultiLevelSkipListWriter"/>, skip data is assumed to be saved for /// skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list. However, /// in Lucene41PostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>, /// 2*skipInterval+1<sup>th</sup> ... posting (skipInterval==PackedBlockSize in this case). /// When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one - /// more skip data than Lucene41SkipWriter. </li> - /// <li>SkipDatum is the metadata of one skip entry. - /// For the first block (no matter packed or VInt), it is omitted.</li> - /// <li>DocSkip records the document number of every PackedBlockSize<sup>th</sup> document number in + /// more skip data than Lucene41SkipWriter. </description></item> + /// <item><description>SkipDatum is the metadata of one skip entry. + /// For the first block (no matter packed or VInt), it is omitted.</description></item> + /// <item><description>DocSkip records the document number of every PackedBlockSize<sup>th</sup> document number in /// the postings (i.e. last document number in each packed block). On disk it is stored as the - /// difference from previous value in the sequence. </li> - /// <li>DocFPSkip records the file offsets of each block (excluding )posting at + /// difference from previous value in the sequence. </description></item> + /// <item><description>DocFPSkip records the file offsets of each block (excluding )posting at /// PackedBlockSize+1<sup>th</sup>, 2*PackedBlockSize+1<sup>th</sup> ... , in DocFile. /// The file offsets are relative to the start of current term's TermFreqs. - /// On disk it is also stored as the difference from previous SkipDatum in the sequence.</li> - /// <li>Since positions and payloads are also block encoded, the skip should skip to related block first, + /// On disk it is also stored as the difference from previous SkipDatum in the sequence.</description></item> + /// <item><description>Since positions and payloads are also block encoded, the skip should skip to related block first, /// then fetch the values according to in-block offset. PosFPSkip and PayFPSkip record the file /// offsets of related block in .pos and .pay, respectively. While PosBlockOffset indicates /// which value to fetch inside the related block (PayBlockOffset is unnecessary since it is always /// equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of - /// current term's TermFreqs, and stored as a difference sequence.</li> - /// <li>PayByteUpto indicates the start offset of the current payload. It is equivalent to - /// the sum of the payload lengths in the current block up to PosBlockOffset</li> - /// </ul> + /// current term's TermFreqs, and stored as a difference sequence.</description></item> + /// <item><description>PayByteUpto indicates the start offset of the current payload. It is equivalent to + /// the sum of the payload lengths in the current block up to PosBlockOffset</description></item> + /// </list> /// </dd> /// </dl> /// @@ -258,52 +258,52 @@ namespace Lucene.Net.Codecs.Lucene41 /// <dl> /// <dd> /// <b>Positions</b> - /// <p>The .pos file contains the lists of positions that each term occurs at within documents. It also - /// sometimes stores part of payloads and offsets for speedup.</p> - /// <ul> - /// <li>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup>, Footer</li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>TermPositions --> <PackedPosDeltaBlock> <sup>PackedPosBlockNum</sup>, - /// VIntBlock? </li> - /// <li>VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?, - /// OffsetDelta?, OffsetLength?><sup>PosVIntCount</sup> - /// <li>PackedPosDeltaBlock --> <seealso cref="PackedInts PackedInts"/></li> - /// <li>PositionDelta, OffsetDelta, OffsetLength --> - /// <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>PayloadData --> <seealso cref="DataOutput#writeByte byte"/><sup>PayLength</sup></li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// <p>Notes:</p> - /// <ul> - /// <li>TermPositions are order by term (terms are implicit, from the term dictionary), and position - /// values for each term document pair are incremental, and ordered by document number.</li> - /// <li>PackedPosBlockNum is the number of packed blocks for current term's positions, payloads or offsets. - /// In particular, PackedPosBlockNum = floor(totalTermFreq/PackedBlockSize) </li> - /// <li>PosVIntCount is the number of positions encoded as VInt format. In particular, - /// PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize</li> - /// <li>The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock - /// in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.</li> - /// <li>PositionDelta is, if payloads are disabled for the term's field, the + /// <para>The .pos file contains the lists of positions that each term occurs at within documents. It also + /// sometimes stores part of payloads and offsets for speedup.</para> + /// <list type="bullet"> + /// <item><description>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup>, Footer</description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>TermPositions --> <PackedPosDeltaBlock> <sup>PackedPosBlockNum</sup>, + /// VIntBlock? </description></item> + /// <item><description>VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?, + /// OffsetDelta?, OffsetLength?><sup>PosVIntCount</sup></description></item> + /// <item><description>PackedPosDeltaBlock --> PackedInts (<see cref="Util.Packed.PackedInt32s"/>)</description></item> + /// <item><description>PositionDelta, OffsetDelta, OffsetLength --> + /// VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>PayloadData --> byte (<see cref="Store.DataOutput.WriteByte(byte)"/>)<sup>PayLength</sup></description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> + /// </list> + /// <para>Notes:</para> + /// <list type="bullet"> + /// <item><description>TermPositions are order by term (terms are implicit, from the term dictionary), and position + /// values for each term document pair are incremental, and ordered by document number.</description></item> + /// <item><description>PackedPosBlockNum is the number of packed blocks for current term's positions, payloads or offsets. + /// In particular, PackedPosBlockNum = floor(totalTermFreq/PackedBlockSize) </description></item> + /// <item><description>PosVIntCount is the number of positions encoded as VInt format. In particular, + /// PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize</description></item> + /// <item><description>The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock + /// in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.</description></item> + /// <item><description>PositionDelta is, if payloads are disabled for the term's field, the /// difference between the position of the current occurrence in the document and /// the previous occurrence (or zero, if this is the first occurrence in this /// document). If payloads are enabled for the term's field, then PositionDelta/2 /// is the difference between the current and the previous position. If payloads /// are enabled and PositionDelta is odd, then PayloadLength is stored, indicating - /// the length of the payload at the current term position.</li> - /// <li>For example, the TermPositions for a term which occurs as the fourth term in + /// the length of the payload at the current term position.</description></item> + /// <item><description>For example, the TermPositions for a term which occurs as the fourth term in /// one document, and as the fifth and ninth term in a subsequent document, would /// be the following sequence of VInts (payloads disabled): - /// <p>4, 5, 4</p></li> - /// <li>PayloadData is metadata associated with the current term position. If + /// <para>4, 5, 4</para></description></item> + /// <item><description>PayloadData is metadata associated with the current term position. If /// PayloadLength is stored at the current position, then it indicates the length /// of this payload. If PayloadLength is not stored, then this payload has the same - /// length as the payload at the previous position.</li> - /// <li>OffsetDelta/2 is the difference between this position's startOffset from the + /// length as the payload at the previous position.</description></item> + /// <item><description>OffsetDelta/2 is the difference between this position's startOffset from the /// previous occurrence (or zero, if this is the first occurrence in this document). /// If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the /// previous occurrence and an OffsetLength follows. Offset data is only written for - /// <seealso cref="IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/>.</li> - /// </ul> + /// <see cref="Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/>.</description></item> + /// </list> /// </dd> /// </dl> /// @@ -311,35 +311,35 @@ namespace Lucene.Net.Codecs.Lucene41 /// <dl> /// <dd> /// <b>Payloads and Offsets</b> - /// <p>The .pay file will store payloads and offsets associated with certain term-document positions. - /// Some payloads and offsets will be separated out into .pos file, for performance reasons.</p> - /// <ul> - /// <li>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup>, Footer</li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> <sup>PackedPayBlockNum</sup> - /// <li>TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> <sup>PackedPayBlockNum</sup> - /// <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> <seealso cref="PackedInts PackedInts"/></li> - /// <li>SumPayLength --> <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>PayData --> <seealso cref="DataOutput#writeByte byte"/><sup>SumPayLength</sup></li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// <p>Notes:</p> - /// <ul> - /// <li>The order of TermPayloads/TermOffsets will be the same as TermPositions, note that part of - /// payload/offsets are stored in .pos.</li> - /// <li>The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is the + /// <para>The .pay file will store payloads and offsets associated with certain term-document positions. + /// Some payloads and offsets will be separated out into .pos file, for performance reasons.</para> + /// <list type="bullet"> + /// <item><description>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup>, Footer</description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> <sup>PackedPayBlockNum</sup></description></item> + /// <item><description>TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> <sup>PackedPayBlockNum</sup></description></item> + /// <item><description>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> PackedInts (<see cref="Util.Packed.PackedInt32s"/>) </description></item> + /// <item><description>SumPayLength --> VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>PayData --> byte (<see cref="Store.DataOutput.WriteByte(byte)"/>) <sup>SumPayLength</sup></description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> + /// </list> + /// <para>Notes:</para> + /// <list type="bullet"> + /// <item><description>The order of TermPayloads/TermOffsets will be the same as TermPositions, note that part of + /// payload/offsets are stored in .pos.</description></item> + /// <item><description>The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is the /// same as PackedFreqBlock in chapter <a href="#Frequencies">Frequencies and Skip Data</a>. - /// While PackedStartDeltaBlock follows a same procedure as PackedDocDeltaBlock.</li> - /// <li>PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also synonym - /// for PackedOffsetBlockNum.</li> - /// <li>SumPayLength is the total length of payloads written within one block, should be the sum - /// of PayLengths in one packed block.</li> - /// <li>PayLength in PackedPayLengthBlock is the length of each payload associated with the current - /// position.</li> - /// </ul> + /// While PackedStartDeltaBlock follows a same procedure as PackedDocDeltaBlock.</description></item> + /// <item><description>PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also synonym + /// for PackedOffsetBlockNum.</description></item> + /// <item><description>SumPayLength is the total length of payloads written within one block, should be the sum + /// of PayLengths in one packed block.</description></item> + /// <item><description>PayLength in PackedPayLengthBlock is the length of each payload associated with the current + /// position.</description></item> + /// </list> /// </dd> /// </dl> - /// </p> + /// </para> /// /// @lucene.experimental /// </summary> @@ -375,8 +375,8 @@ namespace Lucene.Net.Codecs.Lucene41 public static int BLOCK_SIZE = 128; /// <summary> - /// Creates {@code Lucene41PostingsFormat} with default - /// settings. + /// Creates <see cref="Lucene41PostingsFormat"/> with default + /// settings. /// </summary> public Lucene41PostingsFormat() : this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE) @@ -384,10 +384,10 @@ namespace Lucene.Net.Codecs.Lucene41 } /// <summary> - /// Creates {@code Lucene41PostingsFormat} with custom - /// values for {@code minBlockSize} and {@code - /// maxBlockSize} passed to block terms dictionary. </summary> - /// <seealso cref= BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) </seealso> + /// Creates <see cref="Lucene41PostingsFormat"/> with custom + /// values for <paramref name="minTermBlockSize"/> and + /// <paramref name="maxTermBlockSize"/> passed to block terms dictionary. </summary> + /// <seealso cref="BlockTreeTermsWriter.BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)"/> public Lucene41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) : base() { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsReader.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsReader.cs index 406cf96..b31126d 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsReader.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsReader.cs @@ -27,9 +27,10 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Concrete class that reads docId(maybe frq,pos,offset,payloads) list /// with postings format. + /// <para/> + /// @lucene.experimental /// </summary> - /// <seealso cref= Lucene41SkipReader for details - /// @lucene.experimental </seealso> + /// <seealso cref="Lucene41SkipReader"/> public sealed class Lucene41PostingsReader : PostingsReaderBase { private readonly IndexInput docIn; @@ -95,7 +96,7 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Read values that have been written using variable-length encoding instead of bit-packing. /// <para/> - /// NOTE: This was readVIntBlock() in Lucene + /// NOTE: This was readVIntBlock() in Lucene. /// </summary> internal static void ReadVInt32Block(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, bool indexHasFreq) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsWriter.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsWriter.cs index dcd68f1..5494d64 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsWriter.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41PostingsWriter.cs @@ -35,11 +35,12 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Concrete class that writes docId(maybe frq,pos,offset,payloads) list /// with postings format. - /// + /// <para/> /// Postings list for each term will be stored separately. + /// <para/> + /// @lucene.experimental /// </summary> - /// <seealso cref= Lucene41SkipWriter for details about skipping setting and postings layout. - /// @lucene.experimental </seealso> + /// <seealso cref="Lucene41SkipWriter"/> for details about skipping setting and postings layout. public sealed class Lucene41PostingsWriter : PostingsWriterBase { /// <summary> @@ -347,7 +348,7 @@ namespace Lucene.Net.Codecs.Lucene41 } /// <summary> - /// Add a new position & payload </summary> + /// Add a new position & payload </summary> public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset) { // if (DEBUG) { @@ -433,7 +434,7 @@ namespace Lucene.Net.Codecs.Lucene41 } /// <summary> - /// Called when we are done adding docs to this term </summary> + /// Called when we are done adding docs to this term. </summary> public override void FinishTerm(BlockTermState state) { Int32BlockTermState state2 = (Int32BlockTermState)state; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipReader.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipReader.cs index 675777e..5cc0a91 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipReader.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipReader.cs @@ -25,29 +25,28 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Implements the skip list reader for block postings format /// that stores positions and payloads. - /// + /// <para/> /// Although this skipper uses MultiLevelSkipListReader as an interface, /// its definition of skip position will be a little different. - /// + /// <para/> /// For example, when skipInterval = blockSize = 3, df = 2*skipInterval = 6, - /// + /// <para/> /// 0 1 2 3 4 5 /// d d d d d d (posting list) /// ^ ^ (skip point in MultiLeveSkipWriter) /// ^ (skip point in Lucene41SkipWriter) - /// + /// <para/> /// In this case, MultiLevelSkipListReader will use the last document as a skip point, /// while Lucene41SkipReader should assume no skip point will comes. - /// + /// <para/> /// If we use the interface directly in Lucene41SkipReader, it may silly try to read /// another skip data after the only skip point is loaded. - /// + /// <para/> /// To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId, /// and numSkipped+blockSize== df, the MultiLevelSkipListReader will assume the skip list /// isn't exhausted yet, and try to load a non-existed skip point - /// - /// Therefore, we'll trim df before passing it to the interface. see trim(int) - /// + /// <para/> + /// Therefore, we'll trim df before passing it to the interface. see <see cref="Trim(int)"/>. /// </summary> internal sealed class Lucene41SkipReader : MultiLevelSkipListReader { @@ -100,12 +99,11 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Trim original docFreq to tell skipReader read proper number of skip points. - /// + /// <para/> /// Since our definition in Lucene41Skip* is a little different from MultiLevelSkip* /// this trimmed docFreq will prevent skipReader from: /// 1. silly reading a non-existed skip point after the last block boundary /// 2. moving into the vInt block - /// /// </summary> internal int Trim(int df) { @@ -136,7 +134,7 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Returns the doc pointer of the doc to which the last call of - /// <seealso cref="MultiLevelSkipListReader#skipTo(int)"/> has skipped. + /// <seealso cref="MultiLevelSkipListReader.SkipTo(int)"/> has skipped. /// </summary> public long DocPointer { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipWriter.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipWriter.cs index 3565a5d..4201c68 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipWriter.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41SkipWriter.cs @@ -23,23 +23,22 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Write skip lists with multiple levels, and support skip within block ints. - /// + /// <para/> /// Assume that docFreq = 28, skipInterval = blockSize = 12 /// /// | block#0 | | block#1 | |vInts| /// d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) /// ^ ^ (level 0 skip point) - /// + /// <para/> /// Note that skipWriter will ignore first document in block#0, since /// it is useless as a skip point. Also, we'll never skip into the vInts /// block, only record skip data at the start its start point(if it exist). - /// + /// <para/> /// For each skip point, we will record: /// 1. docID in former position, i.e. for position 12, record docID[11], etc. /// 2. its related file points(position, payload), /// 3. related numbers or uptos(position, payload). /// 4. start offset. - /// /// </summary> internal sealed class Lucene41SkipWriter : MultiLevelSkipListWriter { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3221b638/src/Lucene.Net/Codecs/Lucene41/Lucene41StoredFieldsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene41/Lucene41StoredFieldsFormat.cs b/src/Lucene.Net/Codecs/Lucene41/Lucene41StoredFieldsFormat.cs index 45e8d0c..8f9c5b5 100644 --- a/src/Lucene.Net/Codecs/Lucene41/Lucene41StoredFieldsFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene41/Lucene41StoredFieldsFormat.cs @@ -26,91 +26,97 @@ namespace Lucene.Net.Codecs.Lucene41 /// <summary> /// Lucene 4.1 stored fields format. /// - /// <p><b>Principle</b></p> - /// <p>this <seealso cref="StoredFieldsFormat"/> compresses blocks of 16KB of documents in + /// <para><b>Principle</b></para> + /// <para>This <seealso cref="StoredFieldsFormat"/> compresses blocks of 16KB of documents in /// order to improve the compression ratio compared to document-level /// compression. It uses the <a href="http://code.google.com/p/lz4/">LZ4</a> /// compression algorithm, which is fast to compress and very fast to decompress /// data. Although the compression method that is used focuses more on speed /// than on compression ratio, it should provide interesting compression ratios - /// for redundant inputs (such as log files, HTML or plain text).</p> - /// <p><b>File formats</b></p> - /// <p>Stored fields are represented by two files:</p> - /// <ol> - /// <li><a name="field_data" id="field_data"></a> - /// <p>A fields data file (extension <tt>.fdt</tt>). this file stores a compact + /// for redundant inputs (such as log files, HTML or plain text).</para> + /// <para><b>File formats</b></para> + /// <para>Stored fields are represented by two files:</para> + /// <list type="number"> + /// <item><description><a name="field_data" id="field_data"></a> + /// <para>A fields data file (extension <c>.fdt</c>). this file stores a compact /// representation of documents in compressed blocks of 16KB or more. When - /// writing a segment, documents are appended to an in-memory <tt>byte[]</tt> + /// writing a segment, documents are appended to an in-memory <c>byte[]</c> /// buffer. When its size reaches 16KB or more, some metadata about the documents /// is flushed to disk, immediately followed by a compressed representation of /// the buffer using the /// <a href="http://code.google.com/p/lz4/">LZ4</a> - /// <a href="http://fastcompression.blogspot.fr/2011/05/lz4-explained.html">compression format</a>.</p> - /// <p>Here is a more detailed description of the field data file format:</p> - /// <ul> - /// <li>FieldData (.fdt) --> <Header>, PackedIntsVersion, <Chunk><sup>ChunkCount</sup></li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>PackedIntsVersion --> <seealso cref="PackedInts#VERSION_CURRENT"/> as a <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</li> - /// <li>Chunk --> DocBase, ChunkDocs, DocFieldCounts, DocLengths, <CompressedDocs></li> - /// <li>DocBase --> the ID of the first document of the chunk as a <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>ChunkDocs --> the number of documents in the chunk as a <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>DocFieldCounts --> the number of stored fields of every document in the chunk, encoded as followed:<ul> - /// <li>if chunkDocs=1, the unique value is encoded as a <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>else read a <seealso cref="DataOutput#writeVInt VInt"/> (let's call it <tt>bitsRequired</tt>)<ul> - /// <li>if <tt>bitsRequired</tt> is <tt>0</tt> then all values are equal, and the common value is the following <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>else <tt>bitsRequired</tt> is the number of bits required to store any value, and values are stored in a <seealso cref="PackedInts packed"/> array where every value is stored on exactly <tt>bitsRequired</tt> bits</li> - /// </ul></li> - /// </ul></li> - /// <li>DocLengths --> the lengths of all documents in the chunk, encoded with the same method as DocFieldCounts</li> - /// <li>CompressedDocs --> a compressed representation of <Docs> using the LZ4 compression format</li> - /// <li>Docs --> <Doc><sup>ChunkDocs</sup></li> - /// <li>Doc --> <FieldNumAndType, Value><sup>DocFieldCount</sup></li> - /// <li>FieldNumAndType --> a <seealso cref="DataOutput#writeVLong VLong"/>, whose 3 last bits are Type and other bits are FieldNum</li> - /// <li>Type --><ul> - /// <li>0: Value is String</li> - /// <li>1: Value is BinaryValue</li> - /// <li>2: Value is Int</li> - /// <li>3: Value is Float</li> - /// <li>4: Value is Long</li> - /// <li>5: Value is Double</li> - /// <li>6, 7: unused</li> - /// </ul></li> - /// <li>FieldNum --> an ID of the field</li> - /// <li>Value --> <seealso cref="DataOutput#writeString(String) String"/> | BinaryValue | Int | Float | Long | Double depending on Type</li> - /// <li>BinaryValue --> ValueLength <Byte><sup>ValueLength</sup></li> - /// </ul> - /// <p>Notes</p> - /// <ul> - /// <li>If documents are larger than 16KB then chunks will likely contain only + /// <a href="http://fastcompression.blogspot.fr/2011/05/lz4-explained.html">compression format</a>.</para> + /// <para>Here is a more detailed description of the field data file format:</para> + /// <list type="bullet"> + /// <item><description>FieldData (.fdt) --> <Header>, PackedIntsVersion, <Chunk><sup>ChunkCount</sup></description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>PackedIntsVersion --> <see cref="Util.Packed.PackedInt32s.VERSION_CURRENT"/> as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</description></item> + /// <item><description>Chunk --> DocBase, ChunkDocs, DocFieldCounts, DocLengths, <CompressedDocs></description></item> + /// <item><description>DocBase --> the ID of the first document of the chunk as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>ChunkDocs --> the number of documents in the chunk as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>DocFieldCounts --> the number of stored fields of every document in the chunk, encoded as followed: + /// <list type="bullet"> + /// <item><description>if chunkDocs=1, the unique value is encoded as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>else read a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) (let's call it <c>bitsRequired</c>) + /// <list type="bullet"> + /// <item><description>if <c>bitsRequired</c> is <c>0</c> then all values are equal, and the common value is the following VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>else <c>bitsRequired</c> is the number of bits required to store any value, and values are stored in a packed (<see cref="Util.Packed.PackedInt32s"/>) array where every value is stored on exactly <c>bitsRequired</c> bits</description></item> + /// </list> + /// </description></item> + /// </list> + /// </description></item> + /// <item><description>DocLengths --> the lengths of all documents in the chunk, encoded with the same method as DocFieldCounts</description></item> + /// <item><description>CompressedDocs --> a compressed representation of <Docs> using the LZ4 compression format</description></item> + /// <item><description>Docs --> <Doc><sup>ChunkDocs</sup></description></item> + /// <item><description>Doc --> <FieldNumAndType, Value><sup>DocFieldCount</sup></description></item> + /// <item><description>FieldNumAndType --> a VLong (<see cref="Store.DataOutput.WriteVInt64(long)"/>), whose 3 last bits are Type and other bits are FieldNum</description></item> + /// <item><description>Type --> + /// <list type="bullet"> + /// <item><description>0: Value is String</description></item> + /// <item><description>1: Value is BinaryValue</description></item> + /// <item><description>2: Value is Int</description></item> + /// <item><description>3: Value is Float</description></item> + /// <item><description>4: Value is Long</description></item> + /// <item><description>5: Value is Double</description></item> + /// <item><description>6, 7: unused</description></item> + /// </list> + /// </description></item> + /// <item><description>FieldNum --> an ID of the field</description></item> + /// <item><description>Value --> String (<see cref="Store.DataOutput.WriteString(string)"/>) | BinaryValue | Int | Float | Long | Double depending on Type</description></item> + /// <item><description>BinaryValue --> ValueLength <Byte><sup>ValueLength</sup></description></item> + /// </list> + /// <para>Notes</para> + /// <list type="bullet"> + /// <item><description>If documents are larger than 16KB then chunks will likely contain only /// one document. However, documents can never spread across several chunks (all - /// fields of a single document are in the same chunk).</li> - /// <li>When at least one document in a chunk is large enough so that the chunk + /// fields of a single document are in the same chunk).</description></item> + /// <item><description>When at least one document in a chunk is large enough so that the chunk /// is larger than 32KB, the chunk will actually be compressed in several LZ4 - /// blocks of 16KB. this allows <seealso cref="StoredFieldVisitor"/>s which are only + /// blocks of 16KB. this allows <see cref="StoredFieldVisitor"/>s which are only /// interested in the first fields of a document to not have to decompress 10MB - /// of data if the document is 10MB, but only 16KB.</li> - /// <li>Given that the original lengths are written in the metadata of the chunk, + /// of data if the document is 10MB, but only 16KB.</description></item> + /// <item><description>Given that the original lengths are written in the metadata of the chunk, /// the decompressor can leverage this information to stop decoding as soon as - /// enough data has been decompressed.</li> - /// <li>In case documents are incompressible, CompressedDocs will be less than - /// 0.5% larger than Docs.</li> - /// </ul> - /// </li> - /// <li><a name="field_index" id="field_index"></a> - /// <p>A fields index file (extension <tt>.fdx</tt>).</p> - /// <ul> - /// <li>FieldsIndex (.fdx) --> <Header>, <ChunkIndex></li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>ChunkIndex: See <seealso cref="CompressingStoredFieldsIndexWriter"/></li> - /// </ul> - /// </li> - /// </ol> - /// <p><b>Known limitations</b></p> - /// <p>this <seealso cref="StoredFieldsFormat"/> does not support individual documents - /// larger than (<tt>2<sup>31</sup> - 2<sup>14</sup></tt>) bytes. In case this + /// enough data has been decompressed.</description></item> + /// <item><description>In case documents are incompressible, CompressedDocs will be less than + /// 0.5% larger than Docs.</description></item> + /// </list> + /// </description></item> + /// <item><description><a name="field_index" id="field_index"></a> + /// <para>A fields index file (extension <c>.fdx</c>).</para> + /// <list type="bullet"> + /// <item><description>FieldsIndex (.fdx) --> <Header>, <ChunkIndex></description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>ChunkIndex: See <see cref="CompressingStoredFieldsIndexWriter"/></description></item> + /// </list> + /// </description></item> + /// </list> + /// <para><b>Known limitations</b></para> + /// <para>This <see cref="StoredFieldsFormat"/> does not support individual documents + /// larger than (<c>2<sup>31</sup> - 2<sup>14</sup></c>) bytes. In case this /// is a problem, you should use another format, such as - /// <seealso cref="Lucene40StoredFieldsFormat"/>.</p> + /// <see cref="Lucene40StoredFieldsFormat"/>.</para> /// @lucene.experimental /// </summary> public sealed class Lucene41StoredFieldsFormat : CompressingStoredFieldsFormat
