Lucene.Net.Codecs.Lucene42: Fixed XML documentation comment warnings
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ee52fd34 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ee52fd34 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ee52fd34 Branch: refs/heads/master Commit: ee52fd34c52edf2bc67aecbed717fac22f931e8a Parents: b27d10c Author: Shad Storhaug <[email protected]> Authored: Mon Jun 5 13:40:50 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Tue Jun 6 06:58:40 2017 +0700 ---------------------------------------------------------------------- CONTRIBUTING.md | 3 +- src/Lucene.Net/Codecs/Lucene42/Lucene42Codec.cs | 23 +-- .../Codecs/Lucene42/Lucene42DocValuesFormat.cs | 188 +++++++++---------- .../Lucene42/Lucene42DocValuesProducer.cs | 2 +- .../Codecs/Lucene42/Lucene42FieldInfosFormat.cs | 111 ++++++----- .../Codecs/Lucene42/Lucene42FieldInfosReader.cs | 8 +- .../Codecs/Lucene42/Lucene42NormsConsumer.cs | 2 +- .../Codecs/Lucene42/Lucene42NormsFormat.cs | 35 ++-- .../Lucene42/Lucene42TermVectorsFormat.cs | 183 +++++++++--------- 9 files changed, 279 insertions(+), 276 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/CONTRIBUTING.md ---------------------------------------------------------------------- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ce132b9..6886da2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -55,8 +55,7 @@ helpers to help with that, see for examples see our [Java style methods to avoid 2. Codecs.Lucene3x (namespace) 3. Codecs.Lucene40 (namespace) 4. Codecs.Lucene41 (namespace) - 5. Codecs.Lucene42 (namespace) - 6. Util.Packed (namespace) + 5. Util.Packed (namespace) 2. Lucene.Net.Codecs (project) 1. Appending (namespace) 2. BlockTerms (namespace) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42Codec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42Codec.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42Codec.cs index 3972a5e..b9fe243 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42Codec.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42Codec.cs @@ -29,13 +29,14 @@ namespace Lucene.Net.Codecs.Lucene42 /// <summary> /// Implements the Lucene 4.2 index format, with configurable per-field postings /// and docvalues formats. - /// <p> + /// <para/> /// If you want to reuse functionality of this codec in another codec, extend - /// <seealso cref="FilterCodec"/>. + /// <see cref="FilterCodec"/>. + /// <para/> + /// See <see cref="Lucene.Net.Codecs.Lucene42"/> package documentation for file format details. + /// <para/> + /// @lucene.experimental /// </summary> - /// <seealso cref= Lucene.Net.Codecs.Lucene42 package documentation for file format details. - /// @lucene.experimental </seealso> - /// @deprecated Only for reading old 4.2 segments // NOTE: if we make largish changes in a minor release, easier to just make Lucene43Codec or whatever // if they are backwards compatible or smallish we can probably do the backwards in the postingsreader // (it writes a minor version, etc). @@ -124,9 +125,9 @@ namespace Lucene.Net.Codecs.Lucene42 /// <summary> /// Returns the postings format that should be used for writing - /// new segments of <code>field</code>. - /// - /// The default implementation always returns "Lucene41" + /// new segments of <paramref name="field"/>. + /// <para/> + /// The default implementation always returns "Lucene41" /// </summary> public virtual PostingsFormat GetPostingsFormatForField(string field) { @@ -135,9 +136,9 @@ namespace Lucene.Net.Codecs.Lucene42 /// <summary> /// Returns the docvalues format that should be used for writing - /// new segments of <code>field</code>. - /// - /// The default implementation always returns "Lucene42" + /// new segments of <paramref name="field"/>. + /// <para/> + /// The default implementation always returns "Lucene42" /// </summary> public virtual DocValuesFormat GetDocValuesFormatForField(string field) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesFormat.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesFormat.cs index 82e8c89..29419a5 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesFormat.cs @@ -25,99 +25,99 @@ namespace Lucene.Net.Codecs.Lucene42 /// <summary> /// Lucene 4.2 DocValues format. - /// <p> + /// <para/> /// Encodes the four per-document value types (Numeric,Binary,Sorted,SortedSet) with seven basic strategies. - /// <p> - /// <ul> - /// <li>Delta-compressed Numerics: per-document integers written in blocks of 4096. For each block - /// the minimum value is encoded, and each entry is a delta from that minimum value. - /// <li>Table-compressed Numerics: when the number of unique values is very small, a lookup table - /// is written instead. Each per-document entry is instead the ordinal to this table. - /// <li>Uncompressed Numerics: when all values would fit into a single byte, and the - /// <code>acceptableOverheadRatio</code> would pack values into 8 bits per value anyway, they - /// are written as absolute values (with no indirection or packing) for performance. - /// <li>GCD-compressed Numerics: when all numbers share a common divisor, such as dates, the greatest - /// common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics. - /// <li>Fixed-width Binary: one large concatenated byte[] is written, along with the fixed length. - /// Each document's value can be addressed by maxDoc*length. - /// <li>Variable-width Binary: one large concatenated byte[] is written, along with end addresses + /// <para/> + /// <list type="bullet"> + /// <item><description>Delta-compressed Numerics: per-document integers written in blocks of 4096. For each block + /// the minimum value is encoded, and each entry is a delta from that minimum value.</description></item> + /// <item><description>Table-compressed Numerics: when the number of unique values is very small, a lookup table + /// is written instead. Each per-document entry is instead the ordinal to this table.</description></item> + /// <item><description>Uncompressed Numerics: when all values would fit into a single byte, and the + /// <c>acceptableOverheadRatio</c> would pack values into 8 bits per value anyway, they + /// are written as absolute values (with no indirection or packing) for performance.</description></item> + /// <item><description>GCD-compressed Numerics: when all numbers share a common divisor, such as dates, the greatest + /// common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics.</description></item> + /// <item><description>Fixed-width Binary: one large concatenated byte[] is written, along with the fixed length. + /// Each document's value can be addressed by <c>maxDoc*length</c>.</description></item> + /// <item><description>Variable-width Binary: one large concatenated byte[] is written, along with end addresses /// for each document. The addresses are written in blocks of 4096, with the current absolute /// start for the block, and the average (expected) delta per entry. For each document the - /// deviation from the delta (actual - expected) is written. - /// <li>Sorted: an FST mapping deduplicated terms to ordinals is written, along with the per-document - /// ordinals written using one of the numeric strategies above. - /// <li>SortedSet: an FST mapping deduplicated terms to ordinals is written, along with the per-document - /// ordinal list written using one of the binary strategies above. - /// </ul> - /// <p> + /// deviation from the delta (actual - expected) is written.</description></item> + /// <item><description>Sorted: an FST mapping deduplicated terms to ordinals is written, along with the per-document + /// ordinals written using one of the numeric strategies above.</description></item> + /// <item><description>SortedSet: an FST mapping deduplicated terms to ordinals is written, along with the per-document + /// ordinal list written using one of the binary strategies above.</description></item> + /// </list> + /// <para/> /// Files: - /// <ol> - /// <li><tt>.dvd</tt>: DocValues data</li> - /// <li><tt>.dvm</tt>: DocValues metadata</li> - /// </ol> - /// <ol> - /// <li><a name="dvm" id="dvm"></a> - /// <p>The DocValues metadata or .dvm file.</p> - /// <p>For DocValues field, this stores metadata, such as the offset into the - /// DocValues data (.dvd)</p> - /// <p>DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry><sup>NumFields</sup>,Footer</p> - /// <ul> - /// <li>Entry --> NumericEntry | BinaryEntry | SortedEntry</li> - /// <li>NumericEntry --> DataOffset,CompressionType,PackedVersion</li> - /// <li>BinaryEntry --> DataOffset,DataLength,MinLength,MaxLength,PackedVersion?,BlockSize?</li> - /// <li>SortedEntry --> DataOffset,ValueCount</li> - /// <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --> <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>DataOffset,DataLength --> <seealso cref="DataOutput#writeLong Int64"/></li> - /// <li>EntryType,CompressionType --> <seealso cref="DataOutput#writeByte Byte"/></li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// <p>Sorted fields have two entries: a SortedEntry with the FST metadata, - /// and an ordinary NumericEntry for the document-to-ord metadata.</p> - /// <p>SortedSet fields have two entries: a SortedEntry with the FST metadata, - /// and an ordinary BinaryEntry for the document-to-ord-list metadata.</p> - /// <p>FieldNumber of -1 indicates the end of metadata.</p> - /// <p>EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)</p> - /// <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p> - /// <p>CompressionType indicates how Numeric values will be compressed: - /// <ul> - /// <li>0 --> delta-compressed. For each block of 4096 integers, every integer is delta-encoded - /// from the minimum value within the block. - /// <li>1 --> table-compressed. When the number of unique numeric values is small and it would save space, - /// a lookup table of unique values is written, followed by the ordinal for each document. - /// <li>2 --> uncompressed. When the <code>acceptableOverheadRatio</code> parameter would upgrade the number + /// <list type="number"> + /// <item><description><c>.dvd</c>: DocValues data</description></item> + /// <item><description><c>.dvm</c>: DocValues metadata</description></item> + /// </list> + /// <list type="number"> + /// <item><description><a name="dvm" id="dvm"></a> + /// <para>The DocValues metadata or .dvm file.</para> + /// <para>For DocValues field, this stores metadata, such as the offset into the + /// DocValues data (.dvd)</para> + /// <para>DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry><sup>NumFields</sup>,Footer</para> + /// <list type="bullet"> + /// <item><description>Entry --> NumericEntry | BinaryEntry | SortedEntry</description></item> + /// <item><description>NumericEntry --> DataOffset,CompressionType,PackedVersion</description></item> + /// <item><description>BinaryEntry --> DataOffset,DataLength,MinLength,MaxLength,PackedVersion?,BlockSize?</description></item> + /// <item><description>SortedEntry --> DataOffset,ValueCount</description></item> + /// <item><description>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --> VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>DataOffset,DataLength --> Int64 (<see cref="Store.DataOutput.WriteInt64(long)"/>) </description></item> + /// <item><description>EntryType,CompressionType --> Byte (<see cref="Store.DataOutput.WriteByte(byte)"/>) </description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> + /// </list> + /// <para>Sorted fields have two entries: a SortedEntry with the FST metadata, + /// and an ordinary NumericEntry for the document-to-ord metadata.</para> + /// <para>SortedSet fields have two entries: a SortedEntry with the FST metadata, + /// and an ordinary BinaryEntry for the document-to-ord-list metadata.</para> + /// <para>FieldNumber of -1 indicates the end of metadata.</para> + /// <para>EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)</para> + /// <para>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</para> + /// <para/>CompressionType indicates how Numeric values will be compressed: + /// <list type="bullet"> + /// <item><description>0 --> delta-compressed. For each block of 4096 integers, every integer is delta-encoded + /// from the minimum value within the block.</description></item> + /// <item><description>1 --> table-compressed. When the number of unique numeric values is small and it would save space, + /// a lookup table of unique values is written, followed by the ordinal for each document.</description></item> + /// <item><description>2 --> uncompressed. When the <c>acceptableOverheadRatio</c> parameter would upgrade the number /// of bits required to 8, and all values fit in a byte, these are written as absolute binary values - /// for performance. - /// <li>3 -->, gcd-compressed. When all integers share a common divisor, only quotients are stored - /// using blocks of delta-encoded ints. - /// </ul> - /// <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values. - /// If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length). + /// for performance.</description></item> + /// <item><description>3 --> gcd-compressed. When all integers share a common divisor, only quotients are stored + /// using blocks of delta-encoded ints.</description></item> + /// </list> + /// <para/>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values. + /// If they are equal, then all values are of a fixed size, and can be addressed as <c>DataOffset + (docID * length)</c>. /// Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize) - /// is written for the addresses. - /// <li><a name="dvd" id="dvd"></a> - /// <p>The DocValues data or .dvd file.</p> - /// <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p> - /// <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup>,Footer</p> - /// <ul> - /// <li>NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</li> - /// <li>BinaryData --> <seealso cref="DataOutput#writeByte Byte"/><sup>DataLength</sup>,Addresses</li> - /// <li>SortedData --> <seealso cref="FST FST<Int64>"/></li> - /// <li>DeltaCompressedNumerics --> <seealso cref="BlockPackedWriter BlockPackedInts(blockSize=4096)"/></li> - /// <li>TableCompressedNumerics --> TableSize,<seealso cref="DataOutput#writeLong Int64"/><sup>TableSize</sup>,<seealso cref="PackedInt32s PackedInts"/></li> - /// <li>UncompressedNumerics --> <seealso cref="DataOutput#writeByte Byte"/><sup>maxdoc</sup></li> - /// <li>Addresses --> <seealso cref="MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=4096)"/></li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// <p>SortedSet entries store the list of ordinals in their BinaryData as a - /// sequences of increasing <seealso cref="DataOutput#writeVLong vLong"/>s, delta-encoded.</p> - /// </ol> - /// <p> + /// is written for the addresses.</description></item> + /// <item><description><a name="dvd" id="dvd"></a> + /// <para>The DocValues data or .dvd file.</para> + /// <para>For DocValues field, this stores the actual per-document data (the heavy-lifting)</para> + /// <para>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup>,Footer</para> + /// <list type="bullet"> + /// <item><description>NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</description></item> + /// <item><description>BinaryData --> Byte (<see cref="Store.DataOutput.WriteByte(byte)"/>) <sup>DataLength</sup>,Addresses</description></item> + /// <item><description>SortedData --> FST<Int64> (<see cref="Util.Fst.FST{T}"/>) </description></item> + /// <item><description>DeltaCompressedNumerics --> BlockPackedInts(blockSize=4096) (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>TableCompressedNumerics --> TableSize, Int64 (<see cref="Store.DataOutput.WriteInt64(long)"/>) <sup>TableSize</sup>, PackedInts (<see cref="PackedInt32s"/>) </description></item> + /// <item><description>UncompressedNumerics --> Byte (<see cref="Store.DataOutput.WriteByte(byte)"/>) <sup>maxdoc</sup></description></item> + /// <item><description>Addresses --> MonotonicBlockPackedInts(blockSize=4096) (<see cref="Util.Packed.MonotonicBlockPackedWriter"/>) </description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/></description></item> + /// </list> + /// <para>SortedSet entries store the list of ordinals in their BinaryData as a + /// sequences of increasing vLongs (<see cref="Store.DataOutput.WriteVInt64(long)"/>), delta-encoded.</para></description></item> + /// </list> + /// <para/> /// Limitations: - /// <ul> - /// <li> Binary doc values can be at most <seealso cref="#MAX_BINARY_FIELD_LENGTH"/> in length. - /// </ul> </summary> - /// @deprecated Only for reading old 4.2 segments + /// <list type="bullet"> + /// <item><description> Binary doc values can be at most <see cref="MAX_BINARY_FIELD_LENGTH"/> in length.</description></item> + /// </list> + /// </summary> [Obsolete("Only for reading old 4.2 segments")] [DocValuesFormatName("Lucene42")] // LUCENENET specific - using DocValuesFormatName attribute to ensure the default name passed from subclasses is the same as this class name public class Lucene42DocValuesFormat : DocValuesFormat @@ -129,8 +129,7 @@ namespace Lucene.Net.Codecs.Lucene42 protected readonly float m_acceptableOverheadRatio; /// <summary> - /// Calls {@link #Lucene42DocValuesFormat(float) - /// Lucene42DocValuesFormat(PackedInts.DEFAULT)} + /// Calls <c>Lucene42DocValuesFormat(PackedInts.DEFAULT)</c> (<see cref="Lucene42DocValuesFormat(float)"/>. /// </summary> public Lucene42DocValuesFormat() : this(PackedInt32s.DEFAULT) @@ -138,12 +137,13 @@ namespace Lucene.Net.Codecs.Lucene42 } /// <summary> - /// Creates a new Lucene42DocValuesFormat with the specified - /// <code>acceptableOverheadRatio</code> for NumericDocValues. </summary> - /// <param name="acceptableOverheadRatio"> compression parameter for numerics. - /// Currently this is only used when the number of unique values is small. - /// - /// @lucene.experimental </param> + /// Creates a new <see cref="Lucene42DocValuesFormat"/> with the specified + /// <paramref name="acceptableOverheadRatio"/> for <see cref="Index.NumericDocValues"/>. + /// <para/> + /// @lucene.experimental + /// </summary> + /// <param name="acceptableOverheadRatio"> Compression parameter for numerics. + /// Currently this is only used when the number of unique values is small.</param> public Lucene42DocValuesFormat(float acceptableOverheadRatio) : base() { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesProducer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesProducer.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesProducer.cs index 4503588..dc52a9e 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesProducer.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42DocValuesProducer.cs @@ -54,7 +54,7 @@ namespace Lucene.Net.Codecs.Lucene42 using Util = Lucene.Net.Util.Fst.Util; /// <summary> - /// Reader for <seealso cref="Lucene42DocValuesFormat"/> + /// Reader for <see cref="Lucene42DocValuesFormat"/>. /// </summary> internal class Lucene42DocValuesProducer : DocValuesProducer { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosFormat.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosFormat.cs index 8cc7e9b..6dd6820 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosFormat.cs @@ -19,67 +19,64 @@ namespace Lucene.Net.Codecs.Lucene42 * limitations under the License. */ - // javadoc - // javadoc - /// <summary> /// Lucene 4.2 Field Infos format. - /// <p> - /// <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p> - /// <p>FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, - /// FieldBits,DocValuesBits,Attributes> <sup>FieldsCount</sup></p> - /// <p>Data types: - /// <ul> - /// <li>Header --> <seealso cref="CodecUtil#checkHeader CodecHeader"/></li> - /// <li>FieldsCount --> <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>FieldName --> <seealso cref="DataOutput#writeString String"/></li> - /// <li>FieldBits, DocValuesBits --> <seealso cref="DataOutput#writeByte Byte"/></li> - /// <li>FieldNumber --> <seealso cref="DataOutput#writeInt VInt"/></li> - /// <li>Attributes --> <seealso cref="DataOutput#writeStringStringMap Map<String,String>"/></li> - /// </ul> - /// </p> + /// <para/> + /// <para>Field names are stored in the field info file, with suffix <c>.fnm</c>.</para> + /// <para>FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, + /// FieldBits,DocValuesBits,Attributes> <sup>FieldsCount</sup></para> + /// <para>Data types: + /// <list type="bullet"> + /// <item><description>Header --> CodecHeader <see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/></description></item> + /// <item><description>FieldsCount --> VInt <see cref="Store.DataOutput.WriteVInt32(int)"/></description></item> + /// <item><description>FieldName --> String <see cref="Store.DataOutput.WriteString(string)"/></description></item> + /// <item><description>FieldBits, DocValuesBits --> Byte <see cref="Store.DataOutput.WriteByte(byte)"/></description></item> + /// <item><description>FieldNumber --> VInt <see cref="Store.DataOutput.WriteInt32(int)"/></description></item> + /// <item><description>Attributes --> IDictionary<String,String> <see cref="Store.DataOutput.WriteStringStringMap(System.Collections.Generic.IDictionary{string, string})"/></description></item> + /// </list> + /// </para> /// Field Descriptions: - /// <ul> - /// <li>FieldsCount: the number of fields in this file.</li> - /// <li>FieldName: name of the field as a UTF-8 String.</li> - /// <li>FieldNumber: the field's number. Note that unlike previous versions of + /// <list type="bullet"> + /// <item><description>FieldsCount: the number of fields in this file.</description></item> + /// <item><description>FieldName: name of the field as a UTF-8 String.</description></item> + /// <item><description>FieldNumber: the field's number. Note that unlike previous versions of /// Lucene, the fields are not numbered implicitly by their order in the - /// file, instead explicitly.</li> - /// <li>FieldBits: a byte containing field options. - /// <ul> - /// <li>The low-order bit is one for indexed fields, and zero for non-indexed - /// fields.</li> - /// <li>The second lowest-order bit is one for fields that have term vectors - /// stored, and zero for fields without term vectors.</li> - /// <li>If the third lowest order-bit is set (0x4), offsets are stored into - /// the postings list in addition to positions.</li> - /// <li>Fourth bit is unused.</li> - /// <li>If the fifth lowest-order bit is set (0x10), norms are omitted for the - /// indexed field.</li> - /// <li>If the sixth lowest-order bit is set (0x20), payloads are stored for the - /// indexed field.</li> - /// <li>If the seventh lowest-order bit is set (0x40), term frequencies and - /// positions omitted for the indexed field.</li> - /// <li>If the eighth lowest-order bit is set (0x80), positions are omitted for the - /// indexed field.</li> - /// </ul> - /// </li> - /// <li>DocValuesBits: a byte containing per-document value types. The type + /// file, instead explicitly.</description></item> + /// <item><description>FieldBits: a byte containing field options. + /// <list type="bullet"> + /// <item><description>The low-order bit is one for indexed fields, and zero for non-indexed + /// fields.</description></item> + /// <item><description>The second lowest-order bit is one for fields that have term vectors + /// stored, and zero for fields without term vectors.</description></item> + /// <item><description>If the third lowest order-bit is set (0x4), offsets are stored into + /// the postings list in addition to positions.</description></item> + /// <item><description>Fourth bit is unused.</description></item> + /// <item><description>If the fifth lowest-order bit is set (0x10), norms are omitted for the + /// indexed field.</description></item> + /// <item><description>If the sixth lowest-order bit is set (0x20), payloads are stored for the + /// indexed field.</description></item> + /// <item><description>If the seventh lowest-order bit is set (0x40), term frequencies and + /// positions omitted for the indexed field.</description></item> + /// <item><description>If the eighth lowest-order bit is set (0x80), positions are omitted for the + /// indexed field.</description></item> + /// </list> + /// </description></item> + /// <item><description>DocValuesBits: a byte containing per-document value types. The type /// recorded as two four-bit integers, with the high-order bits representing - /// <code>norms</code> options, and the low-order bits representing - /// {@code DocValues} options. Each four-bit integer can be decoded as such: - /// <ul> - /// <li>0: no DocValues for this field.</li> - /// <li>1: NumericDocValues. (<seealso cref="DocValuesType#NUMERIC"/>)</li> - /// <li>2: BinaryDocValues. ({@code DocValuesType#BINARY})</li> - /// <li>3: SortedDocValues. ({@code DocValuesType#SORTED})</li> - /// </ul> - /// </li> - /// <li>Attributes: a key-value map of codec-private attributes.</li> - /// </ul> - /// - /// @lucene.experimental </summary> - /// @deprecated Only for reading old 4.2-4.5 segments + /// <c>norms</c> options, and the low-order bits representing + /// <see cref="Index.DocValues"/> options. Each four-bit integer can be decoded as such: + /// <list type="bullet"> + /// <item><description>0: no DocValues for this field.</description></item> + /// <item><description>1: NumericDocValues. (<see cref="Index.DocValuesType.NUMERIC"/>)</description></item> + /// <item><description>2: BinaryDocValues. (<see cref="Index.DocValuesType.BINARY"/>)</description></item> + /// <item><description>3: SortedDocValues. (<see cref="Index.DocValuesType.SORTED"/>)</description></item> + /// </list> + /// </description></item> + /// <item><description>Attributes: a key-value map of codec-private attributes.</description></item> + /// </list> + /// <para/> + /// @lucene.experimental + /// </summary> [Obsolete("Only for reading old 4.2-4.5 segments")] public class Lucene42FieldInfosFormat : FieldInfosFormat { @@ -108,7 +105,7 @@ namespace Lucene.Net.Codecs.Lucene42 } /// <summary> - /// Extension of field infos </summary> + /// Extension of field infos. </summary> internal const string EXTENSION = "fnm"; // Codec header http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosReader.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosReader.cs index b81c62d..7c5bff8 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosReader.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42FieldInfosReader.cs @@ -34,10 +34,10 @@ namespace Lucene.Net.Codecs.Lucene42 /// <summary> /// Lucene 4.2 FieldInfos reader. - /// - /// @lucene.experimental </summary> - /// @deprecated Only for reading old 4.2-4.5 segments - /// <seealso cref= Lucene42FieldInfosFormat </seealso> + /// <para/> + /// @lucene.experimental + /// </summary> + /// <seealso cref="Lucene42FieldInfosFormat"/> [Obsolete("Only for reading old 4.2-4.5 segments")] internal sealed class Lucene42FieldInfosReader : FieldInfosReader { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsConsumer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsConsumer.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsConsumer.cs index fa445de..3351309 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsConsumer.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsConsumer.cs @@ -34,7 +34,7 @@ namespace Lucene.Net.Codecs.Lucene42 using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; /// <summary> - /// Writer for <seealso cref="Lucene42NormsFormat"/> + /// Writer for <see cref="Lucene42NormsFormat"/>. /// </summary> internal class Lucene42NormsConsumer : DocValuesConsumer { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsFormat.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsFormat.cs index 66e0c3c..7884efe 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42NormsFormat.cs @@ -23,25 +23,25 @@ namespace Lucene.Net.Codecs.Lucene42 /// <summary> /// Lucene 4.2 score normalization format. - /// <p> - /// NOTE: this uses the same format as <seealso cref="Lucene42DocValuesFormat"/> + /// <para/> + /// NOTE: this uses the same format as <see cref="Lucene42DocValuesFormat"/> /// Numeric DocValues, but with different file extensions, and passing - /// <seealso cref="PackedInt32s#FASTEST"/> for uncompressed encoding: trading off + /// <see cref="PackedInt32s.FASTEST"/> for uncompressed encoding: trading off /// space for performance. - /// <p> + /// <para/> /// Files: - /// <ul> - /// <li><tt>.nvd</tt>: DocValues data</li> - /// <li><tt>.nvm</tt>: DocValues metadata</li> - /// </ul> </summary> - /// <seealso cref= Lucene42DocValuesFormat </seealso> + /// <list type="bullet"> + /// <item><description><c>.nvd</c>: DocValues data</description></item> + /// <item><description><c>.nvm</c>: DocValues metadata</description></item> + /// </list> + /// </summary> + /// <seealso cref="Lucene42DocValuesFormat"/> public class Lucene42NormsFormat : NormsFormat { internal readonly float acceptableOverheadRatio; /// <summary> - /// Calls {@link #Lucene42NormsFormat(float) - /// Lucene42DocValuesFormat(PackedInts.FASTEST)} + /// Calls <c>Lucene42DocValuesFormat(PackedInt32s.FASTEST)</c> (<see cref="Lucene42NormsFormat(float)"/>). /// </summary> public Lucene42NormsFormat() : this(PackedInt32s.FASTEST) @@ -50,12 +50,13 @@ namespace Lucene.Net.Codecs.Lucene42 } /// <summary> - /// Creates a new Lucene42DocValuesFormat with the specified - /// <code>acceptableOverheadRatio</code> for NumericDocValues. </summary> - /// <param name="acceptableOverheadRatio"> compression parameter for numerics. - /// Currently this is only used when the number of unique values is small. - /// - /// @lucene.experimental </param> + /// Creates a new <see cref="Lucene42DocValuesFormat"/> with the specified + /// <paramref name="acceptableOverheadRatio"/> for <see cref="Index.NumericDocValues"/>. + /// <para/> + /// @lucene.experimental + /// </summary> + /// <param name="acceptableOverheadRatio"> Compression parameter for numerics. + /// Currently this is only used when the number of unique values is small.</param> public Lucene42NormsFormat(float acceptableOverheadRatio) { this.acceptableOverheadRatio = acceptableOverheadRatio; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ee52fd34/src/Lucene.Net/Codecs/Lucene42/Lucene42TermVectorsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene42/Lucene42TermVectorsFormat.cs b/src/Lucene.Net/Codecs/Lucene42/Lucene42TermVectorsFormat.cs index 27e491e..9c46c86 100644 --- a/src/Lucene.Net/Codecs/Lucene42/Lucene42TermVectorsFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene42/Lucene42TermVectorsFormat.cs @@ -21,98 +21,103 @@ namespace Lucene.Net.Codecs.Lucene42 using CompressionMode = Lucene.Net.Codecs.Compressing.CompressionMode; /// <summary> - /// Lucene 4.2 <seealso cref="TermVectorsFormat term vectors format"/>. - /// <p> - /// Very similarly to <seealso cref="Lucene41StoredFieldsFormat"/>, this format is based + /// Lucene 4.2 term vectors format (<see cref="TermVectorsFormat"/>). + /// <para/> + /// Very similarly to <see cref="Lucene41.Lucene41StoredFieldsFormat"/>, this format is based /// on compressed chunks of data, with document-level granularity so that a /// document can never span across distinct chunks. Moreover, data is made as - /// compact as possible:<ul> - /// <li>textual data is compressed using the very light, - /// <a href="http://code.google.com/p/lz4/">LZ4</a> compression algorithm, - /// <li>binary data is written using fixed-size blocks of - /// <seealso cref="PackedInts packed ints"/>. - /// </ul> - /// <p> - /// Term vectors are stored using two files<ul> - /// <li>a data file where terms, frequencies, positions, offsets and payloads - /// are stored, - /// <li>an index file, loaded into memory, used to locate specific documents in - /// the data file. - /// </ul> + /// compact as possible: + /// <list type="bullet"> + /// <item><description>textual data is compressed using the very light, + /// <a href="http://code.google.com/p/lz4/">LZ4</a> compression algorithm,</description></item> + /// <item><description>binary data is written using fixed-size blocks of + /// packed <see cref="int"/>s (<see cref="Util.Packed.PackedInt32s"/>).</description></item> + /// </list> + /// <para/> + /// Term vectors are stored using two files + /// <list type="bullet"> + /// <item><description>a data file where terms, frequencies, positions, offsets and payloads + /// are stored,</description></item> + /// <item><description>an index file, loaded into memory, used to locate specific documents in + /// the data file.</description></item> + /// </list> /// Looking up term vectors for any document requires at most 1 disk seek. - /// <p><b>File formats</b> - /// <ol> - /// <li><a name="vector_data" id="vector_data"></a> - /// <p>A vector data file (extension <tt>.tvd</tt>). this file stores terms, - /// frequencies, positions, offsets and payloads for every document. Upon writing - /// a new segment, it accumulates data into memory until the buffer used to store - /// terms and payloads grows beyond 4KB. Then it flushes all metadata, terms - /// and positions to disk using <a href="http://code.google.com/p/lz4/">LZ4</a> - /// compression for terms and payloads and - /// <seealso cref="BlockPackedWriter blocks of packed ints"/> for positions.</p> - /// <p>Here is a more detailed description of the field data file format:</p> - /// <ul> - /// <li>VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk><sup>ChunkCount</sup>, Footer</li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>PackedIntsVersion --> <seealso cref="PackedInts#VERSION_CURRENT"/> as a <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>ChunkSize is the number of bytes of terms to accumulate before flushing, as a <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</li> - /// <li>Chunk --> DocBase, ChunkDocs, < NumFields >, < FieldNums >, < FieldNumOffs >, < Flags >, - /// < NumTerms >, < TermLengths >, < TermFreqs >, < Positions >, < StartOffsets >, < Lengths >, - /// < PayloadLengths >, < TermAndPayloads ></li> - /// <li>DocBase is the ID of the first doc of the chunk as a <seealso cref="DataOutput#writeVInt VInt"/></li> - /// <li>ChunkDocs is the number of documents in the chunk</li> - /// <li>NumFields --> DocNumFields<sup>ChunkDocs</sup></li> - /// <li>DocNumFields is the number of fields for each doc, written as a <seealso cref="DataOutput#writeVInt VInt"/> if ChunkDocs==1 and as a <seealso cref="PackedInts"/> array otherwise</li> - /// <li>FieldNums --> FieldNumDelta<sup>TotalDistincFields</sup>, a delta-encoded list of the sorted unique field numbers present in the chunk</li> - /// <li>FieldNumOffs --> FieldNumOff<sup>TotalFields</sup>, as a <seealso cref="PackedInts"/> array</li> - /// <li>FieldNumOff is the offset of the field number in FieldNums</li> - /// <li>TotalFields is the total number of fields (sum of the values of NumFields)</li> - /// <li>Flags --> Bit < FieldFlags ></li> - /// <li>Bit is a single bit which when true means that fields have the same options for every document in the chunk</li> - /// <li>FieldFlags --> if Bit==1: Flag<sup>TotalDistinctFields</sup> else Flag<sup>TotalFields</sup></li> - /// <li>Flag: a 3-bits int where:<ul> - /// <li>the first bit means that the field has positions</li> - /// <li>the second bit means that the field has offsets</li> - /// <li>the third bit means that the field has payloads</li> - /// </ul></li> - /// <li>NumTerms --> FieldNumTerms<sup>TotalFields</sup></li> - /// <li>FieldNumTerms: the number of terms for each field, using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>TermLengths --> PrefixLength<sup>TotalTerms</sup> SuffixLength<sup>TotalTerms</sup></li> - /// <li>TotalTerms: total number of terms (sum of NumTerms)</li> - /// <li>PrefixLength: 0 for the first term of a field, the common prefix with the previous term otherwise using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>SuffixLength: length of the term minus PrefixLength for every term using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>TermFreqs --> TermFreqMinus1<sup>TotalTerms</sup></li> - /// <li>TermFreqMinus1: (frequency - 1) for each term using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>Positions --> PositionDelta<sup>TotalPositions</sup></li> - /// <li>TotalPositions is the sum of frequencies of terms of all fields that have positions</li> - /// <li>PositionDelta: the absolute position for the first position of a term, and the difference with the previous positions for following positions using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>StartOffsets --> (AvgCharsPerTerm<sup>TotalDistinctFields</sup>) StartOffsetDelta<sup>TotalOffsets</sup></li> - /// <li>TotalOffsets is the sum of frequencies of terms of all fields that have offsets</li> - /// <li>AvgCharsPerTerm: average number of chars per term, encoded as a float on 4 bytes. They are not present if no field has both positions and offsets enabled.</li> - /// <li>StartOffsetDelta: (startOffset - previousStartOffset - AvgCharsPerTerm * PositionDelta). previousStartOffset is 0 for the first offset and AvgCharsPerTerm is 0 if the field has no positions using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>Lengths --> LengthMinusTermLength<sup>TotalOffsets</sup></li> - /// <li>LengthMinusTermLength: (endOffset - startOffset - termLength) using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>PayloadLengths --> PayloadLength<sup>TotalPayloads</sup></li> - /// <li>TotalPayloads is the sum of frequencies of terms of all fields that have payloads</li> - /// <li>PayloadLength is the payload length encoded using <seealso cref="BlockPackedWriter blocks of 64 packed ints"/></li> - /// <li>TermAndPayloads --> LZ4-compressed representation of < FieldTermsAndPayLoads ><sup>TotalFields</sup></li> - /// <li>FieldTermsAndPayLoads --> Terms (Payloads)</li> - /// <li>Terms: term bytes</li> - /// <li>Payloads: payload bytes (if the field has payloads)</li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// </li> - /// <li><a name="vector_index" id="vector_index"></a> - /// <p>An index file (extension <tt>.tvx</tt>).</p> - /// <ul> - /// <li>VectorIndex (.tvx) --> <Header>, <ChunkIndex>, Footer</li> - /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li> - /// <li>ChunkIndex: See <seealso cref="CompressingStoredFieldsIndexWriter"/></li> - /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li> - /// </ul> - /// </li> - /// </ol> + /// <para/><b>File formats</b> + /// <list type="number"> + /// <item><description><a name="vector_data" id="vector_data"></a> + /// <para>A vector data file (extension <c>.tvd</c>). this file stores terms, + /// frequencies, positions, offsets and payloads for every document. Upon writing + /// a new segment, it accumulates data into memory until the buffer used to store + /// terms and payloads grows beyond 4KB. Then it flushes all metadata, terms + /// and positions to disk using <a href="http://code.google.com/p/lz4/">LZ4</a> + /// compression for terms and payloads and + /// blocks of packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) for positions.</para> + /// <para>Here is a more detailed description of the field data file format:</para> + /// <list type="bullet"> + /// <item><description>VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk><sup>ChunkCount</sup>, Footer</description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>PackedIntsVersion --> <see cref="Util.Packed.PackedInt32s.VERSION_CURRENT"/> as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>ChunkSize is the number of bytes of terms to accumulate before flushing, as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</description></item> + /// <item><description>Chunk --> DocBase, ChunkDocs, < NumFields >, < FieldNums >, < FieldNumOffs >, < Flags >, + /// < NumTerms >, < TermLengths >, < TermFreqs >, < Positions >, < StartOffsets >, < Lengths >, + /// < PayloadLengths >, < TermAndPayloads ></description></item> + /// <item><description>DocBase is the ID of the first doc of the chunk as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> + /// <item><description>ChunkDocs is the number of documents in the chunk</description></item> + /// <item><description>NumFields --> DocNumFields<sup>ChunkDocs</sup></description></item> + /// <item><description>DocNumFields is the number of fields for each doc, written as a VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) if ChunkDocs==1 and as a <see cref="Util.Packed.PackedInt32s"/> array otherwise</description></item> + /// <item><description>FieldNums --> FieldNumDelta<sup>TotalDistincFields</sup>, a delta-encoded list of the sorted unique field numbers present in the chunk</description></item> + /// <item><description>FieldNumOffs --> FieldNumOff<sup>TotalFields</sup>, as a <see cref="Util.Packed.PackedInt32s"/> array</description></item> + /// <item><description>FieldNumOff is the offset of the field number in FieldNums</description></item> + /// <item><description>TotalFields is the total number of fields (sum of the values of NumFields)</description></item> + /// <item><description>Flags --> Bit < FieldFlags ></description></item> + /// <item><description>Bit is a single bit which when true means that fields have the same options for every document in the chunk</description></item> + /// <item><description>FieldFlags --> if Bit==1: Flag<sup>TotalDistinctFields</sup> else Flag<sup>TotalFields</sup></description></item> + /// <item><description>Flag: a 3-bits int where: + /// <list type="bullet"> + /// <item><description>the first bit means that the field has positions</description></item> + /// <item><description>the second bit means that the field has offsets</description></item> + /// <item><description>the third bit means that the field has payloads</description></item> + /// </list> + /// </description></item> + /// <item><description>NumTerms --> FieldNumTerms<sup>TotalFields</sup></description></item> + /// <item><description>FieldNumTerms: the number of terms for each field, using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>TermLengths --> PrefixLength<sup>TotalTerms</sup> SuffixLength<sup>TotalTerms</sup></description></item> + /// <item><description>TotalTerms: total number of terms (sum of NumTerms)</description></item> + /// <item><description>PrefixLength: 0 for the first term of a field, the common prefix with the previous term otherwise using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>SuffixLength: length of the term minus PrefixLength for every term using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>TermFreqs --> TermFreqMinus1<sup>TotalTerms</sup></description></item> + /// <item><description>TermFreqMinus1: (frequency - 1) for each term using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>Positions --> PositionDelta<sup>TotalPositions</sup></description></item> + /// <item><description>TotalPositions is the sum of frequencies of terms of all fields that have positions</description></item> + /// <item><description>PositionDelta: the absolute position for the first position of a term, and the difference with the previous positions for following positions using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>StartOffsets --> (AvgCharsPerTerm<sup>TotalDistinctFields</sup>) StartOffsetDelta<sup>TotalOffsets</sup></description></item> + /// <item><description>TotalOffsets is the sum of frequencies of terms of all fields that have offsets</description></item> + /// <item><description>AvgCharsPerTerm: average number of chars per term, encoded as a float on 4 bytes. They are not present if no field has both positions and offsets enabled.</description></item> + /// <item><description>StartOffsetDelta: (startOffset - previousStartOffset - AvgCharsPerTerm * PositionDelta). previousStartOffset is 0 for the first offset and AvgCharsPerTerm is 0 if the field has no positions using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>Lengths --> LengthMinusTermLength<sup>TotalOffsets</sup></description></item> + /// <item><description>LengthMinusTermLength: (endOffset - startOffset - termLength) using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>PayloadLengths --> PayloadLength<sup>TotalPayloads</sup></description></item> + /// <item><description>TotalPayloads is the sum of frequencies of terms of all fields that have payloads</description></item> + /// <item><description>PayloadLength is the payload length encoded using blocks of 64 packed <see cref="int"/>s (<see cref="Util.Packed.BlockPackedWriter"/>) </description></item> + /// <item><description>TermAndPayloads --> LZ4-compressed representation of < FieldTermsAndPayLoads ><sup>TotalFields</sup></description></item> + /// <item><description>FieldTermsAndPayLoads --> Terms (Payloads)</description></item> + /// <item><description>Terms: term bytes</description></item> + /// <item><description>Payloads: payload bytes (if the field has payloads)</description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> + /// </list> + /// </description></item> + /// <item><description><a name="vector_index" id="vector_index"></a> + /// <para>An index file (extension <c>.tvx</c>).</para> + /// <list type="bullet"> + /// <item><description>VectorIndex (.tvx) --> <Header>, <ChunkIndex>, Footer</description></item> + /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> + /// <item><description>ChunkIndex: See <see cref="Compressing.CompressingStoredFieldsIndexWriter"/></description></item> + /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> + /// </list> + /// </description></item> + /// </list> + /// <para/> /// @lucene.experimental /// </summary> public sealed class Lucene42TermVectorsFormat : CompressingTermVectorsFormat
