Lucene.Net.Codecs: member accessibility
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/c602c98f Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/c602c98f Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/c602c98f Branch: refs/heads/api-work Commit: c602c98f3119bc345bc80a08a4d2f2d49e788261 Parents: 2860403 Author: Shad Storhaug <[email protected]> Authored: Sun Jan 29 13:52:35 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Sun Jan 29 13:52:35 2017 +0700 ---------------------------------------------------------------------- .../Appending/AppendingCodec.cs | 4 +- .../Appending/AppendingPostingsFormat.cs | 5 +- .../Appending/AppendingTermsReader.cs | 6 +- .../BlockTerms/BlockTermsReader.cs | 144 ++++--- .../BlockTerms/BlockTermsWriter.cs | 97 ++--- .../BlockTerms/FixedGapTermsIndexReader.cs | 174 ++++----- .../BlockTerms/FixedGapTermsIndexWriter.cs | 166 ++++---- .../BlockTerms/TermsIndexReaderBase.cs | 37 +- .../BlockTerms/TermsIndexWriterBase.cs | 9 +- .../BlockTerms/VariableGapTermsIndexReader.cs | 166 ++++---- .../BlockTerms/VariableGapTermsIndexWriter.cs | 47 +-- .../Bloom/BloomFilterFactory.cs | 4 +- .../Bloom/BloomFilteringPostingsFormat.cs | 93 +++-- .../Bloom/DefaultBloomFilterFactory.cs | 2 - src/Lucene.Net.Codecs/Bloom/FuzzySet.cs | 50 ++- src/Lucene.Net.Codecs/Bloom/HashFunction.cs | 1 - src/Lucene.Net.Codecs/Bloom/MurmurHash2.cs | 6 +- .../DiskDV/DiskDocValuesFormat.cs | 13 +- .../DiskDV/DiskDocValuesProducer.cs | 11 +- src/Lucene.Net.Codecs/DiskDV/DiskNormsFormat.cs | 9 +- src/Lucene.Net.Codecs/HashMapHelperClass.cs | 4 +- .../Intblock/FixedIntBlockIndexInput.cs | 19 +- .../Intblock/FixedIntBlockIndexOutput.cs | 8 +- .../Intblock/VariableIntBlockIndexInput.cs | 20 +- .../Intblock/VariableIntBlockIndexOutput.cs | 18 +- .../Memory/DirectDocValuesConsumer.cs | 6 +- .../Memory/DirectDocValuesFormat.cs | 14 +- .../Memory/DirectDocValuesProducer.cs | 2 +- .../Memory/DirectPostingsFormat.cs | 158 ++++---- .../Memory/FSTOrdPostingsFormat.cs | 4 +- .../Memory/FSTOrdPulsing41PostingsFormat.cs | 6 +- .../Memory/FSTOrdTermsReader.cs | 66 ++-- .../Memory/FSTOrdTermsWriter.cs | 14 +- .../Memory/FSTPostingsFormat.cs | 4 +- .../Memory/FSTPulsing41PostingsFormat.cs | 7 +- src/Lucene.Net.Codecs/Memory/FSTTermOutputs.cs | 17 +- src/Lucene.Net.Codecs/Memory/FSTTermsReader.cs | 116 +++--- src/Lucene.Net.Codecs/Memory/FSTTermsWriter.cs | 2 +- .../Memory/MemoryDocValuesConsumer.cs | 19 +- .../Memory/MemoryDocValuesFormat.cs | 19 +- .../Memory/MemoryDocValuesProducer.cs | 15 +- .../Memory/MemoryPostingsFormat.cs | 168 ++++----- .../Pulsing/Pulsing41PostingsFormat.cs | 12 +- .../Pulsing/PulsingPostingsFormat.cs | 22 +- .../Pulsing/PulsingPostingsReader.cs | 317 ++++++++-------- .../Pulsing/PulsingPostingsWriter.cs | 18 +- src/Lucene.Net.Codecs/RectangularArrays.cs | 2 +- src/Lucene.Net.Codecs/Sep/IntIndexInput.cs | 8 +- src/Lucene.Net.Codecs/Sep/IntIndexOutput.cs | 9 +- src/Lucene.Net.Codecs/Sep/IntStreamFactory.cs | 2 - src/Lucene.Net.Codecs/Sep/SepPostingsReader.cs | 179 +++++---- src/Lucene.Net.Codecs/Sep/SepPostingsWriter.cs | 47 ++- src/Lucene.Net.Codecs/Sep/SepSkipListReader.cs | 2 - src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs | 376 +++++++++---------- .../SimpleText/SimpleTextCodec.cs | 4 +- .../SimpleText/SimpleTextDocValuesFormat.cs | 7 +- .../SimpleText/SimpleTextDocValuesReader.cs | 353 +++++++++-------- .../SimpleText/SimpleTextDocValuesWriter.cs | 64 ++-- .../SimpleText/SimpleTextFieldInfosReader.cs | 2 +- .../SimpleText/SimpleTextFieldInfosWriter.cs | 2 - .../SimpleText/SimpleTextFieldsReader.cs | 27 +- .../SimpleText/SimpleTextFieldsWriter.cs | 40 +- .../SimpleText/SimpleTextLiveDocsFormat.cs | 22 +- .../SimpleText/SimpleTextNormsFormat.cs | 6 +- .../SimpleText/SimpleTextPostingsFormat.cs | 13 +- .../SimpleText/SimpleTextSegmentInfoFormat.cs | 44 ++- .../SimpleText/SimpleTextSegmentInfoReader.cs | 3 +- .../SimpleText/SimpleTextSegmentInfoWriter.cs | 2 - .../SimpleText/SimpleTextStoredFieldsFormat.cs | 1 - .../SimpleText/SimpleTextStoredFieldsReader.cs | 2 +- .../SimpleText/SimpleTextStoredFieldsWriter.cs | 3 +- .../SimpleText/SimpleTextTermVectorsFormat.cs | 1 - .../SimpleText/SimpleTextTermVectorsReader.cs | 41 +- .../SimpleText/SimpleTextTermVectorsWriter.cs | 1 - .../SimpleText/SimpleTextUtil.cs | 4 +- src/Lucene.Net.Codecs/StringHelperClass.cs | 2 + .../Index/DocsAndPositionsEnum.cs | 2 +- src/Lucene.Net.Core/Index/Fields.cs | 2 +- 78 files changed, 1667 insertions(+), 1725 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Appending/AppendingCodec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Appending/AppendingCodec.cs b/src/Lucene.Net.Codecs/Appending/AppendingCodec.cs index fbba94c..3b87595 100644 --- a/src/Lucene.Net.Codecs/Appending/AppendingCodec.cs +++ b/src/Lucene.Net.Codecs/Appending/AppendingCodec.cs @@ -34,7 +34,8 @@ namespace Lucene.Net.Codecs.Appending { private readonly PostingsFormat _postings = new AppendingPostingsFormat(); - public AppendingCodec() : base("Appending", new Lucene40Codec()) + public AppendingCodec() + : base("Appending", new Lucene40Codec()) { } @@ -43,6 +44,5 @@ namespace Lucene.Net.Codecs.Appending get { return _postings; } } } - } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Appending/AppendingPostingsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Appending/AppendingPostingsFormat.cs b/src/Lucene.Net.Codecs/Appending/AppendingPostingsFormat.cs index 3270383..d15d75c 100644 --- a/src/Lucene.Net.Codecs/Appending/AppendingPostingsFormat.cs +++ b/src/Lucene.Net.Codecs/Appending/AppendingPostingsFormat.cs @@ -26,9 +26,10 @@ namespace Lucene.Net.Codecs.Appending /// </summary> internal class AppendingPostingsFormat : PostingsFormat { - public static String CODEC_NAME = "Appending"; + public static string CODEC_NAME = "Appending"; - public AppendingPostingsFormat() : base(CODEC_NAME) + public AppendingPostingsFormat() + : base(CODEC_NAME) {} public override FieldsConsumer FieldsConsumer(SegmentWriteState state) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Appending/AppendingTermsReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Appending/AppendingTermsReader.cs b/src/Lucene.Net.Codecs/Appending/AppendingTermsReader.cs index edd6035..3929f83 100644 --- a/src/Lucene.Net.Codecs/Appending/AppendingTermsReader.cs +++ b/src/Lucene.Net.Codecs/Appending/AppendingTermsReader.cs @@ -29,12 +29,12 @@ namespace Lucene.Net.Codecs.Appending [Obsolete("Only for reading old Appending segments")] public class AppendingTermsReader : BlockTreeTermsReader { - private const String APPENDING_TERMS_CODEC_NAME = "APPENDING_TERMS_DICT"; - private const String APPENDING_TERMS_INDEX_CODEC_NAME = "APPENDING_TERMS_INDEX"; + private const string APPENDING_TERMS_CODEC_NAME = "APPENDING_TERMS_DICT"; + private const string APPENDING_TERMS_INDEX_CODEC_NAME = "APPENDING_TERMS_INDEX"; public AppendingTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, - IOContext ioContext, String segmentSuffix, int indexDivisor) + IOContext ioContext, string segmentSuffix, int indexDivisor) : base(dir, fieldInfos, info, postingsReader, ioContext, segmentSuffix, indexDivisor) { } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs index 8ab0ef6..b287f95 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs @@ -41,7 +41,6 @@ namespace Lucene.Net.Codecs.BlockTerms /// </summary> public class BlockTermsReader : FieldsProducer { - // Open input to the main terms dict file (_X.tis) private readonly IndexInput _input; @@ -59,9 +58,46 @@ namespace Lucene.Net.Codecs.BlockTerms private readonly int _version; + /// <summary> + /// Used as a key for the terms cache + /// </summary> + private class FieldAndTerm : DoubleBarrelLRUCache.CloneableKey + { + public string Field { get; set; } + private BytesRef Term { get; set; } + + public FieldAndTerm() + { + } + + private FieldAndTerm(FieldAndTerm other) + { + Field = other.Field; + Term = BytesRef.DeepCopyOf(other.Term); + } + + public override bool Equals(object other) + { + var o = (FieldAndTerm)other; + return o.Field.Equals(Field) && Term.BytesEquals(o.Term); + } + + public override DoubleBarrelLRUCache.CloneableKey Clone() + { + return new FieldAndTerm(this); + } + + public override int GetHashCode() + { + return Field.GetHashCode() * 31 + Term.GetHashCode(); + } + } + + // private string segment; + public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, - String segmentSuffix) + string segmentSuffix) { _postingsReader = postingsReader; @@ -84,7 +120,7 @@ namespace Lucene.Net.Codecs.BlockTerms int numFields = _input.ReadVInt(); if (numFields < 0) { - throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}", + throw new CorruptIndexException(string.Format("Invalid number of fields: {0}, Resource: {1}", numFields, _input)); } @@ -108,7 +144,7 @@ namespace Lucene.Net.Codecs.BlockTerms { // #docs with field must be <= #docs throw new CorruptIndexException( - String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount, + string.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount, _input)); } @@ -116,7 +152,7 @@ namespace Lucene.Net.Codecs.BlockTerms { // #postings must be >= #docs with field throw new CorruptIndexException( - String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount, + string.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount, _input)); } @@ -124,7 +160,7 @@ namespace Lucene.Net.Codecs.BlockTerms { // #positions must be >= #postings throw new CorruptIndexException( - String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}", + string.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}", sumTotalTermFreq, sumDocFreq, _input)); } @@ -137,7 +173,7 @@ namespace Lucene.Net.Codecs.BlockTerms } catch (ArgumentException) { - throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}", + throw new CorruptIndexException(string.Format("Duplicate fields: {0}, Resource: {1}", fieldInfo.Name, _input)); } @@ -213,74 +249,18 @@ namespace Lucene.Net.Codecs.BlockTerms return _fields.Keys.GetEnumerator(); } - public override Terms Terms(String field) + public override Terms Terms(string field) { Debug.Assert(field != null); return _fields[field]; } - public override long RamBytesUsed() - { - var sizeInBytes = (_postingsReader != null) ? _postingsReader.RamBytesUsed() : 0; - sizeInBytes += (_indexReader != null) ? _indexReader.RamBytesUsed : 0; - return sizeInBytes; - } - - public override void CheckIntegrity() - { - // verify terms - if (_version >= BlockTermsWriter.VERSION_CHECKSUM) - { - CodecUtil.ChecksumEntireFile(_input); - } - // verify postings - _postingsReader.CheckIntegrity(); - } - public override int Count { get { - { - return _fields.Count; - } - } - } - - /// <summary> - /// Used as a key for the terms cache - /// </summary> - private class FieldAndTerm : DoubleBarrelLRUCache.CloneableKey - { - public String Field { get; set; } - private BytesRef Term { get; set; } - - private FieldAndTerm(FieldAndTerm other) - { - Field = other.Field; - Term = BytesRef.DeepCopyOf(other.Term); - } - - public override bool Equals(Object other) - { - var o = (FieldAndTerm)other; - return o.Field.Equals(Field) && Term.BytesEquals(o.Term); - } - - public override DoubleBarrelLRUCache.CloneableKey Clone() - { - return new FieldAndTerm(this); - } - - public override int GetHashCode() - { - return Field.GetHashCode() * 31 + Term.GetHashCode(); - } - - public FieldAndTerm() - { - + return _fields.Count; } } @@ -419,7 +399,7 @@ namespace Lucene.Net.Codecs.BlockTerms _fieldReader = fieldReader; _blockTermsReader = blockTermsReader; - _input = (IndexInput) _blockTermsReader._input.Clone(); + _input = (IndexInput)_blockTermsReader._input.Clone(); _input.Seek(_fieldReader._termsStartPointer); _indexEnum = _blockTermsReader._indexReader.GetFieldEnum(_fieldReader._fieldInfo); _doOrd = _blockTermsReader._indexReader.SupportsOrd; @@ -450,7 +430,7 @@ namespace Lucene.Net.Codecs.BlockTerms { if (_indexEnum == null) throw new InvalidOperationException("terms index was not loaded"); - + var doSeek = true; // See if we can avoid seeking, because target term @@ -461,7 +441,7 @@ namespace Lucene.Net.Codecs.BlockTerms if (cmp == 0) return SeekStatus.FOUND; // Already at the requested term - + if (cmp < 0) { // Target term is after current term @@ -501,7 +481,7 @@ namespace Lucene.Net.Codecs.BlockTerms if (_doOrd) _state.Ord = _indexEnum.Ord - 1; - + _term.CopyBytes(_indexEnum.Term); } else @@ -660,7 +640,7 @@ namespace Lucene.Net.Codecs.BlockTerms _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix); break; } - + _termSuffixesReader.SkipBytes(suffix); } @@ -794,7 +774,7 @@ namespace Lucene.Net.Codecs.BlockTerms { //System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this); Debug.Assert(otherState is BlockTermState); - Debug.Assert(!_doOrd || ((BlockTermState) otherState).Ord < _fieldReader._numTerms); + Debug.Assert(!_doOrd || ((BlockTermState)otherState).Ord < _fieldReader._numTerms); _state.CopyFrom(otherState); _seekPending = true; _indexIsCurrent = false; @@ -804,7 +784,7 @@ namespace Lucene.Net.Codecs.BlockTerms public override TermState GetTermState() { DecodeMetaData(); - return (TermState) _state.Clone(); + return (TermState)_state.Clone(); } public override void SeekExact(long ord) @@ -833,7 +813,7 @@ namespace Lucene.Net.Codecs.BlockTerms _term.CopyBytes(_indexEnum.Term); // Now, scan: - var left = (int) (ord - _state.Ord); + var left = (int)(ord - _state.Ord); while (left > 0) { var term = _next(); @@ -970,6 +950,22 @@ namespace Lucene.Net.Codecs.BlockTerms } } - + public override long RamBytesUsed() + { + var sizeInBytes = (_postingsReader != null) ? _postingsReader.RamBytesUsed() : 0; + sizeInBytes += (_indexReader != null) ? _indexReader.RamBytesUsed : 0; + return sizeInBytes; + } + + public override void CheckIntegrity() + { + // verify terms + if (_version >= BlockTermsWriter.VERSION_CHECKSUM) + { + CodecUtil.ChecksumEntireFile(_input); + } + // verify postings + _postingsReader.CheckIntegrity(); + } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs index 5f782d6..81411a7 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs @@ -36,8 +36,7 @@ namespace Lucene.Net.Codecs.BlockTerms /// </remarks> public class BlockTermsWriter : FieldsConsumer { - - public const String CODEC_NAME = "BLOCK_TERMS_DICT"; + internal const string CODEC_NAME = "BLOCK_TERMS_DICT"; // Initial format public const int VERSION_START = 0; @@ -46,14 +45,40 @@ namespace Lucene.Net.Codecs.BlockTerms public const int VERSION_CHECKSUM = 3; public const int VERSION_CURRENT = VERSION_CHECKSUM; - /** Extension of terms file */ - public const String TERMS_EXTENSION = "tib"; + /// <summary>Extension of terms file</summary> + public const string TERMS_EXTENSION = "tib"; - private IndexOutput _output; - protected readonly PostingsWriterBase PostingsWriter; - protected readonly FieldInfos FieldInfos; - protected FieldInfo CurrentField; + protected IndexOutput _output; + private readonly PostingsWriterBase PostingsWriter; + private readonly FieldInfos FieldInfos; + private FieldInfo CurrentField; private readonly TermsIndexWriterBase _termsIndexWriter; + + protected class FieldMetaData + { + public FieldInfo FieldInfo { get; private set; } + public long NumTerms { get; private set; } + public long TermsStartPointer { get; private set; } + public long SumTotalTermFreq { get; private set; } + public long SumDocFreq { get; private set; } + public int DocCount { get; private set; } + public int LongsSize { get; private set; } + + public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, + long sumDocFreq, int docCount, int longsSize) + { + Debug.Assert(numTerms > 0); + + FieldInfo = fieldInfo; + TermsStartPointer = termsStartPointer; + NumTerms = numTerms; + SumTotalTermFreq = sumTotalTermFreq; + SumDocFreq = sumDocFreq; + DocCount = docCount; + LongsSize = longsSize; + } + } + private readonly List<FieldMetaData> _fields = new List<FieldMetaData>(); public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter, @@ -140,59 +165,32 @@ namespace Lucene.Net.Codecs.BlockTerms _output.WriteLong(dirStart); } - protected class FieldMetaData - { - public FieldInfo FieldInfo { get; private set; } - public long NumTerms { get; private set; } - public long TermsStartPointer { get; private set; } - public long SumTotalTermFreq { get; private set; } - public long SumDocFreq { get; private set; } - public int DocCount { get; private set; } - public int LongsSize { get; private set; } - - public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, - long sumDocFreq, int docCount, int longsSize) - { - Debug.Assert(numTerms > 0); - - FieldInfo = fieldInfo; - TermsStartPointer = termsStartPointer; - NumTerms = numTerms; - SumTotalTermFreq = sumTotalTermFreq; - SumDocFreq = sumDocFreq; - DocCount = docCount; - LongsSize = longsSize; - } - } - private class TermEntry { public readonly BytesRef Term = new BytesRef(); public BlockTermState State; } - public class TermsWriter : TermsConsumer + internal class TermsWriter : TermsConsumer { - private readonly RAMOutputStream _bytesWriter = new RAMOutputStream(); - private readonly RAMOutputStream _bufferWriter = new RAMOutputStream(); - private readonly BytesRef _lastPrevTerm = new BytesRef(); - + // Outer instance + private readonly BlockTermsWriter _btw; + private readonly FieldInfo _fieldInfo; private readonly PostingsWriterBase _postingsWriter; private readonly long _termsStartPointer; - private readonly TermsIndexWriterBase.FieldWriter _fieldIndexWriter; - private readonly BlockTermsWriter _btw; - - private TermEntry[] _pendingTerms; - private int _pendingCount; - private long _numTerms; + private readonly TermsIndexWriterBase.FieldWriter _fieldIndexWriter; private long _sumTotalTermFreq; private long _sumDocFreq; private int _docCount; private readonly int _longsSize; - public TermsWriter( + private TermEntry[] _pendingTerms; + + private int _pendingCount; + + internal TermsWriter( TermsIndexWriterBase.FieldWriter fieldIndexWriter, FieldInfo fieldInfo, PostingsWriterBase postingsWriter, BlockTermsWriter btw) @@ -222,9 +220,10 @@ namespace Lucene.Net.Codecs.BlockTerms return _postingsWriter; } + private readonly BytesRef _lastPrevTerm = new BytesRef(); + public override void FinishTerm(BytesRef text, TermStats stats) { - Debug.Assert(stats.DocFreq > 0); var isIndexTerm = _fieldIndexWriter.CheckIndexTerm(text, stats); @@ -291,7 +290,7 @@ namespace Lucene.Net.Codecs.BlockTerms } } - private static int SharedPrefix(BytesRef term1, BytesRef term2) + private int SharedPrefix(BytesRef term1, BytesRef term2) { Debug.Assert(term1.Offset == 0); Debug.Assert(term2.Offset == 0); @@ -313,6 +312,9 @@ namespace Lucene.Net.Codecs.BlockTerms return pos1; } + private readonly RAMOutputStream _bytesWriter = new RAMOutputStream(); + private readonly RAMOutputStream _bufferWriter = new RAMOutputStream(); + private void FlushBlock() { // First pass: compute common prefix for all terms @@ -385,6 +387,5 @@ namespace Lucene.Net.Codecs.BlockTerms _pendingCount = 0; } } - } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs index 611c1ed..0cb840d 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs @@ -191,75 +191,30 @@ namespace Lucene.Net.Codecs.BlockTerms return version; } - public override bool SupportsOrd - { - get { return true; } - } - - public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo) - { - FieldIndexData fieldData = _fields[fieldInfo]; - return fieldData.CoreIndex == null ? null : new IndexEnum(fieldData.CoreIndex, this); - } - - public override void Dispose() - { - if (_input != null && !_indexLoaded) - _input.Dispose(); - } - - private void SeekDir(IndexInput input, long dirOffset) - { - if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) - { - input.Seek(input.Length - CodecUtil.FooterLength() - 8); - dirOffset = input.ReadLong(); - - } - else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) - { - input.Seek(input.Length - 8); - dirOffset = input.ReadLong(); - } - - input.Seek(dirOffset); - } - - public override long RamBytesUsed - { - get - { - var sizeInBytes = ((_termBytes != null) ? _termBytes.RamBytesUsed() : 0) + - ((_termBytesReader != null) ? _termBytesReader.RamBytesUsed() : 0); - - return _fields.Values.Aggregate(sizeInBytes, - (current, entry) => (current + entry.CoreIndex.RamBytesUsed)); - } - } - private class IndexEnum : FieldIndexEnum { - private readonly FieldIndexData.CoreFieldIndex _fieldIndex; + // Outer intstance private readonly FixedGapTermsIndexReader _fgtir; + private readonly FieldIndexData.CoreFieldIndex _fieldIndex; + private readonly BytesRef term = new BytesRef(); + private long ord; + public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex, FixedGapTermsIndexReader fgtir) { - Term = new BytesRef(); _fieldIndex = fieldIndex; _fgtir = fgtir; } - public override long Ord { get; set; } + public override sealed BytesRef Term { get { return term; } } - public override sealed BytesRef Term { get; set; } - - public override long? Seek(BytesRef target) + public override long? Seek(BytesRef target) // LUCENENET TODO: return was not nullable in Lucene { var lo = 0; // binary search var hi = _fieldIndex.NumIndexTerms - 1; Debug.Assert(_fgtir._totalIndexInterval > 0, - String.Format("TotalIndexInterval: {0}", _fgtir._totalIndexInterval)); + string.Format("TotalIndexInterval: {0}", _fgtir._totalIndexInterval)); long offset; int length; @@ -268,7 +223,7 @@ namespace Lucene.Net.Codecs.BlockTerms var mid = (int)((uint)(lo + hi) >> 1); offset = _fieldIndex.TermOffsets.Get(mid); - length = (int) (_fieldIndex.TermOffsets.Get(1 + mid) - offset); + length = (int)(_fieldIndex.TermOffsets.Get(1 + mid) - offset); _fgtir._termBytesReader.FillSlice(Term, _fieldIndex.TermBytesStart + offset, length); int delta = _fgtir._termComp.Compare(target, Term); @@ -283,7 +238,7 @@ namespace Lucene.Net.Codecs.BlockTerms else { Debug.Assert(mid >= 0); - Ord = mid * _fgtir._totalIndexInterval; + ord = mid * _fgtir._totalIndexInterval; return _fieldIndex.TermsStart + _fieldIndex.TermsDictOffsets.Get(mid); } } @@ -295,22 +250,22 @@ namespace Lucene.Net.Codecs.BlockTerms } offset = _fieldIndex.TermOffsets.Get(hi); - length = (int) (_fieldIndex.TermOffsets.Get(1 + hi) - offset); + length = (int)(_fieldIndex.TermOffsets.Get(1 + hi) - offset); _fgtir._termBytesReader.FillSlice(Term, _fieldIndex.TermBytesStart + offset, length); - Ord = hi * _fgtir._totalIndexInterval; + ord = hi * _fgtir._totalIndexInterval; return _fieldIndex.TermsStart + _fieldIndex.TermsDictOffsets.Get(hi); } - public override long? Next + public override long? Next // LUCENENET TODO: Make into Next() method // LUCENENET TODO: return was not nullable in Lucene { get { - var idx = 1 + (int)(Ord / _fgtir._totalIndexInterval); + var idx = 1 + (int)(ord / _fgtir._totalIndexInterval); if (idx >= _fieldIndex.NumIndexTerms) return -1; - Ord += _fgtir._totalIndexInterval; + ord += _fgtir._totalIndexInterval; var offset = _fieldIndex.TermOffsets.Get(idx); var length = (int)(_fieldIndex.TermOffsets.Get(1 + idx) - offset); @@ -321,7 +276,9 @@ namespace Lucene.Net.Codecs.BlockTerms } } - public override long? Seek(long ord) + public override long Ord { get { return ord; } } + + public override long? Seek(long ord) // LUCENENET TODO: return was not nullable in Lucene { var idx = (int)(ord / _fgtir._totalIndexInterval); @@ -332,23 +289,31 @@ namespace Lucene.Net.Codecs.BlockTerms var length = (int)(_fieldIndex.TermOffsets.Get(1 + idx) - offset); _fgtir._termBytesReader.FillSlice(Term, _fieldIndex.TermBytesStart + offset, length); - Ord = idx * _fgtir._totalIndexInterval; + this.ord = idx * _fgtir._totalIndexInterval; return _fieldIndex.TermsStart + _fieldIndex.TermsDictOffsets.Get(idx); } } - protected class FieldIndexData + public override bool SupportsOrd { - public volatile CoreFieldIndex CoreIndex; + get { return true; } + } + + private class FieldIndexData + { + // Outer instance + private readonly FixedGapTermsIndexReader _fgtir; // LUCENENET TODO: In this assembly, change all variables marked "Outer instance" to be named outerInstance and move them to the beginning of the ctor parameter list + + internal volatile CoreFieldIndex CoreIndex; private readonly long _indexStart; private readonly long _termsStart; private readonly long _packedIndexStart; private readonly long _packedOffsetsStart; - private readonly int _numIndexTerms; - private readonly FixedGapTermsIndexReader _fgtir; + private readonly int _numIndexTerms; + public FieldIndexData(int numIndexTerms, long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, FixedGapTermsIndexReader fgtir) @@ -372,35 +337,25 @@ namespace Lucene.Net.Codecs.BlockTerms _numIndexTerms, _fgtir); } - public class CoreFieldIndex + internal class CoreFieldIndex { /// <summary> /// Where this fields term begin in the packed byte[] data /// </summary> - public long TermBytesStart { get; private set; } + internal long TermBytesStart { get; private set; } /// <summary> /// Offset into index TermBytes /// </summary> - public PackedInts.Reader TermOffsets { get; private set; } + internal PackedInts.Reader TermOffsets { get; private set; } /// <summary> /// Index pointers into main terms dict /// </summary> - public PackedInts.Reader TermsDictOffsets { get; private set; } + internal PackedInts.Reader TermsDictOffsets { get; private set; } - /// <summary>Returns approximate RAM bytes Used</summary> - public long RamBytesUsed - { - get - { - return ((TermOffsets != null) ? TermOffsets.RamBytesUsed() : 0) + - ((TermsDictOffsets != null) ? TermsDictOffsets.RamBytesUsed() : 0); - } - } - - public int NumIndexTerms { get; private set; } - public long TermsStart { get; private set; } + internal int NumIndexTerms { get; private set; } + internal long TermsStart { get; private set; } public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms, FixedGapTermsIndexReader fgtir) @@ -419,7 +374,7 @@ namespace Lucene.Net.Codecs.BlockTerms NumIndexTerms = 1 + (numIndexTerms - 1)/fgtir._indexDivisor; Debug.Assert(NumIndexTerms > 0, - String.Format("NumIndexTerms: {0}, IndexDivisor: {1}", NumIndexTerms, fgtir._indexDivisor)); + string.Format("NumIndexTerms: {0}, IndexDivisor: {1}", NumIndexTerms, fgtir._indexDivisor)); if (fgtir._indexDivisor == 1) { @@ -492,7 +447,7 @@ namespace Lucene.Net.Codecs.BlockTerms clone.Seek(indexStart + termOffset); Debug.Assert(indexStart + termOffset < clone.Length, - String.Format("IndexStart: {0}, TermOffset: {1}, Len: {2}", indexStart, termOffset, + string.Format("IndexStart: {0}, TermOffset: {1}, Len: {2}", indexStart, termOffset, clone.Length)); Debug.Assert(indexStart + termOffset + numTermBytes < clone.Length); @@ -524,8 +479,59 @@ namespace Lucene.Net.Codecs.BlockTerms } } + /// <summary>Returns approximate RAM bytes Used</summary> + public long RamBytesUsed // LUCENENET TODO: Make RamBytesUsed() + { + get + { + return ((TermOffsets != null) ? TermOffsets.RamBytesUsed() : 0) + + ((TermsDictOffsets != null) ? TermsDictOffsets.RamBytesUsed() : 0); + } + } + } + } + + public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo) + { + FieldIndexData fieldData = _fields[fieldInfo]; + return fieldData.CoreIndex == null ? null : new IndexEnum(fieldData.CoreIndex, this); + } + + public override void Dispose() + { + if (_input != null && !_indexLoaded) + { + _input.Dispose(); + } + } + + private void SeekDir(IndexInput input, long dirOffset) + { + if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) + { + input.Seek(input.Length - CodecUtil.FooterLength() - 8); + dirOffset = input.ReadLong(); + + } + else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) + { + input.Seek(input.Length - 8); + dirOffset = input.ReadLong(); } + + input.Seek(dirOffset); } + public override long RamBytesUsed + { + get + { + var sizeInBytes = ((_termBytes != null) ? _termBytes.RamBytesUsed() : 0) + + ((_termBytesReader != null) ? _termBytesReader.RamBytesUsed() : 0); + + return _fields.Values.Aggregate(sizeInBytes, + (current, entry) => (current + entry.CoreIndex.RamBytesUsed)); + } + } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs index faf5d1c..74b55d7 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs @@ -37,26 +37,25 @@ namespace Lucene.Net.Codecs.BlockTerms /// </summary> public class FixedGapTermsIndexWriter : TermsIndexWriterBase { - protected IndexOutput Output; + protected IndexOutput Output; // out - /** Extension of terms index file */ - public const String TERMS_INDEX_EXTENSION = "tii"; - public const String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX"; - public const int VERSION_START = 0; - public const int VERSION_APPEND_ONLY = 1; + /// <summary>Extension of terms index file</summary> + internal const string TERMS_INDEX_EXTENSION = "tii"; + internal const string CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX"; + internal const int VERSION_START = 0; + internal const int VERSION_APPEND_ONLY = 1; + internal const int VERSION_CHECKSUM = 1000; // 4.x "skipped" trunk's monotonic addressing: give any user a nice exception + internal const int VERSION_CURRENT = VERSION_CHECKSUM; - public const int VERSION_CHECKSUM = 1000; - - // 4.x "skipped" trunk's monotonic addressing: give any user a nice exception - public const int VERSION_CURRENT = VERSION_CHECKSUM; private readonly int _termIndexInterval; + private readonly List<SimpleFieldWriter> _fields = new List<SimpleFieldWriter>(); - private readonly FieldInfos _fieldInfos; //@SuppressWarnings("unused") + private readonly FieldInfos _fieldInfos; // unread public FixedGapTermsIndexWriter(SegmentWriteState state) { - String indexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, + string indexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION); _termIndexInterval = state.TermIndexInterval; Output = state.Directory.CreateOutput(indexFileName, state.Context); @@ -94,7 +93,7 @@ namespace Lucene.Net.Codecs.BlockTerms /// point order, you must override this method, to simply /// return indexedTerm.length. /// </remarks> - protected int IndexedTermPrefixLength(BytesRef priorTerm, BytesRef indexedTerm) + protected virtual int IndexedTermPrefixLength(BytesRef priorTerm, BytesRef indexedTerm) { // As long as codec sorts terms in unicode codepoint // order, we can safely strip off the non-distinguishing @@ -112,73 +111,17 @@ namespace Lucene.Net.Codecs.BlockTerms return Math.Min(1 + priorTerm.Length, indexedTerm.Length); } - public override void Dispose() - { - if (Output != null) - { - bool success = false; - try - { - long dirStart = Output.FilePointer; - int fieldCount = _fields.Count; - - int nonNullFieldCount = 0; - for (int i = 0; i < fieldCount; i++) - { - SimpleFieldWriter field = _fields[i]; - if (field.NumIndexTerms > 0) - { - nonNullFieldCount++; - } - } - - Output.WriteVInt(nonNullFieldCount); - for (int i = 0; i < fieldCount; i++) - { - SimpleFieldWriter field = _fields[i]; - if (field.NumIndexTerms > 0) - { - Output.WriteVInt(field.FieldInfo.Number); - Output.WriteVInt(field.NumIndexTerms); - Output.WriteVLong(field.TermsStart); - Output.WriteVLong(field.IndexStart); - Output.WriteVLong(field.PackedIndexStart); - Output.WriteVLong(field.PackedOffsetsStart); - } - } - WriteTrailer(dirStart); - CodecUtil.WriteFooter(Output); - success = true; - } - finally - { - if (success) - { - IOUtils.Close(Output); - } - else - { - IOUtils.CloseWhileHandlingException(Output); - } - Output = null; - } - } - } - - private void WriteTrailer(long dirStart) - { - Output.WriteLong(dirStart); - } - - private class SimpleFieldWriter : FieldWriter { - public readonly FieldInfo FieldInfo; - public int NumIndexTerms; - public readonly long IndexStart; - public readonly long TermsStart; - public long PackedIndexStart; - public long PackedOffsetsStart; + // Outer instance + private readonly FixedGapTermsIndexWriter _fgtiw; + + internal readonly FieldInfo FieldInfo; + internal int NumIndexTerms; + internal readonly long IndexStart; + internal readonly long TermsStart; + internal long PackedIndexStart; + internal long PackedOffsetsStart; private long _numTerms; // TODO: we could conceivably make a PackedInts wrapper @@ -191,9 +134,7 @@ namespace Lucene.Net.Codecs.BlockTerms private readonly BytesRef _lastTerm = new BytesRef(); - private readonly FixedGapTermsIndexWriter _fgtiw; - - public SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer, FixedGapTermsIndexWriter fgtiw) + internal SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer, FixedGapTermsIndexWriter fgtiw) { FieldInfo = fieldInfo; IndexStart = fgtiw.Output.FilePointer; @@ -214,14 +155,14 @@ namespace Lucene.Net.Codecs.BlockTerms // can compute wasted suffix if (0 == _numTerms % _fgtiw._termIndexInterval) _lastTerm.CopyBytes(text); - + return false; } public override void Add(BytesRef text, TermStats stats, long termsFilePointer) { int indexedTermLength = _fgtiw.IndexedTermPrefixLength(_lastTerm, text); - + // write only the min prefix that shows the diff // against prior term _fgtiw.Output.WriteBytes(text.Bytes, text.Offset, indexedTermLength); @@ -250,7 +191,6 @@ namespace Lucene.Net.Codecs.BlockTerms public override void Finish(long termsFilePointer) { - // write primary terms dict offsets PackedIndexStart = _fgtiw.Output.FilePointer; @@ -287,5 +227,63 @@ namespace Lucene.Net.Codecs.BlockTerms _termsPointerDeltas = null; } } + + public override void Dispose() + { + if (Output != null) + { + bool success = false; + try + { + long dirStart = Output.FilePointer; + int fieldCount = _fields.Count; + + int nonNullFieldCount = 0; + for (int i = 0; i < fieldCount; i++) + { + SimpleFieldWriter field = _fields[i]; + if (field.NumIndexTerms > 0) + { + nonNullFieldCount++; + } + } + + Output.WriteVInt(nonNullFieldCount); + for (int i = 0; i < fieldCount; i++) + { + SimpleFieldWriter field = _fields[i]; + if (field.NumIndexTerms > 0) + { + Output.WriteVInt(field.FieldInfo.Number); + Output.WriteVInt(field.NumIndexTerms); + Output.WriteVLong(field.TermsStart); + Output.WriteVLong(field.IndexStart); + Output.WriteVLong(field.PackedIndexStart); + Output.WriteVLong(field.PackedOffsetsStart); + } + } + WriteTrailer(dirStart); + CodecUtil.WriteFooter(Output); + success = true; + } + finally + { + if (success) + { + IOUtils.Close(Output); + } + else + { + IOUtils.CloseWhileHandlingException(Output); + } + Output = null; + } + } + } + + private void WriteTrailer(long dirStart) + { + Output.WriteLong(dirStart); + } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs index 7747e91..bf19dc1 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs @@ -39,17 +39,14 @@ namespace Lucene.Net.Codecs.BlockTerms /// </summary> public abstract class TermsIndexReaderBase : IDisposable { - public abstract bool SupportsOrd { get; } - - public abstract int Divisor { get; } - - /// <summary>Returns approximate RAM bytes used</summary> - public abstract long RamBytesUsed { get; } - public abstract FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo); public abstract void Dispose(); + public abstract bool SupportsOrd { get; } + + public abstract int Divisor { get; } + /// <summary> /// Similar to TermsEnum, except, the only "metadata" it /// reports for a given indexed term is the long fileOffset @@ -57,30 +54,32 @@ namespace Lucene.Net.Codecs.BlockTerms /// </summary> public abstract class FieldIndexEnum { - /** Returns -1 at end */ - public abstract long? Next { get; } + /// <summary> + /// Seeks to "largest" indexed term that's less than or equal + /// to term; returns file pointer index (into the main + /// terms index file) for that term + /// </summary> + public abstract long? Seek(BytesRef term); // LUCENENET TODO: make non-null + + /// <summary>Returns -1 at end</summary> + public abstract long? Next { get; } // LUCENENET TODO: Make Next(), make non-null - public abstract BytesRef Term { get; set; } + public abstract BytesRef Term { get; } /// <summary></summary> /// <remarks>Only implemented if {@link TermsIndexReaderBase.supportsOrd()} /// returns true</remarks> /// <returns></returns> - public abstract long Ord { get; set; } + public abstract long? Seek(long ord); // LUCENENET TODO: make non-null /// <summary></summary> /// <remarks>Only implemented if {@link TermsIndexReaderBase.supportsOrd()} /// returns true</remarks> /// <returns></returns> - public abstract long? Seek(long ord); - - /// <summary> - /// Seeks to "largest" indexed term that's less than or equal - /// to term; returns file pointer index (into the main - /// terms index file) for that term - /// </summary> - public abstract long? Seek(BytesRef term); + public abstract long Ord { get; } } + /// <summary>Returns approximate RAM bytes used</summary> + public abstract long RamBytesUsed { get; } // LUCENENET TODO: Make RamBytesUsed() } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs index 76c5973..e3efbcd 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs @@ -30,11 +30,6 @@ namespace Lucene.Net.Codecs.BlockTerms /// </summary> public abstract class TermsIndexWriterBase : IDisposable { - - public abstract FieldWriter AddField(FieldInfo fieldInfo, long termsFilePointer); - - public abstract void Dispose(); - /// <summary>Terms index API for a single field</summary> public abstract class FieldWriter { @@ -42,5 +37,9 @@ namespace Lucene.Net.Codecs.BlockTerms public abstract void Add(BytesRef text, TermStats stats, long termsFilePointer); public abstract void Finish(long termsFilePointer); } + + public abstract FieldWriter AddField(FieldInfo fieldInfo, long termsFilePointer); + + public abstract void Dispose(); // LUCENENET TODO: Implement disposable pattern } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs index e326f15..48eaf86 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs @@ -34,30 +34,34 @@ namespace Lucene.Net.Codecs.BlockTerms /// </summary> public class VariableGapTermsIndexReader : TermsIndexReaderBase { + private readonly PositiveIntOutputs _fstOutputs = PositiveIntOutputs.Singleton; private readonly int _indexDivisor; - private readonly IndexInput _input; // Closed if indexLoaded is true: - private readonly int _version; + private readonly IndexInput _input; // Closed if indexLoaded is true: private volatile bool _indexLoaded; - private long _dirOffset; // start of the field info data - private readonly PositiveIntOutputs _fstOutputs = PositiveIntOutputs.Singleton; private readonly Dictionary<FieldInfo, FieldIndexData> _fields = new Dictionary<FieldInfo, FieldIndexData>(); + + private long _dirOffset; // start of the field info data + + private readonly int _version; + + private readonly string segment; - public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, - String segmentSuffix, IOContext context) + public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, string segment, int indexDivisor, + string segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); + this.segment = segment; var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { - _version = ReadHeader(_input); _indexDivisor = indexDivisor; @@ -85,7 +89,7 @@ namespace Lucene.Net.Codecs.BlockTerms } catch (ArgumentException) { - throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", + throw new CorruptIndexException(string.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } @@ -105,6 +109,11 @@ namespace Lucene.Net.Codecs.BlockTerms } } + public override int Divisor + { + get { return _indexDivisor; } + } + private int ReadHeader(IndexInput input) { int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME, @@ -116,57 +125,64 @@ namespace Lucene.Net.Codecs.BlockTerms return version; } - public override void Dispose() + private class IndexEnum : FieldIndexEnum { - if (_input != null && !_indexLoaded) { - _input.Dispose(); - } - } + private readonly BytesRefFSTEnum<long?> _fstEnum; + private BytesRefFSTEnum.InputOutput<long?> _current; - public override bool SupportsOrd - { - get { return false; } - } - - public override int Divisor - { - get { return _indexDivisor; } - } + public IndexEnum(FST<long?> fst) + { + _fstEnum = new BytesRefFSTEnum<long?>(fst); + } - public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo) - { - FieldIndexData fieldData = _fields[fieldInfo]; - return fieldData.Fst == null ? null : new IndexEnum(fieldData.Fst); - } + public override BytesRef Term + { + get { return _current == null ? null : _current.Input; } + } - private void SeekDir(IndexInput input, long dirOffset) - { - if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) + public override long? Seek(BytesRef target) { - input.Seek(input.Length - CodecUtil.FooterLength() - 8); - dirOffset = input.ReadLong(); + _current = _fstEnum.SeekFloor(target); + return _current.Output; } - else if (_version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) + + public override long? Next { - input.Seek(input.Length - 8); - dirOffset = input.ReadLong(); + get + { + _current = _fstEnum.Next(); + if (_current == null) + return -1; + + return _current.Output; + } + } + + public override long Ord + { + get { throw new NotImplementedException(); } + } + + public override long? Seek(long ord) + { + throw new NotImplementedException(); } - input.Seek(dirOffset); } - public override long RamBytesUsed + public override bool SupportsOrd { - get { return _fields.Values.Sum(entry => entry.RamBytesUsed()); } + get { return false; } } - internal class FieldIndexData + private class FieldIndexData { + // Outer instance + private readonly VariableGapTermsIndexReader _vgtir; private readonly long _indexStart; // Set only if terms index is loaded: - public volatile FST<long?> Fst; - private readonly VariableGapTermsIndexReader _vgtir; - + internal volatile FST<long?> Fst; + public FieldIndexData(long indexStart, VariableGapTermsIndexReader vgtir) { _vgtir = vgtir; @@ -180,7 +196,7 @@ namespace Lucene.Net.Codecs.BlockTerms { if (Fst != null) return; - var clone = (IndexInput) _vgtir._input.Clone(); + var clone = (IndexInput)_vgtir._input.Clone(); clone.Seek(_indexStart); Fst = new FST<long?>(clone, _vgtir._fstOutputs); clone.Dispose(); @@ -201,7 +217,7 @@ namespace Lucene.Net.Codecs.BlockTerms var builder = new Builder<long?>(FST.INPUT_TYPE.BYTE1, outputs); var fstEnum = new BytesRefFSTEnum<long?>(Fst); var count = _vgtir._indexDivisor; - + BytesRefFSTEnum.InputOutput<long?> result; while ((result = fstEnum.Next()) != null) { @@ -217,57 +233,43 @@ namespace Lucene.Net.Codecs.BlockTerms } /// <summary>Returns approximate RAM bytes used</summary> - public long RamBytesUsed() + public virtual long RamBytesUsed() { return Fst == null ? 0 : Fst.SizeInBytes(); } } - protected class IndexEnum : FieldIndexEnum + public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo) { - private readonly BytesRefFSTEnum<long?> _fstEnum; - private BytesRefFSTEnum.InputOutput<long?> _current; - - public IndexEnum(FST<long?> fst) - { - _fstEnum = new BytesRefFSTEnum<long?>(fst); - } - - public override BytesRef Term - { - get { return _current == null ? null : _current.Input; } - set { } - } - - public override long? Seek(BytesRef target) - { - _current = _fstEnum.SeekFloor(target); - return _current.Output; - } - - public override long? Next - { - get - { - _current = _fstEnum.Next(); - if (_current == null) - return -1; + FieldIndexData fieldData = _fields[fieldInfo]; + return fieldData.Fst == null ? null : new IndexEnum(fieldData.Fst); + } - return _current.Output; - } - } + public override void Dispose() + { + if (_input != null && !_indexLoaded) { + _input.Dispose(); + } + } - public override long Ord + private void SeekDir(IndexInput input, long dirOffset) + { + if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { - get { throw new NotImplementedException(); } - set { } + input.Seek(input.Length - CodecUtil.FooterLength() - 8); + dirOffset = input.ReadLong(); } - - public override long? Seek(long ord) + else if (_version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { - throw new NotImplementedException(); + input.Seek(input.Length - 8); + dirOffset = input.ReadLong(); } + input.Seek(dirOffset); } + public override long RamBytesUsed + { + get { return _fields.Values.Sum(entry => entry.RamBytesUsed()); } + } } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs index cda8851..c172433 100644 --- a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs +++ b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs @@ -38,17 +38,18 @@ namespace Lucene.Net.Codecs.BlockTerms /// </summary> public class VariableGapTermsIndexWriter : TermsIndexWriterBase { - protected IndexOutput Output; + protected IndexOutput Output; // out /** Extension of terms index file */ - public const String TERMS_INDEX_EXTENSION = "tiv"; - public const String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX"; - public const int VERSION_START = 0; - public const int VERSION_APPEND_ONLY = 1; - public const int VERSION_CHECKSUM = 2; - public const int VERSION_CURRENT = VERSION_CHECKSUM; + internal const string TERMS_INDEX_EXTENSION = "tiv"; + internal const string CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX"; + internal const int VERSION_START = 0; + internal const int VERSION_APPEND_ONLY = 1; + internal const int VERSION_CHECKSUM = 2; + internal const int VERSION_CURRENT = VERSION_CHECKSUM; private readonly List<FstFieldWriter> _fields = new List<FstFieldWriter>(); + private readonly IndexTermSelector _policy; /// <summary> @@ -74,14 +75,14 @@ namespace Lucene.Net.Codecs.BlockTerms /// <remarks> /// Same policy as {@link FixedGapTermsIndexWriter} /// </remarks> - public class EveryNTermSelector : IndexTermSelector + public sealed class EveryNTermSelector : IndexTermSelector { private int _count; private readonly int _interval; public EveryNTermSelector(int interval) { - _interval = interval; + this._interval = interval; _count = interval; // First term is first indexed term } @@ -108,7 +109,7 @@ namespace Lucene.Net.Codecs.BlockTerms /// every interval terms. This should reduce seek time /// to high docFreq terms. /// </summary> - public class EveryNOrDocFreqTermSelector : IndexTermSelector + public sealed class EveryNOrDocFreqTermSelector : IndexTermSelector { private int _count; private readonly int _docFreqThresh; @@ -192,7 +193,7 @@ namespace Lucene.Net.Codecs.BlockTerms } } - private static void WriteHeader(IndexOutput output) + private void WriteHeader(IndexOutput output) { CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT); } @@ -209,7 +210,7 @@ namespace Lucene.Net.Codecs.BlockTerms /// Note: If your codec does not sort in unicode code point order, /// you must override this method to simplly return IndexedTerm.Length /// </remarks> - protected int IndexedTermPrefixLength(BytesRef priorTerm, BytesRef indexedTerm) + protected virtual int IndexedTermPrefixLength(BytesRef priorTerm, BytesRef indexedTerm) { // As long as codec sorts terms in unicode codepoint // order, we can safely strip off the non-distinguishing @@ -231,17 +232,19 @@ namespace Lucene.Net.Codecs.BlockTerms private class FstFieldWriter : FieldWriter { + // Outer instance + private readonly VariableGapTermsIndexWriter _vgtiw; + private readonly Builder<long?> _fstBuilder; + private readonly PositiveIntOutputs fstOutputs; private readonly long _startTermsFilePointer; - private readonly BytesRef _lastTerm = new BytesRef(); - private readonly IntsRef _scratchIntsRef = new IntsRef(); - private readonly VariableGapTermsIndexWriter _vgtiw; - private bool _first = true; + internal FieldInfo FieldInfo { get; private set; } + internal FST<long?> Fst { get; private set; } + internal long IndexStart { get; private set; } - public long IndexStart { get; private set; } - public FieldInfo FieldInfo { get; private set; } - public FST<long?> Fst { get; private set; } + private readonly BytesRef _lastTerm = new BytesRef(); + private bool _first = true; public FstFieldWriter(FieldInfo fieldInfo, long termsFilePointer, VariableGapTermsIndexWriter vgtiw) { @@ -265,11 +268,13 @@ namespace Lucene.Net.Codecs.BlockTerms _first = false; return true; } - + _lastTerm.CopyBytes(text); return false; } + private readonly IntsRef _scratchIntsRef = new IntsRef(); + public override void Add(BytesRef text, TermStats stats, long termsFilePointer) { if (text.Length == 0) @@ -342,7 +347,5 @@ namespace Lucene.Net.Codecs.BlockTerms { Output.WriteLong(dirStart); } - } - } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Bloom/BloomFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Bloom/BloomFilterFactory.cs b/src/Lucene.Net.Codecs/Bloom/BloomFilterFactory.cs index d443f6e..4dab212 100644 --- a/src/Lucene.Net.Codecs/Bloom/BloomFilterFactory.cs +++ b/src/Lucene.Net.Codecs/Bloom/BloomFilterFactory.cs @@ -28,7 +28,6 @@ namespace Lucene.Net.Codecs.Bloom /// </summary> public abstract class BloomFilterFactory { - /// <summary> /// /// </summary> @@ -43,7 +42,7 @@ namespace Lucene.Net.Codecs.Bloom /// <param name="fieldInfo">The field with sparse set bits</param> /// <param name="initialSet">The bits accumulated</param> /// <returns> null or a hopefully more densely packed, smaller bitset</returns> - public FuzzySet Downsize(FieldInfo fieldInfo, FuzzySet initialSet) + public virtual FuzzySet Downsize(FieldInfo fieldInfo, FuzzySet initialSet) { // Aim for a bitset size that would have 10% of bits set (so 90% of searches // would fail-fast) @@ -58,6 +57,5 @@ namespace Lucene.Net.Codecs.Bloom /// <param name="fieldInfo">The field with which this filter is associated</param> /// <returns>true if the set has reached saturation and should be retired</returns> public abstract bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo); - } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Bloom/BloomFilteringPostingsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Bloom/BloomFilteringPostingsFormat.cs b/src/Lucene.Net.Codecs/Bloom/BloomFilteringPostingsFormat.cs index 0e717c6..1133abe 100644 --- a/src/Lucene.Net.Codecs/Bloom/BloomFilteringPostingsFormat.cs +++ b/src/Lucene.Net.Codecs/Bloom/BloomFilteringPostingsFormat.cs @@ -61,13 +61,13 @@ namespace Lucene.Net.Codecs.Bloom /// </summary> public sealed class BloomFilteringPostingsFormat : PostingsFormat { - public static readonly String BLOOM_CODEC_NAME = "BloomFilter"; + public static readonly string BLOOM_CODEC_NAME = "BloomFilter"; public static readonly int VERSION_START = 1; public static readonly int VERSION_CHECKSUM = 2; public static readonly int VERSION_CURRENT = VERSION_CHECKSUM; - /** Extension of Bloom Filters file */ - private const String BLOOM_EXTENSION = "blm"; + /// <summary>Extension of Bloom Filters file</summary> + private const string BLOOM_EXTENSION = "blm"; private readonly BloomFilterFactory _bloomFilterFactory = new DefaultBloomFilterFactory(); private readonly PostingsFormat _delegatePostingsFormat; @@ -105,7 +105,8 @@ namespace Lucene.Net.Codecs.Bloom /// Used only by core Lucene at read-time via Service Provider instantiation - /// do not use at Write-time in application code. /// </summary> - public BloomFilteringPostingsFormat() : base(BLOOM_CODEC_NAME) + public BloomFilteringPostingsFormat() + : base(BLOOM_CODEC_NAME) { } @@ -127,11 +128,10 @@ namespace Lucene.Net.Codecs.Bloom internal class BloomFilteredFieldsProducer : FieldsProducer { private readonly FieldsProducer _delegateFieldsProducer; - private readonly HashMap<String, FuzzySet> _bloomsByFieldName = new HashMap<String, FuzzySet>(); + private readonly HashMap<string, FuzzySet> _bloomsByFieldName = new HashMap<string, FuzzySet>(); public BloomFilteredFieldsProducer(SegmentReadState state) { - var bloomFileName = IndexFileNames.SegmentFileName( state.SegmentInfo.Name, state.SegmentSuffix, BLOOM_EXTENSION); ChecksumIndexInput bloomIn = null; @@ -184,7 +184,12 @@ namespace Lucene.Net.Codecs.Bloom return _delegateFieldsProducer.GetEnumerator(); } - public override Terms Terms(String field) + public override void Dispose() + { + _delegateFieldsProducer.Dispose(); + } + + public override Terms Terms(string field) { var filter = _bloomsByFieldName[field]; if (filter == null) @@ -198,39 +203,16 @@ namespace Lucene.Net.Codecs.Bloom { get { - { - return _delegateFieldsProducer.Count; - } + return _delegateFieldsProducer.Count; } } - [Obsolete("iterate fields and add their size() instead.")] + [Obsolete("iterate fields and add their Count instead.")] public override long UniqueTermCount { get { return _delegateFieldsProducer.UniqueTermCount; } } - public override void Dispose() - { - _delegateFieldsProducer.Dispose(); - } - - public override long RamBytesUsed() - { - var sizeInBytes = ((_delegateFieldsProducer != null) ? _delegateFieldsProducer.RamBytesUsed() : 0); - foreach (var entry in _bloomsByFieldName.EntrySet()) - { - sizeInBytes += entry.Key.Length*RamUsageEstimator.NUM_BYTES_CHAR; - sizeInBytes += entry.Value.RamBytesUsed(); - } - return sizeInBytes; - } - - public override void CheckIntegrity() - { - _delegateFieldsProducer.CheckIntegrity(); - } - internal class BloomFilteredTerms : Terms { private readonly Terms _delegateTerms; @@ -316,13 +298,13 @@ namespace Lucene.Net.Codecs.Bloom private Terms _delegateTerms; internal TermsEnum DELEGATE_TERMS_ENUM; private TermsEnum _reuseDelegate; - internal readonly FuzzySet FILTER; + internal readonly FuzzySet FILTER; // LUCENENET TODO: rename filter public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter) { _delegateTerms = delegateTerms; _reuseDelegate = reuseDelegate; - FILTER = filter; + this.FILTER = filter; } internal void Reset(Terms delegateTerms, TermsEnum reuseDelegate) @@ -341,17 +323,17 @@ namespace Lucene.Net.Codecs.Bloom return DELEGATE_TERMS_ENUM ?? (DELEGATE_TERMS_ENUM = _delegateTerms.GetIterator(_reuseDelegate)); } - public override BytesRef Next() + public override sealed BytesRef Next() { return Delegate().Next(); } - public override IComparer<BytesRef> Comparer + public override sealed IComparer<BytesRef> Comparer { get { return _delegateTerms.Comparer; } } - public override bool SeekExact(BytesRef text) + public override sealed bool SeekExact(BytesRef text) { // The magical fail-fast speed up that is the entire point of all of // this code - save a disk seek if there is a match on an in-memory @@ -365,32 +347,32 @@ namespace Lucene.Net.Codecs.Bloom return Delegate().SeekExact(text); } - public override SeekStatus SeekCeil(BytesRef text) + public override sealed SeekStatus SeekCeil(BytesRef text) { return Delegate().SeekCeil(text); } - public override void SeekExact(long ord) + public override sealed void SeekExact(long ord) { Delegate().SeekExact(ord); } - public override BytesRef Term + public override sealed BytesRef Term { get { return Delegate().Term; } } - public override long Ord + public override sealed long Ord { get { return Delegate().Ord; } } - public override int DocFreq + public override sealed int DocFreq { get { return Delegate().DocFreq; } } - public override long TotalTermFreq + public override sealed long TotalTermFreq { get { return Delegate().TotalTermFreq; } } @@ -407,15 +389,32 @@ namespace Lucene.Net.Codecs.Bloom } } + public override long RamBytesUsed() + { + var sizeInBytes = ((_delegateFieldsProducer != null) ? _delegateFieldsProducer.RamBytesUsed() : 0); + foreach (var entry in _bloomsByFieldName.EntrySet()) + { + sizeInBytes += entry.Key.Length * RamUsageEstimator.NUM_BYTES_CHAR; + sizeInBytes += entry.Value.RamBytesUsed(); + } + return sizeInBytes; + } + + public override void CheckIntegrity() + { + _delegateFieldsProducer.CheckIntegrity(); + } } internal class BloomFilteredFieldsConsumer : FieldsConsumer { + // Outer instance + private readonly BloomFilteringPostingsFormat _bfpf; + private readonly FieldsConsumer _delegateFieldsConsumer; private readonly Dictionary<FieldInfo, FuzzySet> _bloomFilters = new Dictionary<FieldInfo, FuzzySet>(); private readonly SegmentWriteState _state; - private readonly BloomFilteringPostingsFormat _bfpf; - + public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer, SegmentWriteState state, BloomFilteringPostingsFormat bfpf) { @@ -480,13 +479,11 @@ namespace Lucene.Net.Codecs.Bloom private void SaveAppropriatelySizedBloomFilter(DataOutput bloomOutput, FuzzySet bloomFilter, FieldInfo fieldInfo) { - var rightSizedSet = _bfpf._bloomFilterFactory.Downsize(fieldInfo, bloomFilter) ?? bloomFilter; rightSizedSet.Serialize(bloomOutput); } - } internal class WrappedTermsConsumer : TermsConsumer @@ -524,8 +521,6 @@ namespace Lucene.Net.Codecs.Bloom { get { return _delegateTermsConsumer.Comparer; } } - } - } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Bloom/DefaultBloomFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Bloom/DefaultBloomFilterFactory.cs b/src/Lucene.Net.Codecs/Bloom/DefaultBloomFilterFactory.cs index 3e49add..f3e0722 100644 --- a/src/Lucene.Net.Codecs/Bloom/DefaultBloomFilterFactory.cs +++ b/src/Lucene.Net.Codecs/Bloom/DefaultBloomFilterFactory.cs @@ -27,7 +27,6 @@ namespace Lucene.Net.Codecs.Bloom /// </summary> public class DefaultBloomFilterFactory : BloomFilterFactory { - public override FuzzySet GetSetForField(SegmentWriteState state, FieldInfo info) { //Assume all of the docs have a unique term (e.g. a primary key) and we hope to maintain a set with 10% of bits set @@ -40,6 +39,5 @@ namespace Lucene.Net.Codecs.Bloom // throw any more memory at this problem. return bloomFilter.GetSaturation() > 0.9f; } - } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Bloom/FuzzySet.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Bloom/FuzzySet.cs b/src/Lucene.Net.Codecs/Bloom/FuzzySet.cs index d107cb0..16e1104 100644 --- a/src/Lucene.Net.Codecs/Bloom/FuzzySet.cs +++ b/src/Lucene.Net.Codecs/Bloom/FuzzySet.cs @@ -45,7 +45,6 @@ namespace Lucene.Net.Codecs.Bloom /// </summary> public class FuzzySet { - public static readonly int VERSION_SPI = 1; // HashFunction used to be loaded through a SPI public static readonly int VERSION_START = VERSION_SPI; public static readonly int VERSION_CURRENT = 2; @@ -70,7 +69,7 @@ namespace Lucene.Net.Codecs.Bloom /// </remarks> public enum ContainsResult { - Maybe, + Maybe, // LUCENENET TODO: Change to MAYBE, NO No }; @@ -87,27 +86,16 @@ namespace Lucene.Net.Codecs.Bloom // translation of the query term that mirrors the stored content's reprojections. private static int[] _usableBitSetSizes; - private static int[] UsableBitSetSizes - { - get - { - if (_usableBitSetSizes == null) - InitializeUsableBitSetSizes(); - - return _usableBitSetSizes; - } - set { _usableBitSetSizes = value; } - } - private static void InitializeUsableBitSetSizes() + static FuzzySet() { - UsableBitSetSizes = new int[30]; + _usableBitSetSizes = new int[30]; const int mask = 1; var size = mask; - for (var i = 0; i < UsableBitSetSizes.Length; i++) + for (var i = 0; i < _usableBitSetSizes.Length; i++) { size = (size << 1) | mask; - UsableBitSetSizes[i] = size; + _usableBitSetSizes[i] = size; } } @@ -118,8 +106,8 @@ namespace Lucene.Net.Codecs.Bloom /// </summary> public static int GetNearestSetSize(int maxNumberOfBits) { - var result = UsableBitSetSizes[0]; - foreach (var t in UsableBitSetSizes.Where(t => t <= maxNumberOfBits)) + var result = _usableBitSetSizes[0]; + foreach (var t in _usableBitSetSizes.Where(t => t <= maxNumberOfBits)) { result = t; } @@ -138,7 +126,9 @@ namespace Lucene.Net.Codecs.Bloom { // Iterate around the various scales of bitset from smallest to largest looking for the first that // satisfies value volumes at the chosen saturation level - foreach (var t in from t in UsableBitSetSizes let numSetBitsAtDesiredSaturation = (int) (t*desiredSaturation) let estimatedNumUniqueValues = GetEstimatedNumberUniqueValuesAllowingForCollisions( + foreach (var t in from t in _usableBitSetSizes + let numSetBitsAtDesiredSaturation = (int) (t*desiredSaturation) + let estimatedNumUniqueValues = GetEstimatedNumberUniqueValuesAllowingForCollisions( t, numSetBitsAtDesiredSaturation) where estimatedNumUniqueValues > maxNumberOfValuesExpected select t) { return t; @@ -170,7 +160,7 @@ namespace Lucene.Net.Codecs.Bloom /// Unlike a conventional set, the fuzzy set returns NO or MAYBE rather than true or false. /// </summary> /// <returns>NO or MAYBE</returns> - public ContainsResult Contains(BytesRef value) + public virtual ContainsResult Contains(BytesRef value) { var hash = _hashFunction.Hash(value); if (hash < 0) @@ -198,7 +188,7 @@ namespace Lucene.Net.Codecs.Bloom /// @param out Data output stream /// @ If there is a low-level I/O error /// </summary> - public void Serialize(DataOutput output) + public virtual void Serialize(DataOutput output) { output.WriteInt(VERSION_CURRENT); output.WriteInt(_bloomSize); @@ -244,7 +234,7 @@ namespace Lucene.Net.Codecs.Bloom /// chosen size of the internal bitset. /// </summary> /// <param name="value">The Key value to be hashed</param> - public void AddValue(BytesRef value) + public virtual void AddValue(BytesRef value) { var hash = _hashFunction.Hash(value); if (hash < 0) @@ -261,14 +251,16 @@ namespace Lucene.Net.Codecs.Bloom /// Lower values have better accuracy but require more space. /// </param> /// <return>A smaller FuzzySet or null if the current set is already over-saturated</return> - public FuzzySet Downsize(float targetMaxSaturation) + public virtual FuzzySet Downsize(float targetMaxSaturation) { var numBitsSet = _filter.Cardinality(); FixedBitSet rightSizedBitSet; var rightSizedBitSetSize = _bloomSize; //Hopefully find a smaller size bitset into which we can project accumulated values while maintaining desired saturation level - foreach (var candidateBitsetSize in from candidateBitsetSize in UsableBitSetSizes let candidateSaturation = numBitsSet - /(float) candidateBitsetSize where candidateSaturation <= targetMaxSaturation select candidateBitsetSize) + foreach (var candidateBitsetSize in from candidateBitsetSize in _usableBitSetSizes + let candidateSaturation = numBitsSet /(float) candidateBitsetSize + where candidateSaturation <= targetMaxSaturation + select candidateBitsetSize) { rightSizedBitSetSize = candidateBitsetSize; break; @@ -299,7 +291,7 @@ namespace Lucene.Net.Codecs.Bloom return new FuzzySet(rightSizedBitSet, rightSizedBitSetSize, _hashFunction); } - public int GetEstimatedUniqueValues() + public virtual int GetEstimatedUniqueValues() { return GetEstimatedNumberUniqueValuesAllowingForCollisions(_bloomSize, _filter.Cardinality()); } @@ -315,13 +307,13 @@ namespace Lucene.Net.Codecs.Bloom return (int) (setSizeAsDouble*logInverseSaturation); } - public float GetSaturation() + public virtual float GetSaturation() { var numBitsSet = _filter.Cardinality(); return numBitsSet/(float) _bloomSize; } - public long RamBytesUsed() + public virtual long RamBytesUsed() { return RamUsageEstimator.SizeOf(_filter.GetBits()); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c602c98f/src/Lucene.Net.Codecs/Bloom/HashFunction.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Bloom/HashFunction.cs b/src/Lucene.Net.Codecs/Bloom/HashFunction.cs index af1a608..facee5b 100644 --- a/src/Lucene.Net.Codecs/Bloom/HashFunction.cs +++ b/src/Lucene.Net.Codecs/Bloom/HashFunction.cs @@ -35,6 +35,5 @@ namespace Lucene.Net.Codecs.Bloom /// @return the hash of the bytes referenced by bytes.offset and length bytes.length /// </summary> public abstract int Hash(BytesRef bytes); - } } \ No newline at end of file
