Lucene.Net.Codecs.BlockTerms (FixedGapTermsIndexWriter + 
VariableGapTermsIndexWriter + VariableGapTermsIndexReader): Reviewed 
line-by-line and fixed several formatting issues


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/af9535fa
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/af9535fa
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/af9535fa

Branch: refs/heads/api-work
Commit: af9535fa26dfd0c514c30764b7c1d821a94300d2
Parents: 1cf1b11
Author: Shad Storhaug <[email protected]>
Authored: Sat Mar 18 00:28:28 2017 +0700
Committer: Shad Storhaug <[email protected]>
Committed: Sat Mar 18 03:21:48 2017 +0700

----------------------------------------------------------------------
 .../BlockTerms/FixedGapTermsIndexWriter.cs      |   2 +
 .../BlockTerms/VariableGapTermsIndexReader.cs   | 109 +++++++++++-------
 .../BlockTerms/VariableGapTermsIndexWriter.cs   | 115 +++++++++++--------
 3 files changed, 137 insertions(+), 89 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/af9535fa/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs 
b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
index c1731da..1751f83 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
@@ -136,6 +136,7 @@ namespace Lucene.Net.Codecs.BlockTerms
             internal SimpleFieldWriter(FixedGapTermsIndexWriter outerInstance, 
FieldInfo fieldInfo, long termsFilePointer)
             {
                 this.outerInstance = outerInstance;
+
                 this.fieldInfo = fieldInfo;
                 indexStart = outerInstance.m_output.FilePointer;
                 termsStart = lastTermsPointer = termsFilePointer;
@@ -164,6 +165,7 @@ namespace Lucene.Net.Codecs.BlockTerms
             public override void Add(BytesRef text, TermStats stats, long 
termsFilePointer)
             {
                 int indexedTermLength = 
outerInstance.IndexedTermPrefixLength(lastTerm, text);
+                //System.out.println("FGW: add text=" + text.utf8ToString() + 
" " + text + " fp=" + termsFilePointer);
 
                 // write only the min prefix that shows the diff
                 // against prior term

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/af9535fa/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs 
b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
index 321bf1c..340b18e 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
@@ -56,8 +56,7 @@ namespace Lucene.Net.Codecs.BlockTerms
                     IndexFileNames.SegmentFileName(segment, segmentSuffix,
                         VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), 
new IOContext(context, true));
             this.segment = segment;
-            var success = false;
-
+            bool success = false;
             Debug.Assert(indexDivisor == -1 || indexDivisor > 0);
 
             try
@@ -66,12 +65,14 @@ namespace Lucene.Net.Codecs.BlockTerms
                 _indexDivisor = indexDivisor;
 
                 if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
+                {
                     CodecUtil.ChecksumEntireFile(_input);
+                }
                 
                 SeekDir(_input, _dirOffset);
 
                 // Read directory
-                var numFields = _input.ReadVInt32();
+                int numFields = _input.ReadVInt32();
                 if (numFields < 0)
                 {
                     throw new CorruptIndexException("invalid numFields: " + 
numFields + " (resource=" + _input + ")");
@@ -79,9 +80,9 @@ namespace Lucene.Net.Codecs.BlockTerms
 
                 for (var i = 0; i < numFields; i++)
                 {
-                    var field = _input.ReadVInt32();
-                    var indexStart = _input.ReadVInt64();
-                    var fieldInfo = fieldInfos.FieldInfo(field);
+                    int field = _input.ReadVInt32();
+                    long indexStart = _input.ReadVInt64();
+                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                     FieldIndexData previous = _fields.Put(fieldInfo, new 
FieldIndexData(this, fieldInfo, indexStart));
                     if (previous != null)
                     {
@@ -137,9 +138,11 @@ namespace Lucene.Net.Codecs.BlockTerms
 
             public override long Seek(BytesRef target)
             {
+                //System.out.println("VGR: seek field=" + fieldInfo.name + " 
target=" + target);
                 _current = _fstEnum.SeekFloor(target);
                 if (_current.Output.HasValue)
                 {
+                    //System.out.println("  got input=" + current.input + " 
output=" + current.output);
                     return _current.Output.Value;
                 }
                 throw new NullReferenceException("_current.Output is null"); 
// LUCENENET NOTE: NullReferenceException would be thrown in Java, so doing it 
here
@@ -147,11 +150,19 @@ namespace Lucene.Net.Codecs.BlockTerms
 
             public override long Next()
             {
+                //System.out.println("VGR: next field=" + fieldInfo.name);
                 _current = _fstEnum.Next();
                 if (_current == null)
+                {
+                    //System.out.println("  eof");
                     return -1;
+                }
 
-                return _current.Output.Value;
+                if (_current.Output.HasValue)
+                {
+                    return _current.Output.Value;
+                }
+                throw new NullReferenceException("_current.Output is null"); 
// LUCENENET NOTE: NullReferenceException would be thrown in Java, so doing it 
here
             }
 
             public override long Ord
@@ -181,49 +192,52 @@ namespace Lucene.Net.Codecs.BlockTerms
             public FieldIndexData(VariableGapTermsIndexReader outerInstance, 
FieldInfo fieldInfo, long indexStart)
             {
                 this.outerInstance = outerInstance;
+
                 _indexStart = indexStart;
 
                 if (this.outerInstance._indexDivisor > 0)
+                {
                     LoadTermsIndex();
+                }  
             }
 
             private void LoadTermsIndex()
             {
-                if (fst != null) return;
-
-                var clone = (IndexInput)outerInstance._input.Clone();
-                clone.Seek(_indexStart);
-                fst = new FST<long?>(clone, outerInstance._fstOutputs);
-                clone.Dispose();
-
-                /*
-                final String dotFileName = segment + "_" + fieldInfo.name + 
".dot";
-                Writer w = new OutputStreamWriter(new 
FileOutputStream(dotFileName));
-                Util.toDot(fst, w, false, false);
-                System.out.println("FST INDEX: SAVED to " + dotFileName);
-                w.close();
-                */
-
-                if (outerInstance._indexDivisor > 1)
+                if (fst == null)
                 {
-                    // subsample
-                    var scratchIntsRef = new Int32sRef();
-                    var outputs = PositiveInt32Outputs.Singleton;
-                    var builder = new Builder<long?>(FST.INPUT_TYPE.BYTE1, 
outputs);
-                    var fstEnum = new BytesRefFSTEnum<long?>(fst);
-                    var count = outerInstance._indexDivisor;
-
-                    BytesRefFSTEnum.InputOutput<long?> result;
-                    while ((result = fstEnum.Next()) != null)
+                    IndexInput clone = 
(IndexInput)outerInstance._input.Clone();
+                    clone.Seek(_indexStart);
+                    fst = new FST<long?>(clone, outerInstance._fstOutputs);
+                    clone.Dispose(); // LUCENENET TODO: No using block here is 
bad...
+
+                    /*
+                    final String dotFileName = segment + "_" + fieldInfo.name 
+ ".dot";
+                    Writer w = new OutputStreamWriter(new 
FileOutputStream(dotFileName));
+                    Util.toDot(fst, w, false, false);
+                    System.out.println("FST INDEX: SAVED to " + dotFileName);
+                    w.close();
+                    */
+
+                    if (outerInstance._indexDivisor > 1)
                     {
-                        if (count == outerInstance._indexDivisor)
+                        // subsample
+                        Int32sRef scratchIntsRef = new Int32sRef();
+                        PositiveInt32Outputs outputs = 
PositiveInt32Outputs.Singleton;
+                        Builder<long?> builder = new 
Builder<long?>(FST.INPUT_TYPE.BYTE1, outputs);
+                        BytesRefFSTEnum<long?> fstEnum = new 
BytesRefFSTEnum<long?>(fst);
+                        BytesRefFSTEnum.InputOutput<long?> result;
+                        int count = outerInstance._indexDivisor;
+                        while ((result = fstEnum.Next()) != null)
                         {
-                            
builder.Add(Util.Fst.Util.ToInt32sRef(result.Input, scratchIntsRef), 
result.Output);
-                            count = 0;
+                            if (count == outerInstance._indexDivisor)
+                            {
+                                
builder.Add(Util.Fst.Util.ToInt32sRef(result.Input, scratchIntsRef), 
result.Output);
+                                count = 0;
+                            }
+                            count++;
                         }
-                        count++;
+                        fst = builder.Finish();
                     }
-                    fst = builder.Finish();
                 }
             }
 
@@ -236,13 +250,21 @@ namespace Lucene.Net.Codecs.BlockTerms
 
         public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo)
         {
-            FieldIndexData fieldData = _fields[fieldInfo];
-            return fieldData.fst == null ? null : new IndexEnum(fieldData.fst);
+            FieldIndexData fieldData;
+            if (!_fields.TryGetValue(fieldInfo, out fieldData) || fieldData == 
null)
+            {
+                return null;
+            }
+            else
+            {
+                return new IndexEnum(fieldData.fst);
+            }
         }
 
         public override void Dispose()
         {
-            if (_input != null && !_indexLoaded) { 
+            if (_input != null && !_indexLoaded)
+            { 
                 _input.Dispose(); 
             } 
         }
@@ -264,7 +286,12 @@ namespace Lucene.Net.Codecs.BlockTerms
 
         public override long RamBytesUsed()
         {
-            return _fields.Values.Sum(entry => entry.RamBytesUsed());
+            long sizeInBytes = 0;
+            foreach (FieldIndexData entry in _fields.Values)
+            {
+                sizeInBytes += entry.RamBytesUsed();
+            }
+            return sizeInBytes;
         }
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/af9535fa/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs 
b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs
index fae7620..f545666 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexWriter.cs
@@ -38,16 +38,18 @@ namespace Lucene.Net.Codecs.BlockTerms
     {
         protected IndexOutput m_output;
 
-        /** Extension of terms index file */
+        /// <summary>Extension of terms index file</summary>
         internal const string TERMS_INDEX_EXTENSION = "tiv";
+
         internal const string CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX";
         internal const int VERSION_START = 0;
         internal const int VERSION_APPEND_ONLY = 1;
         internal const int VERSION_CHECKSUM = 2;
         internal const int VERSION_CURRENT = VERSION_CHECKSUM;
 
-        private readonly List<FstFieldWriter> _fields = new 
List<FstFieldWriter>();
+        private readonly IList<FstFieldWriter> _fields = new 
List<FstFieldWriter>();
 
+        private readonly FieldInfos fieldInfos; // unread
         private readonly IndexTermSelector _policy;
 
         /// <summary>
@@ -81,19 +83,22 @@ namespace Lucene.Net.Codecs.BlockTerms
             public EveryNTermSelector(int interval)
             {
                 this._interval = interval;
-                _count = interval; // First term is first indexed term
+                // First term is first indexed term
+                _count = interval; 
             }
 
             public override bool IsIndexTerm(BytesRef term, TermStats stats)
             {
-                if (_count >= _interval) 
+                if (_count >= _interval)
                 {
                     _count = 1;
                     return true;
                 }
-                
-                _count++;
-                return false;
+                else
+                {
+                    _count++;
+                    return false;
+                }
             }
 
             public override void NewField(FieldInfo fieldInfo)
@@ -115,9 +120,11 @@ namespace Lucene.Net.Codecs.BlockTerms
 
             public EveryNOrDocFreqTermSelector(int docFreqThresh, int interval)
             {
-                _interval = interval;
-                _docFreqThresh = docFreqThresh;
-                _count = interval; // First term is first indexed term
+                this._interval = interval;
+                this._docFreqThresh = docFreqThresh;
+
+                // First term is first indexed term
+                _count = interval; 
             }
 
             public override bool IsIndexTerm(BytesRef term, TermStats stats)
@@ -127,9 +134,11 @@ namespace Lucene.Net.Codecs.BlockTerms
                     _count = 1;
                     return true;
                 }
-                
-                _count++;
-                return false;
+                else
+                {
+                    _count++;
+                    return false;
+                }
             }
 
             public override void NewField(FieldInfo fieldInfo)
@@ -177,17 +186,19 @@ namespace Lucene.Net.Codecs.BlockTerms
                 TERMS_INDEX_EXTENSION);
             m_output = state.Directory.CreateOutput(indexFileName, 
state.Context);
             bool success = false;
-
             try
             {
-                _policy = policy;
+                fieldInfos = state.FieldInfos;
+                this._policy = policy;
                 WriteHeader(m_output);
                 success = true;
             }
             finally
             {
                 if (!success)
+                {
                     IOUtils.CloseWhileHandlingException(m_output);
+                }
             }
         }
 
@@ -198,8 +209,9 @@ namespace Lucene.Net.Codecs.BlockTerms
 
         public override FieldWriter AddField(FieldInfo field, long 
termsFilePointer)
         {
+            ////System.out.println("VGW: field=" + field.name);
             _policy.NewField(field);
-            var writer = new FstFieldWriter(this, field, termsFilePointer);
+            FstFieldWriter writer = new FstFieldWriter(this, field, 
termsFilePointer);
             _fields.Add(writer);
             return writer;
         }
@@ -233,7 +245,7 @@ namespace Lucene.Net.Codecs.BlockTerms
             private readonly VariableGapTermsIndexWriter outerInstance;
 
             private readonly Builder<long?> _fstBuilder;
-            //private readonly PositiveIntOutputs fstOutputs; // LUCENENET 
NOTE: Not used
+            private readonly PositiveInt32Outputs fstOutputs;
             private readonly long _startTermsFilePointer;
 
             internal FieldInfo FieldInfo { get; private set; }
@@ -246,10 +258,12 @@ namespace Lucene.Net.Codecs.BlockTerms
             public FstFieldWriter(VariableGapTermsIndexWriter outerInstance, 
FieldInfo fieldInfo, long termsFilePointer)
             {
                 this.outerInstance = outerInstance;
-                FieldInfo = fieldInfo;
-                PositiveInt32Outputs fstOutputs = 
PositiveInt32Outputs.Singleton;
+
+                this.FieldInfo = fieldInfo;
+                fstOutputs = PositiveInt32Outputs.Singleton;
                 _fstBuilder = new Builder<long?>(FST.INPUT_TYPE.BYTE1, 
fstOutputs);
                 IndexStart = this.outerInstance.m_output.FilePointer;
+                ////System.out.println("VGW: field=" + fieldInfo.name);
 
                 // Always put empty string in
                 _fstBuilder.Add(new Int32sRef(), termsFilePointer);
@@ -265,9 +279,11 @@ namespace Lucene.Net.Codecs.BlockTerms
                     _first = false;
                     return true;
                 }
-
-                _lastTerm.CopyBytes(text);
-                return false;
+                else
+                {
+                    _lastTerm.CopyBytes(text);
+                    return false;
+                }
             }
 
             private readonly Int32sRef _scratchIntsRef = new Int32sRef();
@@ -297,46 +313,49 @@ namespace Lucene.Net.Codecs.BlockTerms
             {
                 Fst = _fstBuilder.Finish();
                 if (Fst != null)
+                {
                     Fst.Save(outerInstance.m_output);
+                }
             }
         }
 
         public override void Dispose()
         {
-            if (m_output == null) return;
-
-            try
+            if (m_output != null)
             {
-                long dirStart = m_output.FilePointer;
-                int fieldCount = _fields.Count;
-
-                int nonNullFieldCount = 0;
-                for (int i = 0; i < fieldCount; i++)
+                try
                 {
-                    FstFieldWriter field = _fields[i];
-                    if (field.Fst != null)
+                    long dirStart = m_output.FilePointer;
+                    int fieldCount = _fields.Count;
+
+                    int nonNullFieldCount = 0;
+                    for (int i = 0; i < fieldCount; i++)
                     {
-                        nonNullFieldCount++;
+                        FstFieldWriter field = _fields[i];
+                        if (field.Fst != null)
+                        {
+                            nonNullFieldCount++;
+                        }
                     }
-                }
 
-                m_output.WriteVInt32(nonNullFieldCount);
-                for (int i = 0; i < fieldCount; i++)
-                {
-                    FstFieldWriter field = _fields[i];
-                    if (field.Fst != null)
+                    m_output.WriteVInt32(nonNullFieldCount);
+                    for (int i = 0; i < fieldCount; i++)
                     {
-                        m_output.WriteVInt32(field.FieldInfo.Number);
-                        m_output.WriteVInt64(field.IndexStart);
+                        FstFieldWriter field = _fields[i];
+                        if (field.Fst != null)
+                        {
+                            m_output.WriteVInt32(field.FieldInfo.Number);
+                            m_output.WriteVInt64(field.IndexStart);
+                        }
                     }
+                    WriteTrailer(dirStart);
+                    CodecUtil.WriteFooter(m_output);
+                }
+                finally
+                {
+                    m_output.Dispose();
+                    m_output = null;
                 }
-                WriteTrailer(dirStart);
-                CodecUtil.WriteFooter(m_output);
-            }
-            finally
-            {
-                m_output.Dispose();
-                m_output = null;
             }
         }
 

Reply via email to