[lucenenet] 01/09: Lucene.Net.Codecs: Fixed testing condition for BaseTermVectorsFormatTestCase on TermVectorsReaders by throwing InvalidOperationException (fixes #267)

nightowl888 Tue, 30 Jun 2020 14:59:10 -0700

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git


commit ed806713e75875fde44c55e973b26d5b58aac91d
Author: Shad Storhaug <[email protected]>
AuthorDate: Mon Jun 29 23:47:30 2020 +0700

    Lucene.Net.Codecs: Fixed testing condition for 
BaseTermVectorsFormatTestCase on TermVectorsReaders by throwing 
InvalidOperationException (fixes #267)
---
 .../SimpleText/SimpleTextTermVectorsReader.cs      | 15 +++++--
 .../Index/BaseTermVectorsFormatTestCase.cs         | 34 ++++++++++-----
 src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs      | 50 ++++++++++++++++++++--
 .../CompressingStoredFieldsIndexWriter.cs          |  2 +-
 .../Compressing/CompressingTermVectorsReader.cs    | 12 +++---
 .../Codecs/Lucene3x/Lucene3xTermVectorsReader.cs   | 17 +++++---
 .../Codecs/Lucene40/Lucene40TermVectorsReader.cs   | 15 ++++---
 7 files changed, 108 insertions(+), 37 deletions(-)

diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs 
b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs
index 20be1d8..1b81575 100644
--- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs
@@ -591,12 +591,19 @@ namespace Lucene.Net.Codecs.SimpleText
 
             public override int NextPosition()
             {
-                // LUCENENET NOTE: In Java, the assertion is being caught in 
the test (as an AssertionException).
-                // Technically, a "possible" (in fact "probable") scenario 
like this one, we should be throwing
-                // an exception, however doing that causes the checkIndex test 
to fail. The only logical thing we
-                // can do to make this compatible is to remove the assert.
                 //Debug.Assert((_positions != null && _nextPos < 
_positions.Length) ||
                 //             _startOffsets != null && _nextPos < 
_startOffsets.Length);
+
+                // LUCENENET: The above assertion was for control flow when 
testing. In Java, it would throw an AssertionError, which is
+                // caught by the 
BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, 
Terms terms) method in the
+                // part that is checking for an error after reading to the end 
of the enumerator.
+
+                // Since there is no way to turn on assertions in a release 
build in .NET, we are throwing an InvalidOperationException
+                // in this case, which matches the behavior of Lucene 8. See 
#267.
+
+                if (((_positions != null && _nextPos < _positions.Length) || 
_startOffsets != null && _nextPos < _startOffsets.Length) == false)
+                    throw new InvalidOperationException("Read past last 
position");
+
                 if (_positions != null)
                 {
                     return _positions[_nextPos++];
diff --git 
a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs 
b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs
index df4d339..8bfde1f 100644
--- a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs
+++ b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs
@@ -16,6 +16,7 @@ using System.Threading;
 using JCG = J2N.Collections.Generic;
 using static Lucene.Net.Index.TermsEnum;
 using Assert = Lucene.Net.TestFramework.Assert;
+using AssertionError = Lucene.Net.Diagnostics.AssertionException;
 using Attribute = Lucene.Net.Util.Attribute;
 
 #if TESTFRAMEWORK_MSTEST
@@ -634,17 +635,28 @@ namespace Lucene.Net.Index
                                 Assert.IsTrue(foundPayload);
                             }
                         }
-                        try
-                        {
-                            docsAndPositionsEnum.NextPosition();
-                            Assert.Fail();
-                        }
-#pragma warning disable 168
-                        catch (Exception e)
-#pragma warning restore 168
-                        {
-                            // ok
-                        }
+
+                        // LUCENENET specific - In Lucene, there were 
assertions set up inside TVReaders which throw AssertionError
+                        // (provided assertions are enabled), which in turn 
signaled this class to skip the check by catching AssertionError.
+                        // In .NET, assertions are not included in the release 
and cannot be enabled, so there is nothing to catch.
+                        // We have to explicitly exclude the types that rely 
on this behavior from the check. Otherwise, they would fall
+                        // through to Assert.Fail().
+                        //
+                        // We also have a fake AssertionException for testing 
mocks. We cannot throw InvalidOperationException in those
+                        // cases because that exception is expected in other 
contexts.
+                        Assert.ThrowsAnyOf<InvalidOperationException, 
AssertionError>(() => docsAndPositionsEnum.NextPosition());
+
+//                        try
+//                        {
+//                            docsAndPositionsEnum.NextPosition();
+//                            Assert.Fail();
+//                        }
+//#pragma warning disable 168
+//                        catch (Exception e)
+//#pragma warning restore 168
+//                        {
+//                            // ok
+//                        }
                     }
                     Assert.AreEqual(DocsEnum.NO_MORE_DOCS, 
docsAndPositionsEnum.NextDoc());
                 }
diff --git a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs 
b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
index de06202..f0ad9a4 100644
--- a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
+++ b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
@@ -4,6 +4,7 @@ using Lucene.Net.Util.Fst;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Text;
 
 namespace Lucene.Net.Codecs
 {
@@ -433,11 +434,54 @@ namespace Lucene.Net.Codecs
                 return "BLOCK: " + Prefix.Utf8ToString();
             }
 
+            // LUCENENET specific - to keep the Debug.Assert statement from 
throwing exceptions
+            // because of invalid UTF8 code in Prefix, we have a wrapper 
method that falls back
+            // to using PendingBlock.Prefix.ToString() if 
PendingBlock.ToString()
+            private string ToString(IList<PendingBlock> blocks) // For assert
+            {
+                if (blocks == null)
+                    return "null";
+
+
+                if (blocks.Count == 0)
+                    return "[]";
+
+                using (var it = blocks.GetEnumerator())
+                {
+                    StringBuilder sb = new StringBuilder();
+                    sb.Append('[');
+                    it.MoveNext();
+                    while (true)
+                    {
+                        var e = it.Current;
+                        // There is a chance that the Prefix will contain 
invalid UTF8,
+                        // so we catch that and use the alternative way of 
displaying it
+                        try
+                        {
+                            sb.Append(e.ToString());
+                        }
+                        catch (IndexOutOfRangeException)
+                        {
+                            sb.Append("BLOCK: ");
+                            sb.Append(e.Prefix.ToString());
+                        }
+                        if (!it.MoveNext())
+                        {
+                            return sb.Append(']').ToString();
+                        }
+                        sb.Append(',').Append(' ');
+                    }
+                }
+            }
+
             public void CompileIndex(IList<PendingBlock> floorBlocks, 
RAMOutputStream scratchBytes)
             {
-                // LUCENENET TODO: floorBlocks cannot be converted using 
Arrays.ToString() here.
-                // It generates an IndexOutOfRangeException()
-                Debug.Assert((IsFloor && floorBlocks != null && 
floorBlocks.Count != 0) || (!IsFloor && floorBlocks == null), "isFloor=" + 
IsFloor + " floorBlocks=" + floorBlocks /*Arrays.ToString(floorBlocks)*/); 
+                // LUCENENET specific - we use a custom wrapper function to 
display floorBlocks, since
+                // it might contain garbage that cannot be converted into 
text. This is compiled out
+                // of the relese, though.
+                Debug.Assert(
+                    (IsFloor && floorBlocks != null && floorBlocks.Count != 0) 
|| (!IsFloor && floorBlocks == null),
+                    "isFloor=" + IsFloor + " floorBlocks=" + 
ToString(floorBlocks));
 
                 Debug.Assert(scratchBytes.GetFilePointer() == 0);
 
diff --git 
a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs 
b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
index 588cc2b..d45adb3 100644
--- a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
+++ b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
@@ -211,7 +211,7 @@ namespace Lucene.Net.Codecs.Compressing
         {
             if (numDocs != totalDocs)
             {
-                throw new Exception("Expected " + numDocs + " docs, but got " 
+ totalDocs);
+                throw new InvalidOperationException("Expected " + numDocs + " 
docs, but got " + totalDocs);
             }
             if (blockChunks > 0)
             {
diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs 
b/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs
index 7eedc6f..4235250 100644
--- a/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs
+++ b/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs
@@ -1071,11 +1071,11 @@ namespace Lucene.Net.Codecs.Compressing
             {
                 if (doc == NO_MORE_DOCS)
                 {
-                    throw new Exception("DocsEnum exhausted");
+                    throw new InvalidOperationException("DocsEnum exhausted");
                 }
                 else if (doc == -1)
                 {
-                    throw new Exception("DocsEnum not started");
+                    throw new InvalidOperationException("DocsEnum not 
started");
                 }
             }
 
@@ -1084,11 +1084,11 @@ namespace Lucene.Net.Codecs.Compressing
                 CheckDoc();
                 if (i < 0)
                 {
-                    throw new Exception("Position enum not started");
+                    throw new InvalidOperationException("Position enum not 
started");
                 }
                 else if (i >= termFreq)
                 {
-                    throw new Exception("Read past last position");
+                    throw new InvalidOperationException("Read past last 
position");
                 }
             }
 
@@ -1096,11 +1096,11 @@ namespace Lucene.Net.Codecs.Compressing
             {
                 if (doc != 0)
                 {
-                    throw new Exception();
+                    throw new InvalidOperationException();
                 }
                 else if (i >= termFreq - 1)
                 {
-                    throw new Exception("Read past last position");
+                    throw new InvalidOperationException("Read past last 
position");
                 }
 
                 ++i;
diff --git a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs 
b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs
index f3535d2..373b580 100644
--- a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs
@@ -804,13 +804,16 @@ namespace Lucene.Net.Codecs.Lucene3x
 
             public override int NextPosition()
             {
-                // LUCENENET TODO: on .NET Core 2.0, this assert causes an 
uncatchable exception.
-                // See: https://github.com/Microsoft/vstest/issues/1022
-                // Once the issue has been identified and fixed we can remove 
this conditional
-                // compilation for it on .NET Core 2.0.
-#if !NETSTANDARD2_0 && !NETSTANDARD2_1
-                Debug.Assert((positions != null && nextPos < positions.Length) 
|| startOffsets != null && nextPos < startOffsets.Length);
-#endif
+                //Debug.Assert((positions != null && nextPos < 
positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
+
+                // LUCENENET: The above assertion was for control flow when 
testing. In Java, it would throw an AssertionError, which is
+                // caught by the 
BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, 
Terms terms) method in the
+                // part that is checking for an error after reading to the end 
of the enumerator.
+
+                // Since there is no way to turn on assertions in a release 
build in .NET, we are throwing an InvalidOperationException
+                // in this case, which matches the behavior of Lucene 8. See 
#267.
+                if (((positions != null && nextPos < positions.Length) || 
startOffsets != null && nextPos < startOffsets.Length) == false)
+                    throw new InvalidOperationException("Read past last 
position");
 
                 if (positions != null)
                 {
diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs 
b/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs
index a4781e7..1cd3d66 100644
--- a/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs
@@ -808,11 +808,16 @@ namespace Lucene.Net.Codecs.Lucene40
 
             public override int NextPosition()
             {
-                // LUCENENET TODO: BUG - Need to investigate why this assert 
sometimes fails
-                // which will cause the test runner to crash on .NET Core 2.0
-#if !NETSTANDARD2_0 && !NETSTANDARD2_1
-                Debug.Assert((positions != null && nextPos < positions.Length) 
|| startOffsets != null && nextPos < startOffsets.Length);
-#endif
+                //Debug.Assert((positions != null && nextPos < 
positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
+
+                // LUCENENET: The above assertion was for control flow when 
testing. In Java, it would throw an AssertionError, which is
+                // caught by the 
BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, 
Terms terms) method in the
+                // part that is checking for an error after reading to the end 
of the enumerator.
+
+                // Since there is no way to turn on assertions in a release 
build in .NET, we are throwing an InvalidOperationException
+                // in this case, which matches the behavior of Lucene 8. See 
#267.
+                if (((positions != null && nextPos < positions.Length) || 
startOffsets != null && nextPos < startOffsets.Length) == false)
+                    throw new InvalidOperationException("Read past last 
position");
 
                 if (positions != null)
                 {

[lucenenet] 01/09: Lucene.Net.Codecs: Fixed testing condition for BaseTermVectorsFormatTestCase on TermVectorsReaders by throwing InvalidOperationException (fixes #267)

Reply via email to