This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit ed806713e75875fde44c55e973b26d5b58aac91d Author: Shad Storhaug <[email protected]> AuthorDate: Mon Jun 29 23:47:30 2020 +0700 Lucene.Net.Codecs: Fixed testing condition for BaseTermVectorsFormatTestCase on TermVectorsReaders by throwing InvalidOperationException (fixes #267) --- .../SimpleText/SimpleTextTermVectorsReader.cs | 15 +++++-- .../Index/BaseTermVectorsFormatTestCase.cs | 34 ++++++++++----- src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs | 50 ++++++++++++++++++++-- .../CompressingStoredFieldsIndexWriter.cs | 2 +- .../Compressing/CompressingTermVectorsReader.cs | 12 +++--- .../Codecs/Lucene3x/Lucene3xTermVectorsReader.cs | 17 +++++--- .../Codecs/Lucene40/Lucene40TermVectorsReader.cs | 15 ++++--- 7 files changed, 108 insertions(+), 37 deletions(-) diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs index 20be1d8..1b81575 100644 --- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs +++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs @@ -591,12 +591,19 @@ namespace Lucene.Net.Codecs.SimpleText public override int NextPosition() { - // LUCENENET NOTE: In Java, the assertion is being caught in the test (as an AssertionException). - // Technically, a "possible" (in fact "probable") scenario like this one, we should be throwing - // an exception, however doing that causes the checkIndex test to fail. The only logical thing we - // can do to make this compatible is to remove the assert. //Debug.Assert((_positions != null && _nextPos < _positions.Length) || // _startOffsets != null && _nextPos < _startOffsets.Length); + + // LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is + // caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the + // part that is checking for an error after reading to the end of the enumerator. + + // Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException + // in this case, which matches the behavior of Lucene 8. See #267. + + if (((_positions != null && _nextPos < _positions.Length) || _startOffsets != null && _nextPos < _startOffsets.Length) == false) + throw new InvalidOperationException("Read past last position"); + if (_positions != null) { return _positions[_nextPos++]; diff --git a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs index df4d339..8bfde1f 100644 --- a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs +++ b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs @@ -16,6 +16,7 @@ using System.Threading; using JCG = J2N.Collections.Generic; using static Lucene.Net.Index.TermsEnum; using Assert = Lucene.Net.TestFramework.Assert; +using AssertionError = Lucene.Net.Diagnostics.AssertionException; using Attribute = Lucene.Net.Util.Attribute; #if TESTFRAMEWORK_MSTEST @@ -634,17 +635,28 @@ namespace Lucene.Net.Index Assert.IsTrue(foundPayload); } } - try - { - docsAndPositionsEnum.NextPosition(); - Assert.Fail(); - } -#pragma warning disable 168 - catch (Exception e) -#pragma warning restore 168 - { - // ok - } + + // LUCENENET specific - In Lucene, there were assertions set up inside TVReaders which throw AssertionError + // (provided assertions are enabled), which in turn signaled this class to skip the check by catching AssertionError. + // In .NET, assertions are not included in the release and cannot be enabled, so there is nothing to catch. + // We have to explicitly exclude the types that rely on this behavior from the check. Otherwise, they would fall + // through to Assert.Fail(). + // + // We also have a fake AssertionException for testing mocks. We cannot throw InvalidOperationException in those + // cases because that exception is expected in other contexts. + Assert.ThrowsAnyOf<InvalidOperationException, AssertionError>(() => docsAndPositionsEnum.NextPosition()); + +// try +// { +// docsAndPositionsEnum.NextPosition(); +// Assert.Fail(); +// } +//#pragma warning disable 168 +// catch (Exception e) +//#pragma warning restore 168 +// { +// // ok +// } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } diff --git a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs index de06202..f0ad9a4 100644 --- a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs +++ b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs @@ -4,6 +4,7 @@ using Lucene.Net.Util.Fst; using System; using System.Collections.Generic; using System.Diagnostics; +using System.Text; namespace Lucene.Net.Codecs { @@ -433,11 +434,54 @@ namespace Lucene.Net.Codecs return "BLOCK: " + Prefix.Utf8ToString(); } + // LUCENENET specific - to keep the Debug.Assert statement from throwing exceptions + // because of invalid UTF8 code in Prefix, we have a wrapper method that falls back + // to using PendingBlock.Prefix.ToString() if PendingBlock.ToString() + private string ToString(IList<PendingBlock> blocks) // For assert + { + if (blocks == null) + return "null"; + + + if (blocks.Count == 0) + return "[]"; + + using (var it = blocks.GetEnumerator()) + { + StringBuilder sb = new StringBuilder(); + sb.Append('['); + it.MoveNext(); + while (true) + { + var e = it.Current; + // There is a chance that the Prefix will contain invalid UTF8, + // so we catch that and use the alternative way of displaying it + try + { + sb.Append(e.ToString()); + } + catch (IndexOutOfRangeException) + { + sb.Append("BLOCK: "); + sb.Append(e.Prefix.ToString()); + } + if (!it.MoveNext()) + { + return sb.Append(']').ToString(); + } + sb.Append(',').Append(' '); + } + } + } + public void CompileIndex(IList<PendingBlock> floorBlocks, RAMOutputStream scratchBytes) { - // LUCENENET TODO: floorBlocks cannot be converted using Arrays.ToString() here. - // It generates an IndexOutOfRangeException() - Debug.Assert((IsFloor && floorBlocks != null && floorBlocks.Count != 0) || (!IsFloor && floorBlocks == null), "isFloor=" + IsFloor + " floorBlocks=" + floorBlocks /*Arrays.ToString(floorBlocks)*/); + // LUCENENET specific - we use a custom wrapper function to display floorBlocks, since + // it might contain garbage that cannot be converted into text. This is compiled out + // of the relese, though. + Debug.Assert( + (IsFloor && floorBlocks != null && floorBlocks.Count != 0) || (!IsFloor && floorBlocks == null), + "isFloor=" + IsFloor + " floorBlocks=" + ToString(floorBlocks)); Debug.Assert(scratchBytes.GetFilePointer() == 0); diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs index 588cc2b..d45adb3 100644 --- a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs +++ b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs @@ -211,7 +211,7 @@ namespace Lucene.Net.Codecs.Compressing { if (numDocs != totalDocs) { - throw new Exception("Expected " + numDocs + " docs, but got " + totalDocs); + throw new InvalidOperationException("Expected " + numDocs + " docs, but got " + totalDocs); } if (blockChunks > 0) { diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs b/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs index 7eedc6f..4235250 100644 --- a/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs +++ b/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs @@ -1071,11 +1071,11 @@ namespace Lucene.Net.Codecs.Compressing { if (doc == NO_MORE_DOCS) { - throw new Exception("DocsEnum exhausted"); + throw new InvalidOperationException("DocsEnum exhausted"); } else if (doc == -1) { - throw new Exception("DocsEnum not started"); + throw new InvalidOperationException("DocsEnum not started"); } } @@ -1084,11 +1084,11 @@ namespace Lucene.Net.Codecs.Compressing CheckDoc(); if (i < 0) { - throw new Exception("Position enum not started"); + throw new InvalidOperationException("Position enum not started"); } else if (i >= termFreq) { - throw new Exception("Read past last position"); + throw new InvalidOperationException("Read past last position"); } } @@ -1096,11 +1096,11 @@ namespace Lucene.Net.Codecs.Compressing { if (doc != 0) { - throw new Exception(); + throw new InvalidOperationException(); } else if (i >= termFreq - 1) { - throw new Exception("Read past last position"); + throw new InvalidOperationException("Read past last position"); } ++i; diff --git a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs index f3535d2..373b580 100644 --- a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs +++ b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs @@ -804,13 +804,16 @@ namespace Lucene.Net.Codecs.Lucene3x public override int NextPosition() { - // LUCENENET TODO: on .NET Core 2.0, this assert causes an uncatchable exception. - // See: https://github.com/Microsoft/vstest/issues/1022 - // Once the issue has been identified and fixed we can remove this conditional - // compilation for it on .NET Core 2.0. -#if !NETSTANDARD2_0 && !NETSTANDARD2_1 - Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length); -#endif + //Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length); + + // LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is + // caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the + // part that is checking for an error after reading to the end of the enumerator. + + // Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException + // in this case, which matches the behavior of Lucene 8. See #267. + if (((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length) == false) + throw new InvalidOperationException("Read past last position"); if (positions != null) { diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs index a4781e7..1cd3d66 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs @@ -808,11 +808,16 @@ namespace Lucene.Net.Codecs.Lucene40 public override int NextPosition() { - // LUCENENET TODO: BUG - Need to investigate why this assert sometimes fails - // which will cause the test runner to crash on .NET Core 2.0 -#if !NETSTANDARD2_0 && !NETSTANDARD2_1 - Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length); -#endif + //Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length); + + // LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is + // caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the + // part that is checking for an error after reading to the end of the enumerator. + + // Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException + // in this case, which matches the behavior of Lucene 8. See #267. + if (((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length) == false) + throw new InvalidOperationException("Read past last position"); if (positions != null) {
