This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit a80eaeb53e86aa5b254e4721e99855eb88485dba Author: Shad Storhaug <[email protected]> AuthorDate: Sun Dec 5 09:45:19 2021 +0700 BUG: Lucene.Net.Codecs.BlockTreeTermsReader.IntersectEnum.Frame::Load(): Fixed issue with inverted logic on RunAutomaton.IsAccept() call that was causing random failures. Added a nightly regression test. (fixes #545) --- src/Lucene.Net.Tests/Index/TestDuelingCodecs.cs | 99 +++++++++++++++++++++++++ src/Lucene.Net/Codecs/BlockTreeTermsReader.cs | 2 +- 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/src/Lucene.Net.Tests/Index/TestDuelingCodecs.cs b/src/Lucene.Net.Tests/Index/TestDuelingCodecs.cs index 047b31e..2a7c42b 100644 --- a/src/Lucene.Net.Tests/Index/TestDuelingCodecs.cs +++ b/src/Lucene.Net.Tests/Index/TestDuelingCodecs.cs @@ -1,4 +1,5 @@ using J2N.Text; +using Lucene.Net.Attributes; using Lucene.Net.Documents; using Lucene.Net.Index.Extensions; using NUnit.Framework; @@ -176,5 +177,103 @@ namespace Lucene.Net.Index { AssertReaderEquals(info, leftReader, rightReader); } + + [Test] + [Nightly] + [LuceneNetSpecific] + public void TestEquals_GH_545() + { + Codec leftCodec = Codec.ForName("SimpleText"); + Codec rightCodec = Codec.ForName("Lucene46"); + + Directory leftDir = new Store.RAMDirectory(); + Directory rightDir = new Store.RAMDirectory(); + + int maxTermLength = 21678; + + Analysis.Analyzer leftAnalyzer = new Analysis.Standard.StandardAnalyzer(TEST_VERSION_CURRENT) { MaxTokenLength = maxTermLength }; + Analysis.Analyzer rightAnalyzer = new Analysis.Standard.StandardAnalyzer(TEST_VERSION_CURRENT) { MaxTokenLength = maxTermLength }; + + IndexWriterConfig leftConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer) { Codec = leftCodec }; + IndexWriterConfig rightConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, rightAnalyzer) { Codec = rightCodec }; + + using IndexWriter leftWriter = new IndexWriter(leftDir, leftConfig); + using IndexWriter rightWriter = new IndexWriter(rightDir, rightConfig); + + // Write indexes + FieldType ft = new FieldType(TextField.TYPE_STORED) + { + StoreTermVectors = true, + StoreTermVectorOffsets = true, + StoreTermVectorPositions = true, + }; + + string body = "Esityslistalla on seuraavana Markovin laatima talous- ja raha-asioiden valiokunnan mietint\u00f6 (A5-0421/2002) Euroopan j\u00e4lleenrakennus- ja kehityspankin (EBRD) toiminnasta (2002/2095(INI)). PARSEP . (FR) Arvoisa puhemies, haluaisin ensimm\u00e4iseksi kiitt\u00e4\u00e4 Euroopan parlamenttia, talous- ja raha-asioiden valiokuntaa sek\u00e4 varsinkin esittelij\u00e4 Markovia mielenkiinnosta EBRD:\u00e4\u00e4 kohtaan. Kuten totesitte, t\u00e4m\u00e4 on ensimm [...] + + Document leftDocument = new Document(); + leftDocument.Add(new Field("body", body, ft)); + leftWriter.AddDocument(leftDocument); + + Document rightDocument = new Document(); + rightDocument.Add(new Field("body", body, ft)); + rightWriter.AddDocument(rightDocument); + + IndexReader leftReader = leftWriter.GetReader(); + leftWriter.Dispose(); + IndexReader rightReader = rightWriter.GetReader(); + rightWriter.Dispose(); + + // check that our readers are valid + TestUtil.CheckReader(leftReader); + TestUtil.CheckReader(rightReader); + + string info = "left: " + leftCodec.ToString() + " / right: " + rightCodec.ToString(); + + // From AssertReaderEquals + + var leftFields = MultiFields.GetFields(leftReader); + var rightFields = MultiFields.GetFields(rightReader); + + // From AssertFieldsEquals + + using var leftEnum = leftFields.GetEnumerator(); + using var rightEnum = rightFields.GetEnumerator(); + + while (leftEnum.MoveNext()) + { + string field = leftEnum.Current; + rightEnum.MoveNext(); + Assert.AreEqual(field, rightEnum.Current, info); + + var leftTerms = leftFields.GetTerms(field); + var rightTerms = rightFields.GetTerms(field); + + // From AssertTermsEquals + + //string re = "??(*)+*.\U000e06d7*"; // Before escaping + string re = "??(\ue808*)+*.\udb41\uded7*"; // Faulty Regex + Util.Automaton.CompiledAutomaton automaton = new Util.Automaton.CompiledAutomaton((new Util.Automaton.RegExp(re, Util.Automaton.RegExpSyntax.NONE)).ToAutomaton()); + if (automaton.Type == Util.Automaton.CompiledAutomaton.AUTOMATON_TYPE.NORMAL) + { + // From AssertTermsEnumEquals + + BytesRef term; + TermsEnum leftTermsEnum = leftTerms.Intersect(automaton, null); + TermsEnum rightTermsEnum = rightTerms.Intersect(automaton, null); + + while (leftTermsEnum.MoveNext()) + { + term = leftTermsEnum.Term; + rightTermsEnum.MoveNext(); + Assert.AreEqual(term, rightTermsEnum.Term, info); + } + Assert.IsFalse(rightTermsEnum.MoveNext(), info); + } + } + Assert.IsFalse(rightEnum.MoveNext()); + + Util.IOUtils.Dispose(leftReader, rightReader); + Util.IOUtils.Dispose(leftDir, rightDir); + } } } \ No newline at end of file diff --git a/src/Lucene.Net/Codecs/BlockTreeTermsReader.cs b/src/Lucene.Net/Codecs/BlockTreeTermsReader.cs index 2aa33dc..32c71ae 100644 --- a/src/Lucene.Net/Codecs/BlockTreeTermsReader.cs +++ b/src/Lucene.Net/Codecs/BlockTreeTermsReader.cs @@ -824,7 +824,7 @@ namespace Lucene.Net.Codecs // If current state is accept, we must process // first block in case it has empty suffix: - if (outerInstance.runAutomaton.IsAccept(state)) + if (!outerInstance.runAutomaton.IsAccept(state)) { // Maybe skip floor blocks: while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[0].Min)
