http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestIndexWriterUnicode.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestIndexWriterUnicode.cs b/src/Lucene.Net.Tests/Index/TestIndexWriterUnicode.cs new file mode 100644 index 0000000..e9fdbf4 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestIndexWriterUnicode.cs @@ -0,0 +1,390 @@ +using Lucene.Net.Attributes; +using Lucene.Net.Documents; +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Randomized.Generators; + using Lucene.Net.Util; + using NUnit.Framework; + using BytesRef = Lucene.Net.Util.BytesRef; + using CharsRef = Lucene.Net.Util.CharsRef; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + + [TestFixture] + public class TestIndexWriterUnicode : LuceneTestCase + { + internal readonly string[] Utf8Data = new string[] { "ab\udc17cd", "ab\ufffdcd", "\udc17abcd", "\ufffdabcd", "\udc17", "\ufffd", "ab\udc17\udc17cd", "ab\ufffd\ufffdcd", "\udc17\udc17abcd", "\ufffd\ufffdabcd", "\udc17\udc17", "\ufffd\ufffd", "ab\ud917cd", "ab\ufffdcd", "\ud917abcd", "\ufffdabcd", "\ud917", "\ufffd", "ab\ud917\ud917cd", "ab\ufffd\ufffdcd", "\ud917\ud917abcd", "\ufffd\ufffdabcd", "\ud917\ud917", "\ufffd\ufffd", "ab\udc17\ud917cd", "ab\ufffd\ufffdcd", "\udc17\ud917abcd", "\ufffd\ufffdabcd", "\udc17\ud917", "\ufffd\ufffd", "ab\udc17\ud917\udc17\ud917cd", "ab\ufffd\ud917\udc17\ufffdcd", "\udc17\ud917\udc17\ud917abcd", "\ufffd\ud917\udc17\ufffdabcd", "\udc17\ud917\udc17\ud917", "\ufffd\ud917\udc17\ufffd" }; + + private int NextInt(int lim) + { + return Random().Next(lim); + } + + private int NextInt(int start, int end) + { + return start + NextInt(end - start); + } + + private bool FillUnicode(char[] buffer, char[] expected, int offset, int count) + { + int len = offset + count; + bool hasIllegal = false; + + if (offset > 0 && buffer[offset] >= 0xdc00 && buffer[offset] < 0xe000) + // Don't start in the middle of a valid surrogate pair + { + offset--; + } + + for (int i = offset; i < len; i++) + { + int t = NextInt(6); + if (0 == t && i < len - 1) + { + // Make a surrogate pair + // High surrogate + expected[i] = buffer[i++] = (char)NextInt(0xd800, 0xdc00); + // Low surrogate + expected[i] = buffer[i] = (char)NextInt(0xdc00, 0xe000); + } + else if (t <= 1) + { + expected[i] = buffer[i] = (char)NextInt(0x80); + } + else if (2 == t) + { + expected[i] = buffer[i] = (char)NextInt(0x80, 0x800); + } + else if (3 == t) + { + expected[i] = buffer[i] = (char)NextInt(0x800, 0xd800); + } + else if (4 == t) + { + expected[i] = buffer[i] = (char)NextInt(0xe000, 0xffff); + } + else if (5 == t && i < len - 1) + { + // Illegal unpaired surrogate + if (NextInt(10) == 7) + { + if (Random().NextBoolean()) + { + buffer[i] = (char)NextInt(0xd800, 0xdc00); + } + else + { + buffer[i] = (char)NextInt(0xdc00, 0xe000); + } + expected[i++] = (char)0xfffd; + expected[i] = buffer[i] = (char)NextInt(0x800, 0xd800); + hasIllegal = true; + } + else + { + expected[i] = buffer[i] = (char)NextInt(0x800, 0xd800); + } + } + else + { + expected[i] = buffer[i] = ' '; + } + } + + return hasIllegal; + } + + // both start & end are inclusive + private int GetInt(Random r, int start, int end) + { + return start + r.Next(1 + end - start); + } + + private string AsUnicodeChar(char c) + { + return "U+" + ((int)c).ToString("x"); + } + + private string TermDesc(string s) + { + string s0; + Assert.IsTrue(s.Length <= 2); + if (s.Length == 1) + { + s0 = AsUnicodeChar(s[0]); + } + else + { + s0 = AsUnicodeChar(s[0]) + "," + AsUnicodeChar(s[1]); + } + return s0; + } + + private void CheckTermsOrder(IndexReader r, ISet<string> allTerms, bool isTop) + { + TermsEnum terms = MultiFields.GetFields(r).GetTerms("f").GetIterator(null); + + BytesRef last = new BytesRef(); + + HashSet<string> seenTerms = new HashSet<string>(); + + while (true) + { + BytesRef term = terms.Next(); + if (term == null) + { + break; + } + + Assert.IsTrue(last.CompareTo(term) < 0); + last.CopyBytes(term); + + string s = term.Utf8ToString(); + Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")"); + seenTerms.Add(s); + } + + if (isTop) + { + Assert.IsTrue(allTerms.SetEquals(seenTerms)); + } + + // Test seeking: + IEnumerator<string> it = seenTerms.GetEnumerator(); + while (it.MoveNext()) + { + BytesRef tr = new BytesRef(it.Current); + Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString())); + } + } + + // LUCENE-510 + [Test, LongRunningTest] + public virtual void TestRandomUnicodeStrings() + { + char[] buffer = new char[20]; + char[] expected = new char[20]; + + BytesRef utf8 = new BytesRef(20); + CharsRef utf16 = new CharsRef(20); + + int num = AtLeast(100000); + for (int iter = 0; iter < num; iter++) + { + bool hasIllegal = FillUnicode(buffer, expected, 0, 20); + + UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8); + if (!hasIllegal) + { +#pragma warning disable 612, 618 + var b = (new string(buffer, 0, 20)).GetBytes(IOUtils.CHARSET_UTF_8); +#pragma warning restore 612, 618 + Assert.AreEqual(b.Length, utf8.Length); + for (int i = 0; i < b.Length; i++) + { + Assert.AreEqual(b[i], utf8.Bytes[i]); + } + } + + UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16); + Assert.AreEqual(utf16.Length, 20); + for (int i = 0; i < 20; i++) + { + Assert.AreEqual(expected[i], utf16.Chars[i]); + } + } + } + + // LUCENE-510 + [Test] + public virtual void TestAllUnicodeChars() + { + BytesRef utf8 = new BytesRef(10); + CharsRef utf16 = new CharsRef(10); + char[] chars = new char[2]; + for (int ch = 0; ch < 0x0010FFFF; ch++) + { + if (ch == 0xd800) + // Skip invalid code points + { + ch = 0xe000; + } + + int len = 0; + if (ch <= 0xffff) + { + chars[len++] = (char)ch; + } + else + { + chars[len++] = (char)(((ch - 0x0010000) >> 10) + UnicodeUtil.UNI_SUR_HIGH_START); + chars[len++] = (char)(((ch - 0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START); + } + + UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); + + string s1 = new string(chars, 0, len); + string s2 = Encoding.UTF8.GetString(utf8.Bytes, utf8.Offset, utf8.Length); + Assert.AreEqual(s1, s2, "codepoint " + ch); + + UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16); + Assert.AreEqual(s1, new string(utf16.Chars, 0, utf16.Length), "codepoint " + ch); + + var b = s1.GetBytes(Encoding.UTF8); + Assert.AreEqual(utf8.Length, b.Length); + for (int j = 0; j < utf8.Length; j++) + { + Assert.AreEqual(utf8.Bytes[j], b[j]); + } + } + } + + [Test] + public virtual void TestEmbeddedFFFF() + { + Directory d = NewDirectory(); + IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); + Document doc = new Document(); + doc.Add(NewTextField("field", "a a\uffffb", Field.Store.NO)); + w.AddDocument(doc); + doc = new Document(); + doc.Add(NewTextField("field", "a", Field.Store.NO)); + w.AddDocument(doc); + IndexReader r = w.Reader; + Assert.AreEqual(1, r.DocFreq(new Term("field", "a\uffffb"))); + r.Dispose(); + w.Dispose(); + d.Dispose(); + } + + // LUCENE-510 + [Test] + public virtual void TestInvalidUTF16() + { + Directory dir = NewDirectory(); + IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new TestIndexWriter.StringSplitAnalyzer())); + Document doc = new Document(); + + int count = Utf8Data.Length / 2; + for (int i = 0; i < count; i++) + { + doc.Add(NewTextField("f" + i, Utf8Data[2 * i], Field.Store.YES)); + } + w.AddDocument(doc); + w.Dispose(); + + IndexReader ir = DirectoryReader.Open(dir); + Document doc2 = ir.Document(0); + for (int i = 0; i < count; i++) + { + Assert.AreEqual(1, ir.DocFreq(new Term("f" + i, Utf8Data[2 * i + 1])), "field " + i + " was not indexed correctly"); + Assert.AreEqual(Utf8Data[2 * i + 1], doc2.GetField("f" + i).GetStringValue(), "field " + i + " is incorrect"); + } + ir.Dispose(); + dir.Dispose(); + } + + // Make sure terms, including ones with surrogate pairs, + // sort in codepoint sort order by default + [Test] + public virtual void TestTermUTF16SortOrder() + { + Random rnd = Random(); + Directory dir = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(rnd, dir, Similarity, TimeZone); + Document d = new Document(); + // Single segment + Field f = NewStringField("f", "", Field.Store.NO); + d.Add(f); + char[] chars = new char[2]; + HashSet<string> allTerms = new HashSet<string>(); + + int num = AtLeast(200); + for (int i = 0; i < num; i++) + { + string s; + if (rnd.NextBoolean()) + { + // Single char + if (rnd.NextBoolean()) + { + // Above surrogates + chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff); + } + else + { + // Below surrogates + chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1); + } + s = new string(chars, 0, 1); + } + else + { + // Surrogate pair + chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END); + Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END); + chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END); + s = new string(chars, 0, 2); + } + allTerms.Add(s); + f.SetStringValue(s); + + writer.AddDocument(d); + + if ((1 + i) % 42 == 0) + { + writer.Commit(); + } + } + + IndexReader r = writer.Reader; + + // Test each sub-segment + foreach (AtomicReaderContext ctx in r.Leaves) + { + CheckTermsOrder(ctx.Reader, allTerms, false); + } + CheckTermsOrder(r, allTerms, true); + + // Test multi segment + r.Dispose(); + + writer.ForceMerge(1); + + // Test single segment + r = writer.Reader; + CheckTermsOrder(r, allTerms, true); + r.Dispose(); + + writer.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestIndexWriterWithThreads.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestIndexWriterWithThreads.cs b/src/Lucene.Net.Tests/Index/TestIndexWriterWithThreads.cs new file mode 100644 index 0000000..ee541be --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestIndexWriterWithThreads.cs @@ -0,0 +1,796 @@ +using Lucene.Net.Documents; +using Lucene.Net.Randomized.Generators; +using Lucene.Net.Support; +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using System.IO; +using System.Threading; + +namespace Lucene.Net.Index +{ + //using Slow = Lucene.Net.Util.LuceneTestCase.Slow; + + using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; + using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + using IBits = Lucene.Net.Util.IBits; + using LineFileDocs = Lucene.Net.Util.LineFileDocs; + using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using NumericDocValuesField = NumericDocValuesField; + using TestUtil = Lucene.Net.Util.TestUtil; + using TextField = TextField; + + /// <summary> + /// MultiThreaded IndexWriter tests + /// </summary> + [SuppressCodecs("Lucene3x")] + [TestFixture] + public class TestIndexWriterWithThreads : LuceneTestCase + { + // Used by test cases below + private class IndexerThread : ThreadClass + { + private readonly Func<string, string, FieldType, Field> NewField; + + internal bool DiskFull; + internal Exception Error; + internal AlreadyClosedException Ace; + internal IndexWriter Writer; + internal bool NoErrors; + internal volatile int AddCount; + + /// <param name="newField"> + /// LUCENENET specific + /// Passed in because <see cref="LuceneTestCase.NewField(string, string, FieldType)"/> + /// is no longer static. + /// </param> + public IndexerThread(IndexWriter writer, bool noErrors, Func<string, string, FieldType, Field> newField) + { + this.Writer = writer; + this.NoErrors = noErrors; + NewField = newField; + } + + public override void Run() + { + Document doc = new Document(); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.StoreTermVectors = true; + customType.StoreTermVectorPositions = true; + customType.StoreTermVectorOffsets = true; + + doc.Add(NewField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); + doc.Add(new NumericDocValuesField("dv", 5)); + + int idUpto = 0; + int fullCount = 0; + long stopTime = Environment.TickCount + 200; + + do + { + try + { + Writer.UpdateDocument(new Term("id", "" + (idUpto++)), doc); + AddCount++; + } + catch (IOException ioe) + { + if (VERBOSE) + { + Console.WriteLine("TEST: expected exc:"); + Console.WriteLine(ioe.StackTrace); + } + //System.out.println(Thread.currentThread().getName() + ": hit exc"); + //ioConsole.WriteLine(e.StackTrace); + if (ioe.Message.StartsWith("fake disk full at") || ioe.Message.Equals("now failing on purpose")) + { + DiskFull = true; +#if !NETSTANDARD + try + { +#endif + Thread.Sleep(1); +#if !NETSTANDARD + } + catch (ThreadInterruptedException ie) + { + throw new ThreadInterruptedException("Thread Interrupted Exception", ie); + } +#endif + if (fullCount++ >= 5) + { + break; + } + } + else + { + if (NoErrors) + { + Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected IOException:"); + Console.WriteLine(ioe.StackTrace); + Error = ioe; + } + break; + } + } + catch (Exception t) + { + //Console.WriteLine(t.StackTrace); + if (NoErrors) + { + Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected Throwable:"); + Console.WriteLine(t.StackTrace); + Error = t; + } + break; + } + } while (Environment.TickCount < stopTime); + } + } + + // LUCENE-1130: make sure immediate disk full on creating + // an IndexWriter (hit during DW.ThreadState.Init()), with + // multiple threads, is OK: + [Test] + public virtual void TestImmediateDiskFullWithThreads([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + int NUM_THREADS = 3; + int numIterations = TEST_NIGHTLY ? 10 : 3; + for (int iter = 0; iter < numIterations; iter++) + { + if (VERBOSE) + { + Console.WriteLine("\nTEST: iter=" + iter); + } + MockDirectoryWrapper dir = NewMockDirectory(); + var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(2) + .SetMergeScheduler(scheduler) + .SetMergePolicy(NewLogMergePolicy(4)); + IndexWriter writer = new IndexWriter(dir, config); + scheduler.SetSuppressExceptions(); + dir.MaxSizeInBytes = 4 * 1024 + 20 * iter; + + IndexerThread[] threads = new IndexerThread[NUM_THREADS]; + + for (int i = 0; i < NUM_THREADS; i++) + { + threads[i] = new IndexerThread(writer, true, NewField); + } + + for (int i = 0; i < NUM_THREADS; i++) + { + threads[i].Start(); + } + + for (int i = 0; i < NUM_THREADS; i++) + { + // Without fix for LUCENE-1130: one of the + // threads will hang + threads[i].Join(); + Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable"); + } + + // Make sure once disk space is avail again, we can + // cleanly close: + dir.MaxSizeInBytes = 0; + writer.Dispose(false); + dir.Dispose(); + } + } + + // LUCENE-1130: make sure we can close() even while + // threads are trying to add documents. Strictly + // speaking, this isn't valid us of Lucene's APIs, but we + // still want to be robust to this case: + [Test] + public virtual void TestCloseWithThreads([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + int NUM_THREADS = 3; + int numIterations = TEST_NIGHTLY ? 7 : 3; + for (int iter = 0; iter < numIterations; iter++) + { + if (VERBOSE) + { + Console.WriteLine("\nTEST: iter=" + iter); + } + Directory dir = NewDirectory(); + var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(10) + .SetMergeScheduler(scheduler) + .SetMergePolicy(NewLogMergePolicy(4)); + IndexWriter writer = new IndexWriter(dir, config); + scheduler.SetSuppressExceptions(); + + IndexerThread[] threads = new IndexerThread[NUM_THREADS]; + + for (int i = 0; i < NUM_THREADS; i++) + { + threads[i] = new IndexerThread(writer, false, NewField); + } + + for (int i = 0; i < NUM_THREADS; i++) + { + threads[i].Start(); + } + + bool done = false; + while (!done) + { + Thread.Sleep(100); + for (int i = 0; i < NUM_THREADS; i++) + // only stop when at least one thread has added a doc + { + if (threads[i].AddCount > 0) + { + done = true; + break; + } + else if (!threads[i].IsAlive) + { + Assert.Fail("thread failed before indexing a single document"); + } + } + } + + if (VERBOSE) + { + Console.WriteLine("\nTEST: now close"); + } + writer.Dispose(false); + + // Make sure threads that are adding docs are not hung: + for (int i = 0; i < NUM_THREADS; i++) + { + // Without fix for LUCENE-1130: one of the + // threads will hang + threads[i].Join(); + if (threads[i].IsAlive) + { + Assert.Fail("thread seems to be hung"); + } + } + + // Quick test to make sure index is not corrupt: + IndexReader reader = DirectoryReader.Open(dir); + DocsEnum tdocs = TestUtil.Docs(Random(), reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0); + int count = 0; + while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + count++; + } + Assert.IsTrue(count > 0); + reader.Dispose(); + + dir.Dispose(); + } + } + + // Runs test, with multiple threads, using the specific + // failure to trigger an IOException + public virtual void TestMultipleThreadsFailure(IConcurrentMergeScheduler scheduler, MockDirectoryWrapper.Failure failure) + { + int NUM_THREADS = 3; + + for (int iter = 0; iter < 2; iter++) + { + if (VERBOSE) + { + Console.WriteLine("TEST: iter=" + iter); + } + MockDirectoryWrapper dir = NewMockDirectory(); + var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(2) + .SetMergeScheduler(scheduler) + .SetMergePolicy(NewLogMergePolicy(4)); + IndexWriter writer = new IndexWriter(dir, config); + scheduler.SetSuppressExceptions(); + + IndexerThread[] threads = new IndexerThread[NUM_THREADS]; + + for (int i = 0; i < NUM_THREADS; i++) + { + threads[i] = new IndexerThread(writer, true, NewField); + } + + for (int i = 0; i < NUM_THREADS; i++) + { + threads[i].Start(); + } + + Thread.Sleep(10); + + dir.FailOn(failure); + failure.SetDoFail(); + + for (int i = 0; i < NUM_THREADS; i++) + { + threads[i].Join(); + Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable"); + } + + bool success = false; + try + { + writer.Dispose(false); + success = true; + } + catch (IOException) + { + failure.ClearDoFail(); + writer.Dispose(false); + } + if (VERBOSE) + { + Console.WriteLine("TEST: success=" + success); + } + + if (success) + { + IndexReader reader = DirectoryReader.Open(dir); + IBits delDocs = MultiFields.GetLiveDocs(reader); + for (int j = 0; j < reader.MaxDoc; j++) + { + if (delDocs == null || !delDocs.Get(j)) + { + reader.Document(j); + reader.GetTermVectors(j); + } + } + reader.Dispose(); + } + + dir.Dispose(); + } + } + + // Runs test, with one thread, using the specific failure + // to trigger an IOException + public virtual void TestSingleThreadFailure(IConcurrentMergeScheduler scheduler, MockDirectoryWrapper.Failure failure) + { + MockDirectoryWrapper dir = NewMockDirectory(); + + IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetMergeScheduler(scheduler)); + Document doc = new Document(); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.StoreTermVectors = true; + customType.StoreTermVectorPositions = true; + customType.StoreTermVectorOffsets = true; + doc.Add(NewField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); + + for (int i = 0; i < 6; i++) + { + writer.AddDocument(doc); + } + + dir.FailOn(failure); + failure.SetDoFail(); + try + { + writer.AddDocument(doc); + writer.AddDocument(doc); + writer.Commit(); + Assert.Fail("did not hit exception"); + } + catch (IOException) + { + } + failure.ClearDoFail(); + writer.AddDocument(doc); + writer.Dispose(false); + dir.Dispose(); + } + + // Throws IOException during FieldsWriter.flushDocument and during DocumentsWriter.abort + private class FailOnlyOnAbortOrFlush : MockDirectoryWrapper.Failure + { + internal bool OnlyOnce; + + public FailOnlyOnAbortOrFlush(bool onlyOnce) + { + this.OnlyOnce = onlyOnce; + } + + public override void Eval(MockDirectoryWrapper dir) + { + // Since we throw exc during abort, eg when IW is + // attempting to delete files, we will leave + // leftovers: + dir.AssertNoUnrefencedFilesOnClose = false; + + if (DoFail) + { + bool sawAbortOrFlushDoc = StackTraceHelper.DoesStackTraceContainMethod("Abort") + || StackTraceHelper.DoesStackTraceContainMethod("FinishDocument"); + bool sawClose = StackTraceHelper.DoesStackTraceContainMethod("Close") + || StackTraceHelper.DoesStackTraceContainMethod("Dispose"); + bool sawMerge = StackTraceHelper.DoesStackTraceContainMethod("Merge"); + + if (sawAbortOrFlushDoc && !sawClose && !sawMerge) + { + if (OnlyOnce) + { + DoFail = false; + } + //System.out.println(Thread.currentThread().getName() + ": now fail"); + //new Throwable(Console.WriteLine().StackTrace); + throw new IOException("now failing on purpose"); + } + } + } + } + + // LUCENE-1130: make sure initial IOException, and then 2nd + // IOException during rollback(), is OK: + [Test] + public virtual void TestIOExceptionDuringAbort([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestSingleThreadFailure(scheduler, new FailOnlyOnAbortOrFlush(false)); + } + + // LUCENE-1130: make sure initial IOException, and then 2nd + // IOException during rollback(), is OK: + [Test] + public virtual void TestIOExceptionDuringAbortOnlyOnce([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestSingleThreadFailure(scheduler, new FailOnlyOnAbortOrFlush(true)); + } + + // LUCENE-1130: make sure initial IOException, and then 2nd + // IOException during rollback(), with multiple threads, is OK: + [Test] + public virtual void TestIOExceptionDuringAbortWithThreads([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestMultipleThreadsFailure(scheduler, new FailOnlyOnAbortOrFlush(false)); + } + + // LUCENE-1130: make sure initial IOException, and then 2nd + // IOException during rollback(), with multiple threads, is OK: + [Test] + public virtual void TestIOExceptionDuringAbortWithThreadsOnlyOnce([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestMultipleThreadsFailure(scheduler, new FailOnlyOnAbortOrFlush(true)); + } + + // Throws IOException during DocumentsWriter.writeSegment + private class FailOnlyInWriteSegment : MockDirectoryWrapper.Failure + { + internal bool OnlyOnce; + + public FailOnlyInWriteSegment(bool onlyOnce) + { + this.OnlyOnce = onlyOnce; + } + + public override void Eval(MockDirectoryWrapper dir) + { + if (DoFail) + { + if (StackTraceHelper.DoesStackTraceContainMethod("Flush") /*&& "Lucene.Net.Index.DocFieldProcessor".Equals(frame.GetType().Name)*/) + { + if (OnlyOnce) + { + DoFail = false; + } + //System.out.println(Thread.currentThread().getName() + ": NOW FAIL: onlyOnce=" + onlyOnce); + //new Throwable(Console.WriteLine().StackTrace); + throw new IOException("now failing on purpose"); + } + } + } + } + + // LUCENE-1130: test IOException in writeSegment + [Test] + public virtual void TestIOExceptionDuringWriteSegment([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestSingleThreadFailure(scheduler, new FailOnlyInWriteSegment(false)); + } + + // LUCENE-1130: test IOException in writeSegment + [Test] + public virtual void TestIOExceptionDuringWriteSegmentOnlyOnce([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestSingleThreadFailure(scheduler, new FailOnlyInWriteSegment(true)); + } + + // LUCENE-1130: test IOException in writeSegment, with threads + [Test] + public virtual void TestIOExceptionDuringWriteSegmentWithThreads([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestMultipleThreadsFailure(scheduler, new FailOnlyInWriteSegment(false)); + } + + // LUCENE-1130: test IOException in writeSegment, with threads + [Test] + public virtual void TestIOExceptionDuringWriteSegmentWithThreadsOnlyOnce([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + TestMultipleThreadsFailure(scheduler, new FailOnlyInWriteSegment(true)); + } + + // LUCENE-3365: Test adding two documents with the same field from two different IndexWriters + // that we attempt to open at the same time. As long as the first IndexWriter completes + // and closes before the second IndexWriter time's out trying to get the Lock, + // we should see both documents + [Test] + public virtual void TestOpenTwoIndexWritersOnDifferentThreads() + { + Directory dir = NewDirectory(); + CountdownEvent oneIWConstructed = new CountdownEvent(1); + DelayedIndexAndCloseRunnable thread1 = new DelayedIndexAndCloseRunnable(dir, oneIWConstructed, this); + DelayedIndexAndCloseRunnable thread2 = new DelayedIndexAndCloseRunnable(dir, oneIWConstructed, this); + + thread1.Start(); + thread2.Start(); + oneIWConstructed.Wait(); + + thread1.StartIndexing(); + thread2.StartIndexing(); + + thread1.Join(); + thread2.Join(); + + // ensure the directory is closed if we hit the timeout and throw assume + // TODO: can we improve this in LuceneTestCase? I dont know what the logic would be... + try + { + AssumeFalse("aborting test: timeout obtaining lock", thread1.Failure is LockObtainFailedException); + AssumeFalse("aborting test: timeout obtaining lock", thread2.Failure is LockObtainFailedException); + + Assert.IsFalse(thread1.Failed, "Failed due to: " + thread1.Failure); + Assert.IsFalse(thread2.Failed, "Failed due to: " + thread2.Failure); + // now verify that we have two documents in the index + IndexReader reader = DirectoryReader.Open(dir); + Assert.AreEqual(2, reader.NumDocs, "IndexReader should have one document per thread running"); + + reader.Dispose(); + } + finally + { + dir.Dispose(); + } + } + + internal class DelayedIndexAndCloseRunnable : ThreadClass + { + internal readonly Directory Dir; + internal bool Failed = false; + internal Exception Failure = null; + internal readonly CountdownEvent StartIndexing_Renamed = new CountdownEvent(1); + internal CountdownEvent IwConstructed; + private readonly LuceneTestCase OuterInstance; + + /// <param name="outerInstance"> + /// LUCENENET specific + /// Passed in because this class acceses non-static methods, + /// NewTextField and NewIndexWriterConfig + /// </param> + public DelayedIndexAndCloseRunnable(Directory dir, CountdownEvent iwConstructed, LuceneTestCase outerInstance) + { + this.Dir = dir; + this.IwConstructed = iwConstructed; + OuterInstance = outerInstance; + } + + public virtual void StartIndexing() + { + this.StartIndexing_Renamed.Signal(); + } + + public override void Run() + { + try + { + Document doc = new Document(); + Field field = OuterInstance.NewTextField("field", "testData", Field.Store.YES); + doc.Add(field); + using (IndexWriter writer = new IndexWriter(Dir, OuterInstance.NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))) + { + if (IwConstructed.CurrentCount > 0) + { + IwConstructed.Signal(); + } + StartIndexing_Renamed.Wait(); + writer.AddDocument(doc); + } + } + catch (Exception e) + { + Failed = true; + Failure = e; + Console.WriteLine(e.ToString()); + return; + } + } + } + + // LUCENE-4147 + [Test] + public virtual void TestRollbackAndCommitWithThreads() + { + BaseDirectoryWrapper d = NewDirectory(); + if (d is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)d).PreventDoubleWrite = false; + } + + int threadCount = TestUtil.NextInt(Random(), 2, 6); + + MockAnalyzer analyzer = new MockAnalyzer(Random()); + analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); + AtomicObject<IndexWriter> writerRef = + new AtomicObject<IndexWriter>(new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer))); + + LineFileDocs docs = new LineFileDocs(Random()); + ThreadClass[] threads = new ThreadClass[threadCount]; + int iters = AtLeast(100); + AtomicBoolean failed = new AtomicBoolean(); + ReentrantLock rollbackLock = new ReentrantLock(); + ReentrantLock commitLock = new ReentrantLock(); + for (int threadID = 0; threadID < threadCount; threadID++) + { + threads[threadID] = new ThreadAnonymousInnerClassHelper(this, d, writerRef, docs, iters, failed, rollbackLock, commitLock); + threads[threadID].Start(); + } + + for (int threadID = 0; threadID < threadCount; threadID++) + { + try + { + threads[threadID].Join(); + } + catch (Exception e) + { + Console.WriteLine("EXCEPTION in ThreadAnonymousInnerClassHelper: " + Environment.NewLine + e); + } + } + + Assert.IsTrue(!failed.Get()); + writerRef.Value.Dispose(); + d.Dispose(); + } + + private class ThreadAnonymousInnerClassHelper : ThreadClass + { + private readonly TestIndexWriterWithThreads OuterInstance; + + private BaseDirectoryWrapper d; + private AtomicObject<IndexWriter> WriterRef; + private LineFileDocs Docs; + private int Iters; + private AtomicBoolean Failed; + private ReentrantLock RollbackLock; + private ReentrantLock CommitLock; + + public ThreadAnonymousInnerClassHelper(TestIndexWriterWithThreads outerInstance, BaseDirectoryWrapper d, AtomicObject<IndexWriter> writerRef, LineFileDocs docs, int iters, AtomicBoolean failed, ReentrantLock rollbackLock, ReentrantLock commitLock) + { + this.OuterInstance = outerInstance; + this.d = d; + this.WriterRef = writerRef; + this.Docs = docs; + this.Iters = iters; + this.Failed = failed; + this.RollbackLock = rollbackLock; + this.CommitLock = commitLock; + } + + public override void Run() + { + for (int iter = 0; iter < Iters && !Failed.Get(); iter++) + { + //final int x = Random().nextInt(5); + int x = Random().Next(3); + try + { + switch (x) + { + case 0: + RollbackLock.@Lock(); + if (VERBOSE) + { + Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now rollback"); + } + try + { + WriterRef.Value.Rollback(); + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": rollback done; now open new writer"); + } + WriterRef.Value = + new IndexWriter(d, OuterInstance.NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); + } + finally + { + RollbackLock.Unlock(); + } + break; + + case 1: + CommitLock.@Lock(); + if (VERBOSE) + { + Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now commit"); + } + try + { + if (Random().NextBoolean()) + { + WriterRef.Value.PrepareCommit(); + } + WriterRef.Value.Commit(); + } + catch (AlreadyClosedException) + { + // ok + } + catch (NullReferenceException) + { + // ok + } + finally + { + CommitLock.Unlock(); + } + break; + + case 2: + if (VERBOSE) + { + Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now add"); + } + try + { + WriterRef.Value.AddDocument(Docs.NextDoc()); + } + catch (AlreadyClosedException) + { + // ok + } + catch (System.NullReferenceException) + { + // ok + } + catch (InvalidOperationException) + { + // ok + } + break; + } + } + catch (Exception t) + { + Failed.Set(true); + throw new Exception(t.Message, t); + } + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestIndexableField.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestIndexableField.cs b/src/Lucene.Net.Tests/Index/TestIndexableField.cs new file mode 100644 index 0000000..88402dd --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestIndexableField.cs @@ -0,0 +1,453 @@ + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Analyzer = Lucene.Net.Analysis.Analyzer; + using BooleanClause = Lucene.Net.Search.BooleanClause; + using BooleanQuery = Lucene.Net.Search.BooleanQuery; + using BytesRef = Lucene.Net.Util.BytesRef; + using Codec = Lucene.Net.Codecs.Codec; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Document = Documents.Document; + using Field = Field; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using Lucene3xCodec = Lucene.Net.Codecs.Lucene3x.Lucene3xCodec; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using Occur = Lucene.Net.Search.Occur; + using TermQuery = Lucene.Net.Search.TermQuery; + using TestUtil = Lucene.Net.Util.TestUtil; + using TokenStream = Lucene.Net.Analysis.TokenStream; + using TopDocs = Lucene.Net.Search.TopDocs; + + [TestFixture] + public class TestIndexableField : LuceneTestCase + { + private class MyField : IIndexableField + { + private readonly TestIndexableField OuterInstance; + + internal readonly int Counter; + internal readonly IIndexableFieldType fieldType; + + public MyField() + { + fieldType = new IndexableFieldTypeAnonymousInnerClassHelper(this); + } + + private class IndexableFieldTypeAnonymousInnerClassHelper : IIndexableFieldType + { + private MyField OuterInstance; + + public IndexableFieldTypeAnonymousInnerClassHelper(MyField outerInstance) + { + OuterInstance = outerInstance; + } + + public bool IsIndexed + { + get { return (OuterInstance.Counter % 10) != 3; } + set { } + } + + public bool IsStored + { + get { return (OuterInstance.Counter & 1) == 0 || (OuterInstance.Counter % 10) == 3; } + set { } + } + + public bool IsTokenized + { + get { return true; } + set { } + } + + public bool StoreTermVectors + { + get { return IsIndexed && OuterInstance.Counter % 2 == 1 && OuterInstance.Counter % 10 != 9; } + set { } + } + + public bool StoreTermVectorOffsets + { + get { return StoreTermVectors && OuterInstance.Counter % 10 != 9; } + set { } + } + + public bool StoreTermVectorPositions + { + get { return StoreTermVectors && OuterInstance.Counter % 10 != 9; } + set { } + } + + public bool StoreTermVectorPayloads + { + get + { +#pragma warning disable 612, 618 + if (Codec.Default is Lucene3xCodec) +#pragma warning restore 612, 618 + { + return false; // 3.x doesnt support + } + else + { + return StoreTermVectors && OuterInstance.Counter % 10 != 9; + } + } + set { } + } + + public bool OmitNorms + { + get { return false; } + set { } + } + + public IndexOptions? IndexOptions + { + get { return Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } + set { } + } + + public NumericType? NumericType + { + get { throw new NotImplementedException(); } + set { } + } + + public DocValuesType? DocValueType + { + get { return null; } + set { } + } + } + + public MyField(TestIndexableField outerInstance, int counter) + : this() + { + this.OuterInstance = outerInstance; + this.Counter = counter; + } + + public string Name + { + get { return "f" + Counter; } + } + + public float Boost + { + get { return 1.0f + (float)Random().NextDouble(); } + } + + public BytesRef GetBinaryValue() + { + if ((Counter % 10) == 3) + { + var bytes = new byte[10]; + for (int idx = 0; idx < bytes.Length; idx++) + { + bytes[idx] = (byte)(Counter + idx); + } + return new BytesRef(bytes, 0, bytes.Length); + } + else + { + return null; + } + } + + public string GetStringValue() + { + int fieldID = Counter % 10; + if (fieldID != 3 && fieldID != 7) + { + return "text " + Counter; + } + else + { + return null; + } + } + + public TextReader GetReaderValue() + { + if (Counter % 10 == 7) + { + return new StringReader("text " + Counter); + } + else + { + return null; + } + } + + public object GetNumericValue() + { + return null; + } + + public IIndexableFieldType FieldType + { + get { return fieldType; } + } + + public TokenStream GetTokenStream(Analyzer analyzer) + { + return GetReaderValue() != null ? analyzer.TokenStream(Name, GetReaderValue()) : analyzer.TokenStream(Name, new StringReader(GetStringValue())); + } + } + + // Silly test showing how to index documents w/o using Lucene's core + // Document nor Field class + [Test] + public virtual void TestArbitraryFields() + { + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + + int NUM_DOCS = AtLeast(27); + if (VERBOSE) + { + Console.WriteLine("TEST: " + NUM_DOCS + " docs"); + } + int[] fieldsPerDoc = new int[NUM_DOCS]; + int baseCount = 0; + + for (int docCount = 0; docCount < NUM_DOCS; docCount++) + { + int fieldCount = TestUtil.NextInt(Random(), 1, 17); + fieldsPerDoc[docCount] = fieldCount - 1; + + int finalDocCount = docCount; + if (VERBOSE) + { + Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount); + } + + int finalBaseCount = baseCount; + baseCount += fieldCount - 1; + + w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount)); + } + + IndexReader r = w.Reader; + w.Dispose(); + + IndexSearcher s = NewSearcher(r); + int counter = 0; + for (int id = 0; id < NUM_DOCS; id++) + { + if (VERBOSE) + { + Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter); + } + TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1); + Assert.AreEqual(1, hits.TotalHits); + int docID = hits.ScoreDocs[0].Doc; + Document doc = s.Doc(docID); + int endCounter = counter + fieldsPerDoc[id]; + while (counter < endCounter) + { + string name = "f" + counter; + int fieldID = counter % 10; + + bool stored = (counter & 1) == 0 || fieldID == 3; + bool binary = fieldID == 3; + bool indexed = fieldID != 3; + + string stringValue; + if (fieldID != 3 && fieldID != 9) + { + stringValue = "text " + counter; + } + else + { + stringValue = null; + } + + // stored: + if (stored) + { + IIndexableField f = doc.GetField(name); + Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); + if (binary) + { + Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); + BytesRef b = f.GetBinaryValue(); + Assert.IsNotNull(b); + Assert.AreEqual(10, b.Length); + for (int idx = 0; idx < 10; idx++) + { + Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]); + } + } + else + { + Debug.Assert(stringValue != null); + Assert.AreEqual(stringValue, f.GetStringValue()); + } + } + + if (indexed) + { + bool tv = counter % 2 == 1 && fieldID != 9; + if (tv) + { + Terms tfv = r.GetTermVectors(docID).GetTerms(name); + Assert.IsNotNull(tfv); + TermsEnum termsEnum = tfv.GetIterator(null); + Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next()); + Assert.AreEqual(1, termsEnum.TotalTermFreq); + DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); + Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(1, dpEnum.Freq); + Assert.AreEqual(1, dpEnum.NextPosition()); + + Assert.AreEqual(new BytesRef("text"), termsEnum.Next()); + Assert.AreEqual(1, termsEnum.TotalTermFreq); + dpEnum = termsEnum.DocsAndPositions(null, dpEnum); + Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(1, dpEnum.Freq); + Assert.AreEqual(0, dpEnum.NextPosition()); + + Assert.IsNull(termsEnum.Next()); + + // TODO: offsets + } + else + { + Fields vectors = r.GetTermVectors(docID); + Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null); + } + + BooleanQuery bq = new BooleanQuery(); + bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); + bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST); + TopDocs hits2 = s.Search(bq, 1); + Assert.AreEqual(1, hits2.TotalHits); + Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc); + + bq = new BooleanQuery(); + bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); + bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST); + TopDocs hits3 = s.Search(bq, 1); + Assert.AreEqual(1, hits3.TotalHits); + Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc); + } + + counter++; + } + } + + r.Dispose(); + dir.Dispose(); + } + + private class IterableAnonymousInnerClassHelper : IEnumerable<IIndexableField> + { + private readonly TestIndexableField OuterInstance; + + private int FieldCount; + private int FinalDocCount; + private int FinalBaseCount; + + public IterableAnonymousInnerClassHelper(TestIndexableField outerInstance, int fieldCount, int finalDocCount, int finalBaseCount) + { + this.OuterInstance = outerInstance; + this.FieldCount = fieldCount; + this.FinalDocCount = finalDocCount; + this.FinalBaseCount = finalBaseCount; + } + + public virtual IEnumerator<IIndexableField> GetEnumerator() + { + return new IteratorAnonymousInnerClassHelper(this, OuterInstance); + } + + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + private class IteratorAnonymousInnerClassHelper : IEnumerator<IIndexableField> + { + private readonly IterableAnonymousInnerClassHelper OuterInstance; + private readonly TestIndexableField OuterTextIndexableField; + + public IteratorAnonymousInnerClassHelper(IterableAnonymousInnerClassHelper outerInstance, TestIndexableField outerTextIndexableField) + { + this.OuterInstance = outerInstance; + OuterTextIndexableField = outerTextIndexableField; + } + + internal int fieldUpto; + private IIndexableField current; + + public bool MoveNext() + { + if (fieldUpto >= OuterInstance.FieldCount) + { + return false; + } + + Debug.Assert(fieldUpto < OuterInstance.FieldCount); + if (fieldUpto == 0) + { + fieldUpto = 1; + current = OuterTextIndexableField.NewStringField("id", "" + OuterInstance.FinalDocCount, Field.Store.YES); + } + else + { + current = new MyField(OuterTextIndexableField, OuterInstance.FinalBaseCount + (fieldUpto++ - 1)); + } + + return true; + } + + public IIndexableField Current + { + get { return current; } + } + + object System.Collections.IEnumerator.Current + { + get { return Current; } + } + + public void Dispose() + { + } + + public void Reset() + { + throw new NotImplementedException(); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestIntBlockPool.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestIntBlockPool.cs b/src/Lucene.Net.Tests/Index/TestIntBlockPool.cs new file mode 100644 index 0000000..98fc2d2 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestIntBlockPool.cs @@ -0,0 +1,185 @@ +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Randomized.Generators; + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Counter = Lucene.Net.Util.Counter; + using Int32BlockPool = Lucene.Net.Util.Int32BlockPool; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator; + + /// <summary> + /// tests basic <seealso cref="Int32BlockPool"/> functionality + /// </summary> + [TestFixture] + public class TestIntBlockPool : LuceneTestCase + { + [Test] + public virtual void TestSingleWriterReader() + { + Counter bytesUsed = Util.Counter.NewCounter(); + Int32BlockPool pool = new Int32BlockPool(new ByteTrackingAllocator(bytesUsed)); + + for (int j = 0; j < 2; j++) + { + Int32BlockPool.SliceWriter writer = new Int32BlockPool.SliceWriter(pool); + int start = writer.StartNewSlice(); + int num = AtLeast(100); + for (int i = 0; i < num; i++) + { + writer.WriteInt32(i); + } + + int upto = writer.CurrentOffset; + Int32BlockPool.SliceReader reader = new Int32BlockPool.SliceReader(pool); + reader.Reset(start, upto); + for (int i = 0; i < num; i++) + { + Assert.AreEqual(i, reader.ReadInt32()); + } + Assert.IsTrue(reader.EndOfSlice()); + if (Random().NextBoolean()) + { + pool.Reset(true, false); + Assert.AreEqual(0, bytesUsed.Get()); + } + else + { + pool.Reset(true, true); + Assert.AreEqual(Int32BlockPool.INT32_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT32, bytesUsed.Get()); + } + } + } + + [Test] + public virtual void TestMultipleWriterReader() + { + Counter bytesUsed = Util.Counter.NewCounter(); + Int32BlockPool pool = new Int32BlockPool(new ByteTrackingAllocator(bytesUsed)); + for (int j = 0; j < 2; j++) + { + IList<StartEndAndValues> holders = new List<StartEndAndValues>(); + int num = AtLeast(4); + for (int i = 0; i < num; i++) + { + holders.Add(new StartEndAndValues(Random().Next(1000))); + } + Int32BlockPool.SliceWriter writer = new Int32BlockPool.SliceWriter(pool); + Int32BlockPool.SliceReader reader = new Int32BlockPool.SliceReader(pool); + + int numValuesToWrite = AtLeast(10000); + for (int i = 0; i < numValuesToWrite; i++) + { + StartEndAndValues values = holders[Random().Next(holders.Count)]; + if (values.ValueCount == 0) + { + values.Start = writer.StartNewSlice(); + } + else + { + writer.Reset(values.End); + } + writer.WriteInt32(values.NextValue()); + values.End = writer.CurrentOffset; + if (Random().Next(5) == 0) + { + // pick one and reader the ints + AssertReader(reader, holders[Random().Next(holders.Count)]); + } + } + + while (holders.Count > 0) + { + int randIndex = Random().Next(holders.Count); + StartEndAndValues values = holders[randIndex]; + holders.RemoveAt(randIndex); + AssertReader(reader, values); + } + if (Random().NextBoolean()) + { + pool.Reset(true, false); + Assert.AreEqual(0, bytesUsed.Get()); + } + else + { + pool.Reset(true, true); + Assert.AreEqual(Int32BlockPool.INT32_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT32, bytesUsed.Get()); + } + } + } + + private class ByteTrackingAllocator : Int32BlockPool.Allocator + { + internal readonly Counter BytesUsed; + + public ByteTrackingAllocator(Counter bytesUsed) + : this(Int32BlockPool.INT32_BLOCK_SIZE, bytesUsed) + { + } + + public ByteTrackingAllocator(int blockSize, Counter bytesUsed) + : base(blockSize) + { + this.BytesUsed = bytesUsed; + } + + public override int[] GetInt32Block() + { + BytesUsed.AddAndGet(m_blockSize * RamUsageEstimator.NUM_BYTES_INT32); + return new int[m_blockSize]; + } + + public override void RecycleInt32Blocks(int[][] blocks, int start, int end) + { + BytesUsed.AddAndGet(-((end - start) * m_blockSize * RamUsageEstimator.NUM_BYTES_INT32)); + } + } + + private void AssertReader(Int32BlockPool.SliceReader reader, StartEndAndValues values) + { + reader.Reset(values.Start, values.End); + for (int i = 0; i < values.ValueCount; i++) + { + Assert.AreEqual(values.ValueOffset + i, reader.ReadInt32()); + } + Assert.IsTrue(reader.EndOfSlice()); + } + + private class StartEndAndValues + { + internal int ValueOffset; + internal int ValueCount; + internal int Start; + internal int End; + + public StartEndAndValues(int valueOffset) + { + this.ValueOffset = valueOffset; + } + + public virtual int NextValue() + { + return ValueOffset + ValueCount++; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestIsCurrent.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestIsCurrent.cs b/src/Lucene.Net.Tests/Index/TestIsCurrent.cs new file mode 100644 index 0000000..d975080 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestIsCurrent.cs @@ -0,0 +1,109 @@ +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Store; + using Lucene.Net.Util; + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using Field = Field; + + [TestFixture] + public class TestIsCurrent : LuceneTestCase + { + private RandomIndexWriter Writer; + + private Directory Directory; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + + // initialize directory + Directory = NewDirectory(); + Writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); + + // write document + Document doc = new Document(); + doc.Add(NewTextField("UUID", "1", Field.Store.YES)); + Writer.AddDocument(doc); + Writer.Commit(); + } + + [TearDown] + public override void TearDown() + { + base.TearDown(); + Writer.Dispose(); + Directory.Dispose(); + } + + /// <summary> + /// Failing testcase showing the trouble + /// </summary> + [Test] + public virtual void TestDeleteByTermIsCurrent() + { + // get reader + DirectoryReader reader = Writer.Reader; + + // assert index has a document and reader is up2date + Assert.AreEqual(1, Writer.NumDocs, "One document should be in the index"); + Assert.IsTrue(reader.IsCurrent, "One document added, reader should be current"); + + // remove document + Term idTerm = new Term("UUID", "1"); + Writer.DeleteDocuments(idTerm); + Writer.Commit(); + + // assert document has been deleted (index changed), reader is stale + Assert.AreEqual(0, Writer.NumDocs, "Document should be removed"); + Assert.IsFalse(reader.IsCurrent, "Reader should be stale"); + + reader.Dispose(); + } + + /// <summary> + /// Testcase for example to show that writer.deleteAll() is working as expected + /// </summary> + [Test] + public virtual void TestDeleteAllIsCurrent() + { + // get reader + DirectoryReader reader = Writer.Reader; + + // assert index has a document and reader is up2date + Assert.AreEqual(1, Writer.NumDocs, "One document should be in the index"); + Assert.IsTrue(reader.IsCurrent, "Document added, reader should be stale "); + + // remove all documents + Writer.DeleteAll(); + Writer.Commit(); + + // assert document has been deleted (index changed), reader is stale + Assert.AreEqual(0, Writer.NumDocs, "Document should be removed"); + Assert.IsFalse(reader.IsCurrent, "Reader should be stale"); + + reader.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestLazyProxSkipping.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestLazyProxSkipping.cs b/src/Lucene.Net.Tests/Index/TestLazyProxSkipping.cs new file mode 100644 index 0000000..8f60ea8 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestLazyProxSkipping.cs @@ -0,0 +1,258 @@ +namespace Lucene.Net.Index +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Lucene.Net.Analysis; + using NUnit.Framework; + using System.IO; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using IndexInput = Lucene.Net.Store.IndexInput; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using IOContext = Lucene.Net.Store.IOContext; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using PhraseQuery = Lucene.Net.Search.PhraseQuery; + using RAMDirectory = Lucene.Net.Store.RAMDirectory; + using ScoreDoc = Lucene.Net.Search.ScoreDoc; + using TestUtil = Lucene.Net.Util.TestUtil; + + /// <summary> + /// Tests lazy skipping on the proximity file. + /// + /// </summary> + [TestFixture] + public class TestLazyProxSkipping : LuceneTestCase + { + private IndexSearcher Searcher; + private int SeeksCounter = 0; + + private string Field = "tokens"; + private string Term1 = "xx"; + private string Term2 = "yy"; + private string Term3 = "zz"; + + private class SeekCountingDirectory : MockDirectoryWrapper + { + private readonly TestLazyProxSkipping OuterInstance; + + public SeekCountingDirectory(TestLazyProxSkipping outerInstance, Directory @delegate) + : base(Random(), @delegate) + { + this.OuterInstance = outerInstance; + } + + public override IndexInput OpenInput(string name, IOContext context) + { + IndexInput ii = base.OpenInput(name, context); + if (name.EndsWith(".prx") || name.EndsWith(".pos")) + { + // we decorate the proxStream with a wrapper class that allows to count the number of calls of seek() + ii = new SeeksCountingStream(OuterInstance, ii); + } + return ii; + } + } + + private void CreateIndex(int numHits) + { + int numDocs = 500; + + Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); + Directory directory = new SeekCountingDirectory(this, new RAMDirectory()); + // note: test explicitly disables payloads + IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(false))); + + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + string content; + if (i % (numDocs / numHits) == 0) + { + // add a document that matches the query "term1 term2" + content = this.Term1 + " " + this.Term2; + } + else if (i % 15 == 0) + { + // add a document that only contains term1 + content = this.Term1 + " " + this.Term1; + } + else + { + // add a document that contains term2 but not term 1 + content = this.Term3 + " " + this.Term2; + } + + doc.Add(NewTextField(this.Field, content, Documents.Field.Store.YES)); + writer.AddDocument(doc); + } + + // make sure the index has only a single segment + writer.ForceMerge(1); + writer.Dispose(); + + SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(directory)); + + this.Searcher = NewSearcher(reader); + } + + private class AnalyzerAnonymousInnerClassHelper : Analyzer + { + private readonly TestLazyProxSkipping OuterInstance; + + public AnalyzerAnonymousInnerClassHelper(TestLazyProxSkipping outerInstance) + { + this.OuterInstance = outerInstance; + } + + protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) + { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); + } + } + + private ScoreDoc[] Search() + { + // create PhraseQuery "term1 term2" and search + PhraseQuery pq = new PhraseQuery(); + pq.Add(new Term(this.Field, this.Term1)); + pq.Add(new Term(this.Field, this.Term2)); + return this.Searcher.Search(pq, null, 1000).ScoreDocs; + } + + private void PerformTest(int numHits) + { + CreateIndex(numHits); + this.SeeksCounter = 0; + ScoreDoc[] hits = Search(); + // verify that the right number of docs was found + Assert.AreEqual(numHits, hits.Length); + + // check if the number of calls of seek() does not exceed the number of hits + Assert.IsTrue(this.SeeksCounter > 0); + Assert.IsTrue(this.SeeksCounter <= numHits + 1, "seeksCounter=" + this.SeeksCounter + " numHits=" + numHits); + Searcher.IndexReader.Dispose(); + } + + [Test] + public virtual void TestLazySkipping() + { + string fieldFormat = TestUtil.GetPostingsFormat(this.Field); + AssumeFalse("this test cannot run with Memory postings format", fieldFormat.Equals("Memory")); + AssumeFalse("this test cannot run with Direct postings format", fieldFormat.Equals("Direct")); + AssumeFalse("this test cannot run with SimpleText postings format", fieldFormat.Equals("SimpleText")); + + // test whether only the minimum amount of seeks() + // are performed + PerformTest(5); + PerformTest(10); + } + + [Test] + public virtual void TestSeek() + { + Directory directory = NewDirectory(); + IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); + for (int i = 0; i < 10; i++) + { + Document doc = new Document(); + doc.Add(NewTextField(this.Field, "a b", Documents.Field.Store.YES)); + writer.AddDocument(doc); + } + + writer.Dispose(); + IndexReader reader = DirectoryReader.Open(directory); + + DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), this.Field, new BytesRef("b")); + + for (int i = 0; i < 10; i++) + { + tp.NextDoc(); + Assert.AreEqual(tp.DocID, i); + Assert.AreEqual(tp.NextPosition(), 1); + } + + tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), this.Field, new BytesRef("a")); + + for (int i = 0; i < 10; i++) + { + tp.NextDoc(); + Assert.AreEqual(tp.DocID, i); + Assert.AreEqual(tp.NextPosition(), 0); + } + reader.Dispose(); + directory.Dispose(); + } + + // Simply extends IndexInput in a way that we are able to count the number + // of invocations of seek() + internal class SeeksCountingStream : IndexInput + { + private readonly TestLazyProxSkipping OuterInstance; + + internal IndexInput Input; + + internal SeeksCountingStream(TestLazyProxSkipping outerInstance, IndexInput input) + : base("SeekCountingStream(" + input + ")") + { + this.OuterInstance = outerInstance; + this.Input = input; + } + + public override byte ReadByte() + { + return this.Input.ReadByte(); + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + this.Input.ReadBytes(b, offset, len); + } + + public override void Dispose() + { + this.Input.Dispose(); + } + + public override long FilePointer + { + get + { + return this.Input.FilePointer; + } + } + + public override void Seek(long pos) + { + OuterInstance.SeeksCounter++; + this.Input.Seek(pos); + } + + public override long Length + { + get { return this.Input.Length; } + } + + public override object Clone() + { + return new SeeksCountingStream(OuterInstance, (IndexInput)this.Input.Clone()); + } + } + } +} \ No newline at end of file
