http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Document/TestDocument.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Document/TestDocument.cs b/src/Lucene.Net.Tests/Document/TestDocument.cs new file mode 100644 index 0000000..2a73911 --- /dev/null +++ b/src/Lucene.Net.Tests/Document/TestDocument.cs @@ -0,0 +1,454 @@ +using Lucene.Net.Support; +using NUnit.Framework; +using System.IO; +using System.Text; + +namespace Lucene.Net.Documents +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using DirectoryReader = Lucene.Net.Index.DirectoryReader; + using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum; + using Fields = Lucene.Net.Index.Fields; + using IIndexableField = Lucene.Net.Index.IIndexableField; + using IndexReader = Lucene.Net.Index.IndexReader; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockTokenizer = Lucene.Net.Analysis.MockTokenizer; + using PhraseQuery = Lucene.Net.Search.PhraseQuery; + using Query = Lucene.Net.Search.Query; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using ScoreDoc = Lucene.Net.Search.ScoreDoc; + using Term = Lucene.Net.Index.Term; + using TermQuery = Lucene.Net.Search.TermQuery; + using Terms = Lucene.Net.Index.Terms; + using TermsEnum = Lucene.Net.Index.TermsEnum; + + /// <summary> + /// Tests <seealso cref="Document"/> class. + /// </summary> + [TestFixture] + public class TestDocument : LuceneTestCase + { + internal string BinaryVal = "this text will be stored as a byte array in the index"; + internal string BinaryVal2 = "this text will be also stored as a byte array in the index"; + + [Test] + public virtual void TestBinaryField() + { + Documents.Document doc = new Documents.Document(); + + FieldType ft = new FieldType(); + ft.IsStored = true; + IIndexableField stringFld = new Field("string", BinaryVal, ft); + IIndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8)); + IIndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8)); + + doc.Add(stringFld); + doc.Add(binaryFld); + + Assert.AreEqual(2, doc.Fields.Count); + + Assert.IsTrue(binaryFld.GetBinaryValue() != null); + Assert.IsTrue(binaryFld.FieldType.IsStored); + Assert.IsFalse(binaryFld.FieldType.IsIndexed); + + string binaryTest = doc.GetBinaryValue("binary").Utf8ToString(); + Assert.IsTrue(binaryTest.Equals(BinaryVal)); + + string stringTest = doc.Get("string"); + Assert.IsTrue(binaryTest.Equals(stringTest)); + + doc.Add(binaryFld2); + + Assert.AreEqual(3, doc.Fields.Count); + + BytesRef[] binaryTests = doc.GetBinaryValues("binary"); + + Assert.AreEqual(2, binaryTests.Length); + + binaryTest = binaryTests[0].Utf8ToString(); + string binaryTest2 = binaryTests[1].Utf8ToString(); + + Assert.IsFalse(binaryTest.Equals(binaryTest2)); + + Assert.IsTrue(binaryTest.Equals(BinaryVal)); + Assert.IsTrue(binaryTest2.Equals(BinaryVal2)); + + doc.RemoveField("string"); + Assert.AreEqual(2, doc.Fields.Count); + + doc.RemoveFields("binary"); + Assert.AreEqual(0, doc.Fields.Count); + } + + /// <summary> + /// Tests <seealso cref="Document#removeField(String)"/> method for a brand new Document + /// that has not been indexed yet. + /// </summary> + /// <exception cref="Exception"> on error </exception> + + [Test] + public virtual void TestRemoveForNewDocument() + { + Documents.Document doc = MakeDocumentWithFields(); + Assert.AreEqual(10, doc.Fields.Count); + doc.RemoveFields("keyword"); + Assert.AreEqual(8, doc.Fields.Count); + doc.RemoveFields("doesnotexists"); // removing non-existing fields is + // siltenlty ignored + doc.RemoveFields("keyword"); // removing a field more than once + Assert.AreEqual(8, doc.Fields.Count); + doc.RemoveFields("text"); + Assert.AreEqual(6, doc.Fields.Count); + doc.RemoveFields("text"); + Assert.AreEqual(6, doc.Fields.Count); + doc.RemoveFields("text"); + Assert.AreEqual(6, doc.Fields.Count); + doc.RemoveFields("doesnotexists"); // removing non-existing fields is + // siltenlty ignored + Assert.AreEqual(6, doc.Fields.Count); + doc.RemoveFields("unindexed"); + Assert.AreEqual(4, doc.Fields.Count); + doc.RemoveFields("unstored"); + Assert.AreEqual(2, doc.Fields.Count); + doc.RemoveFields("doesnotexists"); // removing non-existing fields is + // siltenlty ignored + Assert.AreEqual(2, doc.Fields.Count); + + doc.RemoveFields("indexed_not_tokenized"); + Assert.AreEqual(0, doc.Fields.Count); + } + + [Test] + public virtual void TestConstructorExceptions() + { + FieldType ft = new FieldType(); + ft.IsStored = true; + new Field("name", "value", ft); // okay + new StringField("name", "value", Field.Store.NO); // okay + try + { + new Field("name", "value", new FieldType()); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException e) +#pragma warning restore 168 + { + // expected exception + } + new Field("name", "value", ft); // okay + try + { + FieldType ft2 = new FieldType(); + ft2.IsStored = true; + ft2.StoreTermVectors = true; + new Field("name", "value", ft2); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException e) +#pragma warning restore 168 + { + // expected exception + } + } + + /// <summary> + /// Tests <seealso cref="Document#getValues(String)"/> method for a brand new Document + /// that has not been indexed yet. + /// </summary> + /// <exception cref="Exception"> on error </exception> + [Test] + public virtual void TestGetValuesForNewDocument() + { + DoAssert(MakeDocumentWithFields(), false); + } + + /// <summary> + /// Tests <seealso cref="Document#getValues(String)"/> method for a Document retrieved + /// from an index. + /// </summary> + /// <exception cref="Exception"> on error </exception> + [Test] + public virtual void TestGetValuesForIndexedDocument() + { + Directory dir = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + writer.AddDocument(MakeDocumentWithFields()); + IndexReader reader = writer.Reader; + + IndexSearcher searcher = NewSearcher(reader); + + // search for something that does exists + Query query = new TermQuery(new Term("keyword", "test1")); + + // ensure that queries return expected results without DateFilter first + ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; + Assert.AreEqual(1, hits.Length); + + DoAssert(searcher.Doc(hits[0].Doc), true); + writer.Dispose(); + reader.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestGetValues() + { + Documents.Document doc = MakeDocumentWithFields(); + Assert.AreEqual(new string[] { "test1", "test2" }, doc.GetValues("keyword")); + Assert.AreEqual(new string[] { "test1", "test2" }, doc.GetValues("text")); + Assert.AreEqual(new string[] { "test1", "test2" }, doc.GetValues("unindexed")); + Assert.AreEqual(new string[0], doc.GetValues("nope")); + } + + [Test] + public virtual void TestPositionIncrementMultiFields() + { + Directory dir = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + writer.AddDocument(MakeDocumentWithFields()); + IndexReader reader = writer.Reader; + + IndexSearcher searcher = NewSearcher(reader); + PhraseQuery query = new PhraseQuery(); + query.Add(new Term("indexed_not_tokenized", "test1")); + query.Add(new Term("indexed_not_tokenized", "test2")); + + ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; + Assert.AreEqual(1, hits.Length); + + DoAssert(searcher.Doc(hits[0].Doc), true); + writer.Dispose(); + reader.Dispose(); + dir.Dispose(); + } + + private Documents.Document MakeDocumentWithFields() + { + Documents.Document doc = new Documents.Document(); + FieldType stored = new FieldType(); + stored.IsStored = true; + FieldType indexedNotTokenized = new FieldType(); + indexedNotTokenized.IsIndexed = true; + indexedNotTokenized.IsTokenized = false; + doc.Add(new StringField("keyword", "test1", Field.Store.YES)); + doc.Add(new StringField("keyword", "test2", Field.Store.YES)); + doc.Add(new TextField("text", "test1", Field.Store.YES)); + doc.Add(new TextField("text", "test2", Field.Store.YES)); + doc.Add(new Field("unindexed", "test1", stored)); + doc.Add(new Field("unindexed", "test2", stored)); + doc.Add(new TextField("unstored", "test1", Field.Store.NO)); + doc.Add(new TextField("unstored", "test2", Field.Store.NO)); + doc.Add(new Field("indexed_not_tokenized", "test1", indexedNotTokenized)); + doc.Add(new Field("indexed_not_tokenized", "test2", indexedNotTokenized)); + return doc; + } + + private void DoAssert(Documents.Document doc, bool fromIndex) + { + IIndexableField[] keywordFieldValues = doc.GetFields("keyword"); + IIndexableField[] textFieldValues = doc.GetFields("text"); + IIndexableField[] unindexedFieldValues = doc.GetFields("unindexed"); + IIndexableField[] unstoredFieldValues = doc.GetFields("unstored"); + + Assert.IsTrue(keywordFieldValues.Length == 2); + Assert.IsTrue(textFieldValues.Length == 2); + Assert.IsTrue(unindexedFieldValues.Length == 2); + // this test cannot work for documents retrieved from the index + // since unstored fields will obviously not be returned + if (!fromIndex) + { + Assert.IsTrue(unstoredFieldValues.Length == 2); + } + + Assert.IsTrue(keywordFieldValues[0].GetStringValue().Equals("test1")); + Assert.IsTrue(keywordFieldValues[1].GetStringValue().Equals("test2")); + Assert.IsTrue(textFieldValues[0].GetStringValue().Equals("test1")); + Assert.IsTrue(textFieldValues[1].GetStringValue().Equals("test2")); + Assert.IsTrue(unindexedFieldValues[0].GetStringValue().Equals("test1")); + Assert.IsTrue(unindexedFieldValues[1].GetStringValue().Equals("test2")); + // this test cannot work for documents retrieved from the index + // since unstored fields will obviously not be returned + if (!fromIndex) + { + Assert.IsTrue(unstoredFieldValues[0].GetStringValue().Equals("test1")); + Assert.IsTrue(unstoredFieldValues[1].GetStringValue().Equals("test2")); + } + } + + [Test] + public virtual void TestFieldSetValue() + { + Field field = new StringField("id", "id1", Field.Store.YES); + Documents.Document doc = new Documents.Document(); + doc.Add(field); + doc.Add(new StringField("keyword", "test", Field.Store.YES)); + + Directory dir = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + writer.AddDocument(doc); + field.SetStringValue("id2"); + writer.AddDocument(doc); + field.SetStringValue("id3"); + writer.AddDocument(doc); + + IndexReader reader = writer.Reader; + IndexSearcher searcher = NewSearcher(reader); + + Query query = new TermQuery(new Term("keyword", "test")); + + // ensure that queries return expected results without DateFilter first + ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; + Assert.AreEqual(3, hits.Length); + int result = 0; + for (int i = 0; i < 3; i++) + { + Documents.Document doc2 = searcher.Doc(hits[i].Doc); + Field f = (Field)doc2.GetField("id"); + if (f.GetStringValue().Equals("id1")) + { + result |= 1; + } + else if (f.GetStringValue().Equals("id2")) + { + result |= 2; + } + else if (f.GetStringValue().Equals("id3")) + { + result |= 4; + } + else + { + Assert.Fail("unexpected id field"); + } + } + writer.Dispose(); + reader.Dispose(); + dir.Dispose(); + Assert.AreEqual(7, result, "did not see all IDs"); + } + + // LUCENE-3616 + [Test] + public virtual void TestInvalidFields() + { + Assert.Throws<System.ArgumentException>(() => { new Field("foo", new MockTokenizer(new StreamReader(File.Open("", FileMode.Open))), StringField.TYPE_STORED); }); + } + + // LUCENE-3682 + [Test] + public virtual void TestTransitionAPI() + { + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + + Documents.Document doc = new Documents.Document(); +#pragma warning disable 612, 618 + doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); + doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); + doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); + doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); + doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); + doc.Add(new Field("binary", new byte[10])); + doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); + doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); + doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); +#pragma warning restore 612, 618 + w.AddDocument(doc); + IndexReader r = w.Reader; + w.Dispose(); + + doc = r.Document(0); + // 4 stored fields + Assert.AreEqual(4, doc.Fields.Count); + Assert.AreEqual("abc", doc.Get("stored")); + Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); + Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); + BytesRef br = doc.GetBinaryValue("binary"); + Assert.IsNotNull(br); + Assert.AreEqual(10, br.Length); + + IndexSearcher s = new IndexSearcher(r); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); + Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); + + foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) + { + Fields tvFields = r.GetTermVectors(0); + Terms tvs = tvFields.GetTerms(field); + Assert.IsNotNull(tvs); + Assert.AreEqual(2, tvs.Count); + TermsEnum tvsEnum = tvs.GetIterator(null); + Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next()); + DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); + if (field.Equals("tv")) + { + Assert.IsNull(dpEnum); + } + else + { + Assert.IsNotNull(dpEnum); + } + Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next()); + Assert.IsNull(tvsEnum.Next()); + } + + r.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestNumericFieldAsString() + { + Documents.Document doc = new Documents.Document(); + doc.Add(new Int32Field("int", 5, Field.Store.YES)); + Assert.AreEqual("5", doc.Get("int")); + Assert.IsNull(doc.Get("somethingElse")); + doc.Add(new Int32Field("int", 4, Field.Store.YES)); + Assert.AreEqual(new string[] { "5", "4" }, doc.GetValues("int")); + + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + iw.AddDocument(doc); + DirectoryReader ir = iw.Reader; + Documents.Document sdoc = ir.Document(0); + Assert.AreEqual("5", sdoc.Get("int")); + Assert.IsNull(sdoc.Get("somethingElse")); + Assert.AreEqual(new string[] { "5", "4" }, sdoc.GetValues("int")); + ir.Dispose(); + iw.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Document/TestField.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Document/TestField.cs b/src/Lucene.Net.Tests/Document/TestField.cs new file mode 100644 index 0000000..f436364 --- /dev/null +++ b/src/Lucene.Net.Tests/Document/TestField.cs @@ -0,0 +1,617 @@ +using Lucene.Net.Support; +using NUnit.Framework; +using System; +using System.IO; +using System.Text; + +namespace Lucene.Net.Documents +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using BytesRef = Lucene.Net.Util.BytesRef; + using CannedTokenStream = Lucene.Net.Analysis.CannedTokenStream; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using Token = Lucene.Net.Analysis.Token; + + // sanity check some basics of fields + [TestFixture] + public class TestField : LuceneTestCase + { + [Test] + public virtual void TestDoubleField() + { + Field[] fields = new Field[] { new DoubleField("foo", 5d, Field.Store.NO), new DoubleField("foo", 5d, Field.Store.YES) }; + + foreach (Field field in fields) + { + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + field.SetDoubleValue(6d); // ok + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(6d, (double)field.GetNumericValue(), 0.0d); + } + } + + [Test] + public virtual void TestDoubleDocValuesField() + { + DoubleDocValuesField field = new DoubleDocValuesField("foo", 5d); + + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + field.SetDoubleValue(6d); // ok + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(6d, BitConverter.Int64BitsToDouble((long)field.GetNumericValue()), 0.0d); + } + + [Test] + public virtual void TestFloatDocValuesField() + { + SingleDocValuesField field = new SingleDocValuesField("foo", 5f); + + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + field.SetSingleValue(6f); // ok + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(6f, Number.Int32BitsToSingle(Convert.ToInt32(field.GetNumericValue())), 0.0f); + } + + [Test] + public virtual void TestFloatField() + { + Field[] fields = new Field[] { new SingleField("foo", 5f, Field.Store.NO), new SingleField("foo", 5f, Field.Store.YES) }; + + foreach (Field field in fields) + { + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + field.SetSingleValue(6f); // ok + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(6f, (float)field.GetNumericValue(), 0.0f); + } + } + + [Test] + public virtual void TestIntField() + { + Field[] fields = new Field[] { new Int32Field("foo", 5, Field.Store.NO), new Int32Field("foo", 5, Field.Store.YES) }; + + foreach (Field field in fields) + { + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + field.SetInt32Value(6); // ok + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(6, (int)field.GetNumericValue()); + } + } + + [Test] + public virtual void TestNumericDocValuesField() + { + NumericDocValuesField field = new NumericDocValuesField("foo", 5L); + + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + field.SetInt64Value(6); // ok + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(6L, (long)field.GetNumericValue()); + } + + [Test] + public virtual void TestLongField() + { + Field[] fields = new Field[] { new Int64Field("foo", 5L, Field.Store.NO), new Int64Field("foo", 5L, Field.Store.YES) }; + + foreach (Field field in fields) + { + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + field.SetInt64Value(6); // ok + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(6L, (long)field.GetNumericValue()); + } + } + + [Test] + public virtual void TestSortedBytesDocValuesField() + { + SortedDocValuesField field = new SortedDocValuesField("foo", new BytesRef("bar")); + + TrySetBoost(field); + TrySetByteValue(field); + field.SetBytesValue("fubar".ToBytesRefArray(Encoding.UTF8)); + field.SetBytesValue(new BytesRef("baz")); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(new BytesRef("baz"), field.GetBinaryValue()); + } + + [Test] + public virtual void TestBinaryDocValuesField() + { + BinaryDocValuesField field = new BinaryDocValuesField("foo", new BytesRef("bar")); + + TrySetBoost(field); + TrySetByteValue(field); + field.SetBytesValue("fubar".ToBytesRefArray(Encoding.UTF8)); + field.SetBytesValue(new BytesRef("baz")); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(new BytesRef("baz"), field.GetBinaryValue()); + } + + [Test] + public virtual void TestStringField() + { + Field[] fields = new Field[] { new StringField("foo", "bar", Field.Store.NO), new StringField("foo", "bar", Field.Store.YES) }; + + foreach (Field field in fields) + { + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + field.SetStringValue("baz"); + TrySetTokenStreamValue(field); + + Assert.AreEqual("baz", field.GetStringValue()); + } + } + + [Test] + public virtual void TestTextFieldString() + { + Field[] fields = new Field[] { new TextField("foo", "bar", Field.Store.NO), new TextField("foo", "bar", Field.Store.YES) }; + + foreach (Field field in fields) + { + field.Boost = 5f; + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + field.SetStringValue("baz"); + field.SetTokenStream(new CannedTokenStream(new Token("foo", 0, 3))); + + Assert.AreEqual("baz", field.GetStringValue()); + Assert.AreEqual(5f, field.Boost, 0f); + } + } + + [Test] + public virtual void TestTextFieldReader() + { + Field field = new TextField("foo", new StringReader("bar")); + + field.Boost = 5f; + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + field.SetReaderValue(new StringReader("foobar")); + TrySetShortValue(field); + TrySetStringValue(field); + field.SetTokenStream(new CannedTokenStream(new Token("foo", 0, 3))); + + Assert.IsNotNull(field.GetReaderValue()); + Assert.AreEqual(5f, field.Boost, 0f); + } + + /* TODO: this is pretty expert and crazy + * see if we can fix it up later + public void testTextFieldTokenStream() throws Exception { + } + */ + + [Test] + public virtual void TestStoredFieldBytes() + { + Field[] fields = new Field[] { new StoredField("foo", "bar".GetBytes(Encoding.UTF8)), new StoredField("foo", "bar".GetBytes(Encoding.UTF8), 0, 3), new StoredField("foo", new BytesRef("bar")) }; + + foreach (Field field in fields) + { + TrySetBoost(field); + TrySetByteValue(field); + field.SetBytesValue("baz".ToBytesRefArray(Encoding.UTF8)); + field.SetBytesValue(new BytesRef("baz")); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(new BytesRef("baz"), field.GetBinaryValue()); + } + } + + [Test] + public virtual void TestStoredFieldString() + { + Field field = new StoredField("foo", "bar"); + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + field.SetStringValue("baz"); + TrySetTokenStreamValue(field); + + Assert.AreEqual("baz", field.GetStringValue()); + } + + [Test] + public virtual void TestStoredFieldInt() + { + Field field = new StoredField("foo", 1); + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + field.SetInt32Value(5); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(5, (int)field.GetNumericValue()); + } + + [Test] + public virtual void TestStoredFieldDouble() + { + Field field = new StoredField("foo", 1D); + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + field.SetDoubleValue(5D); + TrySetIntValue(field); + TrySetFloatValue(field); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(5D, (double)field.GetNumericValue(), 0.0D); + } + + [Test] + public virtual void TestStoredFieldFloat() + { + Field field = new StoredField("foo", 1F); + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + field.SetSingleValue(5f); + TrySetLongValue(field); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(5f, (float)field.GetNumericValue(), 0.0f); + } + + [Test] + public virtual void TestStoredFieldLong() + { + Field field = new StoredField("foo", 1L); + TrySetBoost(field); + TrySetByteValue(field); + TrySetBytesValue(field); + TrySetBytesRefValue(field); + TrySetDoubleValue(field); + TrySetIntValue(field); + TrySetFloatValue(field); + field.SetInt64Value(5); + TrySetReaderValue(field); + TrySetShortValue(field); + TrySetStringValue(field); + TrySetTokenStreamValue(field); + + Assert.AreEqual(5L, (long)field.GetNumericValue()); + } + + private void TrySetByteValue(Field f) + { + try + { + f.SetByteValue((byte)10); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetBytesValue(Field f) + { + try + { + f.SetBytesValue(new byte[] { 5, 5 }); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetBytesRefValue(Field f) + { + try + { + f.SetBytesValue(new BytesRef("bogus")); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetDoubleValue(Field f) + { + try + { + f.SetDoubleValue(double.MaxValue); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetIntValue(Field f) + { + try + { + f.SetInt32Value(int.MaxValue); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetLongValue(Field f) + { + try + { + f.SetInt64Value(long.MaxValue); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetFloatValue(Field f) + { + try + { + f.SetSingleValue(float.MaxValue); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetReaderValue(Field f) + { + try + { + f.SetReaderValue(new StringReader("BOO!")); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetShortValue(Field f) + { + try + { + f.SetInt16Value(short.MaxValue); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetStringValue(Field f) + { + try + { + f.SetStringValue("BOO!"); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetTokenStreamValue(Field f) + { + try + { + f.SetTokenStream(new CannedTokenStream(new Token("foo", 0, 3))); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + private void TrySetBoost(Field f) + { + try + { + f.Boost = 5.0f; + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/BinaryTokenStream.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/BinaryTokenStream.cs b/src/Lucene.Net.Tests/Index/BinaryTokenStream.cs new file mode 100644 index 0000000..cc1ead2 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/BinaryTokenStream.cs @@ -0,0 +1,101 @@ +using Lucene.Net.Analysis.TokenAttributes; + +namespace Lucene.Net.Index +{ + using Attribute = Lucene.Net.Util.Attribute; + using BytesRef = Lucene.Net.Util.BytesRef; + using IAttribute = Lucene.Net.Util.IAttribute; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using TokenStream = Lucene.Net.Analysis.TokenStream; + + // javadocs + + /// <summary> + /// A binary tokenstream that lets you index a single + /// binary token (BytesRef value). + /// </summary> + /// <seealso> cref= CannedBinaryTokenStream </seealso> + public sealed class BinaryTokenStream : TokenStream + { + private readonly IByteTermAttribute BytesAtt;// = addAttribute(typeof(ByteTermAttribute)); + private readonly BytesRef Bytes; + private bool Available = true; + + public BinaryTokenStream(BytesRef bytes) + { + this.Bytes = bytes; + BytesAtt = AddAttribute<IByteTermAttribute>(); + } + + public override bool IncrementToken() + { + if (Available) + { + ClearAttributes(); + Available = false; + BytesAtt.BytesRef = Bytes; + return true; + } + return false; + } + + public override void Reset() + { + Available = true; + } + + public interface IByteTermAttribute : ITermToBytesRefAttribute + { + new BytesRef BytesRef { get; set; } + } + + public class ByteTermAttribute : Attribute, IByteTermAttribute + { + internal BytesRef Bytes; + + public void FillBytesRef() + { + // no-op: the bytes was already filled by our owner's incrementToken + } + + public BytesRef BytesRef + { + get + { + return Bytes; + } + set + { + this.Bytes = value; + } + } + + public override void Clear() + { + } + + public override void CopyTo(IAttribute target) + { + ByteTermAttribute other = (ByteTermAttribute)target; + other.Bytes = Bytes; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/SynchronizedList.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/SynchronizedList.cs b/src/Lucene.Net.Tests/Index/SynchronizedList.cs new file mode 100644 index 0000000..000620d --- /dev/null +++ b/src/Lucene.Net.Tests/Index/SynchronizedList.cs @@ -0,0 +1,168 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace Lucene.Net.Index +{ + + internal class SynchronizedList<T> : IList<T> + { + private readonly List<T> _list = new List<T>(); + + private readonly ReaderWriterLockSlim _lock = new ReaderWriterLockSlim(); + + public T this[int index] + { + get { return _list[index]; } + set + { + _list[index] = value; + } + } + + public int Count + { + get { return _list.Count; } + } + + public bool IsReadOnly + { + get { return false; } + } + + public void Add(T item) + { + _lock.EnterWriteLock(); + + try + { + _list.Add(item); + } + finally + { + _lock.ExitWriteLock(); + } + } + + public void Clear() + { + _lock.EnterWriteLock(); + + try + { + _list.Clear(); + } + finally + { + _lock.ExitWriteLock(); + } + } + + public bool Contains(T item) + { + _lock.EnterReadLock(); + + try + { + return _list.Contains(item); + } + finally + { + _lock.ExitReadLock(); + } + } + + public void CopyTo(T[] array, int arrayIndex) + { + _lock.EnterWriteLock(); + + try + { + _list.CopyTo(array, arrayIndex); + } + finally + { + _lock.ExitWriteLock(); + } + } + + public IEnumerator<T> GetEnumerator() + { + _lock.EnterReadLock(); + + try + { + return _list.GetEnumerator(); + } + finally + { + _lock.ExitReadLock(); + } + } + + public int IndexOf(T item) + { + _lock.EnterReadLock(); + + try + { + return _list.IndexOf(item); + } + finally + { + _lock.ExitReadLock(); + } + } + + public void Insert(int index, T item) + { + _lock.EnterWriteLock(); + + try + { + _list.Insert(index, item); + } + finally + { + _lock.ExitWriteLock(); + } + } + + public bool Remove(T item) + { + _lock.EnterWriteLock(); + + try + { + return _list.Remove(item); + } + finally + { + _lock.ExitWriteLock(); + } + } + + public void RemoveAt(int index) + { + _lock.EnterWriteLock(); + + try + { + _list.RemoveAt(index); + } + finally + { + _lock.ExitWriteLock(); + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return _list.GetEnumerator(); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/Test2BBinaryDocValues.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/Test2BBinaryDocValues.cs b/src/Lucene.Net.Tests/Index/Test2BBinaryDocValues.cs new file mode 100644 index 0000000..e7bc021 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/Test2BBinaryDocValues.cs @@ -0,0 +1,171 @@ +using Lucene.Net.Documents; +using NUnit.Framework; +using System; + +namespace Lucene.Net.Index +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper; + using BinaryDocValuesField = BinaryDocValuesField; + using ByteArrayDataInput = Lucene.Net.Store.ByteArrayDataInput; + using ByteArrayDataOutput = Lucene.Net.Store.ByteArrayDataOutput; + using BytesRef = Lucene.Net.Util.BytesRef; + using Document = Documents.Document; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + + [SuppressCodecs("Lucene3x")] + [Ignore("takes ~ 45 minutes")] + [TestFixture] + public class Test2BBinaryDocValues : LuceneTestCase + { + // indexes Integer.MAX_VALUE docs with a fixed binary field + [Test] + public virtual void TestFixedBinary([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary")); + if (dir is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .SetRAMBufferSizeMB(256.0) + .SetMergeScheduler(scheduler) + .SetMergePolicy(NewLogMergePolicy(false, 10)) + .SetOpenMode(OpenMode.CREATE); + IndexWriter w = new IndexWriter(dir, config); + + Document doc = new Document(); + var bytes = new byte[4]; + BytesRef data = new BytesRef(bytes); + BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); + doc.Add(dvField); + + for (int i = 0; i < int.MaxValue; i++) + { + bytes[0] = (byte)(i >> 24); + bytes[1] = (byte)(i >> 16); + bytes[2] = (byte)(i >> 8); + bytes[3] = (byte)i; + w.AddDocument(doc); + if (i % 100000 == 0) + { + Console.WriteLine("indexed: " + i); + Console.Out.Flush(); + } + } + + w.ForceMerge(1); + w.Dispose(); + + Console.WriteLine("verifying..."); + Console.Out.Flush(); + + DirectoryReader r = DirectoryReader.Open(dir); + int expectedValue = 0; + foreach (AtomicReaderContext context in r.Leaves) + { + AtomicReader reader = context.AtomicReader; + BytesRef scratch = new BytesRef(); + BinaryDocValues dv = reader.GetBinaryDocValues("dv"); + for (int i = 0; i < reader.MaxDoc; i++) + { + bytes[0] = (byte)(expectedValue >> 24); + bytes[1] = (byte)(expectedValue >> 16); + bytes[2] = (byte)(expectedValue >> 8); + bytes[3] = (byte)expectedValue; + dv.Get(i, scratch); + Assert.AreEqual(data, scratch); + expectedValue++; + } + } + + r.Dispose(); + dir.Dispose(); + } + + // indexes Integer.MAX_VALUE docs with a variable binary field + [Test] + public virtual void TestVariableBinary([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BVariableBinary")); + if (dir is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + + var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .SetRAMBufferSizeMB(256.0) + .SetMergeScheduler(scheduler) + .SetMergePolicy(NewLogMergePolicy(false, 10)) + .SetOpenMode(OpenMode.CREATE); + IndexWriter w = new IndexWriter(dir, config); + + Document doc = new Document(); + var bytes = new byte[4]; + ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes); + BytesRef data = new BytesRef(bytes); + BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); + doc.Add(dvField); + + for (int i = 0; i < int.MaxValue; i++) + { + encoder.Reset(bytes); + encoder.WriteVInt32(i % 65535); // 1, 2, or 3 bytes + data.Length = encoder.Position; + w.AddDocument(doc); + if (i % 100000 == 0) + { + Console.WriteLine("indexed: " + i); + Console.Out.Flush(); + } + } + + w.ForceMerge(1); + w.Dispose(); + + Console.WriteLine("verifying..."); + Console.Out.Flush(); + + DirectoryReader r = DirectoryReader.Open(dir); + int expectedValue = 0; + ByteArrayDataInput input = new ByteArrayDataInput(); + foreach (AtomicReaderContext context in r.Leaves) + { + AtomicReader reader = context.AtomicReader; + BytesRef scratch = new BytesRef(bytes); + BinaryDocValues dv = reader.GetBinaryDocValues("dv"); + for (int i = 0; i < reader.MaxDoc; i++) + { + dv.Get(i, scratch); + input.Reset((byte[])(Array)scratch.Bytes, scratch.Offset, scratch.Length); + Assert.AreEqual(expectedValue % 65535, input.ReadVInt32()); + Assert.IsTrue(input.Eof); + expectedValue++; + } + } + + r.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/Test2BDocs.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/Test2BDocs.cs b/src/Lucene.Net.Tests/Index/Test2BDocs.cs new file mode 100644 index 0000000..35963e5 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/Test2BDocs.cs @@ -0,0 +1,105 @@ +using Lucene.Net.Support; +using NUnit.Framework; + +namespace Lucene.Net.Index +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + [TestFixture] + public class Test2BDocs : LuceneTestCase + { + internal static Directory Dir; + + [OneTimeSetUp] + public static void BeforeClass() + { + Dir = NewFSDirectory(CreateTempDir("2Bdocs")); + IndexWriter iw = new IndexWriter(Dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + for (int i = 0; i < 262144; i++) + { + iw.AddDocument(doc); + } + iw.ForceMerge(1); + iw.Dispose(); + } + + [OneTimeTearDown] + public void AfterClass() + { + Dir.Dispose(); + Dir = null; + base.TearDown(); + } + + public override void TearDown() + { + // LUCENENET: We don't want our temp directory deleted until after + // all of the tests in the class run. So we need to override this and + // call base.TearDown() manually during TestFixtureTearDown + } + + [Test] + public virtual void TestOverflow() + { + DirectoryReader ir = DirectoryReader.Open(Dir); + IndexReader[] subReaders = new IndexReader[8192]; + Arrays.Fill(subReaders, ir); + try + { + new MultiReader(subReaders); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + ir.Dispose(); + } + + [Test] + public virtual void TestExactlyAtLimit() + { + Directory dir2 = NewFSDirectory(CreateTempDir("2BDocs2")); + IndexWriter iw = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + for (int i = 0; i < 262143; i++) + { + iw.AddDocument(doc); + } + iw.Dispose(); + DirectoryReader ir = DirectoryReader.Open(Dir); + DirectoryReader ir2 = DirectoryReader.Open(dir2); + IndexReader[] subReaders = new IndexReader[8192]; + Arrays.Fill(subReaders, ir); + subReaders[subReaders.Length - 1] = ir2; + MultiReader mr = new MultiReader(subReaders); + Assert.AreEqual(int.MaxValue, mr.MaxDoc); + Assert.AreEqual(int.MaxValue, mr.NumDocs); + ir.Dispose(); + ir2.Dispose(); + dir2.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/Test2BNumericDocValues.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/Test2BNumericDocValues.cs b/src/Lucene.Net.Tests/Index/Test2BNumericDocValues.cs new file mode 100644 index 0000000..7c37423 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/Test2BNumericDocValues.cs @@ -0,0 +1,89 @@ +using Lucene.Net.Attributes; +using Lucene.Net.Documents; +using NUnit.Framework; +using System; + +namespace Lucene.Net.Index +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper; + using Document = Documents.Document; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using NumericDocValuesField = NumericDocValuesField; + + //@TimeoutSuite(millis = 80 * TimeUnits.HOUR) @Ignore("takes ~ 30 minutes") @SuppressCodecs("Lucene3x") public class Test2BNumericDocValues extends Lucene.Net.Util.LuceneTestCase + [SuppressCodecs("Lucene3x")] + [Ignore("takes ~ 30 minutes")] + [TestFixture] + public class Test2BNumericDocValues : LuceneTestCase + { + // indexes Integer.MAX_VALUE docs with an increasing dv field + [Test, LongRunningTest] + public virtual void TestNumerics([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BNumerics")); + if (dir is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(scheduler).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(OpenMode.CREATE)); + + Document doc = new Document(); + NumericDocValuesField dvField = new NumericDocValuesField("dv", 0); + doc.Add(dvField); + + for (int i = 0; i < int.MaxValue; i++) + { + dvField.SetInt64Value(i); + w.AddDocument(doc); + if (i % 100000 == 0) + { + Console.WriteLine("indexed: " + i); + Console.Out.Flush(); + } + } + + w.ForceMerge(1); + w.Dispose(); + + Console.WriteLine("verifying..."); + Console.Out.Flush(); + + DirectoryReader r = DirectoryReader.Open(dir); + long expectedValue = 0; + foreach (AtomicReaderContext context in r.Leaves) + { + AtomicReader reader = context.AtomicReader; + NumericDocValues dv = reader.GetNumericDocValues("dv"); + for (int i = 0; i < reader.MaxDoc; i++) + { + Assert.AreEqual(expectedValue, dv.Get(i)); + expectedValue++; + } + } + + r.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/Test2BPositions.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/Test2BPositions.cs b/src/Lucene.Net.Tests/Index/Test2BPositions.cs new file mode 100644 index 0000000..d004779 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/Test2BPositions.cs @@ -0,0 +1,123 @@ +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Documents; +using NUnit.Framework; +using System; + +namespace Lucene.Net.Index +{ + using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper; + using CharTermAttribute = Lucene.Net.Analysis.TokenAttributes.CharTermAttribute; + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using PositionIncrementAttribute = Lucene.Net.Analysis.TokenAttributes.PositionIncrementAttribute; + using TextField = TextField; + using TokenStream = Lucene.Net.Analysis.TokenStream; + + /*using Ignore = org.junit.Ignore; + + using TimeoutSuite = com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;*/ + + /// <summary> + /// Test indexes ~82M docs with 52 positions each, so you get > Integer.MAX_VALUE positions + /// @lucene.experimental + /// </summary> + [SuppressCodecs("SimpleText", "Memory", "Direct")] + [TestFixture] + public class Test2BPositions : LuceneTestCase + // uses lots of space and takes a few minutes + { + [Ignore("Very slow. Enable manually by removing Ignore.")] + [Test] + public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPositions")); + if (dir is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(scheduler).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(OpenMode.CREATE)); + + MergePolicy mp = w.Config.MergePolicy; + if (mp is LogByteSizeMergePolicy) + { + // 1 petabyte: + ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; + } + + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.OmitNorms = true; + Field field = new Field("field", new MyTokenStream(), ft); + doc.Add(field); + + int numDocs = (int.MaxValue / 26) + 1; + for (int i = 0; i < numDocs; i++) + { + w.AddDocument(doc); + if (VERBOSE && i % 100000 == 0) + { + Console.WriteLine(i + " of " + numDocs + "..."); + } + } + w.ForceMerge(1); + w.Dispose(); + dir.Dispose(); + } + + public sealed class MyTokenStream : TokenStream + { + internal readonly ICharTermAttribute TermAtt; + internal readonly IPositionIncrementAttribute PosIncAtt; + internal int Index; + + public MyTokenStream() + { + TermAtt = AddAttribute<ICharTermAttribute>(); + PosIncAtt = AddAttribute<IPositionIncrementAttribute>(); + } + + public override bool IncrementToken() + { + if (Index < 52) + { + ClearAttributes(); + TermAtt.Length = 1; + TermAtt.Buffer[0] = 'a'; + PosIncAtt.PositionIncrement = 1 + Index; + Index++; + return true; + } + return false; + } + + public override void Reset() + { + Index = 0; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/Test2BPostings.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/Test2BPostings.cs b/src/Lucene.Net.Tests/Index/Test2BPostings.cs new file mode 100644 index 0000000..2b79afd --- /dev/null +++ b/src/Lucene.Net.Tests/Index/Test2BPostings.cs @@ -0,0 +1,125 @@ +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Attributes; +using Lucene.Net.Documents; +using NUnit.Framework; +using System; + +namespace Lucene.Net.Index +{ + using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper; + using CharTermAttribute = Lucene.Net.Analysis.TokenAttributes.CharTermAttribute; + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using TextField = TextField; + using TokenStream = Lucene.Net.Analysis.TokenStream; + + /// <summary> + /// Test indexes ~82M docs with 26 terms each, so you get > Integer.MAX_VALUE terms/docs pairs + /// @lucene.experimental + /// </summary> + [SuppressCodecs("SimpleText", "Memory", "Direct", "Compressing")] + [TestFixture] + public class Test2BPostings : LuceneTestCase + { + [Ignore("Very slow. Enable manually by removing Ignore.")] +#if !NETSTANDARD + // LUCENENET: There is no Timeout on NUnit for .NET Core. + [Timeout(int.MaxValue)] +#endif + [Test, LongRunningTest, HasTimeout] + public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings")); + if (dir is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + + var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .SetRAMBufferSizeMB(256.0) + .SetMergeScheduler(scheduler) + .SetMergePolicy(NewLogMergePolicy(false, 10)) + .SetOpenMode(OpenMode.CREATE); + + IndexWriter w = new IndexWriter(dir, config); + + MergePolicy mp = w.Config.MergePolicy; + if (mp is LogByteSizeMergePolicy) + { + // 1 petabyte: + ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; + } + + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.OmitNorms = true; + ft.IndexOptions = IndexOptions.DOCS_ONLY; + Field field = new Field("field", new MyTokenStream(), ft); + doc.Add(field); + + int numDocs = (int.MaxValue / 26) + 1; + for (int i = 0; i < numDocs; i++) + { + w.AddDocument(doc); + if (VERBOSE && i % 100000 == 0) + { + Console.WriteLine(i + " of " + numDocs + "..."); + } + } + w.ForceMerge(1); + w.Dispose(); + dir.Dispose(); + } + + public sealed class MyTokenStream : TokenStream + { + internal readonly ICharTermAttribute TermAtt; + internal int Index; + + public MyTokenStream() + { + TermAtt = AddAttribute<ICharTermAttribute>(); + } + + public override bool IncrementToken() + { + if (Index <= 'z') + { + ClearAttributes(); + TermAtt.Length = 1; + TermAtt.Buffer[0] = (char)Index++; + return true; + } + return false; + } + + public override void Reset() + { + Index = 'a'; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/Test2BPostingsBytes.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/Test2BPostingsBytes.cs b/src/Lucene.Net.Tests/Index/Test2BPostingsBytes.cs new file mode 100644 index 0000000..42f9329 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/Test2BPostingsBytes.cs @@ -0,0 +1,167 @@ +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Documents; +using NUnit.Framework; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Support; + using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper; + using CharTermAttribute = Lucene.Net.Analysis.TokenAttributes.CharTermAttribute; + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using TextField = TextField; + using TokenStream = Lucene.Net.Analysis.TokenStream; + + /*using Ignore = org.junit.Ignore; + + using TimeoutSuite = com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;*/ + + /// <summary> + /// Test indexes 2B docs with 65k freqs each, + /// so you get > Integer.MAX_VALUE postings data for the term + /// @lucene.experimental + /// </summary> + [SuppressCodecs("SimpleText", "Memory", "Direct", "Lucene3x")] + [TestFixture] + public class Test2BPostingsBytes : LuceneTestCase + // disable Lucene3x: older lucene formats always had this issue. + // @Absurd @Ignore takes ~20GB-30GB of space and 10 minutes. + // with some codecs needs more heap space as well. + { + [Ignore("Very slow. Enable manually by removing Ignore.")] + [Test] + public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) + { + BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); + if (dir is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + + var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) + .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .SetRAMBufferSizeMB(256.0) + .SetMergeScheduler(scheduler) + .SetMergePolicy(NewLogMergePolicy(false, 10)) + .SetOpenMode(OpenMode.CREATE); + IndexWriter w = new IndexWriter(dir, config); + + MergePolicy mp = w.Config.MergePolicy; + if (mp is LogByteSizeMergePolicy) + { + // 1 petabyte: + ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; + } + + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; + ft.OmitNorms = true; + MyTokenStream tokenStream = new MyTokenStream(); + Field field = new Field("field", tokenStream, ft); + doc.Add(field); + + const int numDocs = 1000; + for (int i = 0; i < numDocs; i++) + { + if (i % 2 == 1) // trick blockPF's little optimization + { + tokenStream.n = 65536; + } + else + { + tokenStream.n = 65537; + } + w.AddDocument(doc); + } + w.ForceMerge(1); + w.Dispose(); + + DirectoryReader oneThousand = DirectoryReader.Open(dir); + IndexReader[] subReaders = new IndexReader[1000]; + Arrays.Fill(subReaders, oneThousand); + MultiReader mr = new MultiReader(subReaders); + BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); + if (dir2 is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + w2.AddIndexes(mr); + w2.ForceMerge(1); + w2.Dispose(); + oneThousand.Dispose(); + + DirectoryReader oneMillion = DirectoryReader.Open(dir2); + subReaders = new IndexReader[2000]; + Arrays.Fill(subReaders, oneMillion); + mr = new MultiReader(subReaders); + BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); + if (dir3 is MockDirectoryWrapper) + { + ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; + } + IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + w3.AddIndexes(mr); + w3.ForceMerge(1); + w3.Dispose(); + oneMillion.Dispose(); + + dir.Dispose(); + dir2.Dispose(); + dir3.Dispose(); + } + + public sealed class MyTokenStream : TokenStream + { + internal readonly ICharTermAttribute TermAtt; + internal int Index; + internal int n; + + public MyTokenStream() + { + TermAtt = AddAttribute<ICharTermAttribute>(); + } + + public override bool IncrementToken() + { + if (Index < n) + { + ClearAttributes(); + TermAtt.Buffer[0] = 'a'; + TermAtt.Length = 1; + Index++; + return true; + } + return false; + } + + public override void Reset() + { + Index = 0; + } + } + } +} \ No newline at end of file
