Lucene.Net.TestFramework: Completed implementation of RandomCodec
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b43c69b8 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b43c69b8 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b43c69b8 Branch: refs/heads/api-work Commit: b43c69b8915926d404f9aaaac2500dc4e0fbcf2f Parents: 84f26aa Author: Shad Storhaug <[email protected]> Authored: Tue Feb 28 05:24:09 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Tue Feb 28 05:56:22 2017 +0700 ---------------------------------------------------------------------- .../Index/RandomCodec.cs | 188 ++++++++++--------- .../Util/TestRuleSetupAndRestoreClassEnv.cs | 2 +- 2 files changed, 99 insertions(+), 91 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b43c69b8/src/Lucene.Net.TestFramework/Index/RandomCodec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Index/RandomCodec.cs b/src/Lucene.Net.TestFramework/Index/RandomCodec.cs index 68d0098..0afec03 100644 --- a/src/Lucene.Net.TestFramework/Index/RandomCodec.cs +++ b/src/Lucene.Net.TestFramework/Index/RandomCodec.cs @@ -1,63 +1,62 @@ using Lucene.Net.Support; -using Lucene.Net.Codecs; +using Lucene.Net.Util; using System; using System.Collections.Generic; using System.Diagnostics; namespace Lucene.Net.Index { - + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + using AssertingDocValuesFormat = Lucene.Net.Codecs.Asserting.AssertingDocValuesFormat; using AssertingPostingsFormat = Lucene.Net.Codecs.Asserting.AssertingPostingsFormat; - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - using DocValuesFormat = Lucene.Net.Codecs.DocValuesFormat; - //using TestBloomFilteredLucene41Postings = Lucene.Net.Codecs.bloom.TestBloomFilteredLucene41Postings; - //using DiskDocValuesFormat = Lucene.Net.Codecs.diskdv.DiskDocValuesFormat; + using TestBloomFilteredLucene41Postings = Lucene.Net.Codecs.Bloom.TestBloomFilteredLucene41Postings; + using DiskDocValuesFormat = Lucene.Net.Codecs.DiskDV.DiskDocValuesFormat; using Lucene41PostingsFormat = Lucene.Net.Codecs.Lucene41.Lucene41PostingsFormat; - //using Lucene41WithOrds = Lucene.Net.Codecs.Lucene41ords.Lucene41WithOrds; + using Lucene41WithOrds = Lucene.Net.Codecs.Lucene41Ords.Lucene41WithOrds; using Lucene45DocValuesFormat = Lucene.Net.Codecs.Lucene45.Lucene45DocValuesFormat; using Lucene46Codec = Lucene.Net.Codecs.Lucene46.Lucene46Codec; using PostingsFormat = Lucene.Net.Codecs.PostingsFormat; - //using DirectPostingsFormat = Lucene.Net.Codecs.memory.DirectPostingsFormat; - //using MemoryDocValuesFormat = Lucene.Net.Codecs.memory.MemoryDocValuesFormat; - //using MemoryPostingsFormat = Lucene.Net.Codecs.memory.MemoryPostingsFormat; - //using MockFixedIntBlockPostingsFormat = Lucene.Net.Codecs.mockintblock.MockFixedIntBlockPostingsFormat; - //using MockVariableIntBlockPostingsFormat = Lucene.Net.Codecs.mockintblock.MockVariableIntBlockPostingsFormat; - //using MockRandomPostingsFormat = Lucene.Net.Codecs.mockrandom.MockRandomPostingsFormat; - //using MockSepPostingsFormat = Lucene.Net.Codecs.mocksep.MockSepPostingsFormat; - //using NestedPulsingPostingsFormat = Lucene.Net.Codecs.nestedpulsing.NestedPulsingPostingsFormat; - //using Pulsing41PostingsFormat = Lucene.Net.Codecs.pulsing.Pulsing41PostingsFormat; - //using SimpleTextDocValuesFormat = Lucene.Net.Codecs.simpletext.SimpleTextDocValuesFormat; - //using SimpleTextPostingsFormat = Lucene.Net.Codecs.simpletext.SimpleTextPostingsFormat; - //using FSTOrdPostingsFormat = Lucene.Net.Codecs.memory.FSTOrdPostingsFormat; - //using FSTOrdPulsing41PostingsFormat = Lucene.Net.Codecs.memory.FSTOrdPulsing41PostingsFormat; - //using FSTPostingsFormat = Lucene.Net.Codecs.memory.FSTPostingsFormat; - //using FSTPulsing41PostingsFormat = Lucene.Net.Codecs.memory.FSTPulsing41PostingsFormat; + using DirectPostingsFormat = Lucene.Net.Codecs.Memory.DirectPostingsFormat; + using MemoryDocValuesFormat = Lucene.Net.Codecs.Memory.MemoryDocValuesFormat; + using MemoryPostingsFormat = Lucene.Net.Codecs.Memory.MemoryPostingsFormat; + using MockFixedIntBlockPostingsFormat = Lucene.Net.Codecs.MockIntBlock.MockFixedIntBlockPostingsFormat; + using MockVariableIntBlockPostingsFormat = Lucene.Net.Codecs.MockIntBlock.MockVariableIntBlockPostingsFormat; + using MockRandomPostingsFormat = Lucene.Net.Codecs.MockRandom.MockRandomPostingsFormat; + using MockSepPostingsFormat = Lucene.Net.Codecs.MockSep.MockSepPostingsFormat; + using NestedPulsingPostingsFormat = Lucene.Net.Codecs.NestedPulsing.NestedPulsingPostingsFormat; + using Pulsing41PostingsFormat = Lucene.Net.Codecs.Pulsing.Pulsing41PostingsFormat; + using SimpleTextDocValuesFormat = Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesFormat; + using SimpleTextPostingsFormat = Lucene.Net.Codecs.SimpleText.SimpleTextPostingsFormat; + using FSTOrdPostingsFormat = Lucene.Net.Codecs.Memory.FSTOrdPostingsFormat; + using FSTOrdPulsing41PostingsFormat = Lucene.Net.Codecs.Memory.FSTOrdPulsing41PostingsFormat; + using FSTPostingsFormat = Lucene.Net.Codecs.Memory.FSTPostingsFormat; + using FSTPulsing41PostingsFormat = Lucene.Net.Codecs.Memory.FSTPulsing41PostingsFormat; using TestUtil = Lucene.Net.Util.TestUtil; /// <summary> /// Codec that assigns per-field random postings formats. - /// <p> + /// <para/> /// The same field/format assignment will happen regardless of order, /// a hash is computed up front that determines the mapping. /// this means fields can be put into things like HashSets and added to @@ -69,110 +68,119 @@ namespace Lucene.Net.Index { /// <summary> /// Shuffled list of postings formats to use for new mappings </summary> - private IList<PostingsFormat> Formats = new List<PostingsFormat>(); + private IList<PostingsFormat> formats = new List<PostingsFormat>(); /// <summary> /// Shuffled list of docvalues formats to use for new mappings </summary> - private IList<DocValuesFormat> DvFormats = new List<DocValuesFormat>(); + private IList<DocValuesFormat> dvFormats = new List<DocValuesFormat>(); /// <summary> /// unique set of format names this codec knows about </summary> - public HashSet<string> FormatNames = new HashSet<string>(); + public ISet<string> formatNames = new HashSet<string>(); /// <summary> /// unique set of docvalues format names this codec knows about </summary> - public HashSet<string> DvFormatNames = new HashSet<string>(); + public ISet<string> dvFormatNames = new HashSet<string>(); /// <summary> /// memorized field->postingsformat mappings </summary> // note: we have to sync this map even though its just for debugging/toString, // otherwise DWPT's .toString() calls that iterate over the map can // cause concurrentmodificationexception if indexwriter's infostream is on - private readonly IDictionary<string, PostingsFormat> PreviousMappings = new ConcurrentHashMapWrapper<string, PostingsFormat>(new Dictionary<string, PostingsFormat>()); + private readonly IDictionary<string, PostingsFormat> previousMappings = new ConcurrentHashMapWrapper<string, PostingsFormat>(new Dictionary<string, PostingsFormat>()); - private IDictionary<string, DocValuesFormat> PreviousDVMappings = new ConcurrentHashMapWrapper<string, DocValuesFormat>(new Dictionary<string, DocValuesFormat>()); - private readonly int PerFieldSeed; + private IDictionary<string, DocValuesFormat> previousDVMappings = new ConcurrentHashMapWrapper<string, DocValuesFormat>(new Dictionary<string, DocValuesFormat>()); + private readonly int perFieldSeed; public override PostingsFormat GetPostingsFormatForField(string name) { - PostingsFormat codec = PreviousMappings[name]; + PostingsFormat codec = previousMappings[name]; if (codec == null) { - codec = Formats[Math.Abs(PerFieldSeed ^ name.GetHashCode()) % Formats.Count]; - /*if (codec is SimpleTextPostingsFormat && PerFieldSeed % 5 != 0) + codec = formats[Math.Abs(perFieldSeed ^ name.GetHashCode()) % formats.Count]; + if (codec is SimpleTextPostingsFormat && perFieldSeed % 5 != 0) { // make simpletext rarer, choose again - codec = Formats[Math.Abs(PerFieldSeed ^ name.ToUpper(CultureInfo.InvariantCulture).GetHashCode()) % Formats.Count]; - }*/ - PreviousMappings[name] = codec; + codec = formats[Math.Abs(perFieldSeed ^ name.ToUpperInvariant().GetHashCode()) % formats.Count]; + } + previousMappings[name] = codec; // Safety: - Debug.Assert(PreviousMappings.Count < 10000, "test went insane"); + Debug.Assert(previousMappings.Count < 10000, "test went insane"); } return codec; } public override DocValuesFormat GetDocValuesFormatForField(string name) { - DocValuesFormat codec = PreviousDVMappings[name]; + DocValuesFormat codec = previousDVMappings[name]; if (codec == null) { - codec = DvFormats[Math.Abs(PerFieldSeed ^ name.GetHashCode()) % DvFormats.Count]; - /*if (codec is SimpleTextDocValuesFormat && PerFieldSeed % 5 != 0) + codec = dvFormats[Math.Abs(perFieldSeed ^ name.GetHashCode()) % dvFormats.Count]; + if (codec is SimpleTextDocValuesFormat && perFieldSeed % 5 != 0) { // make simpletext rarer, choose again - codec = DvFormats[Math.Abs(PerFieldSeed ^ name.ToUpper(CultureInfo.InvariantCulture).GetHashCode()) % DvFormats.Count]; - }*/ - PreviousDVMappings[name] = codec; + codec = dvFormats[Math.Abs(perFieldSeed ^ name.ToUpperInvariant().GetHashCode()) % dvFormats.Count]; + } + previousDVMappings[name] = codec; // Safety: - Debug.Assert(PreviousDVMappings.Count < 10000, "test went insane"); + Debug.Assert(previousDVMappings.Count < 10000, "test went insane"); } return codec; } public RandomCodec(Random random, ISet<string> avoidCodecs) { - this.PerFieldSeed = random.Next(); + this.perFieldSeed = random.Next(); // TODO: make it possible to specify min/max iterms per // block via CL: int minItemsPerBlock = TestUtil.NextInt(random, 2, 100); int maxItemsPerBlock = 2 * (Math.Max(2, minItemsPerBlock - 1)) + random.Next(100); int lowFreqCutoff = TestUtil.NextInt(random, 2, 100); - // LUCENENET TODO: Finish RandomCodec implementation Add(avoidCodecs, new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock), - /* new FSTPostingsFormat(), new FSTOrdPostingsFormat(), new FSTPulsing41PostingsFormat(1 + random.Next(20)), new FSTOrdPulsing41PostingsFormat(1 + random.Next(20)), - new DirectPostingsFormat(LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : maxItemsPerBlock), LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : lowFreqCutoff)), - new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), - new TestBloomFilteredLucene41Postings(), new MockSepPostingsFormat(), new MockFixedIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 2000)), - new MockVariableIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 127)), new MockRandomPostingsFormat(random), - new NestedPulsingPostingsFormat(), new Lucene41WithOrds(), new SimpleTextPostingsFormat(), - */ - new AssertingPostingsFormat() - /*new MemoryPostingsFormat(true, random.nextFloat()), new MemoryPostingsFormat(false, random.nextFloat())*/ - ); - - // add pulsing again with (usually) different parameters - //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed - //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing - //with such "wrapper" classes? - - AddDocValues(avoidCodecs, new Lucene45DocValuesFormat(), /*new DiskDocValuesFormat(), new MemoryDocValuesFormat(), new SimpleTextDocValuesFormat(),*/ new AssertingDocValuesFormat()); - - Collections.Shuffle(Formats); - Collections.Shuffle(DvFormats); + new DirectPostingsFormat(LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : maxItemsPerBlock), + LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : lowFreqCutoff)), + new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), + // add pulsing again with (usually) different parameters + new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), + //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed + //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing + //with such "wrapper" classes? + new TestBloomFilteredLucene41Postings(), + new MockSepPostingsFormat(), + new MockFixedIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 2000)), + new MockVariableIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 127)), + new MockRandomPostingsFormat(random), + new NestedPulsingPostingsFormat(), + new Lucene41WithOrds(), + new SimpleTextPostingsFormat(), + new AssertingPostingsFormat(), + new MemoryPostingsFormat(true, random.nextFloat()), + new MemoryPostingsFormat(false, random.nextFloat()) + ); + + AddDocValues(avoidCodecs, + new Lucene45DocValuesFormat(), + new DiskDocValuesFormat(), + new MemoryDocValuesFormat(), + new SimpleTextDocValuesFormat(), + new AssertingDocValuesFormat()); + + Collections.Shuffle(formats); + Collections.Shuffle(dvFormats); // Avoid too many open files: - if (Formats.Count > 4) + if (formats.Count > 4) { - Formats = Formats.SubList(0, 4); + formats = formats.SubList(0, 4); } - if (DvFormats.Count > 4) + if (dvFormats.Count > 4) { - DvFormats = DvFormats.SubList(0, 4); + dvFormats = dvFormats.SubList(0, 4); } } @@ -187,8 +195,8 @@ namespace Lucene.Net.Index { if (!avoidCodecs.Contains(p.Name)) { - Formats.Add(p); - FormatNames.Add(p.Name); + formats.Add(p); + formatNames.Add(p.Name); } } } @@ -199,15 +207,15 @@ namespace Lucene.Net.Index { if (!avoidCodecs.Contains(d.Name)) { - DvFormats.Add(d); - DvFormatNames.Add(d.Name); + dvFormats.Add(d); + dvFormatNames.Add(d.Name); } } } public override string ToString() { - return base.ToString() + ": " + PreviousMappings.ToString() + ", docValues:" + PreviousDVMappings.ToString(); + return base.ToString() + ": " + previousMappings.ToString() + ", docValues:" + previousDVMappings.ToString(); } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b43c69b8/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs b/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs index bef99b6..ba4be2c 100644 --- a/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs +++ b/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs @@ -530,7 +530,7 @@ namespace Lucene.Net.Util if (codec is RandomCodec && avoidCodecs.Count > 0) { - foreach (string name in ((RandomCodec)codec).FormatNames) + foreach (string name in ((RandomCodec)codec).formatNames) { LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + name + ".", ShouldAvoidCodec(name)); }
