http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs new file mode 100644 index 0000000..046ed25 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs @@ -0,0 +1,225 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Codecs; +using Lucene.Net.Index; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.IO; +using System.Text; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Creates an index. + /// </summary> + /// <remarks> + /// Other side effects: index writer object in perfRunData is set. + /// <para/> + /// Relevant properties: + /// <list type="bullet"> + /// <item><term>merge.factor</term><description>(default 10)</description></item> + /// <item><term>max.buffered</term><description>(default no flush)</description></item> + /// <item><term>compound</term><description>(default true)</description></item> + /// <item><term>ram.flush.mb</term><description>[default 0]</description></item> + /// <item><term>merge.policy</term><description>(default Lucene.Net.Index.LogByteSizeMergePolicy, Lucene.Net)</description></item> + /// <item><term>merge.scheduler</term><description>(default Lucene.Net.Index.ConcurrentMergeScheduler, Lucene.Net)</description></item> + /// <item><term>concurrent.merge.scheduler.max.thread.count</term><description>(defaults per ConcurrentMergeScheduler)</description></item> + /// <item><term>concurrent.merge.scheduler.max.merge.count</term><description>(defaults per ConcurrentMergeScheduler)</description></item> + /// <item><term>default.codec</term><description></description></item> + /// </list> + /// <para/> + /// This task also supports a "writer.info.stream" property with the following + /// values: + /// <list type="bullet"> + /// <item><term>SystemOut</term><description>Sets <see cref="IndexWriterConfig.SetInfoStream(InfoStream)"/> to <see cref="SystemConsole.Out"/>.</description></item> + /// <item><term>SystemErr</term><description>Sets <see cref="IndexWriterConfig.SetInfoStream(InfoStream)"/> to <see cref="SystemConsole.Error"/></description></item> + /// <item><term><file_name></term><description> + /// Attempts to create a file given that name and sets <see cref="IndexWriterConfig.SetInfoStream(InfoStream)"/> + /// to that file. If this denotes an invalid file name, or some error occurs, an exception will be thrown. + /// </description></item> + /// </list> + /// </remarks> + public class CreateIndexTask : PerfTask + { + public CreateIndexTask(PerfRunData runData) + : base(runData) + { + } + + public static IndexDeletionPolicy GetIndexDeletionPolicy(Config config) + { + string deletionPolicyName = config.Get("deletion.policy", "Lucene.Net.Index.KeepOnlyLastCommitDeletionPolicy, Lucene.Net"); + Type deletionPolicyType = Type.GetType(deletionPolicyName); + if (deletionPolicyType == null) + { + throw new Exception("Unrecognized deletion policy type '" + deletionPolicyName + "'"); + } + else if (deletionPolicyType.Equals(typeof(NoDeletionPolicy))) + { + return NoDeletionPolicy.INSTANCE; + } + else + { + try + { + return (IndexDeletionPolicy)Activator.CreateInstance(deletionPolicyType); + } + catch (Exception e) + { + throw new Exception("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy", e); + } + } + } + + public override int DoLogic() + { + PerfRunData runData = RunData; + Config config = runData.Config; + runData.IndexWriter = ConfigureWriter(config, runData, OpenMode.CREATE, null); + return 1; + } + + public static IndexWriterConfig CreateWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) + { + // :Post-Release-Update-Version.LUCENE_XY: + LuceneVersion version = (LuceneVersion)Enum.Parse(typeof(LuceneVersion), config.Get("writer.version", LuceneVersion.LUCENE_48.ToString())); + IndexWriterConfig iwConf = new IndexWriterConfig(version, runData.Analyzer); + iwConf.OpenMode = mode; + IndexDeletionPolicy indexDeletionPolicy = GetIndexDeletionPolicy(config); + iwConf.IndexDeletionPolicy = indexDeletionPolicy; + if (commit != null) + iwConf.IndexCommit = commit; + + + string mergeScheduler = config.Get("merge.scheduler", + "Lucene.Net.Index.ConcurrentMergeScheduler, Lucene.Net"); + Type mergeSchedulerType = Type.GetType(mergeScheduler); + if (mergeSchedulerType == null) + { + throw new Exception("Unrecognized merge scheduler type '" + mergeScheduler + "'"); + } + else if (mergeSchedulerType.Equals(typeof(NoMergeScheduler))) + { + iwConf.MergeScheduler = NoMergeScheduler.INSTANCE; + } + else + { + try + { + iwConf.MergeScheduler = (IMergeScheduler)Activator.CreateInstance(mergeSchedulerType); + } + catch (Exception e) + { + throw new Exception("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e); + } + + if (mergeScheduler.Equals("Lucene.Net.Index.ConcurrentMergeScheduler")) + { + ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler)iwConf.MergeScheduler; + int maxThreadCount = config.Get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.DEFAULT_MAX_THREAD_COUNT); + int maxMergeCount = config.Get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.DEFAULT_MAX_MERGE_COUNT); + cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount); + } + } + + string defaultCodec = config.Get("default.codec", null); + if (defaultCodec != null) + { + try + { + Type clazz = Type.GetType(defaultCodec); + iwConf.Codec = (Codec)Activator.CreateInstance(clazz); + } + catch (Exception e) + { + throw new Exception("Couldn't instantiate Codec: " + defaultCodec, e); + } + } + + string mergePolicy = config.Get("merge.policy", + "Lucene.Net.Index.LogByteSizeMergePolicy, Lucene.Net"); + bool isCompound = config.Get("compound", true); + Type mergePolicyType = Type.GetType(mergePolicy); + if (mergePolicyType == null) + { + throw new Exception("Unrecognized merge policy type '" + mergePolicy + "'"); + } + else if (mergePolicyType.Equals(typeof(NoMergePolicy))) + { + iwConf.MergePolicy = isCompound ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES; + } + else + { + try + { + iwConf.MergePolicy = (MergePolicy)Activator.CreateInstance(mergePolicyType); + } + catch (Exception e) + { + throw new Exception("unable to instantiate class '" + mergePolicy + "' as merge policy", e); + } + iwConf.MergePolicy.NoCFSRatio = isCompound ? 1.0 : 0.0; + if (iwConf.MergePolicy is LogMergePolicy) + { + LogMergePolicy logMergePolicy = (LogMergePolicy)iwConf.MergePolicy; + logMergePolicy.MergeFactor = config.Get("merge.factor", OpenIndexTask.DEFAULT_MERGE_PFACTOR); + } + } + double ramBuffer = config.Get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB); + int maxBuffered = config.Get("max.buffered", OpenIndexTask.DEFAULT_MAX_BUFFERED); + if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) + { + iwConf.RAMBufferSizeMB = ramBuffer; + iwConf.MaxBufferedDocs = maxBuffered; + } + else + { + iwConf.MaxBufferedDocs = maxBuffered; + iwConf.RAMBufferSizeMB = ramBuffer; + } + + return iwConf; + } + + public static IndexWriter ConfigureWriter(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) + { + IndexWriterConfig iwc = CreateWriterConfig(config, runData, mode, commit); + string infoStreamVal = config.Get("writer.info.stream", null); + if (infoStreamVal != null) + { + if (infoStreamVal.Equals("SystemOut", StringComparison.Ordinal)) + { + iwc.SetInfoStream(SystemConsole.Out); + } + else if (infoStreamVal.Equals("SystemErr", StringComparison.Ordinal)) + { + iwc.SetInfoStream(SystemConsole.Error); + } + else + { + FileInfo f = new FileInfo(infoStreamVal); + iwc.SetInfoStream(new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.GetEncoding(0))); + } + } + IndexWriter writer = new IndexWriter(runData.Directory, iwc); + return writer; + } + } +}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs new file mode 100644 index 0000000..15ec2ee --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs @@ -0,0 +1,42 @@ +using Lucene.Net.Facet.Taxonomy.Directory; +using Lucene.Net.Index; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Create a taxonomy index. + /// <para/> + /// Other side effects: taxonomy writer object in perfRunData is set. + /// </summary> + public class CreateTaxonomyIndexTask : PerfTask + { + public CreateTaxonomyIndexTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + PerfRunData runData = RunData; + runData.TaxonomyWriter = new DirectoryTaxonomyWriter(runData.TaxonomyDir, OpenMode.CREATE); + return 1; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs new file mode 100644 index 0000000..3e29e4d --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs @@ -0,0 +1,61 @@ +using Lucene.Net.Index; +using System; +using System.Globalization; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Runs forceMerge on the index. + /// <para/> + /// Other side effects: none. + /// </summary> + public class ForceMergeTask : PerfTask + { + public ForceMergeTask(PerfRunData runData) + : base(runData) + { + } + + int maxNumSegments = -1; + + public override int DoLogic() + { + if (maxNumSegments == -1) + { + throw new InvalidOperationException("required argument (maxNumSegments) was not specified"); + } + IndexWriter iw = RunData.IndexWriter; + iw.ForceMerge(maxNumSegments); + //System.out.println("forceMerge called"); + return 1; + } + + public override void SetParams(string @params) + { + base.SetParams(@params); + maxNumSegments = (int)double.Parse(@params, CultureInfo.InvariantCulture); + } + + public override bool SupportsParams + { + get { return true; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs new file mode 100644 index 0000000..411f285 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs @@ -0,0 +1,132 @@ +using Lucene.Net.Index; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Globalization; +using System.Threading; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Spawns a BG thread that periodically (defaults to 3.0 + /// seconds, but accepts param in seconds) wakes up and asks + /// IndexWriter for a near real-time reader. Then runs a + /// single query (body: 1) sorted by docdate, and prints + /// time to reopen and time to run the search. + /// <para/> + /// @lucene.experimental It's also not generally usable, eg + /// you cannot change which query is executed. + /// </summary> + public class NearRealtimeReaderTask : PerfTask + { + internal long pauseMSec = 3000L; + + internal int reopenCount; + internal int[] reopenTimes = new int[1]; + + public NearRealtimeReaderTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + PerfRunData runData = RunData; + + // Get initial reader + IndexWriter w = runData.IndexWriter; + if (w == null) + { + throw new Exception("please open the writer before invoking NearRealtimeReader"); + } + + if (runData.GetIndexReader() != null) + { + throw new Exception("please close the existing reader before invoking NearRealtimeReader"); + } + + + long t = Support.Time.CurrentTimeMilliseconds(); + DirectoryReader r = DirectoryReader.Open(w, true); + runData.SetIndexReader(r); + // Transfer our reference to runData + r.DecRef(); + + // TODO: gather basic metrics for reporting -- eg mean, + // stddev, min/max reopen latencies + + // Parent sequence sets stopNow + reopenCount = 0; + while (!Stop) + { + long waitForMsec = (pauseMSec - (Support.Time.CurrentTimeMilliseconds() - t)); + if (waitForMsec > 0) + { + Thread.Sleep((int)waitForMsec); + //System.out.println("NRT wait: " + waitForMsec + " msec"); + } + + t = Support.Time.CurrentTimeMilliseconds(); + DirectoryReader newReader = DirectoryReader.OpenIfChanged(r); + if (newReader != null) + { + int delay = (int)(Support.Time.CurrentTimeMilliseconds() - t); + if (reopenTimes.Length == reopenCount) + { + reopenTimes = ArrayUtil.Grow(reopenTimes, 1 + reopenCount); + } + reopenTimes[reopenCount++] = delay; + // TODO: somehow we need to enable warming, here + runData.SetIndexReader(newReader); + // Transfer our reference to runData + newReader.DecRef(); + r = newReader; + } + } + Stop = false; + + return reopenCount; + } + + public override void SetParams(string @params) + { + base.SetParams(@params); + pauseMSec = (long)(1000.0 * float.Parse(@params, CultureInfo.InvariantCulture)); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + SystemConsole.WriteLine("NRT reopen times:"); + for (int i = 0; i < reopenCount; i++) + { + SystemConsole.Write(" " + reopenTimes[i]); + } + SystemConsole.WriteLine(); + } + } + + public override bool SupportsParams + { + get { return true; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs new file mode 100644 index 0000000..aae5abb --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs @@ -0,0 +1,189 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Support.IO; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.IO; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Create a new <see cref="Analyzer"/> and set it it in the getRunData() for use by all future tasks. + /// </summary> + public class NewAnalyzerTask : PerfTask + { + private IList<string> analyzerNames; + private int current; + + public NewAnalyzerTask(PerfRunData runData) + : base(runData) + { + analyzerNames = new List<string>(); + } + + public static Analyzer CreateAnalyzer(string className) + { + Type clazz = Type.GetType(className); + try + { + // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore + return (Analyzer)Activator.CreateInstance(clazz, +#pragma warning disable 612, 618 + LuceneVersion.LUCENE_CURRENT); +#pragma warning restore 612, 618 + } + catch (MissingMethodException /*nsme*/) + { + // otherwise use default ctor + return (Analyzer)Activator.CreateInstance(clazz); + } + } + + public override int DoLogic() + { + string analyzerName = null; + try + { + if (current >= analyzerNames.Count) + { + current = 0; + } + analyzerName = analyzerNames[current++]; + Analyzer analyzer = null; + if (null == analyzerName || 0 == analyzerName.Length) + { + analyzerName = "Lucene.Net.Analysis.Standard.StandardAnalyzer, Lucene.Net.Analysis.Common"; + } + // First, lookup analyzerName as a named analyzer factory + AnalyzerFactory factory; + if (RunData.AnalyzerFactories.TryGetValue(analyzerName, out factory) && null != factory) + { + analyzer = factory.Create(); + } + else + { + if (analyzerName.Contains(".")) + { + if (analyzerName.StartsWith("Standard.", StringComparison.Ordinal)) + { + analyzerName = "Lucene.Net.Analysis." + analyzerName; + } + analyzer = CreateAnalyzer(analyzerName); + } + else + { // No package + try + { + // Attempt to instantiate a core analyzer + string coreClassName = "Lucene.Net.Analysis.Core." + analyzerName; + analyzer = CreateAnalyzer(coreClassName); + analyzerName = coreClassName; + } + catch (TypeLoadException /*e*/) + { + // If not a core analyzer, try the base analysis package + analyzerName = "Lucene.Net.Analysis." + analyzerName; + analyzer = CreateAnalyzer(analyzerName); + } + } + } + RunData.Analyzer = analyzer; + } + catch (Exception e) + { + throw new Exception("Error creating Analyzer: " + analyzerName, e); + } + return 1; + } + + /// <summary> + /// Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the Analyzer lives in + /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name. + /// <para/> + /// Analyzer names may also refer to previously defined AnalyzerFactory's. + /// <para/> + /// Example Declaration: + /// <code> + /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) > + /// </code> + /// <para/> + /// Example AnalyzerFactory usage: + /// <code> + /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer) + /// -NewAnalyzer('whitespace tokenized') + /// </code> + /// </summary> + /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param> + public override void SetParams(string @params) + { + + base.SetParams(@params); + StreamTokenizer stok = new StreamTokenizer(new StringReader(@params)); + stok.QuoteChar('"'); + stok.QuoteChar('\''); + stok.IsEOLSignificant = false; + stok.OrdinaryChar(','); + try + { + while (stok.NextToken() != StreamTokenizer.TT_EOF) + { + switch (stok.TokenType) + { + case ',': + { + // Do nothing + break; + } + case '\'': + case '\"': + case StreamTokenizer.TT_WORD: + { + analyzerNames.Add(stok.StringValue); + break; + } + default: + { + //throw new RuntimeException("Unexpected token: " + stok.ToString()); + throw new Exception("Unexpected token: " + stok.ToString()); + } + } + } + } + catch (Exception e) + { + if (e.Message.StartsWith("Line #", StringComparison.Ordinal)) + { + throw e; + } + else + { + throw new Exception("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e); + } + } + } + + /// <seealso cref="PerfTask.SupportsParams"/> + public override bool SupportsParams + { + get { return true; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs new file mode 100644 index 0000000..4ff00c1 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs @@ -0,0 +1,149 @@ +using Icu.Collation; +using Lucene.Net.Analysis; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Globalization; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// LUCENENET specific extension methods for the <see cref="NewCollationAnalyzerTask.Implementation"/> enumeration. + /// </summary> + public static class ImplementationExtensions + { + public static Type GetAnalyzerType(this NewCollationAnalyzerTask.Implementation impl) + { + switch (impl) + { + //case NewCollationAnalyzerTask.Implementation.JDK: + // return typeof(Lucene.Net.Collation.CollationKeyAnalyzer); + + case NewCollationAnalyzerTask.Implementation.ICU: + return typeof(Lucene.Net.Collation.ICUCollationKeyAnalyzer); + default: + return typeof(Lucene.Net.Collation.ICUCollationKeyAnalyzer); + } + } + + public static Type GetCollatorType(this NewCollationAnalyzerTask.Implementation impl) + { + switch (impl) + { + //case NewCollationAnalyzerTask.Implementation.JDK: + // return typeof(Icu.Collation.Collator); + + case NewCollationAnalyzerTask.Implementation.ICU: + return typeof(Icu.Collation.Collator); + default: + return typeof(Icu.Collation.Collator); + } + } + } + + public class NewCollationAnalyzerTask : PerfTask + { + /// <summary> + /// Different Collation implementations: currently + /// limited to what is provided in ICU. + /// <para/> + /// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun">Comparison of implementations</a> + /// </summary> + public enum Implementation + { + //JDK, // LUCENENET: Not supported + ICU + } + + private Implementation impl = Implementation.ICU; //Implementation.JDK; + + public NewCollationAnalyzerTask(PerfRunData runData) + : base(runData) + { + } + + internal static Analyzer CreateAnalyzer(CultureInfo locale, Implementation impl) + { + // LUCENENET specific - senseless to use reflection here because we only have one + // collator. + object collator = Collator.Create(locale, Collator.Fallback.FallbackAllowed); + + Type clazz = impl.GetAnalyzerType(); + return (Analyzer)Activator.CreateInstance(clazz, +#pragma warning disable 612, 618 + LuceneVersion.LUCENE_CURRENT, +#pragma warning restore 612, 618 + collator); + } + + public override int DoLogic() + { + try + { + CultureInfo locale = RunData.Locale; + if (locale == null) throw new Exception( + "Locale must be set with the NewLocale task!"); + Analyzer analyzer = CreateAnalyzer(locale, impl); + RunData.Analyzer = analyzer; + SystemConsole.WriteLine("Changed Analyzer to: " + + analyzer.GetType().Name + "(" + locale + ")"); + } + catch (Exception e) + { + throw new Exception("Error creating Analyzer: impl=" + impl, e); + } + return 1; + } + + public override void SetParams(string @params) + { + base.SetParams(@params); + + StringTokenizer st = new StringTokenizer(@params, ","); + while (st.HasMoreTokens()) + { + string param = st.NextToken(); + StringTokenizer expr = new StringTokenizer(param, ":"); + string key = expr.NextToken(); + string value = expr.NextToken(); + // for now we only support the "impl" parameter. + // TODO: add strength, decomposition, etc + if (key.Equals("impl", StringComparison.Ordinal)) + { + if (value.Equals("icu", StringComparison.OrdinalIgnoreCase)) + impl = Implementation.ICU; + //else if (value.Equals("jdk", StringComparison.OrdinalIgnoreCase)) + // impl = Implementation.JDK; + else + throw new Exception("Unknown parameter " + param); + } + else + { + throw new Exception("Unknown parameter " + param); + } + } + } + + public override bool SupportsParams + { + get { return true; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs new file mode 100644 index 0000000..135d203 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs @@ -0,0 +1,97 @@ +using Lucene.Net.Support; +using System; +using System.Globalization; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Set a <see cref="CultureInfo"/> for use in benchmarking. + /// </summary> + /// <remarks> + /// Locales can be specified in the following ways: + /// <list type="bullet"> + /// <item><description><c>de</c>: Language "de"</description></item> + /// <item><description><code>en,US</code>: Language "en", country "US"</description></item> + /// <item><description><code>no-NO</code>: Language "no", country "NO"</description></item> + /// <item><description><code>ROOT</code>: The <see cref="CultureInfo.InvariantCulture"/></description></item> + /// </list> + /// </remarks> + public class NewLocaleTask : PerfTask + { + private string culture; + //private string language; + //private string country; + //private string variant; + + /// <summary> + /// Create a new <see cref="CultureInfo"/> and set it it in the RunData for + /// use by all future tasks. + /// </summary> + /// <param name="runData"></param> + public NewLocaleTask(PerfRunData runData) + : base(runData) + { + } + + internal static CultureInfo CreateLocale(string culture /*String language, String country, String variant*/) + { + if (culture == null || culture.Length == 0) + return null; + + string lang = culture; + if (lang.Equals("ROOT", StringComparison.OrdinalIgnoreCase)) + return CultureInfo.InvariantCulture; // Default culture + //lang = ""; // empty language is the root locale in the JDK + + return new CultureInfo(lang); + } + + public override int DoLogic() + { + CultureInfo locale = CreateLocale(culture /*language, country, variant*/); + RunData.Locale = locale; + SystemConsole.WriteLine("Changed Locale to: " + + (locale == null ? "null" : + (locale.EnglishName.Length == 0) ? "root locale" : locale.ToString())); + return 1; + } + + public override void SetParams(string @params) + { + base.SetParams(@params); + //language = country = variant = ""; + culture = ""; + string ignore; + StringTokenizer st = new StringTokenizer(@params, ","); + if (st.HasMoreTokens()) + //language = st.nextToken(); + culture = st.NextToken(); + if (st.HasMoreTokens()) + culture += "-" + st.NextToken(); + if (st.HasMoreTokens()) + ignore = st.NextToken(); + } + + public override bool SupportsParams + { + get { return true; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs new file mode 100644 index 0000000..66fc685 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs @@ -0,0 +1,44 @@ +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Increment the counter for properties maintained by Round Number. + /// <para/> + /// Other side effects: if there are props by round number, log value change. + /// </summary> + public class NewRoundTask : PerfTask + { + public NewRoundTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + RunData.Config.NewRound(); + return 0; + } + + /// <seealso cref="PerfTask.ShouldNotRecordStats"/> + protected override bool ShouldNotRecordStats + { + get { return true; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs new file mode 100644 index 0000000..73ec96a --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs @@ -0,0 +1,88 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Index; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Open an index writer. + /// </summary> + /// <remarks> + /// Other side effects: index writer object in perfRunData is set. + /// <para/> + /// Relevant properties: + /// <list type="bullet"> + /// <item><term>merge.factor</term><description></description></item> + /// <item><term>max.buffered</term><description></description></item> + /// <item><term>max.field.length</term><description></description></item> + /// <item><term>ram.flush.mb</term><description>[default 0]</description></item> + /// </list> + /// <para/> + /// Accepts a param specifying the commit point as + /// previously saved with <see cref="CommitIndexTask"/>. If you specify + /// this, it rolls the index back to that commit on opening + /// the <see cref="IndexWriter"/>. + /// </remarks> + public class OpenIndexTask : PerfTask + { + public static readonly int DEFAULT_MAX_BUFFERED = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; + public static readonly int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR; + public static readonly double DEFAULT_RAM_FLUSH_MB = (int)IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; + private string commitUserData; + + public OpenIndexTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + PerfRunData runData = RunData; + Config config = runData.Config; + IndexCommit ic; + if (commitUserData != null) + { + ic = OpenReaderTask.FindIndexCommit(runData.Directory, commitUserData); + } + else + { + ic = null; + } + + IndexWriter writer = CreateIndexTask.ConfigureWriter(config, runData, OpenMode.APPEND, ic); + runData.IndexWriter = writer; + return 1; + } + + public override void SetParams(string @params) + { + base.SetParams(@params); + if (@params != null) + { + // specifies which commit point to open + commitUserData = @params; + } + } + + public override bool SupportsParams + { + get { return true; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs new file mode 100644 index 0000000..81adf4c --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs @@ -0,0 +1,100 @@ +using Lucene.Net.Index; +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using System.IO; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Open an index reader. + /// <para/> + /// Other side effects: index reader object in perfRunData is set. + /// <para/> + /// Optional params commitUserData eg. OpenReader(false,commit1) + /// </summary> + public class OpenReaderTask : PerfTask + { + public static readonly string USER_DATA = "userData"; + private string commitUserData = null; + + public OpenReaderTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + Store.Directory dir = RunData.Directory; + DirectoryReader r = null; + if (commitUserData != null) + { + r = DirectoryReader.Open(OpenReaderTask.FindIndexCommit(dir, commitUserData)); + } + else + { + r = DirectoryReader.Open(dir); + } + RunData.SetIndexReader(r); + // We transfer reference to the run data + r.DecRef(); + return 1; + } + + public override void SetParams(string @params) + { + base.SetParams(@params); + if (@params != null) + { + string[] split = @params.Split(new char[] { ',' }).TrimEnd(); + if (split.Length > 0) + { + commitUserData = split[0]; + } + } + } + + public override bool SupportsParams + { + get { return true; } + } + + public static IndexCommit FindIndexCommit(Store.Directory dir, string userData) + { + IList<IndexCommit> commits = DirectoryReader.ListCommits(dir); + foreach (IndexCommit ic in commits) + { + IDictionary<string, string> map = ic.UserData; + string ud = null; + if (map != null) + { + //ud = map.get(USER_DATA); + map.TryGetValue(USER_DATA, out ud); + } + if (ud != null && ud.Equals(userData, StringComparison.Ordinal)) + { + return ic; + } + } + + throw new IOException("index does not contain commit with userData: " + userData); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs new file mode 100644 index 0000000..5a1f38b --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs @@ -0,0 +1,41 @@ +using Lucene.Net.Facet.Taxonomy.Directory; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Open a taxonomy index. + /// <para/> + /// Other side effects: taxonomy writer object in perfRunData is set. + /// </summary> + public class OpenTaxonomyIndexTask : PerfTask + { + public OpenTaxonomyIndexTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + PerfRunData runData = RunData; + runData.TaxonomyWriter = new DirectoryTaxonomyWriter(runData.TaxonomyDir); + return 1; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs new file mode 100644 index 0000000..e53738f --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs @@ -0,0 +1,44 @@ +using Lucene.Net.Facet.Taxonomy.Directory; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Open a taxonomy index reader. + /// <para/> + /// Other side effects: taxonomy reader object in perfRunData is set. + /// </summary> + public class OpenTaxonomyReaderTask : PerfTask + { + public OpenTaxonomyReaderTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + PerfRunData runData = RunData; + DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(runData.TaxonomyDir); + runData.SetTaxonomyReader(taxoReader); + // We transfer reference to the run data + taxoReader.DecRef(); + return 1; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs new file mode 100644 index 0000000..0ae9dac --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs @@ -0,0 +1,380 @@ +using Lucene.Net.Benchmarks.ByTask.Stats; +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Support; +using System; +using System.Diagnostics; +using System.Globalization; +using System.Text; +using System.Threading; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// An abstract task to be tested for performance. + /// </summary> + /// <remarks> + /// Every performance task extends this class, and provides its own + /// <see cref="DoLogic()"/> method, which performs the actual task. + /// <para/> + /// Tasks performing some work that should be measured for the task, can override + /// <see cref="Setup()"/> and/or <see cref="TearDown()"/> and place that work there. + /// <para/> + /// Relevant properties: + /// <list type="bullet"> + /// <item><term>task.max.depth.log</term><description></description></item> + /// </list> + /// <para/> + /// Also supports the following logging attributes: + /// <list type="bullet"> + /// <item><term>log.step</term><description> + /// specifies how often to log messages about the current running + /// task. Default is 1000 <see cref="DoLogic()"/> invocations. Set to -1 to disable + /// logging. + /// </description></item> + /// <item><term>log.step.[class Task Name]</term><description> + /// specifies the same as 'log.step', only for a + /// particular task name. For example, log.step.AddDoc will be applied only for + /// <see cref="AddDocTask"/>. It's a way to control + /// per task logging settings. If you want to omit logging for any other task, + /// include log.step=-1. The syntax is "log.step." together with the Task's + /// 'short' name (i.e., without the 'Task' part). + /// </description></item> + /// </list> + /// </remarks> + public abstract class PerfTask + { + internal static readonly int DEFAULT_LOG_STEP = 1000; + + private PerfRunData runData; + + // propeties that all tasks have + private string name; + private int depth = 0; + protected int m_logStep; + private int logStepCount = 0; + private int maxDepthLogStart = 0; + private bool disableCounting = false; + protected string m_params = null; + + private bool runInBackground; + private int deltaPri; + + // The first line of this task's definition in the alg file + private int algLineNum = 0; + + protected static readonly string NEW_LINE = Environment.NewLine; + + /// <summary> + /// Should not be used externally + /// </summary> + private PerfTask() + { + name = GetType().Name; + if (name.EndsWith("Task", StringComparison.Ordinal)) + { + name = name.Substring(0, name.Length - 4); + } + } + + public virtual void SetRunInBackground(int deltaPri) + { + runInBackground = true; + this.deltaPri = deltaPri; + } + + public virtual bool RunInBackground + { + get { return runInBackground; } + } + + public virtual int BackgroundDeltaPriority + { + get { return deltaPri; } + } + + // LUCENENET specific - made private and + // added Stop property because volatile + // fields cannot be protected. + private volatile bool stopNow; + + protected bool Stop + { + get { return stopNow; } + set { stopNow = value; } + } + public virtual void StopNow() + { + stopNow = true; + } + + public PerfTask(PerfRunData runData) + : this() + { + this.runData = runData; + Config config = runData.Config; + this.maxDepthLogStart = config.Get("task.max.depth.log", 0); + + string logStepAtt = "log.step"; + string taskLogStepAtt = "log.step." + name; + if (config.Get(taskLogStepAtt, null) != null) + { + logStepAtt = taskLogStepAtt; + } + + // It's important to read this from Config, to support vals-by-round. + m_logStep = config.Get(logStepAtt, DEFAULT_LOG_STEP); + // To avoid the check 'if (logStep > 0)' in tearDown(). This effectively + // turns logging off. + if (m_logStep <= 0) + { + m_logStep = int.MaxValue; + } + } + + public virtual object Clone() + { + // tasks having non primitive data structures should override this. + // otherwise parallel running of a task sequence might not run correctly. + return (PerfTask)base.MemberwiseClone(); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + } + + /// <summary> + /// Run the task, record statistics. + /// </summary> + /// <param name="reportStats"></param> + /// <returns>Number of work items done by this task.</returns> + public int RunAndMaybeStats(bool reportStats) + { + int count; + if (!reportStats || ShouldNotRecordStats) + { + Setup(); + count = DoLogic(); + count = disableCounting ? 0 : count; + TearDown(); + return count; + } + if (reportStats && depth <= maxDepthLogStart && !ShouldNeverLogAtStart) + { + SystemConsole.WriteLine("------------> starting task: " + GetName()); + } + Setup(); + Points pnts = runData.Points; + TaskStats ts = pnts.MarkTaskStart(this, runData.Config.RoundNumber); + count = DoLogic(); + count = disableCounting ? 0 : count; + pnts.MarkTaskEnd(ts, count); + TearDown(); + return count; + } + + /// <summary> + /// Perform the task once (ignoring repetitions specification). + /// Return number of work items done by this task. + /// For indexing that can be number of docs added. + /// For warming that can be number of scanned items, etc. + /// </summary> + /// <returns>Number of work items done by this task.</returns> + public abstract int DoLogic(); + + /// <summary> + /// Returns the name. + /// </summary> + public virtual string GetName() + { + if (m_params == null) + { + return name; + } + return new StringBuilder(name).Append('(').Append(m_params).Append(')').ToString(); + } + + /// <summary> + /// Sets the name. + /// </summary> + /// <param name="name">The name to set.</param> + protected virtual void SetName(string name) + { + this.name = name; + } + + /// <summary> + /// Gets the run data. + /// </summary> + public virtual PerfRunData RunData + { + get { return runData; } + } + + /// <summary> + /// Gets or Sets the depth. + /// </summary> + public virtual int Depth + { + get { return depth; } + set { depth = value; } + } + + // compute a blank string padding for printing this task indented by its depth + internal string GetPadding() + { + char[] c = new char[4 * Depth]; + for (int i = 0; i < c.Length; i++) c[i] = ' '; + return new string(c); + } + + public override string ToString() + { + string padd = GetPadding(); + StringBuilder sb = new StringBuilder(padd); + if (disableCounting) + { + sb.Append('-'); + } + sb.Append(GetName()); + if (RunInBackground) + { + sb.Append(" &"); + int x = BackgroundDeltaPriority; + if (x != 0) + { + sb.Append(x); + } + } + return sb.ToString(); + } + + /// <summary> + /// Returns the maxDepthLogStart. + /// </summary> + internal int MaxDepthLogStart + { + get { return maxDepthLogStart; } + } + + protected virtual string GetLogMessage(int recsCount) + { + return "processed " + recsCount + " records"; + } + + /// <summary> + /// Tasks that should never log at start can override this. + /// Returns <c>true</c> if this task should never log when it start. + /// </summary> + protected virtual bool ShouldNeverLogAtStart + { + get { return false; } + } + + /// <summary> + /// Tasks that should not record statistics can override this. + /// Returns <c>true</c> if this task should never record its statistics. + /// </summary> + protected virtual bool ShouldNotRecordStats + { + get { return false; } + } + + /// <summary> + /// Task setup work that should not be measured for that specific task. By + /// default it does nothing, but tasks can implement this, moving work from + /// <see cref="DoLogic()"/> to this method. Only the work done in <see cref="DoLogic()"/> + /// is measured for this task. Notice that higher level (sequence) tasks + /// containing this task would then measure larger time than the sum of their + /// contained tasks. + /// </summary> + public virtual void Setup() + { + } + + /// <summary> + /// Task teardown work that should not be measured for that specific task. By + /// default it does nothing, but tasks can implement this, moving work from + /// <see cref="DoLogic()"/> to this method. Only the work done in <see cref="DoLogic()"/> + /// is measured for this task. Notice that higher level (sequence) tasks + /// containing this task would then measure larger time than the sum of their + /// contained tasks. + /// </summary> + public virtual void TearDown() + { + if (++logStepCount % m_logStep == 0) + { + double time = (((Stopwatch.GetTimestamp() / Stopwatch.Frequency) * 1000) - runData.StartTimeMillis) / 1000.0; + SystemConsole.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0:0000000.00}", time) + " sec --> " + + Thread.CurrentThread.Name + " " + GetLogMessage(logStepCount)); + } + } + + /// <summary> + /// Sub classes that support parameters must override this method to return + /// <c>true</c> if this task supports command line params. + /// </summary> + public virtual bool SupportsParams + { + get { return false; } + } + + /// <summary> + /// Set the params of this task. + /// </summary> + /// <exception cref="NotSupportedException">For tasks supporting command line parameters.</exception> + public virtual void SetParams(string @params) + { + if (!SupportsParams) + { + throw new NotSupportedException(GetName() + " does not support command line parameters."); + } + this.m_params = @params; + } + + /// <summary> + /// Gets the Params. + /// </summary> + public virtual string Params + { + get { return m_params; } + } + + /// <summary> + /// Return <c>true</c> if counting is disabled for this task. + /// </summary> + public virtual bool DisableCounting + { + get { return disableCounting; } + set { disableCounting = value; } + } + + public virtual int AlgLineNum + { + get { return algLineNum; } + set { algLineNum = value; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs new file mode 100644 index 0000000..3453fc5 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs @@ -0,0 +1,60 @@ +using Lucene.Net.Index; +using Lucene.Net.Store; +using Lucene.Net.Support; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Opens a reader and prints basic statistics. + /// </summary> + public class PrintReaderTask : PerfTask + { + private string userData = null; + + public PrintReaderTask(PerfRunData runData) + : base(runData) + { + } + + public override void SetParams(string @params) + { + base.SetParams(@params); + userData = @params; + } + + public override bool SupportsParams + { + get { return true; } + } + + public override int DoLogic() + { + Directory dir = RunData.Directory; + IndexReader r = null; + if (userData == null) + r = DirectoryReader.Open(dir); + else + r = DirectoryReader.Open(OpenReaderTask.FindIndexCommit(dir, userData)); + SystemConsole.WriteLine("--> numDocs:" + r.NumDocs + " dels:" + r.NumDeletedDocs); + r.Dispose(); + return 1; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs new file mode 100644 index 0000000..3eeda9b --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs @@ -0,0 +1,339 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Benchmarks.ByTask.Feeds; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Store; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Read index (abstract) task. + /// Sub classes implement <see cref="WithSearch"/>, <see cref="WithWarm"/>, <see cref="WithTraverse"/> and <see cref="WithRetrieve"/> + /// </summary> + /// <remarks> + /// Note: All ReadTasks reuse the reader if it is already open. + /// Otherwise a reader is opened at start and closed at the end. + /// <para/> + /// The <c>search.num.hits</c> config parameter sets + /// the top number of hits to collect during searching. If + /// <c>print.hits.field</c> is set, then each hit is + /// printed along with the value of that field. + /// <para/> + /// Other side effects: none. + /// </remarks> + public abstract class ReadTask : PerfTask + { + private readonly IQueryMaker queryMaker; + + public ReadTask(PerfRunData runData) + : base(runData) + { + if (WithSearch) + { + queryMaker = GetQueryMaker(); + } + else + { + queryMaker = null; + } + } + + public override int DoLogic() + { + int res = 0; + + // open reader or use existing one + IndexSearcher searcher = RunData.GetIndexSearcher(); + + IndexReader reader; + + bool closeSearcher; + if (searcher == null) + { + // open our own reader + Directory dir = RunData.Directory; + reader = DirectoryReader.Open(dir); + searcher = new IndexSearcher(reader); + closeSearcher = true; + } + else + { + // use existing one; this passes +1 ref to us + reader = searcher.IndexReader; + closeSearcher = false; + } + + // optionally warm and add num docs traversed to count + if (WithWarm) + { + Document doc = null; + IBits liveDocs = MultiFields.GetLiveDocs(reader); + for (int m = 0; m < reader.MaxDoc; m++) + { + if (null == liveDocs || liveDocs.Get(m)) + { + doc = reader.Document(m); + res += (doc == null ? 0 : 1); + } + } + } + + if (WithSearch) + { + res++; + Query q = queryMaker.MakeQuery(); + Sort sort = Sort; + TopDocs hits = null; + int numHits = NumHits; + if (numHits > 0) + { + if (WithCollector == false) + { + if (sort != null) + { + // TODO: instead of always passing false we + // should detect based on the query; if we make + // the IndexSearcher search methods that take + // Weight public again, we can go back to + // pulling the Weight ourselves: + TopFieldCollector collector = TopFieldCollector.Create(sort, numHits, + true, WithScore, + WithMaxScore, + false); + searcher.Search(q, null, collector); + hits = collector.GetTopDocs(); + } + else + { + hits = searcher.Search(q, numHits); + } + } + else + { + ICollector collector = CreateCollector(); + searcher.Search(q, null, collector); + //hits = collector.topDocs(); + } + + string printHitsField = RunData.Config.Get("print.hits.field", null); + if (hits != null && printHitsField != null && printHitsField.Length > 0) + { + SystemConsole.WriteLine("totalHits = " + hits.TotalHits); + SystemConsole.WriteLine("maxDoc() = " + reader.MaxDoc); + SystemConsole.WriteLine("numDocs() = " + reader.NumDocs); + for (int i = 0; i < hits.ScoreDocs.Length; i++) + { + int docID = hits.ScoreDocs[i].Doc; + Document doc = reader.Document(docID); + SystemConsole.WriteLine(" " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField)); + } + } + + if (WithTraverse) + { + ScoreDoc[] scoreDocs = hits.ScoreDocs; + int traversalSize = Math.Min(scoreDocs.Length, TraversalSize); + + if (traversalSize > 0) + { + bool retrieve = WithRetrieve; + int numHighlight = Math.Min(NumToHighlight, scoreDocs.Length); + Analyzer analyzer = RunData.Analyzer; + BenchmarkHighlighter highlighter = null; + if (numHighlight > 0) + { + highlighter = GetBenchmarkHighlighter(q); + } + for (int m = 0; m < traversalSize; m++) + { + int id = scoreDocs[m].Doc; + res++; + if (retrieve) + { + Document document = RetrieveDoc(reader, id); + res += document != null ? 1 : 0; + if (numHighlight > 0 && m < numHighlight) + { + ICollection<string> fieldsToHighlight = GetFieldsToHighlight(document); + foreach (string field in fieldsToHighlight) + { + string text = document.Get(field); + res += highlighter.DoHighlight(reader, id, field, document, analyzer, text); + } + } + } + } + } + } + } + } + + if (closeSearcher) + { + reader.Dispose(); + } + else + { + // Release our +1 ref from above + reader.DecRef(); + } + return res; + } + + protected virtual ICollector CreateCollector() + { + return TopScoreDocCollector.Create(NumHits, true); + } + + + protected virtual Document RetrieveDoc(IndexReader ir, int id) + { + return ir.Document(id); + } + + /// <summary> + /// Return query maker used for this task. + /// </summary> + public abstract IQueryMaker GetQueryMaker(); + + /// <summary> + /// Return <c>true</c> if search should be performed. + /// </summary> + public abstract bool WithSearch { get; } + + public virtual bool WithCollector + { + get { return false; } + } + + + /// <summary> + /// Return <c>true</c> if warming should be performed. + /// </summary> + public abstract bool WithWarm { get; } + + /// <summary> + /// Return <c>true</c> if, with search, results should be traversed. + /// </summary> + public abstract bool WithTraverse { get; } + + /// <summary> + /// Whether scores should be computed (only useful with + /// field sort) + /// </summary> + public virtual bool WithScore + { + get { return true; } + } + + /// <summary> + /// Whether maxScores should be computed (only useful with + /// field sort) + /// </summary> + public virtual bool WithMaxScore + { + get { return true; } + } + + /// <summary> + /// Specify the number of hits to traverse. Tasks should override this if they want to restrict the number + /// of hits that are traversed when <see cref="WithTraverse"/> is <c>true</c>. Must be greater than 0. + /// <para/> + /// Read task calculates the traversal as: <c>Math.Min(hits.Length, TraversalSize)</c> + /// </summary> + /// <remarks> + /// Unless overridden, the return value is <see cref="int.MaxValue"/>. + /// </remarks> + public virtual int TraversalSize + { + get { return int.MaxValue; } + } + + internal static readonly int DEFAULT_SEARCH_NUM_HITS = 10; + private int numHits; + + public override void Setup() + { + base.Setup(); + numHits = RunData.Config.Get("search.num.hits", DEFAULT_SEARCH_NUM_HITS); + } + + /// <summary> + /// Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number + /// of hits that are collected during searching. Must be greater than 0. + /// <para/> + /// Returns 10 by default, or <c>search.num.hits</c> config if set. + /// </summary> + public virtual int NumHits + { + get { return numHits; } + } + + /// <summary> + /// Return <c>true</c> if, with search & results traversing, docs should be retrieved. + /// </summary> + public abstract bool WithRetrieve { get; } + + /// <summary> + /// The number of documents to highlight. 0 means no docs will be highlighted. + /// </summary> + public virtual int NumToHighlight + { + get { return 0; } + } + + /// <summary> + /// Return an appropriate highlighter to be used with + /// highlighting tasks. + /// </summary> + /// <param name="q"></param> + /// <returns></returns> + protected virtual BenchmarkHighlighter GetBenchmarkHighlighter(Query q) + { + return null; + } + + public virtual Sort Sort + { + get { return null; } + } + + /// <summary> + /// Define the fields to highlight. Base implementation returns all fields. + /// </summary> + /// <param name="document">The <see cref="Document"/>.</param> + /// <returns>An <see cref="T:ICollection{string}"/> of <see cref="Field"/> names.</returns> + protected virtual ICollection<string> GetFieldsToHighlight(Document document) + { + IList<IIndexableField> fields = document.Fields; + ISet<string> result = new HashSet<string>(/*fields.Count*/); + foreach (IIndexableField f in fields) + { + result.Add(f.Name); + } + return result; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs new file mode 100644 index 0000000..1a8125d --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs @@ -0,0 +1,160 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Benchmarks.ByTask.Feeds; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using System.Collections.Generic; +using System.IO; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Simple task to test performance of tokenizers. It just + /// creates a token stream for each field of the document and + /// read all tokens out of that stream. + /// </summary> + public class ReadTokensTask : PerfTask + { + public ReadTokensTask(PerfRunData runData) + : base(runData) + { + } + + private int totalTokenCount = 0; + + // volatile data passed between setup(), doLogic(), tearDown(). + private Document doc = null; + + public override void Setup() + { + base.Setup(); + DocMaker docMaker = RunData.DocMaker; + doc = docMaker.MakeDocument(); + } + + protected override string GetLogMessage(int recsCount) + { + return "read " + recsCount + " docs; " + totalTokenCount + " tokens"; + } + + public override void TearDown() + { + doc = null; + base.TearDown(); + } + + public override int DoLogic() + { + IList<IIndexableField> fields = doc.Fields; + Analyzer analyzer = RunData.Analyzer; + int tokenCount = 0; + foreach (IIndexableField field in fields) + { + if (!field.FieldType.IsTokenized || + field is Int32Field || + field is Int64Field || + field is SingleField || + field is DoubleField) + { + continue; + } + + using (TokenStream stream = field.GetTokenStream(analyzer)) + { + // reset the TokenStream to the first token + stream.Reset(); + + ITermToBytesRefAttribute termAtt = stream.GetAttribute<ITermToBytesRefAttribute>(); + while (stream.IncrementToken()) + { + termAtt.FillBytesRef(); + tokenCount++; + } + stream.End(); + } + } + totalTokenCount += tokenCount; + return tokenCount; + } + + /// <summary> + /// Simple StringReader that can be reset to a new string; + /// we use this when tokenizing the string value from a + /// Field. + /// </summary> + internal ReusableStringReader stringReader = new ReusableStringReader(); + + internal sealed class ReusableStringReader : TextReader + { + int upto; + int left; + string s; + internal void Init(string s) + { + this.s = s; + left = s.Length; + this.upto = 0; + } + + public override int Read() + { + char[] result = new char[1]; + if (Read(result, 0, 1, false) != -1) + { + return result[0]; + } + return -1; + } + public override int Read(char[] c, int off, int len) + { + return Read(c, off, len, true); + } + + private int Read(char[] c, int off, int len, bool returnZeroWhenComplete) + { + if (left > len) + { + s.CopyTo(upto, c, off, upto + len); + upto += len; + left -= len; + return len; + } + else if (0 == left) + { + if (returnZeroWhenComplete) + { + return 0; // .NET semantics + } + return -1; + } + else + { + s.CopyTo(upto, c, off, upto + left); + int r = left; + left = 0; + upto = s.Length; + return r; + } + } + + protected override void Dispose(bool disposing) { } + } + } +}