http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs new file mode 100644 index 0000000..e3190f9 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs @@ -0,0 +1,559 @@ +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text; + +namespace Lucene.Net.Benchmarks.ByTask.Utils +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Perf run configuration properties. + /// </summary> + /// <remarks> + /// Numeric property containing ":", e.g. "10:100:5" is interpreted + /// as array of numeric values. It is extracted once, on first use, and + /// maintain a round number to return the appropriate value. + /// <para/> + /// The config property "work.dir" tells where is the root of + /// docs data dirs and indexes dirs. It is set to either of: + /// <list type="bullet"> + /// <item><description>value supplied for it in the alg file;</description></item> + /// <item><description>otherwise, value of environment variable "benchmark.work.dir";</description></item> + /// <item><description>otherwise, "work".</description></item> + /// </list> + /// </remarks> + public class Config + { + // For tests, if verbose is not turned on, don't print the props. + private static readonly bool DEFAULT_PRINT_PROPS = SystemProperties.GetPropertyAsBoolean("tests.verbose", true); + private static readonly string NEW_LINE = Environment.NewLine; + + private int roundNumber = 0; + private IDictionary<string, string> props; + private IDictionary<string, object> valByRound = new Dictionary<string, object>(); + private IDictionary<string, string> colForValByRound = new Dictionary<string, string>(); + private string algorithmText; + + /// <summary> + /// Read both algorithm and config properties. + /// </summary> + /// <param name="algReader">From where to read algorithm and config properties.</param> + /// <exception cref="IOException">If there is a low-level I/O error.</exception> + public Config(TextReader algReader) + { + // read alg file to array of lines + IList<string> lines = new List<string>(); + int lastConfigLine = 0; + string line; + while ((line = algReader.ReadLine()) != null) + { + lines.Add(line); + if (line.IndexOf('=') > 0) + { + lastConfigLine = lines.Count; + } + } + algReader.Dispose(); + // copy props lines to string + MemoryStream ms = new MemoryStream(); + TextWriter writer = new StreamWriter(ms); + for (int i = 0; i < lastConfigLine; i++) + { + writer.WriteLine(lines[i]); + } + // read props from string + this.props = new Dictionary<string, string>(); + writer.Flush(); + ms.Position = 0; + props.Load(ms); + + // make sure work dir is set properly + string temp; + if (!props.TryGetValue("work.dir", out temp) || temp == null) + { + props["work.dir"] = SystemProperties.GetProperty("benchmark.work.dir", "work"); + } + + if (props.TryGetValue("print.props", out temp)) + { + if (temp.Equals("true", StringComparison.OrdinalIgnoreCase)) + { + PrintProps(); + } + } + else if (DEFAULT_PRINT_PROPS) + { + PrintProps(); + } + + // copy algorithm lines + var sb = new StringBuilder(); + for (int i = lastConfigLine; i < lines.Count; i++) + { + sb.Append(lines[i]); + sb.Append(NEW_LINE); + } + algorithmText = sb.ToString(); + } + + /// <summary> + /// Create config without algorithm - useful for a programmatic perf test. + /// </summary> + /// <param name="props">Configuration properties.</param> + public Config(IDictionary<string, string> props) + { + this.props = props; + string temp; + if (props.TryGetValue("print.props", out temp)) + { + if (temp.Equals("true", StringComparison.OrdinalIgnoreCase)) + { + PrintProps(); + } + } + else if (DEFAULT_PRINT_PROPS) + { + PrintProps(); + } + } + + private void PrintProps() + { + SystemConsole.WriteLine("------------> config properties:"); + List<string> propKeys = new List<string>(props.Keys); + propKeys.Sort(); + foreach (string propName in propKeys) + { + SystemConsole.WriteLine(propName + " = " + props[propName]); + } + SystemConsole.WriteLine("-------------------------------"); + } + + /// <summary> + /// Return a string property. + /// </summary> + /// <param name="name">Name of property.</param> + /// <param name="dflt">Default value.</param> + /// <returns>A string property.</returns> + public virtual string Get(string name, string dflt) + { + string[] vals; + object temp; + if (valByRound.TryGetValue(name, out temp) && temp != null) + { + vals = (string[])temp; + return vals[roundNumber % vals.Length]; + } + // done if not by round + string sval; + if (!props.TryGetValue(name, out sval)) + { + sval = dflt; + } + if (sval == null) + { + return null; + } + if (sval.IndexOf(":") < 0) + { + return sval; + } + else if (sval.IndexOf(":\\") >= 0 || sval.IndexOf(":/") >= 0) + { + // this previously messed up absolute path names on Windows. Assuming + // there is no real value that starts with \ or / + return sval; + } + // first time this prop is extracted by round + int k = sval.IndexOf(":"); + string colName = sval.Substring(0, k - 0); + sval = sval.Substring(k + 1); + colForValByRound[name] = colName; + vals = PropToStringArray(sval); + valByRound[name] = vals; + return vals[roundNumber % vals.Length]; + } + + /// <summary> + /// Set a property. + /// <para/> + /// Note: once a multiple values property is set, it can no longer be modified. + /// </summary> + /// <param name="name">Name of property.</param> + /// <param name="value">Either single or multiple property value (multiple values are separated by ":")</param> + public virtual void Set(string name, string value) + { + object temp; + if (valByRound.TryGetValue(name, out temp) && temp != null) + { + throw new Exception("Cannot modify a multi value property!"); + } + props[name] = value; + } + + /// <summary> + /// Return an <see cref="int"/> property. + /// <para/> + /// If the property contain ":", e.g. "10:100:5", it is interpreted + /// as array of ints. It is extracted once, on first call + /// to Get() it, and a by-round-value is returned. + /// </summary> + /// <param name="name">Name of property.</param> + /// <param name="dflt">Default value.</param> + /// <returns>An <see cref="int"/> property.</returns> + public virtual int Get(string name, int dflt) + { + // use value by round if already parsed + int[] vals; + object temp; + if (valByRound.TryGetValue(name, out temp) && temp != null) + { + vals = (int[])temp; + return vals[roundNumber % vals.Length]; + } + // done if not by round + string sval; + if (!props.TryGetValue(name, out sval)) + { + sval = "" + dflt; + } + if (sval.IndexOf(":") < 0) + { + return int.Parse(sval, CultureInfo.InvariantCulture); + } + // first time this prop is extracted by round + int k = sval.IndexOf(":"); + string colName = sval.Substring(0, k - 0); + sval = sval.Substring(k + 1); + colForValByRound[name] = colName; + vals = PropToInt32Array(sval); + valByRound[name] = vals; + return vals[roundNumber % vals.Length]; + } + + /// <summary> + /// Return a double property. + /// <para/> + /// If the property contain ":", e.g. "10:100:5", it is interpreted + /// as array of doubles. It is extracted once, on first call + /// to Get() it, and a by-round-value is returned. + /// </summary> + /// <param name="name">Name of property.</param> + /// <param name="dflt">Default value.</param> + /// <returns>A double property.</returns> + public virtual double Get(string name, double dflt) + { + // use value by round if already parsed + double[] vals; + object temp; + if (valByRound.TryGetValue(name, out temp) && temp != null) + { + vals = (double[])temp; + return vals[roundNumber % vals.Length]; + } + // done if not by round + string sval; + if (!props.TryGetValue(name, out sval)) + { + sval = "" + dflt; + } + if (sval.IndexOf(":") < 0) + { + return double.Parse(sval, CultureInfo.InvariantCulture); + } + // first time this prop is extracted by round + int k = sval.IndexOf(":"); + string colName = sval.Substring(0, k - 0); + sval = sval.Substring(k + 1); + colForValByRound[name] = colName; + vals = PropToDoubleArray(sval); + valByRound[name] = vals; + return vals[roundNumber % vals.Length]; + } + + /// <summary> + /// Return a boolean property. + /// If the property contain ":", e.g. "true.true.false", it is interpreted + /// as array of booleans. It is extracted once, on first call + /// to Get() it, and a by-round-value is returned. + /// </summary> + /// <param name="name">Name of property.</param> + /// <param name="dflt">Default value.</param> + /// <returns>A <see cref="bool"/> property.</returns> + public virtual bool Get(string name, bool dflt) + { + // use value by round if already parsed + bool[] vals; + object temp; + if (valByRound.TryGetValue(name, out temp) && temp != null) + { + vals = (bool[])temp; + return vals[roundNumber % vals.Length]; + } + // done if not by round + string sval; + if (!props.TryGetValue(name, out sval)) + { + sval = "" + dflt; + } + if (sval.IndexOf(":") < 0) + { + return bool.Parse(sval); + } + // first time this prop is extracted by round + int k = sval.IndexOf(":"); + string colName = sval.Substring(0, k - 0); + sval = sval.Substring(k + 1); + colForValByRound[name] = colName; + vals = PropToBooleanArray(sval); + valByRound[name] = vals; + return vals[roundNumber % vals.Length]; + } + + /// <summary> + /// Increment the round number, for config values that are extracted by round number. + /// </summary> + /// <returns>The new round number.</returns> + public virtual int NewRound() + { + roundNumber++; + + StringBuilder sb = new StringBuilder("--> Round ").Append(roundNumber - 1).Append("-->").Append(roundNumber); + + // log changes in values + if (valByRound.Count > 0) + { + sb.Append(": "); + foreach (string name in valByRound.Keys) + { + object a = valByRound[name]; + if (a is int[]) + { + int[] ai = (int[])a; + int n1 = (roundNumber - 1) % ai.Length; + int n2 = roundNumber % ai.Length; + sb.Append(" ").Append(name).Append(":").Append(ai[n1]).Append("-->").Append(ai[n2]); + } + else if (a is double[]) + { + double[] ad = (double[])a; + int n1 = (roundNumber - 1) % ad.Length; + int n2 = roundNumber % ad.Length; + sb.Append(" ").Append(name).Append(":").Append(ad[n1]).Append("-->").Append(ad[n2]); + } + else if (a is string[]) + { + string[] ad = (string[])a; + int n1 = (roundNumber - 1) % ad.Length; + int n2 = roundNumber % ad.Length; + sb.Append(" ").Append(name).Append(":").Append(ad[n1]).Append("-->").Append(ad[n2]); + } + else + { + bool[] ab = (bool[])a; + int n1 = (roundNumber - 1) % ab.Length; + int n2 = roundNumber % ab.Length; + sb.Append(" ").Append(name).Append(":").Append(ab[n1]).Append("-->").Append(ab[n2]); + } + } + } + + SystemConsole.WriteLine(); + SystemConsole.WriteLine(sb.ToString()); + SystemConsole.WriteLine(); + + return roundNumber; + } + + private string[] PropToStringArray(string s) + { + if (s.IndexOf(":") < 0) + { + return new string[] { s }; + } + + List<string> a = new List<string>(); + StringTokenizer st = new StringTokenizer(s, ":"); + while (st.HasMoreTokens()) + { + string t = st.NextToken(); + a.Add(t); + } + return a.ToArray(); + } + + // extract properties to array, e.g. for "10:100:5" return int[]{10,100,5}. + private int[] PropToInt32Array(string s) + { + if (s.IndexOf(":") < 0) + { + return new int[] { int.Parse(s, CultureInfo.InvariantCulture) }; + } + + List<int> a = new List<int>(); + StringTokenizer st = new StringTokenizer(s, ":"); + while (st.HasMoreTokens()) + { + string t = st.NextToken(); + a.Add(int.Parse(t, CultureInfo.InvariantCulture)); + } + int[] res = new int[a.Count]; + for (int i = 0; i < a.Count; i++) + { + res[i] = a[i]; + } + return res; + } + + // extract properties to array, e.g. for "10.7:100.4:-2.3" return int[]{10.7,100.4,-2.3}. + private double[] PropToDoubleArray(string s) + { + if (s.IndexOf(":") < 0) + { + return new double[] { double.Parse(s, CultureInfo.InvariantCulture) }; + } + + List<double> a = new List<double>(); + StringTokenizer st = new StringTokenizer(s, ":"); + while (st.HasMoreTokens()) + { + string t = st.NextToken(); + a.Add(double.Parse(t, CultureInfo.InvariantCulture)); + } + double[] res = new double[a.Count]; + for (int i = 0; i < a.Count; i++) + { + res[i] = a[i]; + } + return res; + } + + // extract properties to array, e.g. for "true:true:false" return boolean[]{true,false,false}. + private bool[] PropToBooleanArray(string s) + { + if (s.IndexOf(":") < 0) + { + return new bool[] { bool.Parse(s) }; + } + + List<bool> a = new List<bool>(); + StringTokenizer st = new StringTokenizer(s, ":"); + while (st.HasMoreTokens()) + { + string t = st.NextToken(); + a.Add(bool.Parse(t)); + } + bool[] res = new bool[a.Count]; + for (int i = 0; i < a.Count; i++) + { + res[i] = a[i]; + } + return res; + } + + /// <summary> + /// Gets names of params set by round, for reports title. + /// </summary> + public virtual string GetColsNamesForValsByRound() + { + if (colForValByRound.Count == 0) + { + return ""; + } + StringBuilder sb = new StringBuilder(); + foreach (string name in colForValByRound.Keys) + { + string colName = colForValByRound[name]; + sb.Append(" ").Append(colName); + } + return sb.ToString(); + } + + /// <summary> + /// Gets values of params set by round, for reports lines. + /// </summary> + public virtual string GetColsValuesForValsByRound(int roundNum) + { + if (colForValByRound.Count == 0) + { + return ""; + } + StringBuilder sb = new StringBuilder(); + foreach (string name in colForValByRound.Keys) + { + string colName = colForValByRound[name]; + string template = " " + colName; + if (roundNum < 0) + { + // just append blanks + sb.Append(Formatter.FormatPaddLeft("-", template)); + } + else + { + // append actual values, for that round + object a; + valByRound.TryGetValue(name, out a); + if (a is int[]) + { + int[] ai = (int[])a; + int n = roundNum % ai.Length; + sb.Append(Formatter.Format(ai[n], template)); + } + else if (a is double[]) + { + double[] ad = (double[])a; + int n = roundNum % ad.Length; + sb.Append(Formatter.Format(2, ad[n], template)); + } + else if (a is string[]) + { + string[] ad = (string[])a; + int n = roundNum % ad.Length; + sb.Append(ad[n]); + } + else + { + bool[] ab = (bool[])a; + int n = roundNum % ab.Length; + sb.Append(Formatter.FormatPaddLeft("" + ab[n], template)); + } + } + } + return sb.ToString(); + } + + /// <summary> + /// Gets the round number. + /// </summary> + public virtual int RoundNumber + { + get { return roundNumber; } + } + + /// <summary> + /// Gets the algorithmText. + /// </summary> + public virtual string AlgorithmText + { + get { return algorithmText; } + } + } +}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs new file mode 100644 index 0000000..3d05db8 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs @@ -0,0 +1,46 @@ +using System.IO; + +namespace Lucene.Net.Benchmarks.ByTask.Utils +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// File utilities. + /// </summary> + public class FileUtils + { + /// <summary> + /// Delete files and directories, even if non-empty. + /// </summary> + /// <param name="dir">File or directory.</param> + /// <returns><c>true</c> on success, <c>false</c> if no or part of files have been deleted.</returns> + /// <exception cref="IOException">If there is a low-level I/O error.</exception> + public static bool FullyDelete(DirectoryInfo dir) + { + try + { + Directory.Delete(dir.FullName, true); + return true; + } + catch + { + return !Directory.Exists(dir.FullName); + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs new file mode 100644 index 0000000..85f1fdd --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs @@ -0,0 +1,109 @@ +using System; + +namespace Lucene.Net.Benchmarks.ByTask.Utils +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Formatting utilities (for reports). + /// </summary> + public class Formatter // LUCENENET specific - renamed from Format because of method name collision + { + private static string[] numFormat = { + "N0", + "N1", + "N2" + }; + + private static readonly string padd = " "; + + /// <summary> + /// Padd a number from left. + /// </summary> + /// <param name="numFracDigits">Number of digits in fraction part - must be 0 or 1 or 2.</param> + /// <param name="f">Number to be formatted.</param> + /// <param name="col">Column name (used for deciding on length).</param> + /// <returns>Formatted string.</returns> + public static string Format(int numFracDigits, float f, string col) + { + string res = padd + string.Format(numFormat[numFracDigits], f); + return res.Substring(res.Length - col.Length); + } + + public static string Format(int numFracDigits, double f, string col) + { + string res = padd + string.Format(numFormat[numFracDigits], f); + return res.Substring(res.Length - col.Length); + } + + /// <summary> + /// Pad a number from right. + /// </summary> + /// <param name="numFracDigits">Number of digits in fraction part - must be 0 or 1 or 2.</param> + /// <param name="f">Number to be formatted.</param> + /// <param name="col">Column name (used for deciding on length).</param> + /// <returns>Formatted string.</returns> + public static string FormatPaddRight(int numFracDigits, float f, string col) + { + string res = string.Format(numFormat[numFracDigits], f) + padd; + return res.Substring(0, col.Length - 0); + } + + public static string FormatPaddRight(int numFracDigits, double f, string col) + { + string res = string.Format(numFormat[numFracDigits], f) + padd; + return res.Substring(0, col.Length - 0); + } + + /// <summary> + /// Pad a number from left. + /// </summary> + /// <param name="n">Number to be formatted.</param> + /// <param name="col">Column name (used for deciding on length).</param> + /// <returns>Formatted string.</returns> + public static string Format(int n, string col) + { + string res = padd + n; + return res.Substring(res.Length - col.Length); + } + + /// <summary> + /// Pad a string from right. + /// </summary> + /// <param name="s">String to be formatted.</param> + /// <param name="col">Column name (used for deciding on length).</param> + /// <returns>Formatted string.</returns> + public static string Format(string s, string col) + { + string s1 = (s + padd); + return s1.Substring(0, Math.Min(col.Length, s1.Length)); + } + + /// <summary> + /// Pad a string from left. + /// </summary> + /// <param name="s">String to be formatted.</param> + /// <param name="col">Column name (used for deciding on length).</param> + /// <returns>Formatted string.</returns> + public static string FormatPaddLeft(string s, string col) + { + string res = padd + s; + return res.Substring(res.Length - col.Length); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs new file mode 100644 index 0000000..cf57512 --- /dev/null +++ b/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs @@ -0,0 +1,132 @@ +using ICSharpCode.SharpZipLib.BZip2; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; + +namespace Lucene.Net.Benchmarks.ByTask.Utils +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Stream utilities. + /// </summary> + public class StreamUtils + { + /// <summary>Buffer size used across the benchmark package</summary> + public static readonly int BUFFER_SIZE = 1 << 16; // 64K + + // LUCENENET specific - de-nested Type and renamed FileType + + private static readonly IDictionary<string, FileType?> extensionToType = new Dictionary<string, FileType?>(); + static StreamUtils() + { + // these in are lower case, we will lower case at the test as well + extensionToType[".bz2"] = FileType.BZIP2; + extensionToType[".bzip"] = FileType.BZIP2; + extensionToType[".gz"] = FileType.GZIP; + extensionToType[".gzip"] = FileType.GZIP; + } + + /// <summary> + /// Returns an <see cref="Stream"/> over the requested file. This method + /// attempts to identify the appropriate <see cref="Stream"/> instance to return + /// based on the file name (e.g., if it ends with .bz2 or .bzip, return a + /// 'bzip' <see cref="Stream"/>). + /// </summary> + public static Stream GetInputStream(FileInfo file) + { + // First, create a FileInputStream, as this will be required by all types. + // Wrap with BufferedInputStream for better performance + Stream @in = new FileStream(file.FullName, FileMode.Open, FileAccess.Read); + return GetFileType(file).GetInputStream(@in); + } + + /// <summary>Return the type of the file, or <c>null</c> if unknown.</summary> + private static FileType GetFileType(FileInfo file) + { + FileType? type = null; + string fileName = file.Name; + int idx = fileName.LastIndexOf('.'); + if (idx != -1) + { + extensionToType.TryGetValue(fileName.Substring(idx).ToLowerInvariant(), out type); + } + return !type.HasValue ? FileType.PLAIN : type.Value; + } + + /// <summary> + /// Returns an <see cref="Stream"/> over the requested file, identifying + /// the appropriate <see cref="Stream"/> instance similar to <see cref="GetInputStream(FileInfo)"/>. + /// </summary> + public static Stream GetOutputStream(FileInfo file) + { + // First, create a FileInputStream, as this will be required by all types. + // Wrap with BufferedInputStream for better performance + Stream os = new FileStream(file.FullName, FileMode.Create, FileAccess.Write); + return GetFileType(file).GetOutputStream(os); + } + } + + /// <summary>File format type.</summary> + public enum FileType + { + /// <summary> + /// BZIP2 is automatically used for <b>.bz2</b> and <b>.bzip2</b> extensions. + /// </summary> + BZIP2, + + /// <summary> + /// GZIP is automatically used for <b>.gz</b> and <b>.gzip</b> extensions. + /// </summary> + GZIP, + + /// <summary> + /// Plain text is used for anything which is not GZIP or BZIP. + /// </summary> + PLAIN + } + + internal static class FileTypeExtensions + { + public static Stream GetInputStream(this FileType fileType, Stream input) + { + switch (fileType) + { + case FileType.BZIP2: + return new BZip2InputStream(input); + case FileType.GZIP: + return new GZipStream(input, CompressionMode.Decompress); + default: + return input; + } + } + + public static Stream GetOutputStream(this FileType fileType, Stream output) + { + switch (fileType) + { + case FileType.BZIP2: + return new BZip2OutputStream(output); + case FileType.GZIP: + return new GZipStream(output, CompressionMode.Compress); + default: + return output; + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Constants.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Constants.cs b/src/Lucene.Net.Benchmark/Constants.cs new file mode 100644 index 0000000..72bdbdc --- /dev/null +++ b/src/Lucene.Net.Benchmark/Constants.cs @@ -0,0 +1,33 @@ +namespace Lucene.Net.Benchmarks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Various benchmarking constants (mostly defaults) + /// </summary> + public class Constants + { + public static readonly int DEFAULT_RUN_COUNT = 5; + public static readonly int DEFAULT_SCALE_UP = 5; + public static readonly int DEFAULT_LOG_STEP = 1000; + + public static bool[] BOOLEANS = new bool[] { false, true }; + + public static readonly int DEFAULT_MAXIMUM_DOCUMENTS = int.MaxValue; + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj new file mode 100644 index 0000000..0241099 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj @@ -0,0 +1,214 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <ProjectGuid>{EDC77CB4-597F-4818-8C83-3C006D12C384}</ProjectGuid> + <OutputType>Library</OutputType> + <AppDesignerFolder>Properties</AppDesignerFolder> + <RootNamespace>Lucene.Net.Benchmarks</RootNamespace> + <AssemblyName>Lucene.Net.Benchmark</AssemblyName> + <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion> + <FileAlignment>512</FileAlignment> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> + <DebugSymbols>true</DebugSymbols> + <DebugType>full</DebugType> + <Optimize>false</Optimize> + <OutputPath>bin\Debug\</OutputPath> + <DefineConstants>DEBUG;TRACE</DefineConstants> + <ErrorReport>prompt</ErrorReport> + <WarningLevel>4</WarningLevel> + <DocumentationFile> + </DocumentationFile> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> + <DebugType>pdbonly</DebugType> + <Optimize>true</Optimize> + <OutputPath>bin\Release\</OutputPath> + <DefineConstants>TRACE</DefineConstants> + <ErrorReport>prompt</ErrorReport> + <WarningLevel>4</WarningLevel> + </PropertyGroup> + <PropertyGroup> + <DefineConstants>$(DefineConstants);FEATURE_SERIALIZABLE</DefineConstants> + </PropertyGroup> + <ItemGroup> + <Reference Include="System" /> + <Reference Include="System.Core" /> + <Reference Include="Microsoft.CSharp" /> + </ItemGroup> + <ItemGroup> + <Compile Include="ByTask\Benchmark.cs" /> + <Compile Include="ByTask\Feeds\AbstractQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\ContentItemsSource.cs" /> + <Compile Include="ByTask\Feeds\ContentSource.cs" /> + <Compile Include="ByTask\Feeds\DemoHTMLParser.cs" /> + <Compile Include="ByTask\Feeds\DirContentSource.cs" /> + <Compile Include="ByTask\Feeds\DocData.cs" /> + <Compile Include="ByTask\Feeds\DocMaker.cs" /> + <Compile Include="ByTask\Feeds\EnwikiContentSource.cs" /> + <Compile Include="ByTask\Feeds\EnwikiQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\FacetSource.cs" /> + <Compile Include="ByTask\Feeds\FileBasedQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\GeonamesLineParser.cs" /> + <Compile Include="ByTask\Feeds\HTMLParser.cs" /> + <Compile Include="ByTask\Feeds\LineDocSource.cs" /> + <Compile Include="ByTask\Feeds\LongToEnglishContentSource.cs" /> + <Compile Include="ByTask\Feeds\LongToEnglishQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\NoMoreDataException.cs" /> + <Compile Include="ByTask\Feeds\QueryMaker.cs" /> + <Compile Include="ByTask\Feeds\RandomFacetSource.cs" /> + <Compile Include="ByTask\Feeds\ReutersContentSource.cs" /> + <Compile Include="ByTask\Feeds\ReutersQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\SimpleQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\SimpleSloppyPhraseQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\SingleDocSource.cs" /> + <Compile Include="ByTask\Feeds\SortableSingleDocSource.cs" /> + <Compile Include="ByTask\Feeds\SpatialDocMaker.cs" /> + <Compile Include="ByTask\Feeds\SpatialFileQueryMaker.cs" /> + <Compile Include="ByTask\Feeds\TrecContentSource.cs" /> + <Compile Include="ByTask\Feeds\TrecDocParser.cs" /> + <Compile Include="ByTask\Feeds\TrecFBISParser.cs" /> + <Compile Include="ByTask\Feeds\TrecFR94Parser.cs" /> + <Compile Include="ByTask\Feeds\TrecFTParser.cs" /> + <Compile Include="ByTask\Feeds\TrecGov2Parser.cs" /> + <Compile Include="ByTask\Feeds\TrecLATimesParser.cs" /> + <Compile Include="ByTask\Feeds\TrecParserByPath.cs" /> + <Compile Include="ByTask\PerfRunData.cs" /> + <Compile Include="ByTask\Programmatic\Sample.cs" /> + <Compile Include="ByTask\Stats\Points.cs" /> + <Compile Include="ByTask\Stats\Report.cs" /> + <Compile Include="ByTask\Stats\TaskStats.cs" /> + <Compile Include="ByTask\Tasks\AddDocTask.cs" /> + <Compile Include="ByTask\Tasks\AddFacetedDocTask.cs" /> + <Compile Include="ByTask\Tasks\AddIndexesTask.cs" /> + <Compile Include="ByTask\Tasks\AnalyzerFactoryTask.cs" /> + <Compile Include="ByTask\Tasks\BenchmarkHighlighter.cs" /> + <Compile Include="ByTask\Tasks\ClearStatsTask.cs" /> + <Compile Include="ByTask\Tasks\CloseIndexTask.cs" /> + <Compile Include="ByTask\Tasks\CloseReaderTask.cs" /> + <Compile Include="ByTask\Tasks\CloseTaxonomyIndexTask.cs" /> + <Compile Include="ByTask\Tasks\CloseTaxonomyReaderTask.cs" /> + <Compile Include="ByTask\Tasks\CommitIndexTask.cs" /> + <Compile Include="ByTask\Tasks\CommitTaxonomyIndexTask.cs" /> + <Compile Include="ByTask\Tasks\ConsumeContentSourceTask.cs" /> + <Compile Include="ByTask\Tasks\CreateIndexTask.cs" /> + <Compile Include="ByTask\Tasks\CreateTaxonomyIndexTask.cs" /> + <Compile Include="ByTask\Tasks\ForceMergeTask.cs" /> + <Compile Include="ByTask\Tasks\NearRealtimeReaderTask.cs" /> + <Compile Include="ByTask\Tasks\NewAnalyzerTask.cs" /> + <Compile Include="ByTask\Tasks\NewCollationAnalyzerTask.cs" /> + <Compile Include="ByTask\Tasks\NewLocaleTask.cs" /> + <Compile Include="ByTask\Tasks\NewRoundTask.cs" /> + <Compile Include="ByTask\Tasks\OpenIndexTask.cs" /> + <Compile Include="ByTask\Tasks\OpenReaderTask.cs" /> + <Compile Include="ByTask\Tasks\OpenTaxonomyIndexTask.cs" /> + <Compile Include="ByTask\Tasks\OpenTaxonomyReaderTask.cs" /> + <Compile Include="ByTask\Tasks\PerfTask.cs" /> + <Compile Include="ByTask\Tasks\PrintReaderTask.cs" /> + <Compile Include="ByTask\Tasks\ReadTask.cs" /> + <Compile Include="ByTask\Tasks\ReadTokensTask.cs" /> + <Compile Include="ByTask\Tasks\ReopenReaderTask.cs" /> + <Compile Include="ByTask\Tasks\RepAllTask.cs" /> + <Compile Include="ByTask\Tasks\ReportTask.cs" /> + <Compile Include="ByTask\Tasks\RepSelectByPrefTask.cs" /> + <Compile Include="ByTask\Tasks\RepSumByNameRoundTask.cs" /> + <Compile Include="ByTask\Tasks\RepSumByNameTask.cs" /> + <Compile Include="ByTask\Tasks\RepSumByPrefRoundTask.cs" /> + <Compile Include="ByTask\Tasks\RepSumByPrefTask.cs" /> + <Compile Include="ByTask\Tasks\ResetInputsTask.cs" /> + <Compile Include="ByTask\Tasks\ResetSystemEraseTask.cs" /> + <Compile Include="ByTask\Tasks\ResetSystemSoftTask.cs" /> + <Compile Include="ByTask\Tasks\RollbackIndexTask.cs" /> + <Compile Include="ByTask\Tasks\SearchTask.cs" /> + <Compile Include="ByTask\Tasks\SearchTravRetHighlightTask.cs" /> + <Compile Include="ByTask\Tasks\SearchTravRetLoadFieldSelectorTask.cs" /> + <Compile Include="ByTask\Tasks\SearchTravRetTask.cs" /> + <Compile Include="ByTask\Tasks\SearchTravRetVectorHighlightTask.cs" /> + <Compile Include="ByTask\Tasks\SearchTravTask.cs" /> + <Compile Include="ByTask\Tasks\SearchWithCollectorTask.cs" /> + <Compile Include="ByTask\Tasks\SearchWithSortTask.cs" /> + <Compile Include="ByTask\Tasks\SetPropTask.cs" /> + <Compile Include="ByTask\Tasks\TaskSequence.cs" /> + <Compile Include="ByTask\Tasks\UpdateDocTask.cs" /> + <Compile Include="ByTask\Tasks\WaitForMergesTask.cs" /> + <Compile Include="ByTask\Tasks\WaitTask.cs" /> + <Compile Include="ByTask\Tasks\WarmTask.cs" /> + <Compile Include="ByTask\Tasks\WriteEnwikiLineDocTask.cs" /> + <Compile Include="ByTask\Tasks\WriteLineDocTask.cs" /> + <Compile Include="ByTask\Utils\Algorithm.cs" /> + <Compile Include="ByTask\Utils\AnalyzerFactory.cs" /> + <Compile Include="ByTask\Utils\Config.cs" /> + <Compile Include="ByTask\Utils\FileUtils.cs" /> + <Compile Include="ByTask\Utils\Format.cs" /> + <Compile Include="ByTask\Utils\StreamUtils.cs" /> + <Compile Include="Constants.cs" /> + <Compile Include="Properties\AssemblyInfo.cs" /> + <Compile Include="Quality\Judge.cs" /> + <Compile Include="Quality\QualityBenchmark.cs" /> + <Compile Include="Quality\QualityQuery.cs" /> + <Compile Include="Quality\QualityQueryParser.cs" /> + <Compile Include="Quality\QualityStats.cs" /> + <Compile Include="Quality\Trec\QueryDriver.cs" /> + <Compile Include="Quality\Trec\Trec1MQReader.cs" /> + <Compile Include="Quality\Trec\TrecJudge.cs" /> + <Compile Include="Quality\Trec\TrecTopicsReader.cs" /> + <Compile Include="Quality\Utils\DocNameExtractor.cs" /> + <Compile Include="Quality\Utils\QualityQueriesFinder.cs" /> + <Compile Include="Quality\Utils\SimpleQQParser.cs" /> + <Compile Include="Quality\Utils\SubmissionReport.cs" /> + <Compile Include="Utils\ExtractReuters.cs" /> + <Compile Include="Utils\ExtractWikipedia.cs" /> + <Compile Include="..\CommonAssemblyInfo.cs"> + <Link>Properties\CommonAssemblyInfo.cs</Link> + </Compile> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj"> + <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project> + <Name>Lucene.Net.Analysis.Common</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net.Facet\Lucene.Net.Facet.csproj"> + <Project>{48F7884A-9454-4E88-8413-9D35992CB440}</Project> + <Name>Lucene.Net.Facet</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj"> + <Project>{E9E769EA-8504-44BC-8DC9-CCF958765F8F}</Project> + <Name>Lucene.Net.Highlighter</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net.ICU\Lucene.Net.ICU.csproj"> + <Project>{349cb7c9-7534-4e1d-9b0a-5521441af0ae}</Project> + <Name>Lucene.Net.ICU</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net.Queries\Lucene.Net.Queries.csproj"> + <Project>{69D7956C-C2CC-4708-B399-A188FEC384C4}</Project> + <Name>Lucene.Net.Queries</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net.QueryParser\Lucene.Net.QueryParser.csproj"> + <Project>{949BA34B-6AE6-4CE3-B578-61E13E4D76BF}</Project> + <Name>Lucene.Net.QueryParser</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net.Spatial\Lucene.Net.Spatial.csproj"> + <Project>{35C347F4-24B2-4BE5-8117-A0E3001551CE}</Project> + <Name>Lucene.Net.Spatial</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj"> + <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project> + <Name>Lucene.Net</Name> + </ProjectReference> + </ItemGroup> + <ItemGroup> + <None Include="Lucene.Net.Benchmark.project.json" /> + </ItemGroup> + <ItemGroup /> + <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> + <!-- To modify your build process, add your task inside one of the targets below and uncomment it. + Other similar extension points exist, see Microsoft.Common.targets. + <Target Name="BeforeBuild"> + </Target> + <Target Name="AfterBuild"> + </Target> + --> +</Project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json new file mode 100644 index 0000000..0a83392 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json @@ -0,0 +1,15 @@ +{ + "runtimes": { + "win": {} + }, + "dependencies": { + "icu.net": "54.1.1-alpha", + "Sax.Net": "2.0.2", + "SharpZipLib": "0.86.0", + "Spatial4n.Core": "0.4.1-beta00003", + "TagSoup.Net": "1.2.1.1" + }, + "frameworks": { + "net451": {} + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs b/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..8060798 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs @@ -0,0 +1,30 @@ +using System; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Lucene.Net.Benchmark")] +[assembly: AssemblyDescription( + "System for benchmarking " + + "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyDefaultAlias("Lucene.Net.Benchmark")] +[assembly: AssemblyCulture("")] + +[assembly: CLSCompliant(true)] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("edc77cb4-597f-4818-8c83-3c006d12c384")] + +// for testing +[assembly: InternalsVisibleTo("Lucene.Net.Tests.Benchmark")] + +// NOTE: Version information is in CommonAssemblyInfo.cs http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Judge.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Quality/Judge.cs b/src/Lucene.Net.Benchmark/Quality/Judge.cs new file mode 100644 index 0000000..7cd2089 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Quality/Judge.cs @@ -0,0 +1,55 @@ +using System.IO; + +namespace Lucene.Net.Benchmarks.Quality +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Judge if a document is relevant for a quality query. + /// </summary> + public interface IJudge + { + /// <summary> + /// Judge if document <paramref name="docName"/> is relevant for the given quality query. + /// </summary> + /// <param name="docName">Name of doc tested for relevancy.</param> + /// <param name="query">Tested quality query.</param> + /// <returns><c>true</c> if relevant, <c>false</c> if not.</returns> + bool IsRelevant(string docName, QualityQuery query); + + /// <summary> + /// Validate that queries and this <see cref="IJudge"/> match each other. + /// To be perfectly valid, this Judge must have some data for each and every + /// input quality query, and must not have any data on any other quality query. + /// <b>Note</b>: the quality benchmark run would not fail in case of imperfect + /// validity, just a warning message would be logged. + /// </summary> + /// <param name="qq">Quality queries to be validated.</param> + /// <param name="logger">If not <c>null</c>, validation issues are logged.</param> + /// <returns><c>true</c> if perfectly valid, <c>false</c> if not.</returns> + bool ValidateData(QualityQuery[] qq, TextWriter logger); + + /// <summary> + /// Return the maximal recall for the input quality query. + /// It is the number of relevant docs this <see cref="IJudge"/> "knows" for the query. + /// </summary> + /// <param name="query">The query whose maximal recall is needed.</param> + /// <returns></returns> + int MaxRecall(QualityQuery query); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs b/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs new file mode 100644 index 0000000..ef53e25 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs @@ -0,0 +1,159 @@ +using Lucene.Net.Benchmarks.Quality.Utils; +using Lucene.Net.Search; +using System; +using System.IO; + +namespace Lucene.Net.Benchmarks.Quality +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Main entry point for running a quality benchmark. + /// <para/> + /// There are two main configurations for running a quality benchmark: + /// <list type="bullet"> + /// <item><description>Against existing judgements.</description></item> + /// <item><description>For submission (e.g. for a contest).</description></item> + /// </list> + /// The first configuration requires a non null <see cref="IJudge"/>. + /// The second configuration requires a non null <see cref="Utils.SubmissionReport"/>. + /// </summary> + public class QualityBenchmark + { + /// <summary>Quality Queries that this quality benchmark would execute.</summary> + protected QualityQuery[] m_qualityQueries; + + /// <summary>Parser for turning QualityQueries into Lucene Queries.</summary> + protected IQualityQueryParser m_qqParser; + + /// <summary>Index to be searched.</summary> + protected IndexSearcher m_searcher; + + /// <summary>index field to extract doc name for each search result; used for judging the results.</summary> + protected string m_docNameField; + + /// <summary>maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging.</summary> + private int maxQueries = int.MaxValue; + + /// <summary>Maximal number of results to collect for each query. Default: 1000.</summary> + private int maxResults = 1000; + + /// <summary> + /// Create a <see cref="QualityBenchmark"/>. + /// </summary> + /// <param name="qqs">Quality queries to run.</param> + /// <param name="qqParser">Parser for turning QualityQueries into Lucene Queries.</param> + /// <param name="searcher">Index to be searched.</param> + /// <param name="docNameField"> + /// Name of field containing the document name. + /// This allows to extract the doc name for search results, + /// and is important for judging the results. + /// </param> + public QualityBenchmark(QualityQuery[] qqs, IQualityQueryParser qqParser, + IndexSearcher searcher, string docNameField) + { + this.m_qualityQueries = qqs; + this.m_qqParser = qqParser; + this.m_searcher = searcher; + this.m_docNameField = docNameField; + } + + /// <summary> + /// Run the quality benchmark. + /// </summary> + /// <param name="judge"> + /// The judge that can tell if a certain result doc is relevant for a certain quality query. + /// If null, no judgements would be made. Usually null for a submission run. + /// </param> + /// <param name="submitRep">Submission report is created if non null.</param> + /// <param name="qualityLog">If not null, quality run data would be printed for each query.</param> + /// <returns><see cref="QualityStats"/> of each quality query that was executed.</returns> + /// <exception cref="Exception">If quality benchmark failed to run.</exception> + public virtual QualityStats[] Execute(IJudge judge, SubmissionReport submitRep, + TextWriter qualityLog) + { + int nQueries = Math.Min(maxQueries, m_qualityQueries.Length); + QualityStats[] stats = new QualityStats[nQueries]; + for (int i = 0; i < nQueries; i++) + { + QualityQuery qq = m_qualityQueries[i]; + // generate query + Query q = m_qqParser.Parse(qq); + // search with this query + long t1 = Support.Time.CurrentTimeMilliseconds(); + TopDocs td = m_searcher.Search(q, null, maxResults); + long searchTime = Support.Time.CurrentTimeMilliseconds() - t1; + //most likely we either submit or judge, but check both + if (judge != null) + { + stats[i] = AnalyzeQueryResults(qq, q, td, judge, qualityLog, searchTime); + } + if (submitRep != null) + { + submitRep.Report(qq, td, m_docNameField, m_searcher); + } + } + if (submitRep != null) + { + submitRep.Flush(); + } + return stats; + } + + /// <summary>Analyze/judge results for a single quality query; optionally log them.</summary> + private QualityStats AnalyzeQueryResults(QualityQuery qq, Query q, TopDocs td, IJudge judge, TextWriter logger, long searchTime) + { + QualityStats stts = new QualityStats(judge.MaxRecall(qq), searchTime); + ScoreDoc[] sd = td.ScoreDocs; + long t1 = Support.Time.CurrentTimeMilliseconds(); // extraction of first doc name we measure also construction of doc name extractor, just in case. + DocNameExtractor xt = new DocNameExtractor(m_docNameField); + for (int i = 0; i < sd.Length; i++) + { + string docName = xt.DocName(m_searcher, sd[i].Doc); + long docNameExtractTime = Support.Time.CurrentTimeMilliseconds() - t1; + t1 = Support.Time.CurrentTimeMilliseconds(); + bool isRelevant = judge.IsRelevant(docName, qq); + stts.AddResult(i + 1, isRelevant, docNameExtractTime); + } + if (logger != null) + { + logger.WriteLine(qq.QueryID + " - " + q); + stts.Log(qq.QueryID + " Stats:", 1, logger, " "); + } + return stts; + } + + /// <summary> + /// The maximum number of quality queries to run. Useful at debugging. + /// </summary> + public virtual int MaxQueries + { + get { return maxQueries; } + set { maxQueries = value; } + } + + /// <summary> + /// The maximum number of results to collect for each quality query. + /// </summary> + public virtual int MaxResults + { + get { return maxResults; } + set { maxResults = value; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs b/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs new file mode 100644 index 0000000..de4a945 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs @@ -0,0 +1,107 @@ +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; + +namespace Lucene.Net.Benchmarks.Quality +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// A QualityQuery has an ID and some name-value pairs. + /// <para/> + /// The ID allows to map the quality query with its judgements. + /// <para/> + /// The name-value pairs are used by a + /// <see cref="QualityQueryParser"/> + /// to create a Lucene <see cref="Search.Query"/>. + /// <para/> + /// It is very likely that name-value-pairs would be mapped into fields in a Lucene query, + /// but it is up to the QualityQueryParser how to map - e.g. all values in a single field, + /// or each pair as its own field, etc., - and this of course must match the way the + /// searched index was constructed. + /// </summary> + public class QualityQuery : IComparable<QualityQuery> + { + private string queryID; + private IDictionary<string, string> nameValPairs; + + /// <summary> + /// Create a <see cref="QualityQuery"/> with given ID and name-value pairs. + /// </summary> + /// <param name="queryID">ID of this quality query.</param> + /// <param name="nameValPairs">The contents of this quality query.</param> + public QualityQuery(string queryID, IDictionary<string, string> nameValPairs) + { + this.queryID = queryID; + this.nameValPairs = nameValPairs; + } + + /// <summary> + /// Return all the names of name-value-pairs in this <see cref="QualityQuery"/>. + /// </summary> + public virtual string[] GetNames() + { + return nameValPairs.Keys.ToArray(); + } + + /// <summary> + /// Return the value of a certain name-value pair. + /// </summary> + /// <param name="name">The name whose value should be returned.</param> + /// <returns></returns> + public virtual string GetValue(string name) + { + string result; + nameValPairs.TryGetValue(name, out result); + return result; + } + + /// <summary> + /// Gets the ID of this query. + /// The ID allows to map the quality query with its judgements. + /// </summary> + public virtual string QueryID + { + get { return queryID; } + } + + /// <summary> + /// For a nicer sort of input queries before running them. + /// Try first as ints, fall back to string if not int. + /// </summary> + /// <param name="other"></param> + /// <returns></returns> + public virtual int CompareTo(QualityQuery other) + { + try + { + // compare as ints when ids ints + int n = int.Parse(queryID, CultureInfo.InvariantCulture); + int nOther = int.Parse(other.queryID, CultureInfo.InvariantCulture); + return n - nOther; + } + catch (FormatException /*e*/) + { + // fall back to string comparison + return queryID.CompareToOrdinal(other.queryID); + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs b/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs new file mode 100644 index 0000000..a62d472 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs @@ -0,0 +1,35 @@ +using Lucene.Net.Search; +using System; + +namespace Lucene.Net.Benchmarks.Quality +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Parse a <see cref="QualityQuery"/> into a Lucene query. + /// </summary> + public interface IQualityQueryParser + { + /// <summary> + /// Parse a given <see cref="QualityQuery"/> into a Lucene query. + /// </summary> + /// <param name="qq">The quality query to be parsed.</param> + /// <exception cref="FormatException">If parsing failed.</exception> + Query Parse(QualityQuery qq); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityStats.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Quality/QualityStats.cs b/src/Lucene.Net.Benchmark/Quality/QualityStats.cs new file mode 100644 index 0000000..2098085 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Quality/QualityStats.cs @@ -0,0 +1,339 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Linq; + +namespace Lucene.Net.Benchmarks.Quality +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Results of quality benchmark run for a single query or for a set of queries. + /// </summary> + public class QualityStats + { + /// <summary>Number of points for which precision is computed.</summary> + public static readonly int MAX_POINTS = 20; + + private double maxGoodPoints; + private double recall; + private double[] pAt; + private double pReleventSum = 0; + private double numPoints = 0; + private double numGoodPoints = 0; + private double mrr = 0; + private long searchTime; + private long docNamesExtractTime; + + /// <summary> + /// A certain rank in which a relevant doc was found. + /// </summary> + public class RecallPoint + { + private int rank; + private double recall; + internal RecallPoint(int rank, double recall) + { + this.rank = rank; + this.recall = recall; + } + + /// <summary>Returns the rank: where on the list of returned docs this relevant doc appeared.</summary> + public virtual int Rank + { + get { return rank; } + } + + /// <summary>Returns the recall: how many relevant docs were returned up to this point, inclusive.</summary> + public virtual double Recall + { + get { return recall; } + } + } + + private IList<RecallPoint> recallPoints; + + /// <summary> + /// Construct a QualityStats object with anticipated maximal number of relevant hits. + /// </summary> + /// <param name="maxGoodPoints">maximal possible relevant hits.</param> + /// <param name="searchTime"></param> + public QualityStats(double maxGoodPoints, long searchTime) + { + this.maxGoodPoints = maxGoodPoints; + this.searchTime = searchTime; + this.recallPoints = new List<RecallPoint>(); + pAt = new double[MAX_POINTS + 1]; // pAt[0] unused. + } + + /// <summary> + /// Add a (possibly relevant) doc. + /// </summary> + /// <param name="n">rank of the added doc (its ordinal position within the query results).</param> + /// <param name="isRelevant"><c>true</c> if the added doc is relevant, <c>false</c> otherwise.</param> + /// <param name="docNameExtractTime"></param> + public virtual void AddResult(int n, bool isRelevant, long docNameExtractTime) + { + if (Math.Abs(numPoints + 1 - n) > 1E-6) + { + throw new ArgumentException("point " + n + " illegal after " + numPoints + " points!"); + } + if (isRelevant) + { + numGoodPoints += 1; + recallPoints.Add(new RecallPoint(n, numGoodPoints)); + if (recallPoints.Count == 1 && n <= 5) + { // first point, but only within 5 top scores. + mrr = 1.0 / n; + } + } + numPoints = n; + double p = numGoodPoints / numPoints; + if (isRelevant) + { + pReleventSum += p; + } + if (n < pAt.Length) + { + pAt[n] = p; + } + recall = maxGoodPoints <= 0 ? p : numGoodPoints / maxGoodPoints; + docNamesExtractTime += docNameExtractTime; + } + + /// <summary> + /// Return the precision at rank n: + /// |{relevant hits within first <c>n</c> hits}| / <c>n</c>. + /// </summary> + /// <param name="n">requested precision point, must be at least 1 and at most <see cref="MAX_POINTS"/>.</param> + /// <returns></returns> + public virtual double GetPrecisionAt(int n) + { + if (n < 1 || n > MAX_POINTS) + { + throw new ArgumentException("n=" + n + " - but it must be in [1," + MAX_POINTS + "] range!"); + } + if (n > numPoints) + { + return (numPoints * pAt[(int)numPoints]) / n; + } + return pAt[n]; + } + + /// <summary> + /// Return the average precision at recall points. + /// </summary> + public virtual double GetAvp() + { + return maxGoodPoints == 0 ? 0 : pReleventSum / maxGoodPoints; + } + + /// <summary> + /// Return the recall: |{relevant hits found}| / |{relevant hits existing}|. + /// </summary> + public virtual double Recall + { + get { return recall; } + } + + /// <summary> + /// Log information on this <see cref="QualityStats"/> object. + /// </summary> + /// <param name="title"></param> + /// <param name="paddLines"></param> + /// <param name="logger">Logger.</param> + /// <param name="prefix">prefix before each log line.</param> + public virtual void Log(string title, int paddLines, TextWriter logger, string prefix) + { + for (int i = 0; i < paddLines; i++) + { + logger.WriteLine(); + } + if (title != null && title.Trim().Length > 0) + { + logger.WriteLine(title); + } + prefix = prefix == null ? "" : prefix; + string nf = "{0:F3}"; + int M = 19; + logger.WriteLine(prefix + Format("Search Seconds: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, (double)searchTime / 1000))); + logger.WriteLine(prefix + Format("DocName Seconds: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, (double)docNamesExtractTime / 1000))); + logger.WriteLine(prefix + Format("Num Points: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, numPoints))); + logger.WriteLine(prefix + Format("Num Good Points: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, numGoodPoints))); + logger.WriteLine(prefix + Format("Max Good Points: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, maxGoodPoints))); + logger.WriteLine(prefix + Format("Average Precision: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, GetAvp()))); + logger.WriteLine(prefix + Format("MRR: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, MRR))); + logger.WriteLine(prefix + Format("Recall: ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, Recall))); + for (int i = 1; i < (int)numPoints && i < pAt.Length; i++) + { + logger.WriteLine(prefix + Format("Precision At " + i + ": ", M) + + FracFormat(string.Format(CultureInfo.InvariantCulture, nf, GetPrecisionAt(i)))); + } + for (int i = 0; i < paddLines; i++) + { + logger.WriteLine(); + } + } + + private static string padd = " "; + private string Format(string s, int minLen) + { + s = (s == null ? "" : s); + int n = Math.Max(minLen, s.Length); + return (s + padd).Substring(0, n-0); + } + private string FracFormat(string frac) + { + int k = frac.IndexOf('.'); + string s1 = padd + frac.Substring(0, k-0); + int n = Math.Max(k, 6); + s1 = s1.Substring(s1.Length - n); + return s1 + frac.Substring(k); + } + + /// <summary> + /// Create a <see cref="QualityStats"/> object that is the average of the input <see cref="QualityStats"/> objects. + /// </summary> + /// <param name="stats">array of input stats to be averaged.</param> + /// <returns>an average over the input stats.</returns> + public static QualityStats Average(QualityStats[] stats) + { + QualityStats avg = new QualityStats(0, 0); + if (stats.Length == 0) + { + // weired, no stats to average! + return avg; + } + int m = 0; // queries with positive judgements + // aggregate + for (int i = 0; i < stats.Length; i++) + { + avg.searchTime += stats[i].searchTime; + avg.docNamesExtractTime += stats[i].docNamesExtractTime; + if (stats[i].maxGoodPoints > 0) + { + m++; + avg.numGoodPoints += stats[i].numGoodPoints; + avg.numPoints += stats[i].numPoints; + avg.pReleventSum += stats[i].GetAvp(); + avg.recall += stats[i].recall; + avg.mrr += stats[i].MRR; + avg.maxGoodPoints += stats[i].maxGoodPoints; + for (int j = 1; j < avg.pAt.Length; j++) + { + avg.pAt[j] += stats[i].GetPrecisionAt(j); + } + } + } + Debug.Assert(m> 0, "Fishy: no \"good\" queries!"); + // take average: times go by all queries, other measures go by "good" queries only. + avg.searchTime /= stats.Length; + avg.docNamesExtractTime /= stats.Length; + avg.numGoodPoints /= m; + avg.numPoints /= m; + avg.recall /= m; + avg.mrr /= m; + avg.maxGoodPoints /= m; + for (int j = 1; j < avg.pAt.Length; j++) + { + avg.pAt[j] /= m; + } + avg.pReleventSum /= m; // this is actually avgp now + avg.pReleventSum *= avg.maxGoodPoints; // so that getAvgP() would be correct + + return avg; + } + + /// <summary> + /// Returns the time it took to extract doc names for judging the measured query, in milliseconds. + /// </summary> + public virtual long DocNamesExtractTime + { + get { return docNamesExtractTime; } + } + + /// <summary> + /// Returns the maximal number of good points. + /// This is the number of relevant docs known by the judge for the measured query. + /// </summary> + public virtual double MaxGoodPoints + { + get { return maxGoodPoints; } + } + + /// <summary> + /// Returns the number of good points (only relevant points). + /// </summary> + public virtual double NumGoodPoints + { + get { return numGoodPoints; } + } + + /// <summary> + /// Returns the number of points (both relevant and irrelevant points). + /// </summary> + public virtual double NumPoints + { + get { return numPoints; } + } + + /// <summary> + /// Returns the recallPoints. + /// </summary> + public virtual RecallPoint[] GetRecallPoints() + { + return recallPoints.ToArray(); + } + + /// <summary> + /// Returns the Mean reciprocal rank over the queries or RR for a single query. + /// </summary> + /// <remarks> + /// Reciprocal rank is defined as <c>1/r</c> where <c>r</c> is the + /// rank of the first correct result, or <c>0</c> if there are no correct + /// results within the top 5 results. + /// <para/> + /// This follows the definition in + /// <a href="http://www.cnlp.org/publications/02cnlptrec10.pdf"> + /// Question Answering - CNLP at the TREC-10 Question Answering Track</a>. + /// </remarks> + public virtual double MRR + { + get { return mrr; } + } + + + /// <summary> + /// Returns the search time in milliseconds for the measured query. + /// </summary> + public virtual long SearchTime + { + get { return searchTime; } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs new file mode 100644 index 0000000..0540e62 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs @@ -0,0 +1,93 @@ +using Lucene.Net.Benchmarks.Quality.Utils; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Benchmarks.Quality.Trec +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Command-line tool for doing a TREC evaluation run. + /// </summary> + public class QueryDriver + { + public static void Main(string[] args) + { + if (args.Length < 4 || args.Length > 5) + { + SystemConsole.Error.WriteLine("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]"); + SystemConsole.Error.WriteLine("topicsFile: input file containing queries"); + SystemConsole.Error.WriteLine("qrelsFile: input file containing relevance judgements"); + SystemConsole.Error.WriteLine("submissionFile: output submission file for trec_eval"); + SystemConsole.Error.WriteLine("indexDir: index directory"); + SystemConsole.Error.WriteLine("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:"); + SystemConsole.Error.WriteLine("\texample: TD (query on Title + Description). The default is T (title only)"); + Environment.Exit(1); + } + + FileInfo topicsFile = new FileInfo(args[0]); + FileInfo qrelsFile = new FileInfo(args[1]); + SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene"); + using (Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3]))) + using (IndexReader reader = DirectoryReader.Open(dir)) + { + string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified. + IndexSearcher searcher = new IndexSearcher(reader); + + int maxResults = 1000; + string docNameField = "docname"; + + TextWriter logger = SystemConsole.Out; //new StreamWriter(SystemConsole, Encoding.GetEncoding(0)); + + // use trec utilities to read trec topics into quality queries + TrecTopicsReader qReader = new TrecTopicsReader(); + QualityQuery[] qqs = qReader.ReadQueries(IOUtils.GetDecodingReader(topicsFile, Encoding.UTF8)); + + // prepare judge, with trec utilities that read from a QRels file + IJudge judge = new TrecJudge(IOUtils.GetDecodingReader(qrelsFile, Encoding.UTF8)); + + // validate topics & judgments match each other + judge.ValidateData(qqs, logger); + + ISet<string> fieldSet = new HashSet<string>(); + if (fieldSpec.IndexOf('T') >= 0) fieldSet.Add("title"); + if (fieldSpec.IndexOf('D') >= 0) fieldSet.Add("description"); + if (fieldSpec.IndexOf('N') >= 0) fieldSet.Add("narrative"); + + // set the parsing of quality queries into Lucene queries. + IQualityQueryParser qqParser = new SimpleQQParser(fieldSet.ToArray(), "body"); + + // run the benchmark + QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); + qrun.MaxResults = maxResults; + QualityStats[] stats = qrun.Execute(judge, submitLog, logger); + + // print an avarage sum of the results + QualityStats avg = QualityStats.Average(stats); + avg.Log("SUMMARY", 2, logger, " "); + } + } + } +}