[10/33] lucenenet git commit: Ported Lucene.Net.Benchmark + tests

nightowl888 Sun, 06 Aug 2017 10:59:43 -0700

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/SortableSingleDocSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/SortableSingleDocSource.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/SortableSingleDocSource.cs
new file mode 100644
index 0000000..c83828c
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/SortableSingleDocSource.cs
@@ -0,0 +1,114 @@
+ï»¿using Lucene.Net.Benchmarks.ByTask.Utils;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Adds fields appropriate for sorting: country, random_string and 
sort_field
+    /// (int). Supports the following parameters:
+    /// <list type="bullet">
+    ///     <item><term><b>sort.rng</b></term><description>defines the range 
for sort-by-int field (default <b>20000</b>).</description></item> 
+    ///     <item><term><b>rand.seed</b></term><description>defines the seed 
to initialize Random with (default <b>13</b>).</description></item> 
+    /// </list>
+    /// </summary>
+    public class SortableSingleDocSource : SingleDocSource
+    {
+        private static string[] COUNTRIES = new string[] {
+            "European Union", "United States", "Japan", "Germany", "China 
(PRC)",
+            "United Kingdom", "France", "Italy", "Spain", "Canada", "Brazil", 
"Russia",
+            "India", "South Korea", "Australia", "Mexico", "Netherlands", 
"Turkey",
+            "Sweden", "Belgium", "Indonesia", "Switzerland", "Poland", 
"Norway",
+            "Republic of China", "Saudi Arabia", "Austria", "Greece", 
"Denmark", "Iran",
+            "South Africa", "Argentina", "Ireland", "Thailand", "Finland", 
"Venezuela",
+            "Portugal", "Hong Kong", "United Arab Emirates", "Malaysia",
+            "Czech Republic", "Colombia", "Nigeria", "Romania", "Chile", 
"Israel",
+            "Singapore", "Philippines", "Pakistan", "Ukraine", "Hungary", 
"Algeria",
+            "New Zealand", "Egypt", "Kuwait", "Peru", "Kazakhstan", "Slovakia",
+            "Morocco", "Bangladesh", "Vietnam", "Qatar", "Angola", "Libya", 
"Iraq",
+            "Croatia", "Luxembourg", "Sudan", "Slovenia", "Cuba", "Belarus", 
"Ecuador",
+            "Serbia", "Oman", "Bulgaria", "Lithuania", "Syria", "Dominican 
Republic",
+            "Tunisia", "Guatemala", "Azerbaijan", "Sri Lanka", "Kenya", 
"Latvia",
+            "Turkmenistan", "Costa Rica", "Lebanon", "Uruguay", "Uzbekistan", 
"Yemen",
+            "Cyprus", "Estonia", "Trinidad and Tobago", "Cameroon", "El 
Salvador",
+            "Iceland", "Panama", "Bahrain", "Ivory Coast", "Ethiopia", 
"Tanzania",
+            "Jordan", "Ghana", "Bosnia and Herzegovina", "Macau", "Burma", 
"Bolivia",
+            "Brunei", "Botswana", "Honduras", "Gabon", "Uganda", "Jamaica", 
"Zambia",
+            "Senegal", "Paraguay", "Albania", "Equatorial Guinea", "Georgia",
+            "Democratic Republic of the Congo", "Nepal", "Afghanistan", 
"Cambodia",
+            "Armenia", "Republic of the Congo", "Mozambique", "Republic of 
Macedonia",
+            "Malta", "Namibia", "Madagascar", "Chad", "Burkina Faso", 
"Mauritius",
+            "Mali", "The Bahamas", "Papua New Guinea", "Nicaragua", "Haiti", 
"Benin",
+            "alestinian flag West Bank and Gaza", "Jersey", "Fiji", "Guinea", 
"Moldova",
+            "Niger", "Laos", "Mongolia", "French Polynesia", "Kyrgyzstan", 
"Barbados",
+            "Tajikistan", "Malawi", "Liechtenstein", "New Caledonia", "Kosovo",
+            "Rwanda", "Montenegro", "Swaziland", "Guam", "Mauritania", 
"Guernsey",
+            "Isle of Man", "Togo", "Somalia", "Suriname", "Aruba", "North 
Korea",
+            "Zimbabwe", "Central African Republic", "Faroe Islands", 
"Greenland",
+            "Sierra Leone", "Lesotho", "Cape Verde", "Eritrea", "Bhutan", 
"Belize",
+            "Antigua and Barbuda", "Gibraltar", "Maldives", "San Marino", 
"Guyana",
+            "Burundi", "Saint Lucia", "Djibouti", "British Virgin Islands", 
"Liberia",
+            "Seychelles", "The Gambia", "Northern Mariana Islands", "Grenada",
+            "Saint Vincent and the Grenadines", "Saint Kitts and Nevis", "East 
Timor",
+            "Vanuatu", "Comoros", "Samoa", "Solomon Islands", "Guinea-Bissau",
+            "American Samoa", "Dominica", "Micronesia", "Tonga", "Cook 
Islands",
+            "Palau", "Marshall Islands", "Sï¿½o Tomï¿½ and Prï¿½ncipe", 
"Anguilla",
+            "Kiribati", "Tuvalu", "Niue" };
+
+        private int sortRange;
+        private Random r;
+
+        public override DocData GetNextDocData(DocData docData)
+        {
+            docData = base.GetNextDocData(docData);
+            var props = new Dictionary<string, string>();
+
+            // random int
+            props["sort_field"] = 
r.Next(sortRange).ToString(CultureInfo.InvariantCulture);
+
+            // random string
+            int len = NextInt32(2, 20);
+            char[] buffer = new char[len];
+            for (int i = 0; i < len; i++)
+            {
+                buffer[i] = (char)r.Next(0x80);
+            }
+            props["random_string"] = new string(buffer);
+
+            // random country
+            props["country"] = COUNTRIES[r.Next(COUNTRIES.Length)];
+            docData.Props = props;
+            return docData;
+        }
+
+        private int NextInt32(int start, int end)
+        {
+            return start + r.Next(end - start);
+        }
+
+        public override void SetConfig(Config config)
+        {
+            base.SetConfig(config);
+            sortRange = config.Get("sort.rng", 20000);
+            r = new Random(config.Get("rand.seed", 13));
+        }
+    }
+}


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialDocMaker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialDocMaker.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialDocMaker.cs
new file mode 100644
index 0000000..7879cd8
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialDocMaker.cs
@@ -0,0 +1,249 @@
+ï»¿using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Documents;
+using Lucene.Net.Spatial;
+using Lucene.Net.Spatial.Prefix;
+using Lucene.Net.Spatial.Prefix.Tree;
+using Lucene.Net.Support;
+using Spatial4n.Core.Context;
+using Spatial4n.Core.Shapes;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Indexes spatial data according to a configured <see 
cref="SpatialStrategy"/> with optional
+    /// shape transformation via a configured <see cref="IShapeConverter"/>. 
The converter can turn points into
+    /// circles and bounding boxes, in order to vary the type of indexing 
performance tests.
+    /// Unless it's subclass-ed to do otherwise, this class configures a <see 
cref="SpatialContext"/>,
+    /// <see cref="SpatialPrefixTree"/>, and <see 
cref="RecursivePrefixTreeStrategy"/>. The Strategy is made
+    /// available to a query maker via the static method <see 
cref="GetSpatialStrategy(int)"/>.
+    /// See spatial.alg for a listing of spatial parameters, in particular 
those starting with "spatial."
+    /// and "doc.spatial".
+    /// </summary>
+    public class SpatialDocMaker : DocMaker
+    {
+        public static readonly string SPATIAL_FIELD = "spatial";
+
+        //cache spatialStrategy by round number
+        private static IDictionary<int, SpatialStrategy> spatialStrategyCache 
= new Dictionary<int, SpatialStrategy>();
+
+        private SpatialStrategy strategy;
+        private IShapeConverter shapeConverter;
+
+        /// <summary>
+        /// Looks up the <see cref="SpatialStrategy"/> from the given round --
+        /// <see cref="Config.RoundNumber"/>. It's an error
+        /// if it wasn't created already for this round -- when <see 
cref="SpatialDocMaker"/> is initialized.
+        /// </summary>
+        public static SpatialStrategy GetSpatialStrategy(int roundNumber)
+        {
+            SpatialStrategy result;
+            if (!spatialStrategyCache.TryGetValue(roundNumber, out result) || 
result == null)
+            {
+                throw new InvalidOperationException("Strategy should have been 
init'ed by SpatialDocMaker by now");
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Builds a <see cref="SpatialStrategy"/> from configuration options.
+        /// </summary>
+        protected virtual SpatialStrategy MakeSpatialStrategy(Config config)
+        {
+            //A Map view of Config that prefixes keys with "spatial."
+            var configMap = new DictionaryAnonymousHelper(config);
+
+            SpatialContext ctx = 
SpatialContextFactory.MakeSpatialContext(configMap /*, null*/); // LUCENENET 
TODO: What is this extra param?
+
+            //Some day the strategy might be initialized with a factory but 
such a factory
+            // is non-existent.
+            return MakeSpatialStrategy(config, configMap, ctx);
+        }
+
+        private class DictionaryAnonymousHelper : Dictionary<string, string>
+        {
+            private readonly Config config;
+            public DictionaryAnonymousHelper(Config config)
+            {
+                this.config = config;
+            }
+
+            // LUCENENET TODO: EntrySet not supported. Should we throw on 
GetEnumerator()?
+
+            new public string this[string key]
+            {
+                get { return config.Get("spatial." + key, null); }
+            }
+        }
+
+        protected virtual SpatialStrategy MakeSpatialStrategy(Config config, 
IDictionary<string, string> configMap,
+                                                      SpatialContext ctx)
+        {
+            //A factory for the prefix tree grid
+            SpatialPrefixTree grid = 
SpatialPrefixTreeFactory.MakeSPT(configMap, /*null,*/ ctx); // LUCENENET TODO: 
What is this extra param?
+
+            RecursivePrefixTreeStrategy strategy = new 
RecursivePrefixTreeStrategyAnonymousHelper(grid, SPATIAL_FIELD, config);
+
+            int prefixGridScanLevel = 
config.Get("query.spatial.prefixGridScanLevel", -4);
+            if (prefixGridScanLevel < 0)
+                prefixGridScanLevel = grid.MaxLevels + prefixGridScanLevel;
+            strategy.PrefixGridScanLevel = prefixGridScanLevel;
+
+            double distErrPct = config.Get("spatial.distErrPct", .025);//doc & 
query; a default
+            strategy.DistErrPct = distErrPct;
+            return strategy;
+        }
+
+        private class RecursivePrefixTreeStrategyAnonymousHelper : 
RecursivePrefixTreeStrategy
+        {
+            public 
RecursivePrefixTreeStrategyAnonymousHelper(SpatialPrefixTree grid, string 
fieldName, Config config)
+                : base(grid, fieldName)
+            {
+                this.m_pointsOnly = config.Get("spatial.docPointsOnly", false);
+            }
+        }
+
+        public override void SetConfig(Config config, ContentSource source)
+        {
+            base.SetConfig(config, source);
+            SpatialStrategy existing;
+            if (!spatialStrategyCache.TryGetValue(config.RoundNumber, out 
existing) || existing == null)
+            {
+                //new round; we need to re-initialize
+                strategy = MakeSpatialStrategy(config);
+                spatialStrategyCache[config.RoundNumber] = strategy;
+                //TODO remove previous round config?
+                shapeConverter = MakeShapeConverter(strategy, config, 
"doc.spatial.");
+                SystemConsole.WriteLine("Spatial Strategy: " + strategy);
+            }
+        }
+
+        /// <summary>
+        /// Optionally converts points to circles, and optionally bbox'es 
result.
+        /// </summary>
+        public static IShapeConverter MakeShapeConverter(SpatialStrategy 
spatialStrategy,
+                                                        Config config, string 
configKeyPrefix)
+        {
+            //by default does no conversion
+            double radiusDegrees = config.Get(configKeyPrefix + 
"radiusDegrees", 0.0);
+            double plusMinus = config.Get(configKeyPrefix + 
"radiusDegreesRandPlusMinus", 0.0);
+            bool bbox = config.Get(configKeyPrefix + "bbox", false);
+
+            return new ShapeConverterAnonymousHelper(spatialStrategy, 
radiusDegrees, plusMinus, bbox);
+        }
+
+        private class ShapeConverterAnonymousHelper : IShapeConverter
+        {
+            private readonly SpatialStrategy spatialStrategy;
+            private readonly double radiusDegrees;
+            private readonly double plusMinus;
+            private readonly bool bbox;
+
+            public ShapeConverterAnonymousHelper(SpatialStrategy 
spatialStrategy, double radiusDegrees, double plusMinus, bool bbox)
+            {
+                this.spatialStrategy = spatialStrategy;
+                this.radiusDegrees = radiusDegrees;
+                this.plusMinus = plusMinus;
+                this.bbox = bbox;
+            }
+
+            public IShape Convert(IShape shape)
+            {
+                if (shape is IPoint && (radiusDegrees != 0.0 || plusMinus != 
0.0))
+                {
+                    IPoint point = (IPoint)shape;
+                    double radius = radiusDegrees;
+                    if (plusMinus > 0.0)
+                    {
+                        Random random = new Random(point.GetHashCode());//use 
hashCode so it's reproducibly random
+                        radius += random.NextDouble() * 2 * plusMinus - 
plusMinus;
+                        radius = Math.Abs(radius);//can happen if configured 
plusMinus > radiusDegrees
+                    }
+                    shape = spatialStrategy.SpatialContext.MakeCircle(point, 
radius);
+                }
+                if (bbox)
+                {
+                    shape = shape.BoundingBox;
+                }
+                return shape;
+            }
+        }
+
+        // LUCENENET specific: de-nested IShapeConverter
+
+        public override Document MakeDocument()
+        {
+
+            DocState docState = GetDocState();
+
+            Document doc = base.MakeDocument();
+
+            // Set SPATIAL_FIELD from body
+            DocData docData = docState.docData;
+            //   makeDocument() resets docState.getBody() so we can't look 
there; look in Document
+            string shapeStr = 
doc.GetField(DocMaker.BODY_FIELD).GetStringValue();
+            IShape shape = MakeShapeFromString(strategy, docData.Name, 
shapeStr);
+            if (shape != null)
+            {
+                shape = shapeConverter.Convert(shape);
+                //index
+                foreach (Field f in strategy.CreateIndexableFields(shape))
+                {
+                    doc.Add(f);
+                }
+            }
+
+            return doc;
+        }
+
+        public static IShape MakeShapeFromString(SpatialStrategy strategy, 
string name, string shapeStr)
+        {
+            if (shapeStr != null && shapeStr.Length > 0)
+            {
+                try
+                {
+                    return strategy.SpatialContext.ReadShapeFromWkt(shapeStr);
+                }
+                catch (Exception e)
+                {//InvalidShapeException TODO
+                    SystemConsole.Error.WriteLine("Shape " + name + " wasn't 
parseable: " + e + "  (skipping it)");
+                    return null;
+                }
+            }
+            return null;
+        }
+
+        public override Document MakeDocument(int size)
+        {
+            //TODO consider abusing the 'size' notion to number of shapes per 
document
+            throw new NotSupportedException();
+        }
+    }
+
+    /// <summary>
+    /// Converts one shape to another. Created by
+    /// <see cref="MakeShapeConverter(SpatialStrategy, Config, string)"/>.
+    /// </summary>
+    public interface IShapeConverter
+    {
+        IShape Convert(IShape shape);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialFileQueryMaker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialFileQueryMaker.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialFileQueryMaker.cs
new file mode 100644
index 0000000..d583d22
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/SpatialFileQueryMaker.cs
@@ -0,0 +1,131 @@
+ï»¿using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Queries;
+using Lucene.Net.Queries.Function;
+using Lucene.Net.Search;
+using Lucene.Net.Spatial;
+using Lucene.Net.Spatial.Queries;
+using Spatial4n.Core.Shapes;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Reads spatial data from the body field docs from an internally created 
<see cref="LineDocSource"/>.
+    /// It's parsed by <see 
cref="Spatial4n.Core.Context.SpatialContext.ReadShapeFromWkt(string)"/> and then
+    /// further manipulated via a configurable <see cref="IShapeConverter"/>. 
When using point
+    /// data, it's likely you'll want to configure the shape converter so that 
the query shapes actually
+    /// cover a region. The queries are all created &amp; cached in advance. 
This query maker works in
+    /// conjunction with <see cref="SpatialDocMaker"/>.  See spatial.alg for a 
listing of options, in
+    /// particular the options starting with "query.".
+    /// </summary>
+    public class SpatialFileQueryMaker : AbstractQueryMaker
+    {
+        protected SpatialStrategy m_strategy;
+        protected double m_distErrPct;//NaN if not set
+        protected SpatialOperation m_operation;
+        protected bool m_score;
+
+        protected IShapeConverter m_shapeConverter;
+
+        public override void SetConfig(Config config)
+        {
+            m_strategy = 
SpatialDocMaker.GetSpatialStrategy(config.RoundNumber);
+            m_shapeConverter = SpatialDocMaker.MakeShapeConverter(m_strategy, 
config, "query.spatial.");
+
+            m_distErrPct = config.Get("query.spatial.distErrPct", double.NaN);
+            m_operation = 
SpatialOperation.Get(config.Get("query.spatial.predicate", "Intersects"));
+            m_score = config.Get("query.spatial.score", false);
+
+            base.SetConfig(config);//call last, will call prepareQueries()
+        }
+
+        protected override Query[] PrepareQueries()
+        {
+            int maxQueries = m_config.Get("query.file.maxQueries", 1000);
+            Config srcConfig = new Config(new Dictionary<string, string>());
+            srcConfig.Set("docs.file", m_config.Get("query.file", null));
+            srcConfig.Set("line.parser", 
m_config.Get("query.file.line.parser", null));
+            srcConfig.Set("content.source.forever", "false");
+
+            List<Query> queries = new List<Query>();
+            LineDocSource src = new LineDocSource();
+            try
+            {
+                src.SetConfig(srcConfig);
+                src.ResetInputs();
+                DocData docData = new DocData();
+                for (int i = 0; i < maxQueries; i++)
+                {
+                    docData = src.GetNextDocData(docData);
+                    IShape shape = 
SpatialDocMaker.MakeShapeFromString(m_strategy, docData.Name, docData.Body);
+                    if (shape != null)
+                    {
+                        shape = m_shapeConverter.Convert(shape);
+                        queries.Add(MakeQueryFromShape(shape));
+                    }
+                    else
+                    {
+                        i--;//skip
+                    }
+                }
+            }
+#pragma warning disable 168
+            catch (NoMoreDataException e)
+#pragma warning restore 168
+            {
+                //all-done
+            }
+            finally
+            {
+                src.Dispose();
+            }
+            return queries.ToArray();
+        }
+
+
+        protected virtual Query MakeQueryFromShape(IShape shape)
+        {
+            SpatialArgs args = new SpatialArgs(m_operation, shape);
+            if (!double.IsNaN(m_distErrPct))
+                args.DistErrPct = m_distErrPct;
+
+            if (m_score)
+            {
+                ValueSource valueSource = 
m_strategy.MakeDistanceValueSource(shape.Center);
+                return new CustomScoreQuery(m_strategy.MakeQuery(args), new 
FunctionQuery(valueSource));
+            }
+            else
+            {
+                //strategy.makeQuery() could potentially score (isn't well 
defined) so instead we call
+                // makeFilter() and wrap
+
+                Filter filter = m_strategy.MakeFilter(args);
+                if (filter is QueryWrapperFilter)
+                {
+                    return ((QueryWrapperFilter)filter).Query;
+                }
+                else
+                {
+                    return new ConstantScoreQuery(filter);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecContentSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecContentSource.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecContentSource.cs
new file mode 100644
index 0000000..d84a25d
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecContentSource.cs
@@ -0,0 +1,350 @@
+ï»¿using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using System.Threading;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Implements a <see cref="ContentSource"/> over the TREC collection.
+    /// </summary>
+    /// <remarks>
+    /// Supports the following configuration parameters (on top of
+    /// <see cref="ContentSource"/>):
+    /// <list type="bullet">
+    ///     <item><term>work.dir</term><description>specifies the working 
directory. Required if "docs.dir"
+    ///         denotes a relative path 
(<b>default=work</b>).</description></item>
+    ///     <item><term>docs.dir</term><description>specifies the directory 
where the TREC files reside. 
+    ///         Can be set to a relative path if "work.dir" is also specified
+    ///         (<b>default=trec</b>).
+    ///     </description></item>
+    ///     <item><term>trec.doc.parser</term><description>specifies the <see 
cref="TrecDocParser"/> class to use for
+    ///         parsing the TREC documents content 
(<b>default=TrecGov2Parser</b>).
+    ///     </description></item>
+    ///     <item><term>html.parser</term><description>specifies the <see 
cref="IHTMLParser"/> class to use for
+    ///         parsing the HTML parts of the TREC documents content 
(<b>default=DemoHTMLParser</b>).
+    ///     </description></item>
+    ///     <item><term>content.source.encoding</term><description>if not 
specified, ISO-8859-1 is used.</description></item>
+    ///     <item>content.source.excludeIteration<term></term><description>if 
<c>true</c>, do not append iteration number to docname</description></item>
+    /// </list>
+    /// </remarks>
+    public class TrecContentSource : ContentSource
+    {
+        // LUCENENET specific - DateFormatInfo not used
+
+        public static readonly string DOCNO = "<DOCNO>";
+        public static readonly string TERMINATING_DOCNO = "</DOCNO>";
+        public static readonly string DOC = "<DOC>";
+        public static readonly string TERMINATING_DOC = "</DOC>";
+
+        /// <summary>separator between lines in the buffer</summary>
+        public static readonly string NEW_LINE = Environment.NewLine;
+
+        private static readonly string[] DATE_FORMATS = {
+            // LUCENENET specific: in JAVA, they don't care if it is an 
abbreviated or a full month name when parsing
+            // so we provide definitions for both ways.
+            "ddd, dd MMM yyyy hh:mm:ss K",   // Tue, 09 Dec 2003 22:39:08 GMT
+            "ddd, dd MMMM yyyy hh:mm:ss K",  // Tue, 09 December 2003 22:39:08 
GMT
+            "ddd MMM dd hh:mm:ss yyyy K",    // Tue Dec 09 16:45:08 2003 EST
+            "ddd MMMM dd hh:mm:ss yyyy K",   // Tue December 09 16:45:08 2003 
EST
+            "ddd, dd-MMM-':'y hh:mm:ss K",   // Tue, 09 Dec 2003 22:39:08 GMT
+            "ddd, dd-MMMM-':'y hh:mm:ss K",  // Tue, 09 December 2003 22:39:08 
GMT
+            "ddd, dd-MMM-yyy hh:mm:ss K",    // Tue, 09 Dec 2003 22:39:08 GMT
+            "ddd, dd-MMMM-yyy hh:mm:ss K",   // Tue, 09 December 2003 22:39:08 
GMT
+            "ddd MMM dd hh:mm:ss yyyy",      // Tue Dec 09 16:45:08 2003
+            "ddd MMMM dd hh:mm:ss yyyy",     // Tue December 09 16:45:08 2003
+            "dd MMM yyyy",                   // 1 Mar 1994
+            "dd MMMM yyyy",                  // 1 March 1994
+            "MMM dd, yyyy",                  // Feb 3, 1994
+            "MMMM dd, yyyy",                 // February 3, 1994
+            "yyMMdd",                        // 910513
+            "hhmm K.K.K. MMM dd, yyyy",      // 0901 u.t.c. Apr 28, 1994
+            "hhmm K.K.K. MMMM dd, yyyy",     // 0901 u.t.c. April 28, 1994
+        };
+
+        private ThreadLocal<StringBuilder> trecDocBuffer = new 
ThreadLocal<StringBuilder>();
+        private DirectoryInfo dataDir = null;
+        private List<FileInfo> inputFiles = new List<FileInfo>();
+        private int nextFile = 0;
+        // Use to synchronize threads on reading from the TREC documents.
+        private object @lock = new object();
+
+        // Required for test
+        internal TextReader reader;
+        internal int iteration = 0;
+        internal IHTMLParser htmlParser;
+
+        private bool excludeDocnameIteration;
+        private TrecDocParser trecDocParser = new TrecGov2Parser(); // default
+        internal TrecDocParser.ParsePathType currPathType; // not private for 
tests
+
+        private StringBuilder GetDocBuffer()
+        {
+            StringBuilder sb = trecDocBuffer.Value;
+            if (sb == null)
+            {
+                sb = new StringBuilder();
+                trecDocBuffer.Value = sb;
+            }
+            return sb;
+        }
+
+        internal IHTMLParser HtmlParser
+        {
+            get { return htmlParser; }
+        }
+
+        /// <summary>
+        /// Read until a line starting with the specified <paramref 
name="lineStart"/>.
+        /// </summary>
+        /// <param name="buf">Buffer for collecting the data if so 
specified.</param>
+        /// <param name="lineStart">Line start to look for, must not be 
<c>null</c>.</param>
+        /// <param name="collectMatchLine">Whether to collect the matching 
line into <c>buffer</c>.</param>
+        /// <param name="collectAll">Whether to collect all lines into 
<c>buffer</c>.</param>
+        /// <exception cref="IOException">If there is a low-level I/O 
error.</exception>
+        /// <exception cref="NoMoreDataException">If the source is 
exhausted.</exception>
+        private void Read(StringBuilder buf, string lineStart,
+            bool collectMatchLine, bool collectAll)
+        {
+            string sep = "";
+            while (true)
+            {
+                string line = reader.ReadLine();
+
+                if (line == null)
+                {
+                    OpenNextFile();
+                    continue;
+                }
+
+                var _ = line.Length;
+
+                if (lineStart != null && line.StartsWith(lineStart, 
StringComparison.Ordinal))
+                {
+                    if (collectMatchLine)
+                    {
+                        buf.Append(sep).Append(line);
+                        sep = NEW_LINE;
+                    }
+                    return;
+                }
+
+                if (collectAll)
+                {
+                    buf.Append(sep).Append(line);
+                    sep = NEW_LINE;
+                }
+            }
+        }
+
+        internal virtual void OpenNextFile()
+        {
+            Dispose();
+            //currPathType = null; 
+            while (true)
+            {
+                if (nextFile >= inputFiles.Count)
+                {
+                    // exhausted files, start a new round, unless forever set 
to false.
+                    if (!m_forever)
+                    {
+                        throw new NoMoreDataException();
+                    }
+                    nextFile = 0;
+                    iteration++;
+                }
+                FileInfo f = inputFiles[nextFile++];
+                if (m_verbose)
+                {
+                    SystemConsole.WriteLine("opening: " + f + " length: " + 
f.Length);
+                }
+                try
+                {
+                    Stream inputStream = StreamUtils.GetInputStream(f); // 
support either gzip, bzip2, or regular text file, by extension  
+                    reader = new StreamReader(inputStream, m_encoding);
+                    currPathType = TrecDocParser.PathType(f);
+                    return;
+                }
+                catch (Exception e)
+                {
+                    if (m_verbose)
+                    {
+                        SystemConsole.WriteLine("Skipping 'bad' file " + 
f.FullName + " due to " + e.Message);
+                        continue;
+                    }
+                    throw new NoMoreDataException();
+                }
+            }
+        }
+
+        public virtual DateTime? ParseDate(string dateStr)
+        {
+            dateStr = dateStr.Trim();
+            DateTime d;
+            if (DateTime.TryParseExact(dateStr, DATE_FORMATS, 
CultureInfo.InvariantCulture, DateTimeStyles.None, out d))
+            {
+                return d;
+            }
+            else if (DateTime.TryParse(dateStr, CultureInfo.InvariantCulture, 
DateTimeStyles.None, out d))
+            {
+                return d;
+            }
+
+            // do not fail test just because a date could not be parsed
+            if (m_verbose)
+            {
+                SystemConsole.WriteLine("failed to parse date (assigning 
'now') for: " + dateStr);
+            }
+            return null;
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            if (reader == null)
+            {
+                return;
+            }
+
+            try
+            {
+                reader.Dispose();
+            }
+            catch (IOException e)
+            {
+                if (m_verbose)
+                {
+                    SystemConsole.WriteLine("failed to dispose reader !");
+                    SystemConsole.WriteLine(e.ToString());
+                }
+            }
+            reader = null;
+        }
+
+        public override DocData GetNextDocData(DocData docData)
+        {
+            string name = null;
+            StringBuilder docBuf = GetDocBuffer();
+            TrecDocParser.ParsePathType parsedPathType;
+
+            // protect reading from the TREC files by multiple threads. The 
rest of the
+            // method, i.e., parsing the content and returning the DocData can 
run unprotected.
+            lock (@lock)
+            {
+                if (reader == null)
+                {
+                    OpenNextFile();
+                }
+
+                // 1. skip until doc start - required for all TREC formats
+                docBuf.Length = 0;
+                Read(docBuf, DOC, false, false);
+
+                // save parsedFile for passing trecDataParser after the sync 
block, in 
+                // case another thread will open another file in between.
+                parsedPathType = currPathType;
+
+                // 2. name - required for all TREC formats
+                docBuf.Length = 0;
+                Read(docBuf, DOCNO, true, false);
+                name = docBuf.ToString(DOCNO.Length, 
docBuf.IndexOf(TERMINATING_DOCNO,
+                    DOCNO.Length) - DOCNO.Length).Trim();
+
+                if (!excludeDocnameIteration)
+                {
+                    name = name + "_" + iteration;
+                }
+
+                // 3. read all until end of doc
+                docBuf.Length = 0;
+                Read(docBuf, TERMINATING_DOC, false, true);
+            }
+
+            // count char length of text to be parsed (may be larger than the 
resulted plain doc body text).
+            AddBytes(docBuf.Length);
+
+            // This code segment relies on HtmlParser being thread safe. When 
we get 
+            // here, everything else is already private to that thread, so 
we're safe.
+            docData = trecDocParser.Parse(docData, name, this, docBuf, 
parsedPathType);
+            AddItem();
+
+            return docData;
+        }
+
+        public override void ResetInputs()
+        {
+            lock (@lock)
+            {
+                base.ResetInputs();
+                Dispose();
+                nextFile = 0;
+                iteration = 0;
+            }
+        }
+
+        public override void SetConfig(Config config)
+        {
+            base.SetConfig(config);
+            // dirs
+            DirectoryInfo workDir = new DirectoryInfo(config.Get("work.dir", 
"work"));
+            string d = config.Get("docs.dir", "trec");
+            dataDir = new DirectoryInfo(d);
+            // files
+            CollectFiles(dataDir, inputFiles);
+            if (inputFiles.Count == 0)
+            {
+                throw new ArgumentException("No files in dataDir: " + dataDir);
+            }
+            // trec doc parser
+            try
+            {
+                string trecDocParserClassName = config.Get("trec.doc.parser", 
"Lucene.Net.Benchmarks.ByTask.Feeds.TrecGov2Parser, Lucene.Net.Benchmark");
+                trecDocParser = 
(TrecDocParser)Activator.CreateInstance(Type.GetType(trecDocParserClassName));
+            }
+            catch (Exception e)
+            {
+                // Should not get here. Throw runtime exception.
+                throw new Exception(e.ToString(), e);
+            }
+            // html parser
+            try
+            {
+                string htmlParserClassName = config.Get("html.parser",
+                    "Lucene.Net.Benchmarks.ByTask.Feeds.DemoHTMLParser, 
Lucene.Net.Benchmark");
+                htmlParser = 
(IHTMLParser)Activator.CreateInstance(Type.GetType(htmlParserClassName));
+            }
+            catch (Exception e)
+            {
+                // Should not get here. Throw runtime exception.
+                throw new Exception(e.ToString(), e);
+            }
+            // encoding
+            if (m_encoding == null)
+            {
+                m_encoding = Encoding.GetEncoding("iso-8859-1"); 
//StandardCharsets.ISO_8859_1.name();
+            }
+            // iteration exclusion in doc name 
+            excludeDocnameIteration = 
config.Get("content.source.excludeIteration", false);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecDocParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecDocParser.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecDocParser.cs
new file mode 100644
index 0000000..b67a1c0
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecDocParser.cs
@@ -0,0 +1,159 @@
+ï»¿using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Parser for trec doc content, invoked on doc text excluding &lt;DOC&gt; 
and &lt;DOCNO&gt;
+    /// which are handled in TrecContentSource. Required to be stateless and 
hence thread safe. 
+    /// </summary>
+    public abstract class TrecDocParser
+    {
+        /// <summary>Types of trec parse paths,</summary>
+        public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES, UNKNOWN }
+
+        /// <summary>trec parser type used for unknown extensions</summary>
+        public static readonly ParsePathType DEFAULT_PATH_TYPE = 
ParsePathType.GOV2;
+
+        internal static readonly IDictionary<ParsePathType, TrecDocParser> 
pathType2parser = new Dictionary<ParsePathType, TrecDocParser>();
+        static TrecDocParser()
+        {
+            pathType2parser[ParsePathType.GOV2] = new TrecGov2Parser();
+            pathType2parser[ParsePathType.FBIS] = new TrecFBISParser();
+            pathType2parser[ParsePathType.FR94] = new TrecFR94Parser();
+            pathType2parser[ParsePathType.FT] = new TrecFTParser();
+            pathType2parser[ParsePathType.LATIMES] = new TrecLATimesParser();
+
+            foreach (ParsePathType ppt in 
Enum.GetValues(typeof(ParsePathType)))
+            {
+                pathName2Type[ppt.ToString().ToUpperInvariant()] = ppt;
+            }
+        }
+
+        internal static readonly IDictionary<string, ParsePathType?> 
pathName2Type = new Dictionary<string, ParsePathType?>();
+
+
+        /// <summary>max length of walk up from file to its ancestors when 
looking for a known path type.</summary>
+        private static readonly int MAX_PATH_LENGTH = 10;
+
+        /// <summary>
+        /// Compute the path type of a file by inspecting name of file and its 
parents.
+        /// </summary>
+        public static ParsePathType PathType(FileInfo f)
+        {
+            int pathLength = 0;
+            ParsePathType? ppt;
+            if (pathName2Type.TryGetValue(f.Name.ToUpperInvariant(), out ppt) 
&& ppt != null)
+            {
+                return ppt.Value;
+            }
+            // Walk up the directory names to find a match.
+            DirectoryInfo parentDir = f.Directory;
+            while (parentDir != null && ++pathLength < MAX_PATH_LENGTH)
+            {
+                if 
(pathName2Type.TryGetValue(parentDir.Name.ToUpperInvariant(), out ppt) && ppt 
!= null)
+                {
+                    return ppt.Value;
+                }
+                parentDir = parentDir.Parent;
+            }
+            return DEFAULT_PATH_TYPE;
+        }
+
+        /// <summary>
+        /// Parse the text prepared in docBuf into a result DocData, 
+        /// no synchronization is required.
+        /// </summary>
+        /// <param name="docData">Reusable result.</param>
+        /// <param name="name">Name that should be set to the result.</param>
+        /// <param name="trecSrc">Calling trec content source.</param>
+        /// <param name="docBuf">Text to parse.</param>
+        /// <param name="pathType">Type of parsed file, or <see 
cref="ParsePathType.UNKNOWN"/> if unknown - may be used by 
+        /// parsers to alter their behavior according to the file path type. 
</param>
+        /// <returns></returns>
+        public abstract DocData Parse(DocData docData, string name, 
TrecContentSource trecSrc,
+            StringBuilder docBuf, ParsePathType pathType);
+
+        /// <summary>
+        /// strip tags from <code>buf</code>: each tag is replaced by a single 
blank.
+        /// </summary>
+        /// <returns>Text obtained when stripping all tags from <paramref 
name="buf"/> (input <see cref="StringBuilder"/> is unmodified).</returns>
+        public static string StripTags(StringBuilder buf, int start)
+        {
+            return StripTags(buf.ToString(start, buf.Length - start), 0);
+        }
+
+        /// <summary>
+        /// Strip tags from input.
+        /// </summary>
+        /// <seealso cref="StripTags(StringBuilder, int)"/>
+        public static string StripTags(string buf, int start)
+        {
+            if (start > 0)
+            {
+                buf = buf.Substring(0);
+            }
+            return Regex.Replace(buf, "<[^>]*>", " ");
+        }
+
+        /// <summary>
+        /// Extract from <paramref name="buf"/> the text of interest within 
specified tags.
+        /// </summary>
+        /// <param name="buf">Entire input text.</param>
+        /// <param name="startTag">Tag marking start of text of 
interest.</param>
+        /// <param name="endTag">Tag marking end of text of interest.</param>
+        /// <param name="maxPos">if &#8805; 0 sets a limit on start of text of 
interest.</param>
+        /// <param name="noisePrefixes">Text of interest or null if not 
found.</param>
+        /// <returns></returns>
+        public static string Extract(StringBuilder buf, string startTag, 
string endTag, int maxPos, string[] noisePrefixes)
+        {
+            int k1 = buf.IndexOf(startTag);
+            if (k1 >= 0 && (maxPos < 0 || k1 < maxPos))
+            {
+                k1 += startTag.Length;
+                int k2 = buf.IndexOf(endTag, k1);
+                if (k2 >= 0 && (maxPos < 0 || k2 < maxPos))
+                { // found end tag with allowed range
+                    if (noisePrefixes != null)
+                    {
+                        foreach (string noise in noisePrefixes)
+                        {
+                            int k1a = buf.IndexOf(noise, k1);
+                            if (k1a >= 0 && k1a < k2)
+                            {
+                                k1 = k1a + noise.Length;
+                            }
+                        }
+                    }
+                    return buf.ToString(k1, k2 - k1).Trim();
+                }
+            }
+            return null;
+        }
+
+        //public static void main(String[] args) {
+        //  System.out.println(stripTags("is it true that<space>2<<second 
space>><almost last space>1<one more space>?",0));
+        //}
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFBISParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFBISParser.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFBISParser.cs
new file mode 100644
index 0000000..cf321cc
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFBISParser.cs
@@ -0,0 +1,68 @@
+ï»¿using Lucene.Net.Support;
+using System;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Parser for the FBIS docs in trec disks 4+5 collection format
+    /// </summary>
+    public class TrecFBISParser : TrecDocParser
+    {
+        private static readonly string HEADER = "<HEADER>";
+        private static readonly string HEADER_END = "</HEADER>";
+        private static readonly int HEADER_END_LENGTH = HEADER_END.Length;
+
+        private static readonly string DATE1 = "<DATE1>";
+        private static readonly string DATE1_END = "</DATE1>";
+
+        private static readonly string TI = "<TI>";
+        private static readonly string TI_END = "</TI>";
+
+        public override DocData Parse(DocData docData, string name, 
TrecContentSource trecSrc,
+            StringBuilder docBuf, ParsePathType pathType)
+        {
+            int mark = 0; // that much is skipped
+                          // optionally skip some of the text, set date, title
+            DateTime? date = null;
+            string title = null;
+            int h1 = docBuf.IndexOf(HEADER);
+            if (h1 >= 0)
+            {
+                int h2 = docBuf.IndexOf(HEADER_END, h1);
+                mark = h2 + HEADER_END_LENGTH;
+                // date...
+                string dateStr = Extract(docBuf, DATE1, DATE1_END, h2, null);
+                if (dateStr != null)
+                {
+                    date = trecSrc.ParseDate(dateStr);
+                }
+                // title...
+                title = Extract(docBuf, TI, TI_END, h2, null);
+            }
+            docData.Clear();
+            docData.Name = name;
+            docData.SetDate(date);
+            docData.Title = title;
+            docData.Body = StripTags(docBuf, mark).ToString();
+            return docData;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFR94Parser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFR94Parser.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFR94Parser.cs
new file mode 100644
index 0000000..72f99bb
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFR94Parser.cs
@@ -0,0 +1,69 @@
+ï»¿using Lucene.Net.Support;
+using System;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Parser for the FR94 docs in trec disks 4+5 collection format
+    /// </summary>
+    public class TrecFR94Parser : TrecDocParser
+    {
+        private static readonly string TEXT = "<TEXT>";
+        private static readonly int TEXT_LENGTH = TEXT.Length;
+        private static readonly string TEXT_END = "</TEXT>";
+
+        private static readonly string DATE = "<DATE>";
+        private static readonly string[] DATE_NOISE_PREFIXES = {
+            "DATE:",
+            "date:", //TODO improve date extraction for this format
+            "t.c.",
+        };
+        private static readonly string DATE_END = "</DATE>";
+
+        //TODO can we also extract title for this format?
+
+        public override DocData Parse(DocData docData, string name, 
TrecContentSource trecSrc,
+            StringBuilder docBuf, ParsePathType pathType)
+        {
+            int mark = 0; // that much is skipped
+                          // optionally skip some of the text, set date (no 
title?)
+            DateTime? date = null;
+            int h1 = docBuf.IndexOf(TEXT);
+            if (h1 >= 0)
+            {
+                int h2 = docBuf.IndexOf(TEXT_END, h1);
+                mark = h1 + TEXT_LENGTH;
+                // date...
+                string dateStr = Extract(docBuf, DATE, DATE_END, h2, 
DATE_NOISE_PREFIXES);
+                if (dateStr != null)
+                {
+                    dateStr = StripTags(dateStr, 0).ToString();
+                    date = trecSrc.ParseDate(dateStr.Trim());
+                }
+            }
+            docData.Clear();
+            docData.Name = name;
+            docData.SetDate(date);
+            docData.Body = StripTags(docBuf, mark).ToString();
+            return docData;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFTParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFTParser.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFTParser.cs
new file mode 100644
index 0000000..189f6cb
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecFTParser.cs
@@ -0,0 +1,58 @@
+ï»¿using System;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Parser for the FT docs in trec disks 4+5 collection format
+    /// </summary>
+    public class TrecFTParser : TrecDocParser
+    {
+        private static readonly string DATE = "<DATE>";
+        private static readonly string DATE_END = "</DATE>";
+
+        private static readonly string HEADLINE = "<HEADLINE>";
+        private static readonly string HEADLINE_END = "</HEADLINE>";
+
+        public override DocData Parse(DocData docData, string name, 
TrecContentSource trecSrc,
+            StringBuilder docBuf, ParsePathType pathType)
+        {
+            int mark = 0; // that much is skipped
+
+            // date...
+            DateTime? date = null;
+            string dateStr = Extract(docBuf, DATE, DATE_END, -1, null);
+            if (dateStr != null)
+            {
+                date = trecSrc.ParseDate(dateStr);
+            }
+
+            // title...
+            string title = Extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
+
+            docData.Clear();
+            docData.Name = name;
+            docData.SetDate(date);
+            docData.Title = title;
+            docData.Body = StripTags(docBuf, mark).ToString();
+            return docData;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecGov2Parser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecGov2Parser.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecGov2Parser.cs
new file mode 100644
index 0000000..12912e9
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecGov2Parser.cs
@@ -0,0 +1,57 @@
+ï»¿using Lucene.Net.Support;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Parser for the GOV2 collection format
+    /// </summary>
+    public class TrecGov2Parser : TrecDocParser
+    {
+        private static readonly string DATE = "Date: ";
+        private static readonly string DATE_END = TrecContentSource.NEW_LINE;
+
+        private static readonly string DOCHDR = "<DOCHDR>";
+        private static readonly string TERMINATING_DOCHDR = "</DOCHDR>";
+
+        public override DocData Parse(DocData docData, string name, 
TrecContentSource trecSrc,
+            StringBuilder docBuf, ParsePathType pathType)
+        {
+            // skip some of the non-html text, optionally set date
+            DateTime? date = null;
+            int start = 0;
+            int h1 = docBuf.IndexOf(DOCHDR);
+            if (h1 >= 0)
+            {
+                int h2 = docBuf.IndexOf(TERMINATING_DOCHDR, h1);
+                string dateStr = Extract(docBuf, DATE, DATE_END, h2, null);
+                if (dateStr != null)
+                {
+                    date = trecSrc.ParseDate(dateStr);
+                }
+                start = h2 + TERMINATING_DOCHDR.Length;
+            }
+            string html = docBuf.ToString(start, docBuf.Length - start);
+            return trecSrc.HtmlParser.Parse(docData, name, date, new 
StringReader(html), trecSrc);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecLATimesParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecLATimesParser.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecLATimesParser.cs
new file mode 100644
index 0000000..e54f635
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecLATimesParser.cs
@@ -0,0 +1,75 @@
+ï»¿using System;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Parser for the FT docs in trec disks 4+5 collection format
+    /// </summary>
+    public class TrecLATimesParser : TrecDocParser
+    {
+        private static readonly string DATE = "<DATE>";
+        private static readonly string DATE_END = "</DATE>";
+        private static readonly string DATE_NOISE = "day,"; // anything aftre 
the ',' 
+
+        private static readonly string SUBJECT = "<SUBJECT>";
+        private static readonly string SUBJECT_END = "</SUBJECT>";
+        private static readonly string HEADLINE = "<HEADLINE>";
+        private static readonly string HEADLINE_END = "</HEADLINE>";
+
+        public override DocData Parse(DocData docData, string name, 
TrecContentSource trecSrc,
+            StringBuilder docBuf, ParsePathType pathType)
+        {
+            int mark = 0; // that much is skipped
+
+            // date...
+            DateTime? date = null;
+            string dateStr = Extract(docBuf, DATE, DATE_END, -1, null);
+            if (dateStr != null)
+            {
+                int d2a = dateStr.IndexOf(DATE_NOISE);
+                if (d2a > 0)
+                {
+                    dateStr = dateStr.Substring(0, (d2a + 3) - 0); // we need 
the "day" part
+                }
+                dateStr = StripTags(dateStr, 0).ToString();
+                date = trecSrc.ParseDate(dateStr.Trim());
+            }
+
+            // title... first try with SUBJECT, them with HEADLINE
+            string title = Extract(docBuf, SUBJECT, SUBJECT_END, -1, null);
+            if (title == null)
+            {
+                title = Extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
+            }
+            if (title != null)
+            {
+                title = StripTags(title, 0).ToString().Trim();
+            }
+
+            docData.Clear();
+            docData.Name = name;
+            docData.SetDate(date);
+            docData.Title = title;
+            docData.Body = StripTags(docBuf, mark).ToString();
+            return docData;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecParserByPath.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecParserByPath.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecParserByPath.cs
new file mode 100644
index 0000000..45a72b4
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/TrecParserByPath.cs
@@ -0,0 +1,34 @@
+ï»¿using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Parser for trec docs which selects the parser to apply according 
+    /// to the source files path, defaulting to <see cref="TrecGov2Parser"/>.
+    /// </summary>
+    public class TrecParserByPath : TrecDocParser
+    {
+        public override DocData Parse(DocData docData, string name, 
TrecContentSource trecSrc,
+            StringBuilder docBuf, ParsePathType pathType)
+        {
+            return pathType2parser[pathType].Parse(docData, name, trecSrc, 
docBuf, pathType);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs 
b/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs
new file mode 100644
index 0000000..e5b334c
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs
@@ -0,0 +1,490 @@
+ï»¿using Lucene.Net.Analysis;
+using Lucene.Net.Benchmarks.ByTask.Feeds;
+using Lucene.Net.Benchmarks.ByTask.Stats;
+using Lucene.Net.Benchmarks.ByTask.Tasks;
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Facet.Taxonomy;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.ByTask
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Data maintained by a performance test run.
+    /// </summary>
+    /// <remarks>
+    /// Data includes:
+    /// <list type="bullet">
+    ///     <item><description>Configuration.</description></item>
+    ///     <item><description>Directory, Writer, Reader.</description></item>
+    ///     <item><description>Taxonomy Directory, Writer, 
Reader.</description></item>
+    ///     <item><description>DocMaker, FacetSource and a few instances of 
QueryMaker.</description></item>
+    ///     <item><description>Named AnalysisFactories.</description></item>
+    ///     <item><description>Analyzer.</description></item>
+    ///     <item><description>Statistics data which updated during the 
run.</description></item>
+    /// </list>
+    /// <para/>
+    /// Config properties:
+    /// <list type="bullet">
+    ///     <item><term>work.dir</term><description>&lt;path to root of docs 
and index dirs| Default: work&gt;</description></item>
+    ///     <item><term>analyzer</term><description>&lt;class name for 
analyzer| Default: StandardAnalyzer&gt;</description></item>
+    ///     <item><term>doc.maker</term><description>&lt;class name for 
doc-maker| Default: DocMaker&gt;</description></item>
+    ///     <item><term>facet.source</term><description>&lt;class name for 
facet-source| Default: RandomFacetSource&gt;</description></item>
+    ///     <item><term>query.maker</term><description>&lt;class name for 
query-maker| Default: SimpleQueryMaker&gt;</description></item>
+    ///     <item><term>log.queries</term><description>&lt;whether queries 
should be printed| Default: false&gt;</description></item>
+    ///     <item><term>directory</term><description>&lt;type of directory to 
use for the index| Default: RAMDirectory&gt;</description></item>
+    ///     <item><term>taxonomy.directory</term><description>&lt;type of 
directory for taxonomy index| Default: RAMDirectory&gt;</description></item>
+    /// </list>
+    /// </remarks>
+    public class PerfRunData : IDisposable
+    {
+        private Points points;
+
+        // objects used during performance test run
+        // directory, analyzer, docMaker - created at startup.
+        // reader, writer, searcher - maintained by basic tasks. 
+        private Store.Directory directory;
+        private IDictionary<string, AnalyzerFactory> analyzerFactories = new 
Dictionary<string, AnalyzerFactory>();
+        private Analyzer analyzer;
+        private DocMaker docMaker;
+        private ContentSource contentSource;
+        private FacetSource facetSource;
+        private CultureInfo locale;
+
+        private Store.Directory taxonomyDir;
+        private ITaxonomyWriter taxonomyWriter;
+        private TaxonomyReader taxonomyReader;
+
+        // we use separate (identical) instances for each "read" task type, so 
each can iterate the quries separately.
+        private IDictionary<Type, IQueryMaker> readTaskQueryMaker;
+        private Type qmkrClass;
+
+        private DirectoryReader indexReader;
+        private IndexSearcher indexSearcher;
+        private IndexWriter indexWriter;
+        private Config config;
+        private long startTimeMillis;
+
+        private readonly IDictionary<string, object> perfObjects = new 
Dictionary<string, object>();
+
+        // constructor
+        public PerfRunData(Config config)
+        {
+            this.config = config;
+            // analyzer (default is standard analyzer)
+            analyzer = NewAnalyzerTask.CreateAnalyzer(config.Get("analyzer",
+                "Lucene.Net.Analysis.Standard.StandardAnalyzer, 
Lucene.Net.Analysis.Common"));
+
+            // content source
+            string sourceClass = config.Get("content.source", 
typeof(SingleDocSource).AssemblyQualifiedName);
+            contentSource = 
(ContentSource)Activator.CreateInstance(Type.GetType(sourceClass)); 
//Class.forName(sourceClass).asSubclass(typeof(ContentSource)).newInstance();
+            contentSource.SetConfig(config);
+
+            // doc maker
+            docMaker = 
(DocMaker)Activator.CreateInstance(Type.GetType(config.Get("doc.maker", 
typeof(DocMaker).AssemblyQualifiedName)));  // 
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
+            docMaker.SetConfig(config, contentSource);
+            // facet source
+            facetSource = 
(FacetSource)Activator.CreateInstance(Type.GetType(config.Get("facet.source",
+                typeof(RandomFacetSource).AssemblyQualifiedName))); // 
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
+            facetSource.SetConfig(config);
+            // query makers
+            readTaskQueryMaker = new Dictionary<Type, IQueryMaker>();
+            qmkrClass = Type.GetType(config.Get("query.maker", 
typeof(SimpleQueryMaker).AssemblyQualifiedName));
+
+            // index stuff
+            Reinit(false);
+
+            // statistic points
+            points = new Points(config);
+
+            if (bool.Parse(config.Get("log.queries", "false")))
+            {
+                SystemConsole.WriteLine("------------> queries:");
+                SystemConsole.WriteLine(GetQueryMaker(new 
SearchTask(this)).PrintQueries());
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                IOUtils.Dispose(indexWriter, indexReader, directory,
+                          taxonomyWriter, taxonomyReader, taxonomyDir,
+                          docMaker, facetSource, contentSource);
+
+                // close all perf objects that are closeable.
+                List<IDisposable> perfObjectsToClose = new List<IDisposable>();
+                foreach (object obj in perfObjects.Values)
+                {
+                    if (obj is IDisposable)
+                    {
+                        perfObjectsToClose.Add((IDisposable)obj);
+                    }
+                }
+                IOUtils.Dispose(perfObjectsToClose);
+            }
+        }
+
+        // clean old stuff, reopen 
+        public virtual void Reinit(bool eraseIndex)
+        {
+            // cleanup index
+            IOUtils.Dispose(indexWriter, indexReader, directory);
+            indexWriter = null;
+            indexReader = null;
+
+            IOUtils.Dispose(taxonomyWriter, taxonomyReader, taxonomyDir);
+            taxonomyWriter = null;
+            taxonomyReader = null;
+
+            // directory (default is ram-dir).
+            directory = CreateDirectory(eraseIndex, "index", "directory");
+            taxonomyDir = CreateDirectory(eraseIndex, "taxo", 
"taxonomy.directory");
+
+            // inputs
+            ResetInputs();
+
+            // release unused stuff
+            GC.Collect();
+
+            // Re-init clock
+            SetStartTimeMillis();
+        }
+
+        private Store.Directory CreateDirectory(bool eraseIndex, string 
dirName,
+            string dirParam)
+        {
+            if ("FSDirectory".Equals(config.Get(dirParam, "RAMDirectory"), 
StringComparison.Ordinal))
+            {
+                DirectoryInfo workDir = new 
DirectoryInfo(config.Get("work.dir", "work"));
+                DirectoryInfo indexDir = new 
DirectoryInfo(System.IO.Path.Combine(workDir.FullName, dirName));
+                if (eraseIndex && indexDir.Exists)
+                {
+                    FileUtils.FullyDelete(indexDir);
+                }
+                indexDir.Create();
+                return FSDirectory.Open(indexDir);
+            }
+
+            return new RAMDirectory();
+        }
+
+        /// <summary>
+        /// Returns an object that was previously set by <see 
cref="SetPerfObject(string, object)"/>.
+        /// </summary>
+        public virtual object GetPerfObject(string key)
+        {
+            lock (this)
+            {
+                object result;
+                perfObjects.TryGetValue(key, out result);
+                return result;
+            }
+        }
+
+        /// <summary>
+        /// Sets an object that is required by <see cref="PerfTask"/>s, keyed 
by the given
+        /// <paramref name="key"/>. If the object implements <see 
cref="IDisposable"/>, it will be disposed
+        /// by <see cref="Dispose()"/>.
+        /// </summary>
+        public virtual void SetPerfObject(string key, object obj)
+        {
+            lock (this)
+            {
+                perfObjects[key] = obj;
+            }
+        }
+
+        public virtual long SetStartTimeMillis()
+        {
+            startTimeMillis = Support.Time.CurrentTimeMilliseconds();
+            return startTimeMillis;
+        }
+
+        /// <summary>
+        /// Gets start time in milliseconds.
+        /// </summary>
+        public virtual long StartTimeMillis
+        {
+            get { return startTimeMillis; }
+        }
+
+        /// <summary>
+        /// Gets the points.
+        /// </summary>
+        public virtual Points Points
+        {
+            get { return points; }
+        }
+
+        /// <summary>
+        /// Gets or sets the directory.
+        /// </summary>
+        public virtual Store.Directory Directory
+        {
+            get { return directory; }
+            set { directory = value; }
+        }
+
+        /// <summary>
+        /// Gets the taxonomy directory.
+        /// </summary>
+        public virtual Store.Directory TaxonomyDir
+        {
+            get { return taxonomyDir; }
+        }
+
+        /// <summary>
+        /// Set the taxonomy reader. Takes ownership of that taxonomy reader, 
that is,
+        /// internally performs taxoReader.IncRef() (If caller no longer needs 
that 
+        /// reader it should DecRef()/Dispose() it after calling this method, 
otherwise, 
+        /// the reader will remain open). 
+        /// </summary>
+        /// <param name="taxoReader">The taxonomy reader to set.</param>
+        public virtual void SetTaxonomyReader(TaxonomyReader taxoReader)
+        {
+            lock (this)
+            {
+                if (taxoReader == this.taxonomyReader)
+                {
+                    return;
+                }
+                if (taxonomyReader != null)
+                {
+                    taxonomyReader.DecRef();
+                }
+
+                if (taxoReader != null)
+                {
+                    taxoReader.IncRef();
+                }
+                this.taxonomyReader = taxoReader;
+            }
+        }
+
+        /// <summary>
+        /// Returns the taxonomyReader.  NOTE: this returns a
+        /// reference.  You must call TaxonomyReader.DecRef() when
+        /// you're done.
+        /// </summary>
+        public virtual TaxonomyReader GetTaxonomyReader()
+        {
+            lock (this)
+            {
+                if (taxonomyReader != null)
+                {
+                    taxonomyReader.IncRef();
+                }
+                return taxonomyReader;
+            }
+        }
+
+        /// <summary>
+        /// Gets or sets the taxonomy writer.
+        /// </summary>
+        public virtual ITaxonomyWriter TaxonomyWriter
+        {
+            get { return taxonomyWriter; }
+            set { taxonomyWriter = value; }
+        }
+
+        /// <summary>
+        /// Returns the indexReader.  NOTE: this returns a
+        /// reference.  You must call IndexReader.DecRef() when
+        /// you're done.
+        /// </summary>
+        public virtual DirectoryReader GetIndexReader()
+        {
+            lock (this)
+            {
+                if (indexReader != null)
+                {
+                    indexReader.IncRef();
+                }
+                return indexReader;
+            }
+        }
+
+        /// <summary>
+        /// Returns the indexSearcher.  NOTE: this returns
+        /// a reference to the underlying IndexReader.  You must
+        /// call IndexReader.DecRef() when you're done.
+        /// </summary>
+        /// <returns></returns>
+        public virtual IndexSearcher GetIndexSearcher()
+        {
+            lock (this)
+            {
+                if (indexReader != null)
+                {
+                    indexReader.IncRef();
+                }
+                return indexSearcher;
+            }
+        }
+
+        /// <summary>
+        /// Set the index reader. Takes ownership of that index reader, that 
is,
+        /// internally performs indexReader.incRef() (If caller no longer 
needs that 
+        /// reader it should decRef()/close() it after calling this method, 
otherwise,
+        /// the reader will remain open). 
+        /// </summary>
+        /// <param name="indexReader">The indexReader to set.</param>
+        public virtual void SetIndexReader(DirectoryReader indexReader)
+        {
+            lock (this)
+            {
+                if (indexReader == this.indexReader)
+                {
+                    return;
+                }
+
+                if (this.indexReader != null)
+                {
+                    // Release current IR
+                    this.indexReader.DecRef();
+                }
+
+                this.indexReader = indexReader;
+                if (indexReader != null)
+                {
+                    // Hold reference to new IR
+                    indexReader.IncRef();
+                    indexSearcher = new IndexSearcher(indexReader);
+                }
+                else
+                {
+                    indexSearcher = null;
+                }
+            }
+        }
+
+        /// <summary>
+        /// Gets or sets the indexWriter.
+        /// </summary>
+        public virtual IndexWriter IndexWriter
+        {
+            get { return indexWriter; }
+            set { indexWriter = value; }
+        }
+
+        /// <summary>
+        /// Gets or sets the analyzer.
+        /// </summary>
+        public virtual Analyzer Analyzer
+        {
+            get { return analyzer; }
+            set { analyzer = value; }
+        }
+
+        /// <summary>Gets the <see cref="Feeds.ContentSource"/>.</summary>
+        public virtual ContentSource ContentSource
+        {
+            get { return contentSource; }
+        }
+
+        /// <summary>Returns the <see cref="Feeds.DocMaker"/>.</summary>
+        public virtual DocMaker DocMaker
+        {
+            get { return docMaker; }
+        }
+
+        /// <summary>Gets the <see cref="Feeds.FacetSource"/>.</summary>
+        public virtual FacetSource FacetSource
+        {
+            get { return facetSource; }
+        }
+
+        /// <summary>
+        /// Gets or sets the culture.
+        /// </summary>
+        public virtual CultureInfo Locale // LUCENENET TODO: API Is this 
really needed since we have on the thread already?
+        {
+            get { return locale; }
+            set { locale = value; }
+        }
+
+        /// <summary>
+        /// Gets the config.
+        /// </summary>
+        public virtual Config Config
+        {
+            get { return config; }
+        }
+
+        public virtual void ResetInputs()
+        {
+            contentSource.ResetInputs();
+            docMaker.ResetInputs();
+            facetSource.ResetInputs();
+            foreach (IQueryMaker queryMaker in readTaskQueryMaker.Values)
+            {
+                queryMaker.ResetInputs();
+            }
+        }
+
+        /// <summary>
+        /// Returns the queryMaker by read task type (class).
+        /// </summary>
+        public virtual IQueryMaker GetQueryMaker(ReadTask readTask)
+        {
+            lock (this)
+            {
+                // mapping the query maker by task class allows 
extending/adding new search/read tasks
+                // without needing to modify this class.
+                Type readTaskClass = readTask.GetType();
+                IQueryMaker qm;
+                if (!readTaskQueryMaker.TryGetValue(readTaskClass, out qm) || 
qm == null)
+                {
+                    try
+                    {
+                        //qm = qmkrClass.newInstance();
+                        qm = (IQueryMaker)Activator.CreateInstance(qmkrClass);
+                        qm.SetConfig(config);
+                    }
+                    catch (Exception e)
+                    {
+                        throw new Exception(e.ToString(), e);
+                    }
+                    readTaskQueryMaker[readTaskClass] = qm;
+                }
+                return qm;
+            }
+        }
+
+        public virtual IDictionary<string, AnalyzerFactory> AnalyzerFactories
+        {
+            get { return analyzerFactories; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Programmatic/Sample.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Programmatic/Sample.cs 
b/src/Lucene.Net.Benchmark/ByTask/Programmatic/Sample.cs
new file mode 100644
index 0000000..6b248f6
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Programmatic/Sample.cs
@@ -0,0 +1,90 @@
+ï»¿using Lucene.Net.Benchmarks.ByTask.Tasks;
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Support;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Benchmarks.ByTask.Programmatic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Sample performance test written programmatically - no algorithm file 
is needed here.
+    /// </summary>
+    public class Sample
+    {
+        public static void Main(string[] args)
+        {
+            var p = InitProps();
+            Config conf = new Config(p);
+            PerfRunData runData = new PerfRunData(conf);
+
+            // 1. top sequence
+            TaskSequence top = new TaskSequence(runData, null, null, false); 
// top level, not parallel
+
+            // 2. task to create the index
+            CreateIndexTask create = new CreateIndexTask(runData);
+            top.AddTask(create);
+
+            // 3. task seq to add 500 docs (order matters - top to bottom - 
add seq to top, only then add to seq)
+            TaskSequence seq1 = new TaskSequence(runData, "AddDocs", top, 
false);
+            seq1.SetRepetitions(500);
+            seq1.SetNoChildReport();
+            top.AddTask(seq1);
+
+            // 4. task to add the doc
+            AddDocTask addDoc = new AddDocTask(runData);
+            //addDoc.setParams("1200"); // doc size limit if supported
+            seq1.AddTask(addDoc); // order matters 9see comment above)
+
+            // 5. task to close the index
+            CloseIndexTask close = new CloseIndexTask(runData);
+            top.AddTask(close);
+
+            // task to report
+            RepSumByNameTask rep = new RepSumByNameTask(runData);
+            top.AddTask(rep);
+
+            // print algorithm
+            SystemConsole.WriteLine(top.ToString());
+
+            // execute
+            top.DoLogic();
+        }
+
+        // Sample programmatic settings. Could also read from file.
+        private static IDictionary<string, string> InitProps()
+        {
+            var p = new Dictionary<string, string>();
+            p["task.max.depth.log"] = "3";
+            p["max.buffered"] = "buf:10:10:100:100:10:10:100:100";
+            p["doc.maker"] = 
"Lucene.Net.Benchmarks.ByTask.Feeds.ReutersContentSource, Lucene.Net.Benchmark";
+            p["log.step"] = "2000";
+            p["doc.delete.step"] = "8";
+            p["analyzer"] = "Lucene.Net.Analysis.Standard.StandardAnalyzer, 
Lucene.Net.Analysis.Common";
+            p["doc.term.vector"] = "false";
+            p["directory"] = "FSDirectory";
+            p["query.maker"] = 
"Lucene.Net.Benchmarks.ByTask.Feeds.ReutersQueryMaker, Lucene.Net.Benchmark";
+            p["doc.stored"] = "true";
+            p["docs.dir"] = "reuters-out";
+            p["compound"] = 
"cmpnd:true:true:true:true:false:false:false:false";
+            p["doc.tokenized"] = "true";
+            p["merge.factor"] = "mrg:10:100:10:100:10:100:10:100";
+            return p;
+        }
+    }
+}

[10/33] lucenenet git commit: Ported Lucene.Net.Benchmark + tests

Reply via email to