Ported Lucene.Net.Benchmark + tests

Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b515271d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b515271d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b515271d

Branch: refs/heads/master
Commit: b515271d8821dde3cd980beae780d204fd6b0e5c
Parents: 1e52293
Author: Shad Storhaug <s...@shadstorhaug.com>
Authored: Mon Jul 31 14:26:48 2017 +0700
Committer: Shad Storhaug <s...@shadstorhaug.com>
Committed: Wed Aug 2 09:54:52 2017 +0700

----------------------------------------------------------------------
 Lucene.Net.sln                                  |   52 +
 src/Lucene.Net.Benchmark/ByTask/Benchmark.cs    |  170 +++
 .../ByTask/Feeds/AbstractQueryMaker.cs          |   85 ++
 .../ByTask/Feeds/ContentItemsSource.cs          |  227 ++++
 .../ByTask/Feeds/ContentSource.cs               |   38 +
 .../ByTask/Feeds/DemoHTMLParser.cs              |  259 ++++
 .../ByTask/Feeds/DirContentSource.cs            |  259 ++++
 .../ByTask/Feeds/DocData.cs                     |   73 ++
 .../ByTask/Feeds/DocMaker.cs                    |  511 ++++++++
 .../ByTask/Feeds/EnwikiContentSource.cs         |  394 ++++++
 .../ByTask/Feeds/EnwikiQueryMaker.cs            |  146 +++
 .../ByTask/Feeds/FacetSource.cs                 |   47 +
 .../ByTask/Feeds/FileBasedQueryMaker.cs         |  121 ++
 .../ByTask/Feeds/GeonamesLineParser.cs          |   53 +
 .../ByTask/Feeds/HTMLParser.cs                  |   42 +
 .../ByTask/Feeds/LineDocSource.cs               |  328 +++++
 .../ByTask/Feeds/LongToEnglishContentSource.cs  |   72 ++
 .../ByTask/Feeds/LongToEnglishQueryMaker.cs     |   89 ++
 .../ByTask/Feeds/NoMoreDataException.cs         |   50 +
 .../ByTask/Feeds/QueryMaker.cs                  |   48 +
 .../ByTask/Feeds/RandomFacetSource.cs           |  109 ++
 .../ByTask/Feeds/ReutersContentSource.cs        |  140 +++
 .../ByTask/Feeds/ReutersQueryMaker.cs           |  126 ++
 .../ByTask/Feeds/SimpleQueryMaker.cs            |   70 ++
 .../Feeds/SimpleSloppyPhraseQueryMaker.cs       |   88 ++
 .../ByTask/Feeds/SingleDocSource.cs             |   77 ++
 .../ByTask/Feeds/SortableSingleDocSource.cs     |  114 ++
 .../ByTask/Feeds/SpatialDocMaker.cs             |  249 ++++
 .../ByTask/Feeds/SpatialFileQueryMaker.cs       |  131 ++
 .../ByTask/Feeds/TrecContentSource.cs           |  350 ++++++
 .../ByTask/Feeds/TrecDocParser.cs               |  159 +++
 .../ByTask/Feeds/TrecFBISParser.cs              |   68 +
 .../ByTask/Feeds/TrecFR94Parser.cs              |   69 +
 .../ByTask/Feeds/TrecFTParser.cs                |   58 +
 .../ByTask/Feeds/TrecGov2Parser.cs              |   57 +
 .../ByTask/Feeds/TrecLATimesParser.cs           |   75 ++
 .../ByTask/Feeds/TrecParserByPath.cs            |   34 +
 src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs  |  490 ++++++++
 .../ByTask/Programmatic/Sample.cs               |   90 ++
 src/Lucene.Net.Benchmark/ByTask/Stats/Points.cs |  108 ++
 src/Lucene.Net.Benchmark/ByTask/Stats/Report.cs |   70 ++
 .../ByTask/Stats/TaskStats.cs                   |  237 ++++
 .../ByTask/Tasks/AddDocTask.cs                  |   93 ++
 .../ByTask/Tasks/AddFacetedDocTask.cs           |   95 ++
 .../ByTask/Tasks/AddIndexesTask.cs              |  104 ++
 .../ByTask/Tasks/AnalyzerFactoryTask.cs         |  580 +++++++++
 .../ByTask/Tasks/BenchmarkHighlighter.cs        |   32 +
 .../ByTask/Tasks/ClearStatsTask.cs              |   44 +
 .../ByTask/Tasks/CloseIndexTask.cs              |   67 +
 .../ByTask/Tasks/CloseReaderTask.cs             |   49 +
 .../ByTask/Tasks/CloseTaxonomyIndexTask.cs      |   42 +
 .../ByTask/Tasks/CloseTaxonomyReaderTask.cs     |   47 +
 .../ByTask/Tasks/CommitIndexTask.cs             |   62 +
 .../ByTask/Tasks/CommitTaxonomyIndexTask.cs     |   48 +
 .../ByTask/Tasks/ConsumeContentSourceTask.cs    |   48 +
 .../ByTask/Tasks/CreateIndexTask.cs             |  225 ++++
 .../ByTask/Tasks/CreateTaxonomyIndexTask.cs     |   42 +
 .../ByTask/Tasks/ForceMergeTask.cs              |   61 +
 .../ByTask/Tasks/NearRealtimeReaderTask.cs      |  132 ++
 .../ByTask/Tasks/NewAnalyzerTask.cs             |  189 +++
 .../ByTask/Tasks/NewCollationAnalyzerTask.cs    |  149 +++
 .../ByTask/Tasks/NewLocaleTask.cs               |   97 ++
 .../ByTask/Tasks/NewRoundTask.cs                |   44 +
 .../ByTask/Tasks/OpenIndexTask.cs               |   88 ++
 .../ByTask/Tasks/OpenReaderTask.cs              |  100 ++
 .../ByTask/Tasks/OpenTaxonomyIndexTask.cs       |   41 +
 .../ByTask/Tasks/OpenTaxonomyReaderTask.cs      |   44 +
 .../ByTask/Tasks/PerfTask.cs                    |  380 ++++++
 .../ByTask/Tasks/PrintReaderTask.cs             |   60 +
 .../ByTask/Tasks/ReadTask.cs                    |  339 +++++
 .../ByTask/Tasks/ReadTokensTask.cs              |  160 +++
 .../ByTask/Tasks/ReopenReaderTask.cs            |   45 +
 .../ByTask/Tasks/RepAllTask.cs                  |   83 ++
 .../ByTask/Tasks/RepSelectByPrefTask.cs         |   81 ++
 .../ByTask/Tasks/RepSumByNameRoundTask.cs       |   83 ++
 .../ByTask/Tasks/RepSumByNameTask.cs            |   81 ++
 .../ByTask/Tasks/RepSumByPrefRoundTask.cs       |   79 ++
 .../ByTask/Tasks/RepSumByPrefTask.cs            |   91 ++
 .../ByTask/Tasks/ReportTask.cs                  |  189 +++
 .../ByTask/Tasks/ResetInputsTask.cs             |   43 +
 .../ByTask/Tasks/ResetSystemEraseTask.cs        |   42 +
 .../ByTask/Tasks/ResetSystemSoftTask.cs         |   41 +
 .../ByTask/Tasks/RollbackIndexTask.cs           |   52 +
 .../ByTask/Tasks/SearchTask.cs                  |   60 +
 .../ByTask/Tasks/SearchTravRetHighlightTask.cs  |  188 +++
 .../Tasks/SearchTravRetLoadFieldSelectorTask.cs |   85 ++
 .../ByTask/Tasks/SearchTravRetTask.cs           |   44 +
 .../Tasks/SearchTravRetVectorHighlightTask.cs   |  191 +++
 .../ByTask/Tasks/SearchTravTask.cs              |   87 ++
 .../ByTask/Tasks/SearchWithCollectorTask.cs     |   99 ++
 .../ByTask/Tasks/SearchWithSortTask.cs          |  157 +++
 .../ByTask/Tasks/SetPropTask.cs                 |   71 ++
 .../ByTask/Tasks/TaskSequence.cs                |  662 ++++++++++
 .../ByTask/Tasks/UpdateDocTask.cs               |   99 ++
 .../ByTask/Tasks/WaitForMergesTask.cs           |   36 +
 .../ByTask/Tasks/WaitTask.cs                    |   89 ++
 .../ByTask/Tasks/WarmTask.cs                    |   64 +
 .../ByTask/Tasks/WriteEnwikiLineDocTask.cs      |   72 ++
 .../ByTask/Tasks/WriteLineDocTask.cs            |  238 ++++
 .../ByTask/Utils/Algorithm.cs                   |  459 +++++++
 .../ByTask/Utils/AnalyzerFactory.cs             |  156 +++
 src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs |  559 +++++++++
 .../ByTask/Utils/FileUtils.cs                   |   46 +
 src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs |  109 ++
 .../ByTask/Utils/StreamUtils.cs                 |  132 ++
 src/Lucene.Net.Benchmark/Constants.cs           |   33 +
 .../Lucene.Net.Benchmark.csproj                 |  214 ++++
 .../Lucene.Net.Benchmark.project.json           |   15 +
 .../Properties/AssemblyInfo.cs                  |   30 +
 src/Lucene.Net.Benchmark/Quality/Judge.cs       |   55 +
 .../Quality/QualityBenchmark.cs                 |  159 +++
 .../Quality/QualityQuery.cs                     |  107 ++
 .../Quality/QualityQueryParser.cs               |   35 +
 .../Quality/QualityStats.cs                     |  339 +++++
 .../Quality/Trec/QueryDriver.cs                 |   93 ++
 .../Quality/Trec/Trec1MQReader.cs               |   92 ++
 .../Quality/Trec/TrecJudge.cs                   |  186 +++
 .../Quality/Trec/TrecTopicsReader.cs            |  154 +++
 .../Quality/Utils/DocNameExtractor.cs           |   89 ++
 .../Quality/Utils/QualityQueriesFinder.cs       |  152 +++
 .../Quality/Utils/SimpleQQParser.cs             |   76 ++
 .../Quality/Utils/SubmissionReport.cs           |   98 ++
 .../Utils/ExtractReuters.cs                     |  167 +++
 .../Utils/ExtractWikipedia.cs                   |  178 +++
 src/Lucene.Net.Benchmark/project.json           |   53 +
 src/Lucene.Net.TestFramework/Util/TestUtil.cs   |   22 +-
 .../BenchmarkTestCase.cs                        |  129 ++
 .../ByTask/Feeds/DocMakerTest.cs                |  193 +++
 .../ByTask/Feeds/EnwikiContentSourceTest.cs     |  194 +++
 .../ByTask/Feeds/LineDocSourceTest.cs           |  271 ++++
 .../ByTask/Feeds/TestHtmlParser.cs              |  164 +++
 .../ByTask/Feeds/TrecContentSourceTest.cs       |  431 +++++++
 .../ByTask/Feeds/trecdocs.zip                   |  Bin 0 -> 2514 bytes
 .../ByTask/Tasks/AddIndexesTaskTest.cs          |  153 +++
 .../ByTask/Tasks/Alt/AltPackageTaskTest.cs      |   68 +
 .../ByTask/Tasks/Alt/AltTestTask.cs             |   35 +
 .../ByTask/Tasks/CommitIndexTaskTest.cs         |   63 +
 .../ByTask/Tasks/CountingHighlighterTestTask.cs |   85 ++
 .../ByTask/Tasks/CountingSearchTestTask.cs      |   65 +
 .../ByTask/Tasks/CreateIndexTaskTest.cs         |  129 ++
 .../ByTask/Tasks/PerfTaskTest.cs                |   81 ++
 .../ByTask/Tasks/SearchWithSortTaskTest.cs      |   35 +
 .../ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs  |  121 ++
 .../ByTask/Tasks/WriteLineDocTaskTest.cs        |  436 +++++++
 .../ByTask/TestPerfTasksLogic.cs                | 1177 ++++++++++++++++++
 .../ByTask/TestPerfTasksParse.cs                |  178 +++
 .../ByTask/Utils/StreamUtilsTest.cs             |  149 +++
 .../ByTask/Utils/TestConfig.cs                  |   37 +
 src/Lucene.Net.Tests.Benchmark/ByTask/conf.zip  |  Bin 0 -> 40878 bytes
 .../ByTask/reuters.first20.lines.txt            |   20 +
 .../test-mapping-ISOLatin1Accent-partial.txt    |   30 +
 .../Conf/ConfLoader.cs                          |   28 +
 .../Lucene.Net.Tests.Benchmark.csproj           |  129 ++
 .../Lucene.Net.Tests.Benchmark.project.json     |   13 +
 .../Properties/AssemblyInfo.cs                  |   36 +
 .../Quality/TestQualityRun.cs                   |  210 ++++
 .../Quality/reuters.578.lines.txt.bz2           |  Bin 0 -> 208314 bytes
 .../Quality/trecQRels.txt                       |  723 +++++++++++
 .../Quality/trecTopics.txt                      |  287 +++++
 .../Support/TestApiConsistency.cs               |  150 +++
 .../Support/TestExceptionSerialization.cs       |   54 +
 src/Lucene.Net.Tests.Benchmark/project.json     |   56 +
 162 files changed, 22383 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 5450020..08a00a0 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -110,6 +110,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = 
"Lucene.Net.Analysis.Kuromoj
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = 
"Lucene.Net.Tests.Analysis.Kuromoji", 
"src\Lucene.Net.Tests.Analysis.Kuromoji\Lucene.Net.Tests.Analysis.Kuromoji.csproj",
 "{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Benchmark", 
"src\Lucene.Net.Benchmark\Lucene.Net.Benchmark.csproj", 
"{EDC77CB4-597F-4818-8C83-3C006D12C384}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = 
"Lucene.Net.Tests.Benchmark", 
"src\Lucene.Net.Tests.Benchmark\Lucene.Net.Tests.Benchmark.csproj", 
"{9257F543-44E2-4DB6-8B27-A8A354C13E5B}"
+EndProject
 Global
        GlobalSection(SolutionConfigurationPlatforms) = preSolution
                Debug|Any CPU = Debug|Any CPU
@@ -1111,6 +1115,54 @@ Global
                {34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|Mixed 
Platforms.Build.0 = Release|Any CPU
                {34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|x86.ActiveCfg 
= Release|Any CPU
                {34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|x86.Build.0 = 
Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug|x86.ActiveCfg = 
Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug|x86.Build.0 = 
Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug35|Any 
CPU.ActiveCfg = Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug35|Any CPU.Build.0 
= Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug35|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug35|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug35|x86.ActiveCfg = 
Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Debug35|x86.Build.0 = 
Debug|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release|x86.ActiveCfg = 
Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release|x86.Build.0 = 
Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release35|Any 
CPU.ActiveCfg = Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release35|Any 
CPU.Build.0 = Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release35|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release35|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release35|x86.ActiveCfg 
= Release|Any CPU
+               {EDC77CB4-597F-4818-8C83-3C006D12C384}.Release35|x86.Build.0 = 
Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug|x86.ActiveCfg = 
Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug|x86.Build.0 = 
Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug35|Any 
CPU.ActiveCfg = Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug35|Any CPU.Build.0 
= Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug35|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug35|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug35|x86.ActiveCfg = 
Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Debug35|x86.Build.0 = 
Debug|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release|x86.ActiveCfg = 
Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release|x86.Build.0 = 
Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release35|Any 
CPU.ActiveCfg = Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release35|Any 
CPU.Build.0 = Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release35|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release35|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release35|x86.ActiveCfg 
= Release|Any CPU
+               {9257F543-44E2-4DB6-8B27-A8A354C13E5B}.Release35|x86.Build.0 = 
Release|Any CPU
        EndGlobalSection
        GlobalSection(SolutionProperties) = preSolution
                HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs 
b/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs
new file mode 100644
index 0000000..9f3ad70
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs
@@ -0,0 +1,170 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Run the benchmark algorithm.
+    /// </summary>
+    /// <remarks>
+    /// <list type="number">
+    ///     <item><description>Read algorithm.</description></item>
+    ///     <item><description>Run the algorithm.</description></item>
+    /// </list>
+    /// <para/>
+    /// Things to be added/fixed in "Benchmarking by tasks":
+    /// <list type="number">
+    ///     <item><description>TODO - report into Excel and/or graphed 
view.</description></item>
+    ///     <item><description>TODO - perf comparison between Lucene releases 
over the years.</description></item>
+    ///     <item><description>TODO - perf report adequate to include in 
Lucene nightly build site? (so we can easily track performance 
changes.)</description></item>
+    ///     <item><description>TODO - add overall time control for repeated 
execution (vs. current by-count only).</description></item>
+    ///     <item><description>TODO - query maker that is based on index 
statistics.</description></item>
+    /// </list>
+    /// </remarks>
+    public class Benchmark
+    {
+        private PerfRunData runData;
+        private Algorithm algorithm;
+        private bool executed;
+
+        public Benchmark(TextReader algReader)
+        {
+            // prepare run data
+            try
+            {
+                runData = new PerfRunData(new Config(algReader));
+            }
+            catch (Exception e)
+            {
+                //e.printStackTrace();
+                throw new Exception("Error: cannot init PerfRunData!", e);
+            }
+
+            // parse algorithm
+            try
+            {
+                algorithm = new Algorithm(runData);
+            }
+            catch (Exception e)
+            {
+                throw new Exception("Error: cannot understand algorithm!", e);
+            }
+        }
+
+        /// <summary>
+        /// Execute this benchmark.
+        /// </summary>
+        public virtual void Execute()
+        {
+            lock (this)
+            {
+                if (executed)
+                {
+                    throw new InvalidOperationException("Benchmark was already 
executed");
+                }
+                executed = true;
+                runData.SetStartTimeMillis();
+                algorithm.Execute();
+            }
+        }
+
+        /// <summary>
+        /// Run the benchmark algorithm.
+        /// </summary>
+        /// <param name="args">Benchmark config and algorithm files.</param>
+        public static void Main(string[] args)
+        {
+            Exec(args);
+        }
+
+        /// <summary>
+        /// Utility: execute benchmark from command line.
+        /// </summary>
+        /// <param name="args">Single argument is expected: 
algorithm-file.</param>
+        public static void Exec(string[] args)
+        {
+            // verify command line args
+            if (args.Length < 1)
+            {
+                SystemConsole.WriteLine("Usage: java Benchmark <algorithm 
file>");
+                Environment.Exit(1);
+            }
+
+            // verify input files 
+            FileInfo algFile = new FileInfo(args[0]);
+            if (!algFile.Exists /*|| !algFile.isFile() ||!algFile.canRead()*/ )
+            {
+                SystemConsole.WriteLine("cannot find/read algorithm file: " + 
algFile.FullName);
+                Environment.Exit(1);
+            }
+
+            SystemConsole.WriteLine("Running algorithm from: " + 
algFile.FullName);
+
+            Benchmark benchmark = null;
+            try
+            {
+                benchmark = new Benchmark(IOUtils.GetDecodingReader(algFile, 
Encoding.UTF8));
+            }
+            catch (Exception e)
+            {
+                SystemConsole.WriteLine(e.ToString());
+                Environment.Exit(1);
+            }
+
+            SystemConsole.WriteLine("------------> algorithm:");
+            SystemConsole.WriteLine(benchmark.Algorithm.ToString());
+
+            // execute
+            try
+            {
+                benchmark.Execute();
+            }
+            catch (Exception e)
+            {
+                SystemConsole.WriteLine("Error: cannot execute the algorithm! 
" + e.Message);
+                SystemConsole.WriteLine(e.StackTrace);
+            }
+
+            SystemConsole.WriteLine("####################");
+            SystemConsole.WriteLine("###  D O N E !!! ###");
+            SystemConsole.WriteLine("####################");
+        }
+
+        /// <summary>
+        /// Returns the algorithm.
+        /// </summary>
+        public virtual Algorithm Algorithm
+        {
+            get { return algorithm; }
+        }
+
+        /// <summary>
+        /// Returns the runData.
+        /// </summary>
+        public virtual PerfRunData RunData
+        {
+            get { return runData; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/AbstractQueryMaker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/AbstractQueryMaker.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/AbstractQueryMaker.cs
new file mode 100644
index 0000000..fb6a2bf
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/AbstractQueryMaker.cs
@@ -0,0 +1,85 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Search;
+using System;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Abstract base query maker. 
+    /// Each query maker should just implement the <see 
cref="PrepareQueries()"/> method.
+    /// </summary>
+    public abstract class AbstractQueryMaker : IQueryMaker
+    {
+        protected int m_qnum = 0;
+        protected Query[] m_queries;
+        protected Config m_config;
+
+        public virtual void ResetInputs()
+        {
+            m_qnum = 0;
+        }
+
+        protected abstract Query[] PrepareQueries();
+
+        public virtual void SetConfig(Config config)
+        {
+            this.m_config = config;
+            m_queries = PrepareQueries();
+        }
+
+        public virtual string PrintQueries()
+        {
+            string newline = Environment.NewLine;
+            StringBuilder sb = new StringBuilder();
+            if (m_queries != null)
+            {
+                for (int i = 0; i < m_queries.Length; i++)
+                {
+                    sb.Append(i + ". " + m_queries[i].GetType().Name + " - " + 
m_queries[i].ToString());
+                    sb.Append(newline);
+                }
+            }
+            return sb.ToString();
+        }
+
+        public virtual Query MakeQuery()
+        {
+            return m_queries[NextQnum()];
+        }
+
+        // return next qnum
+        protected virtual int NextQnum()
+        {
+            lock (this)
+            {
+                int res = m_qnum;
+                m_qnum = (m_qnum + 1) % m_queries.Length;
+                return res;
+            }
+        }
+
+        /// <seealso cref="IQueryMaker.MakeQuery(int)"/>
+        public virtual Query MakeQuery(int size)
+        {
+            throw new Exception(this + ".MakeQuery(int size) is not 
supported!");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentItemsSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentItemsSource.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentItemsSource.cs
new file mode 100644
index 0000000..c0f06ef
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentItemsSource.cs
@@ -0,0 +1,227 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Reflection;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Base class for source of data for benchmarking.
+    /// </summary>
+    /// <remarks>
+    /// Keeps track of various statistics, such as how many data items were 
generated, 
+    /// size in bytes etc.
+    /// <para/>
+    /// Supports the following configuration parameters:
+    /// <list type="bullet">
+    ///     <item><term>content.source.forever</term><description>specifies 
whether to generate items forever (<b>default=true</b>).</description></item>
+    ///     <item><term>content.source.verbose</term><description>specifies 
whether messages should be output by the content source 
(<b>default=false</b>).</description></item>
+    ///     <item><term>content.source.encoding</term><description>
+    ///         specifies which encoding to use when 
+    ///         reading the files of that content source. Certain 
implementations may define
+    ///         a default value if this parameter is not specified. 
(<b>default=null</b>).
+    ///     </description></item>
+    ///     <item><term>content.source.log.step</term><description>
+    ///         specifies for how many items a
+    ///         message should be logged. If set to 0 it means no logging 
should occur.
+    ///         <b>NOTE:</b> if verbose is set to false, logging should not 
occur even if
+    ///         logStep is not 0 (<b>default=0</b>).
+    ///     </description></item>
+    /// </list>
+    /// </remarks>
+    public abstract class ContentItemsSource : IDisposable
+    {
+        private long bytesCount;
+        private long totalBytesCount;
+        private int itemCount;
+        private int totalItemCount;
+        private Config config;
+
+        private int lastPrintedNumUniqueTexts = 0;
+        private long lastPrintedNumUniqueBytes = 0;
+        private int printNum = 0;
+
+        protected bool m_forever;
+        protected int m_logStep;
+        protected bool m_verbose;
+        protected Encoding m_encoding;
+
+        /// <summary>update count of bytes generated by this source</summary>
+        protected void AddBytes(long numBytes)
+        {
+            lock (this)
+            {
+                bytesCount += numBytes;
+                totalBytesCount += numBytes;
+            }
+        }
+
+        /// <summary>update count of items generated by this source</summary>
+        protected void AddItem()
+        {
+            lock (this)
+            {
+                ++itemCount;
+                ++totalItemCount;
+            }
+        }
+
+        /// <summary>
+        /// A convenience method for collecting all the files of a content 
source from
+        /// a given directory. The collected <see cref="FileInfo"/> instances 
are stored in the
+        /// given <paramref name="files"/>.
+        /// </summary>
+        protected void CollectFiles(DirectoryInfo dir, IList<FileInfo> files)
+        {
+            CollectFilesImpl(dir, files);
+            files.Sort(new FileNameComparer());
+        }
+
+        private void CollectFilesImpl(DirectoryInfo dir, IList<FileInfo> files)
+        {
+            foreach (var sub in dir.EnumerateDirectories())
+            {
+                CollectFilesImpl(sub, files);
+            }
+
+            files.AddRange(dir.GetFiles());
+        }
+
+        private class FileNameComparer : IComparer<FileInfo>
+        {
+            public int Compare(FileInfo x, FileInfo y)
+            {
+                return x.FullName.CompareToOrdinal(y.FullName);
+            }
+        }
+
+        /// <summary>
+        /// Returns <c>true</c> whether it's time to log a message (depending 
on verbose and
+        /// the number of items generated).
+        /// </summary>
+        /// <returns></returns>
+        protected bool ShouldLog()
+        {
+            return m_verbose && m_logStep > 0 && itemCount % m_logStep == 0;
+        }
+
+        /// <summary>Called when reading from this content source is no longer 
required.</summary>
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        /// <summary>Called when reading from this content source is no longer 
required.</summary>
+        protected abstract void Dispose(bool disposing);
+
+
+        /// <summary>Returns the number of bytes generated since last 
reset.</summary>
+        public long BytesCount { get { return bytesCount; } }
+
+        /// <summary>Returns the number of generated items since last 
reset.</summary>
+        public int ItemsCount { get { return itemCount; } }
+
+        public Config Config { get { return config; } }
+
+        /// <summary>Returns the total number of bytes that were generated by 
this source.</summary>
+        public long TotalBytesCount { get { return totalBytesCount; } }
+
+        /// <summary>Returns the total number of generated items.</summary>
+        public int TotalItemsCount { get { return totalItemCount; } }
+
+        /// <summary>
+        /// Resets the input for this content source, so that the test would 
behave as
+        /// if it was just started, input-wise.
+        /// <para/>
+        /// <b>NOTE:</b> the default implementation resets the number of bytes 
and
+        /// items generated since the last reset, so it's important to call
+        /// <c>base.ResetInputs()</c> in case you override this method.
+        /// </summary>
+        public virtual void ResetInputs()
+        {
+            bytesCount = 0;
+            itemCount = 0;
+        }
+
+        /// <summary>
+        /// Sets the <see cref="Utils.Config"/> for this content source. If 
you override this
+        /// method, you must call <c>base.SetConfig(config)</c>.
+        /// </summary>
+        /// <param name="config"></param>
+        public virtual void SetConfig(Config config)
+        {
+            this.config = config;
+            m_forever = config.Get("content.source.forever", true);
+            m_logStep = config.Get("content.source.log.step", 0);
+            m_verbose = config.Get("content.source.verbose", false);
+            string encodingStr = config.Get("content.source.encoding", null);
+            if (!string.IsNullOrWhiteSpace(encodingStr))
+            {
+                m_encoding = Encoding.GetEncoding(encodingStr);
+            }
+            else
+            {
+                m_encoding = Encoding.GetEncoding(0); // Default system 
encoding
+            }
+        }
+
+        public virtual void PrintStatistics(string itemsName)
+        {
+            if (!m_verbose)
+            {
+                return;
+            }
+            bool print = false;
+            string col = "                  ";
+            StringBuilder sb = new StringBuilder();
+            string newline = Environment.NewLine;
+            sb.Append("------------> 
").Append(GetType().GetTypeInfo().Name).Append(" statistics 
(").Append(printNum).Append("): ").Append(newline);
+            int nut = TotalItemsCount;
+            if (nut > lastPrintedNumUniqueTexts)
+            {
+                print = true;
+                sb.Append("total count of " + itemsName + ": 
").Append(Formatter.Format(0, nut, col)).Append(newline);
+                lastPrintedNumUniqueTexts = nut;
+            }
+            long nub = TotalBytesCount;
+            if (nub > lastPrintedNumUniqueBytes)
+            {
+                print = true;
+                sb.Append("total bytes of " + itemsName + ": 
").Append(Formatter.Format(0, nub, col)).Append(newline);
+                lastPrintedNumUniqueBytes = nub;
+            }
+            if (ItemsCount > 0)
+            {
+                print = true;
+                sb.Append("num " + itemsName + " added since last inputs 
reset:   ").Append(Formatter.Format(0, ItemsCount, col)).Append(newline);
+                sb.Append("total bytes added for " + itemsName + " since last 
inputs reset: ").Append(Formatter.Format(0, BytesCount, col)).Append(newline);
+            }
+            if (print)
+            {
+                SystemConsole.WriteLine(sb.Append(newline).ToString());
+                printNum++;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentSource.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentSource.cs
new file mode 100644
index 0000000..a3c39cb
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/ContentSource.cs
@@ -0,0 +1,38 @@
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Represents content from a specified source, such as TREC, Reuters etc. 
A
+    /// <see cref="ContentSource"/> is responsible for creating <see 
cref="DocData"/> objects for
+    /// its documents to be consumed by <see cref="DocMaker"/>. It also keeps 
track
+    /// of various statistics, such as how many documents were generated, size 
in
+    /// bytes etc.
+    /// <para/>
+    /// For supported configuration parameters see <see 
cref="ContentItemsSource"/>.
+    /// </summary>
+    public abstract class ContentSource : ContentItemsSource
+    {
+        /// <summary>
+        /// Returns the next <see cref="DocData"/> from the content source.
+        /// Implementations must account for multi-threading, as multiple 
threads 
+        /// can call this method simultaneously.
+        /// </summary>
+        public abstract DocData GetNextDocData(DocData docData);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs
new file mode 100644
index 0000000..0903754
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs
@@ -0,0 +1,259 @@
+// LUCENENET TODO: Use HTML Agility pack instead of SAX ?
+
+using Lucene.Net.Support;
+using Sax.Net;
+using Sax.Net.Helpers;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple HTML Parser extracting title, meta tags, and body text
+    /// that is based on <a 
href="http://nekohtml.sourceforge.net/";>NekoHTML</a>.
+    /// </summary>
+    public class DemoHTMLParser : IHTMLParser
+    {
+        /// <summary>The actual parser to read HTML documents.</summary>
+        public sealed class Parser
+        {
+            private readonly IDictionary<string, string> metaTags = new 
Dictionary<string, string>();
+            private readonly string title, body;
+
+            // LUCENENET specific - expose field through property
+            public IDictionary<string, string> MetaTags
+            {
+                get { return metaTags; }
+            }
+
+            // LUCENENET specific - expose field through property
+            public string Title
+            {
+                get { return title; }
+            }
+
+            // LUCENENET specific - expose field through property
+            public string Body
+            {
+                get { return body; }
+            }
+
+            public Parser(TextReader reader)
+                : this(new InputSource(reader))
+            {
+            }
+
+            public Parser(InputSource source)
+            {
+                TagSoup.Net.Parser parser = new TagSoup.Net.Parser();
+
+                parser.SetFeature(TagSoup.Net.Parser.NAMESPACES_FEATURE, true);
+
+                StringBuilder title = new StringBuilder(), body = new 
StringBuilder();
+                DefaultHandler handler = new 
DefaultHandlerAnonymousHelper(this, title, body);
+
+                parser.ContentHandler = handler;
+                parser.ErrorHandler = handler;
+                parser.Parse(source);
+
+                // the javacc-based parser trimmed title (which should be done 
for HTML in all cases):
+                this.title = title.ToString().Trim();
+
+                // assign body text
+                this.body = body.ToString();
+            }
+
+            private class DefaultHandlerAnonymousHelper : DefaultHandler
+            {
+                private int inBODY = 0, inHEAD = 0, inTITLE = 0, suppressed = 
0;
+
+                private readonly Parser outerInstance;
+                private readonly StringBuilder title;
+                private readonly StringBuilder body;
+
+                public DefaultHandlerAnonymousHelper(Parser outerInstance, 
StringBuilder title, StringBuilder body)
+                {
+                    this.outerInstance = outerInstance;
+                    this.title = title;
+                    this.body = body;
+                }
+
+                public override void StartElement(string uri, string 
localName, string qName, IAttributes atts)
+                {
+                    if (inHEAD > 0)
+                    {
+                        if ("title".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                        {
+                            inTITLE++;
+                        }
+                        else
+                        {
+                            if ("meta".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                            {
+                                string name = atts.GetValue("name");
+                                if (name == null)
+                                {
+                                    name = atts.GetValue("http-equiv");
+                                }
+                                string val = atts.GetValue("content");
+                                if (name != null && val != null)
+                                {
+                                    
outerInstance.metaTags[name.ToLowerInvariant()] = val;
+                                }
+                            }
+                        }
+                    }
+                    else if (inBODY > 0)
+                    {
+                        if (SUPPRESS_ELEMENTS.Contains(localName))
+                        {
+                            suppressed++;
+                        }
+                        else if ("img".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                        {
+                            // the original javacc-based parser preserved <IMG 
alt="..."/>
+                            // attribute as body text in [] parenthesis:
+                            string alt = atts.GetValue("alt");
+                            if (alt != null)
+                            {
+                                body.Append('[').Append(alt).Append(']');
+                            }
+                        }
+                    }
+                    else if ("body".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                    {
+                        inBODY++;
+                    }
+                    else if ("head".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                    {
+                        inHEAD++;
+                    }
+                    else if ("frameset".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                    {
+                        throw new SAXException("This parser does not support 
HTML framesets.");
+                    }
+                }
+
+                public override void EndElement(string uri, string localName, 
string qName)
+                {
+                    if (inBODY > 0)
+                    {
+                        if ("body".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                        {
+                            inBODY--;
+                        }
+                        else if (ENDLINE_ELEMENTS.Contains(localName))
+                        {
+                            body.Append('\n');
+                        }
+                        else if (SUPPRESS_ELEMENTS.Contains(localName))
+                        {
+                            suppressed--;
+                        }
+                    }
+                    else if (inHEAD > 0)
+                    {
+                        if ("head".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                        {
+                            inHEAD--;
+                        }
+                        else if (inTITLE > 0 && "title".Equals(localName, 
StringComparison.OrdinalIgnoreCase))
+                        {
+                            inTITLE--;
+                        }
+                    }
+                }
+
+                public override void Characters(char[] ch, int start, int 
length)
+                {
+                    if (inBODY > 0 && suppressed == 0)
+                    {
+                        body.Append(ch, start, length);
+                    }
+                    else if (inTITLE > 0)
+                    {
+                        title.Append(ch, start, length);
+                    }
+                }
+
+                public override InputSource ResolveEntity(string publicId, 
string systemId)
+                {
+                    // disable network access caused by DTDs
+                    return new InputSource(new StringReader(""));
+                }
+            }
+
+            private static ISet<string> CreateElementNameSet(params string[] 
names)
+            {
+                return Collections.UnmodifiableSet(new HashSet<string>(names));
+            }
+
+            /// <summary>HTML elements that cause a line break (they are 
block-elements).</summary>
+            internal static readonly ISet<string> ENDLINE_ELEMENTS = 
CreateElementNameSet(
+                "p", "h1", "h2", "h3", "h4", "h5", "h6", "div", "ul", "ol", 
"dl",
+                "pre", "hr", "blockquote", "address", "fieldset", "table", 
"form",
+                "noscript", "li", "dt", "dd", "noframes", "br", "tr", 
"select", "option"
+            );
+
+            /// <summary>HTML elements with contents that are 
ignored.</summary>
+            internal static readonly ISet<string> SUPPRESS_ELEMENTS = 
CreateElementNameSet(
+                "style", "script"
+            );
+        }
+        public virtual DocData Parse(DocData docData, string name, DateTime? 
date, TextReader reader, TrecContentSource trecSrc)
+        {
+            try
+            {
+                return Parse(docData, name, date, new InputSource(reader), 
trecSrc);
+            }
+            catch (SAXException saxe)
+            {
+                throw new IOException("SAX exception occurred while parsing 
HTML document.", saxe);
+            }
+        }
+
+        public virtual DocData Parse(DocData docData, string name, DateTime? 
date, InputSource source, TrecContentSource trecSrc)
+        {
+            Parser p = new Parser(source);
+
+            // properties 
+            IDictionary<string, string> props = p.MetaTags;
+            string dateStr;
+            if (props.TryGetValue("date", out dateStr) && dateStr != null)
+            {
+                DateTime? newDate = trecSrc.ParseDate(dateStr);
+                if (newDate != null)
+                {
+                    date = newDate;
+                }
+            }
+
+            docData.Clear();
+            docData.Name = name;
+            docData.Body = p.Body;
+            docData.Title = p.Title;
+            docData.Props = props;
+            docData.SetDate(date);
+            return docData;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/DirContentSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/DirContentSource.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/DirContentSource.cs
new file mode 100644
index 0000000..c14d578
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/DirContentSource.cs
@@ -0,0 +1,259 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Support;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+// LUCENENET TODO: This had to be refactored significantly. We need tests to 
confirm it works.
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A <see cref="ContentSource"/> using the Dir collection for its input. 
Supports
+    /// the following configuration parameters (on top of <see 
cref="ContentSource"/>):
+    /// <list type="bullet">
+    ///     <item><term>work.dir</term><description>specifies the working 
directory. Required if "docs.dir" denotes a relative path 
(<b>default=work</b>).</description></item>
+    ///     <item><term>docs.dir</term><description>specifies the directory 
the Dir collection. Can be set to a relative path if "work.dir" is also 
specified (<b>default=dir-out</b>).</description></item>
+    /// </list>
+    /// </summary>
+    public class DirContentSource : ContentSource
+    {
+        /// <summary>
+        /// Iterator over the files in the directory.
+        /// </summary>
+        public class Iterator : IEnumerator<FileInfo>
+        {
+
+            private class Comparer : IComparer<FileInfo>
+            {
+                public int Compare(FileInfo a, FileInfo b)
+                {
+                    string a2 = a.ToString();
+                    string b2 = b.ToString();
+                    int diff = a2.Length - b2.Length;
+
+                    if (diff > 0)
+                    {
+                        while (diff-- > 0)
+                        {
+                            b2 = "0" + b2;
+                        }
+                    }
+                    else if (diff < 0)
+                    {
+                        diff = -diff;
+                        while (diff-- > 0)
+                        {
+                            a2 = "0" + a2;
+                        }
+                    }
+
+                    /* note it's reversed because we're going to push,
+                       which reverses again */
+                    return b2.CompareToOrdinal(a2);
+                }
+            }
+
+            internal int count = 0;
+
+            internal Stack<FileInfo> stack = new Stack<FileInfo>();
+
+            /* this seems silly ... there must be a better way ...
+               not that this is good, but can it matter? */
+
+            private Comparer c = new Comparer();
+
+            private FileInfo current;
+
+            public Iterator(DirectoryInfo f)
+            {
+                Push(f);
+            }
+
+            internal void Push(DirectoryInfo f)
+            {
+                foreach (var dir in f.GetDirectories())
+                {
+                    Push(dir);
+                }
+
+                Push(f.GetFiles("*.txt"));
+            }
+
+            internal void Push(FileInfo[] files)
+            {
+                Array.Sort(files, c);
+                for (int i = 0; i < files.Length; i++)
+                {
+                    // System.err.println("push " + files[i]);
+                    stack.Push(files[i]);
+                }
+            }
+
+            public virtual int Count
+            {
+                get { return count; }
+            }
+
+            public virtual bool MoveNext()
+            {
+                if (stack.Count == 0)
+                {
+                    current = null;
+                    return false;
+                }
+                count++;
+                current = stack.Pop();
+                // System.err.println("pop " + object);
+                return true;
+            }
+
+            public virtual FileInfo Current
+            {
+                get { return current; }
+            }
+
+            object IEnumerator.Current
+            {
+                get { return current; }
+            }
+
+            public void Dispose()
+            {
+                Dispose(true);
+                GC.SuppressFinalize(this);
+            }
+
+            protected virtual void Dispose(bool disposing)
+            {
+            }
+
+            public virtual void Reset()
+            {
+            }
+        }
+
+        private DirectoryInfo dataDir = null;
+        private int iteration = 0;
+        private Iterator inputFiles = null;
+
+        private DateTime? ParseDate(string dateStr)
+        {
+            DateTime temp;
+            if (DateTime.TryParseExact(dateStr, "dd-MMM-yyyy hh:mm:ss.fff", 
CultureInfo.InvariantCulture, DateTimeStyles.None, out temp))
+            {
+                return temp;
+            }
+            else if (DateTime.TryParse(dateStr, CultureInfo.InvariantCulture, 
DateTimeStyles.None, out temp))
+            {
+                return temp;
+            }
+
+            return null;
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                inputFiles = null;
+            }
+        }
+
+        public override DocData GetNextDocData(DocData docData)
+        {
+            FileInfo f = null;
+            string name = null;
+            lock (this)
+            {
+                if (!inputFiles.MoveNext())
+                {
+                    // exhausted files, start a new round, unless forever set 
to false.
+                    if (!m_forever)
+                    {
+                        throw new NoMoreDataException();
+                    }
+                    inputFiles = new Iterator(dataDir);
+                    iteration++;
+                }
+                f = inputFiles.Current;
+                // System.err.println(f);
+                name = f.FullName + "_" + iteration;
+            }
+
+            string line = null;
+            string dateStr;
+            string title;
+            StringBuilder bodyBuf = new StringBuilder(1024);
+
+            using (TextReader reader = new StreamReader(new 
FileStream(f.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8))
+            {
+                //First line is the date, 3rd is the title, rest is body
+                dateStr = reader.ReadLine();
+                reader.ReadLine();//skip an empty line
+                title = reader.ReadLine();
+                reader.ReadLine();//skip an empty line
+                while ((line = reader.ReadLine()) != null)
+                {
+                    bodyBuf.Append(line).Append(' ');
+                }
+            }
+            AddBytes(f.Length);
+
+            DateTime? date = ParseDate(dateStr);
+
+            docData.Clear();
+            docData.Name = name;
+            docData.Body = bodyBuf.ToString();
+            docData.Title = title;
+            docData.SetDate(date);
+            return docData;
+        }
+
+        public override void ResetInputs()
+        {
+            lock (this)
+            {
+                base.ResetInputs();
+                inputFiles = new Iterator(dataDir);
+                iteration = 0;
+            }
+        }
+
+        public override void SetConfig(Config config)
+        {
+            base.SetConfig(config);
+
+            DirectoryInfo workDir = new DirectoryInfo(config.Get("work.dir", 
"work"));
+            string d = config.Get("docs.dir", "dir-out");
+            dataDir = new DirectoryInfo(d);
+
+            inputFiles = new Iterator(dataDir);
+
+            if (inputFiles == null)
+            {
+                throw new Exception("No txt files in dataDir: " + 
dataDir.FullName);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/DocData.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/DocData.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/DocData.cs
new file mode 100644
index 0000000..9e68a4e
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/DocData.cs
@@ -0,0 +1,73 @@
+using Lucene.Net.Documents;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Output of parsing (e.g. HTML parsing) of an input document.
+    /// </summary>
+    public class DocData
+    {
+        public string Name { get; set; }
+        public string Body { get; set; }
+        public string Title { get; set; }
+        private string date;
+        public int ID { get; set; }
+        public IDictionary<string, string> Props { get; set; }
+
+        public void Clear()
+        {
+            Name = null;
+            Body = null;
+            Title = null;
+            date = null;
+            Props = null;
+            ID = -1;
+        }
+
+        /// <summary>
+        /// Gets the date. If the ctor with <see cref="DateTime"/> was called, 
then the string
+        /// returned is the output of <see 
cref="DateTools.DateToString(DateTime, DateTools.Resolution)"/>.
+        /// Otherwise it's the string passed to the other ctor.
+        /// </summary>
+        public virtual string Date
+        {
+            get { return date; }
+        }
+
+        public virtual void SetDate(DateTime? date)
+        {
+            if (date.HasValue)
+            {
+                SetDate(DateTools.DateToString(date.Value, 
DateTools.Resolution.SECOND));
+            }
+            else
+            {
+                this.date = null;
+            }
+        }
+
+        public virtual void SetDate(string date)
+        {
+            this.date = date;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Feeds/DocMaker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/DocMaker.cs 
b/src/Lucene.Net.Benchmark/ByTask/Feeds/DocMaker.cs
new file mode 100644
index 0000000..8ff3e7b
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/DocMaker.cs
@@ -0,0 +1,511 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Text;
+using System.Threading;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Creates <see cref="Document"/> objects. Uses a <see 
cref="ContentSource"/> to generate
+    /// <see cref="DocData"/> objects.
+    /// </summary>
+    /// <remarks>
+    /// Supports the following parameters:
+    /// <list type="bullet">
+    ///     <item><term>content.source</term><description>specifies the <see 
cref="ContentSource"/> class to use (default 
<b>SingleDocSource</b>).</description></item>
+    ///     <item><term>doc.stored</term><description>specifies whether fields 
should be stored (default <b>false</b>).</description></item>
+    ///     <item><term>doc.body.stored</term><description>specifies whether 
the body field should be stored (default = 
<b>doc.stored</b>).</description></item>
+    ///     <item><term>doc.tokenized</term><description>specifies whether 
fields should be tokenized (default <b>true</b>).</description></item>
+    ///     <item><term>doc.body.tokenized</term><description>specifies 
whether the body field should be tokenized (default = 
<b>doc.tokenized</b>).</description></item>
+    ///     <item><term>doc.tokenized.norms</term><description>specifies 
whether norms should be stored in the index or not. (default 
<b>false</b>).</description></item>
+    ///     <item><term>doc.body.tokenized.norms</term><description>
+    ///         specifies whether norms should be stored in the index for the 
body field. 
+    ///         This can be set to true, while <c>doc.tokenized.norms</c> is 
set to false, to allow norms storing just
+    ///         for the body field. (default <b>true</b>).
+    ///         </description></item>
+    ///     <item><term>doc.term.vector</term><description>specifies whether 
term vectors should be stored for fields (default 
<b>false</b>).</description></item>
+    ///     <item><term>doc.term.vector.positions</term><description>specifies 
whether term vectors should be stored with positions (default 
<b>false</b>).</description></item>
+    ///     <item><term>doc.term.vector.offsets</term><description>specifies 
whether term vectors should be stored with offsets (default 
<b>false</b>).</description></item>
+    ///     <item><term>doc.store.body.bytes</term><description>specifies 
whether to store the raw bytes of the document's content in the document 
(default <b>false</b>).</description></item>
+    ///     <item><term>doc.reuse.fields</term><description>specifies whether 
<see cref="Field"/> and <see cref="Document"/> objects  should be reused 
(default <b>true</b>).</description></item>
+    ///     <item><term>doc.index.props</term><description>specifies whether 
the properties returned by</description></item>
+    ///     <item><term>doc.random.id.limit</term><description>
+    ///         if specified, docs will be assigned random
+    ///         IDs from 0 to this limit.  This is useful with UpdateDoc
+    ///         for testing performance of <see 
cref="Index.IndexWriter.UpdateDocument(Index.Term, 
IEnumerable{Index.IIndexableField})"/>.
+    ///         <see cref="DocData.Props"/> will be indexed. (default 
<b>false</b>).
+    ///     </description></item>
+    /// </list>
+    /// </remarks>
+    public class DocMaker : IDisposable
+    {
+        private class LeftOver
+        {
+            public DocData DocData { get; set; }
+            public int Count { get; set; }
+        }
+
+        private Random r;
+        private int updateDocIDLimit;
+
+        /// <summary>
+        /// Document state, supports reuse of field instances
+        /// across documents (see <c>reuseFields</c> parameter).
+        /// </summary>
+        protected class DocState
+        {
+            private readonly IDictionary<string, Field> fields;
+            private readonly IDictionary<string, Field> numericFields;
+            private readonly bool reuseFields;
+            internal readonly Document doc;
+            internal DocData docData = new DocData();
+
+            public DocState(bool reuseFields, FieldType ft, FieldType bodyFt)
+            {
+
+                this.reuseFields = reuseFields;
+
+                if (reuseFields)
+                {
+                    fields = new Dictionary<string, Field>();
+                    numericFields = new Dictionary<string, Field>();
+
+                    // Initialize the map with the default fields.
+                    fields[BODY_FIELD] = new Field(BODY_FIELD, "", bodyFt);
+                    fields[TITLE_FIELD] = new Field(TITLE_FIELD, "", ft);
+                    fields[DATE_FIELD] = new Field(DATE_FIELD, "", ft);
+                    fields[ID_FIELD] = new StringField(ID_FIELD, "", 
Field.Store.YES);
+                    fields[NAME_FIELD] = new Field(NAME_FIELD, "", ft);
+
+                    numericFields[DATE_MSEC_FIELD] = new 
Int64Field(DATE_MSEC_FIELD, 0L, Field.Store.NO);
+                    numericFields[TIME_SEC_FIELD] = new 
Int32Field(TIME_SEC_FIELD, 0, Field.Store.NO);
+
+                    doc = new Document();
+                }
+                else
+                {
+                    numericFields = null;
+                    fields = null;
+                    doc = null;
+                }
+            }
+
+            /// <summary>
+            /// Returns a field corresponding to the field name. If
+            /// <c>reuseFields</c> was set to <c>true</c>, then it attempts to 
reuse a
+            /// <see cref="Field"/> instance. If such a field does not exist, 
it creates a new one.
+            /// </summary>
+            internal Field GetField(string name, FieldType ft)
+            {
+                if (!reuseFields)
+                {
+                    return new Field(name, "", ft);
+                }
+
+                Field f;
+                if (!fields.TryGetValue(name, out f) || f == null)
+                {
+                    f = new Field(name, "", ft);
+                    fields[name] = f;
+                }
+                return f;
+            }
+
+            internal Field GetNumericField(string name, NumericType type)
+            {
+                Field f;
+                if (reuseFields)
+                {
+                    numericFields.TryGetValue(name, out f);
+                }
+                else
+                {
+                    f = null;
+                }
+
+                if (f == null)
+                {
+                    switch (type)
+                    {
+                        case NumericType.INT32:
+                            f = new Int32Field(name, 0, Field.Store.NO);
+                            break;
+                        case NumericType.INT64:
+                            f = new Int64Field(name, 0L, Field.Store.NO);
+                            break;
+                        case NumericType.SINGLE:
+                            f = new SingleField(name, 0.0F, Field.Store.NO);
+                            break;
+                        case NumericType.DOUBLE:
+                            f = new DoubleField(name, 0.0, Field.Store.NO);
+                            break;
+                        default:
+                            throw new InvalidOperationException("Cannot get 
here");
+                    }
+                    if (reuseFields)
+                    {
+                        numericFields[name] = f;
+                    }
+                }
+                return f;
+            }
+        }
+
+        private bool storeBytes = false;
+
+        // LUCENENET specific: DateUtil not used
+
+        // leftovers are thread local, because it is unsafe to share residues 
between threads
+        private ThreadLocal<LeftOver> leftovr = new ThreadLocal<LeftOver>();
+        private ThreadLocal<DocState> docState = new ThreadLocal<DocState>();
+
+        public static readonly string BODY_FIELD = "body";
+        public static readonly string TITLE_FIELD = "doctitle";
+        public static readonly string DATE_FIELD = "docdate";
+        public static readonly string DATE_MSEC_FIELD = "docdatenum";
+        public static readonly string TIME_SEC_FIELD = "doctimesecnum";
+        public static readonly string ID_FIELD = "docid";
+        public static readonly string BYTES_FIELD = "bytes";
+        public static readonly string NAME_FIELD = "docname";
+
+        protected Config m_config;
+
+        protected FieldType m_valType;
+        protected FieldType m_bodyValType;
+
+        protected ContentSource m_source;
+        protected bool m_reuseFields;
+        protected bool m_indexProperties;
+
+        private readonly AtomicInt32 numDocsCreated = new AtomicInt32();
+
+        public DocMaker()
+        {
+        }
+
+        // create a doc
+        // use only part of the body, modify it to keep the rest (or use all 
if size==0).
+        // reset the docdata properties so they are not added more than once.
+        private Document CreateDocument(DocData docData, int size, int cnt)
+        {
+
+            DocState ds = GetDocState();
+            Document doc = m_reuseFields ? ds.doc : new Document();
+            doc.Fields.Clear();
+
+            // Set ID_FIELD
+            FieldType ft = new FieldType(m_valType);
+            ft.IsIndexed = true;
+
+            Field idField = ds.GetField(ID_FIELD, ft);
+            int id;
+            if (r != null)
+            {
+                id = r.Next(updateDocIDLimit);
+            }
+            else
+            {
+                id = docData.ID;
+                if (id == -1)
+                {
+                    id = numDocsCreated.GetAndIncrement();
+                }
+            }
+            idField.SetStringValue(Convert.ToString(id, 
CultureInfo.InvariantCulture));
+            doc.Add(idField);
+
+            // Set NAME_FIELD
+            string name = docData.Name;
+            if (name == null) name = "";
+            name = cnt < 0 ? name : name + "_" + cnt;
+            Field nameField = ds.GetField(NAME_FIELD, m_valType);
+            nameField.SetStringValue(name);
+            doc.Add(nameField);
+
+            // Set DATE_FIELD
+            DateTime? date = null;
+            string dateString = docData.Date;
+            if (dateString != null)
+            {
+                // LUCENENET: TryParseExact needs a non-nullable DateTime to 
work.
+                DateTime temp;
+                if (DateTime.TryParseExact(dateString, new string[] {
+                    // Original format from Java
+                    "dd-MMM-yyyy HH:mm:ss",
+                    // Actual format from the test files...
+                    "yyyyMMddHHmmss"
+                    }, CultureInfo.InvariantCulture, DateTimeStyles.None, out 
temp))
+                {
+                    date = temp;
+                }
+                // LUCENENET: Hail Mary in case the formats above are not 
adequate
+                else if (DateTime.TryParse(dateString, 
CultureInfo.InvariantCulture, DateTimeStyles.None, out temp))
+                {
+                    date = temp;
+                }
+            }
+            else
+            {
+                dateString = "";
+            }
+            Field dateStringField = ds.GetField(DATE_FIELD, m_valType);
+            dateStringField.SetStringValue(dateString);
+            doc.Add(dateStringField);
+
+            if (date == null)
+            {
+                // just set to right now
+                date = DateTime.Now; 
+            }
+
+            Field dateField = ds.GetNumericField(DATE_MSEC_FIELD, 
NumericType.INT64);
+            dateField.SetInt64Value(date.Value.Ticks);
+            doc.Add(dateField);
+
+            //util.cal.setTime(date);
+            //int sec = util.cal.get(Calendar.HOUR_OF_DAY) * 3600 + 
util.cal.get(Calendar.MINUTE) * 60 + util.cal.get(Calendar.SECOND);
+            int sec = 
Convert.ToInt32(date.Value.ToUniversalTime().TimeOfDay.TotalSeconds);
+
+            Field timeSecField = ds.GetNumericField(TIME_SEC_FIELD, 
NumericType.INT32);
+            timeSecField.SetInt32Value(sec);
+            doc.Add(timeSecField);
+
+            // Set TITLE_FIELD
+            string title = docData.Title;
+            Field titleField = ds.GetField(TITLE_FIELD, m_valType);
+            titleField.SetStringValue(title == null ? "" : title);
+            doc.Add(titleField);
+
+            string body = docData.Body;
+            if (body != null && body.Length > 0)
+            {
+                string bdy;
+                if (size <= 0 || size >= body.Length)
+                {
+                    bdy = body; // use all
+                    docData.Body = ""; // nothing left
+                }
+                else
+                {
+                    // attempt not to break words - if whitespace found within 
next 20 chars...
+                    for (int n = size - 1; n < size + 20 && n < body.Length; 
n++)
+                    {
+                        if (char.IsWhiteSpace(body[n]))
+                        {
+                            size = n;
+                            break;
+                        }
+                    }
+                    bdy = body.Substring(0, size - 0); // use part
+                    docData.Body = body.Substring(size); // some left
+                }
+                Field bodyField = ds.GetField(BODY_FIELD, m_bodyValType);
+                bodyField.SetStringValue(bdy);
+                doc.Add(bodyField);
+
+                if (storeBytes)
+                {
+                    Field bytesField = ds.GetField(BYTES_FIELD, 
StringField.TYPE_STORED);
+                    bytesField.SetBytesValue(Encoding.UTF8.GetBytes(bdy));
+                    doc.Add(bytesField);
+                }
+            }
+
+            if (m_indexProperties)
+            {
+                var props = docData.Props;
+                if (props != null)
+                {
+                    foreach (var entry in props)
+                    {
+                        Field f = ds.GetField((string)entry.Key, m_valType);
+                        f.SetStringValue((string)entry.Value);
+                        doc.Add(f);
+                    }
+                    docData.Props = null;
+                }
+            }
+
+            //System.out.println("============== Created doc 
"+numDocsCreated+" :\n"+doc+"\n==========");
+            return doc;
+        }
+
+        private void ResetLeftovers()
+        {
+            leftovr.Value = null;
+        }
+
+        protected virtual DocState GetDocState()
+        {
+            DocState ds = docState.Value;
+            if (ds == null)
+            {
+                ds = new DocState(m_reuseFields, m_valType, m_bodyValType);
+                docState.Value = ds;
+            }
+            return ds;
+        }
+
+        /// <summary>
+        /// Closes the <see cref="DocMaker"/>.
+        /// </summary>
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        /// <summary>
+        /// Closes the <see cref="DocMaker"/>. The base implementation closes 
the
+        /// <see cref="ContentSource"/>, and it can be overridden to do more 
work (but make
+        /// sure to call <c>base.Dispose(bool)</c>).
+        /// </summary>
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                m_source.Dispose();
+            }
+        }
+
+        /// <summary>
+        /// Creates a <see cref="Document"/> object ready for indexing. This 
method uses the
+        /// <see cref="ContentSource"/> to get the next document from the 
source, and creates
+        /// a <see cref="Document"/> object from the returned fields. If
+        /// <c>reuseFields</c> was set to <c>true</c>, it will reuse <see 
cref="Document"/>
+        /// and <see cref="Field"/> instances.
+        /// </summary>
+        /// <returns></returns>
+        public virtual Document MakeDocument()
+        {
+            ResetLeftovers();
+            DocData docData = m_source.GetNextDocData(GetDocState().docData);
+            Document doc = CreateDocument(docData, 0, -1);
+            return doc;
+        }
+
+        /// <summary>
+        /// Same as <see cref="MakeDocument()"/>, only this method creates a 
document of the
+        /// given size input by <paramref name="size"/>.
+        /// </summary>
+        public virtual Document MakeDocument(int size)
+        {
+            LeftOver lvr = leftovr.Value;
+            if (lvr == null || lvr.DocData == null || lvr.DocData.Body == null
+                || lvr.DocData.Body.Length == 0)
+            {
+                ResetLeftovers();
+            }
+            DocData docData = GetDocState().docData;
+            DocData dd = (lvr == null ? m_source.GetNextDocData(docData) : 
lvr.DocData);
+            int cnt = (lvr == null ? 0 : lvr.Count);
+            while (dd.Body == null || dd.Body.Length < size)
+            {
+                DocData dd2 = dd;
+                dd = m_source.GetNextDocData(new DocData());
+                cnt = 0;
+                dd.Body = (dd2.Body + dd.Body);
+            }
+            Document doc = CreateDocument(dd, size, cnt);
+            if (dd.Body == null || dd.Body.Length == 0)
+            {
+                ResetLeftovers();
+            }
+            else
+            {
+                if (lvr == null)
+                {
+                    lvr = new LeftOver();
+                    leftovr.Value = lvr;
+                }
+                lvr.DocData = dd;
+                lvr.Count = ++cnt;
+            }
+            return doc;
+        }
+
+        /// <summary>Reset inputs so that the test run would behave, input 
wise, as if it just started.</summary>
+        public virtual void ResetInputs()
+        {
+            m_source.PrintStatistics("docs");
+            // re-initiate since properties by round may have changed.
+            SetConfig(m_config, m_source);
+            m_source.ResetInputs();
+            numDocsCreated.Set(0);
+            ResetLeftovers();
+        }
+
+        /// <summary>Set the configuration parameters of this doc 
maker.</summary>
+        public virtual void SetConfig(Config config, ContentSource source)
+        {
+            this.m_config = config;
+            this.m_source = source;
+
+            bool stored = config.Get("doc.stored", false);
+            bool bodyStored = config.Get("doc.body.stored", stored);
+            bool tokenized = config.Get("doc.tokenized", true);
+            bool bodyTokenized = config.Get("doc.body.tokenized", tokenized);
+            bool norms = config.Get("doc.tokenized.norms", false);
+            bool bodyNorms = config.Get("doc.body.tokenized.norms", true);
+            bool termVec = config.Get("doc.term.vector", false);
+            bool termVecPositions = config.Get("doc.term.vector.positions", 
false);
+            bool termVecOffsets = config.Get("doc.term.vector.offsets", false);
+
+            m_valType = new FieldType(TextField.TYPE_NOT_STORED);
+            m_valType.IsStored = stored;
+            m_valType.IsTokenized = tokenized;
+            m_valType.OmitNorms = !norms;
+            m_valType.StoreTermVectors = termVec;
+            m_valType.StoreTermVectorPositions = termVecPositions;
+            m_valType.StoreTermVectorOffsets = termVecOffsets;
+            m_valType.Freeze();
+
+            m_bodyValType = new FieldType(TextField.TYPE_NOT_STORED);
+            m_bodyValType.IsStored = bodyStored;
+            m_bodyValType.IsTokenized = bodyTokenized;
+            m_bodyValType.OmitNorms = !bodyNorms;
+            m_bodyValType.StoreTermVectors = termVec;
+            m_bodyValType.StoreTermVectorPositions = termVecPositions;
+            m_bodyValType.StoreTermVectorOffsets = termVecOffsets;
+            m_bodyValType.Freeze();
+
+            storeBytes = config.Get("doc.store.body.bytes", false);
+
+            m_reuseFields = config.Get("doc.reuse.fields", true);
+
+            // In a multi-rounds run, it is important to reset DocState since 
settings
+            // of fields may change between rounds, and this is the only way 
to reset
+            // the cache of all threads.
+            docState = new ThreadLocal<DocState>();
+
+            m_indexProperties = config.Get("doc.index.props", false);
+
+            updateDocIDLimit = config.Get("doc.random.id.limit", -1);
+            if (updateDocIDLimit != -1)
+            {
+                r = new Random(179);
+            }
+        }
+    }
+}

Reply via email to