http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TestHtmlParser.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TestHtmlParser.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TestHtmlParser.cs new file mode 100644 index 0000000..ce9f2f8 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TestHtmlParser.cs @@ -0,0 +1,164 @@ +using Lucene.Net.Support; +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using static Lucene.Net.Benchmarks.ByTask.Feeds.DemoHTMLParser; + +namespace Lucene.Net.Benchmarks.ByTask.Feeds +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public class TestHtmlParser : LuceneTestCase + { + [Test] + public void TestUnicode() + { + String text = "<html><body>æ±è¯</body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("æ±è¯", parser.Body); + } + + [Test] + public void TestEntities() + { + String text = "<html><body>汉语¥</body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("æ±è¯Â¥", parser.Body); + } + + [Test] + public void TestComments() + { + String text = "<html><body>foo<!-- bar --><! baz --></body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("foo", parser.Body); + } + + [Test] + public void TestScript() + { + String text = "<html><body><script type=\"text/javascript\">" + + "document.write(\"test\")</script>foo</body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("foo", parser.Body); + } + + [Test] + public void TestStyle() + { + String text = "<html><head><style type=\"text/css\">" + + "body{background-color:blue;}</style>" + + "</head><body>foo</body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("foo", parser.Body); + } + + [Test] + public void TestDoctype() + { + String text = "<!DOCTYPE HTML PUBLIC " + + "\"-//W3C//DTD HTML 4.01 Transitional//EN\"" + + "\"http://www.w3.org/TR/html4/loose.dtd\">" + + "<html><body>foo</body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("foo", parser.Body); + } + + [Test] + public void TestMeta() + { + String text = "<html><head>" + + "<meta name=\"a\" content=\"1\" />" + + "<meta name=\"b\" content=\"2\" />" + + "<meta name=\"keywords\" content=\"this is a test\" />" + + "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\" />" + + "</head><body>foobar</body></html>"; + Parser parser = new Parser(new StringReader(text)); + IDictionary<string, string> tags = parser.MetaTags; + assertEquals(4, tags.size()); + assertEquals("1", tags["a"]); + assertEquals("2", tags["b"]); + assertEquals("this is a test", tags["keywords"]); + assertEquals("text/html;charset=UTF-8", tags["content-type"]); + } + + [Test] + public void TestTitle() + { + String text = "<html><head><TITLE>foo</TITLE><head><body>bar</body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("foo", parser.Title); + } + + // LUCENE-2246 + [Test] + public void TestTurkish() + { + using (var context = new CultureContext("tr-TR")) + { + String text = "<html><HEAD><TITLE>ııı</TITLE></head><body>" + + "<IMG SRC=\"../images/head.jpg\" WIDTH=570 HEIGHT=47 BORDER=0 ALT=\"Å\">" + + "<a title=\"(ııı)\"></body></html>"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("ııı", parser.Title); + assertEquals("[Å]", parser.Body); + } + } + + [Test] + public void TestSampleTRECDoc() + { + String text = "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-000 title\r\n" + + "</title>\r\n" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-000 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("TEST-000 title", parser.Title); + assertEquals("TEST-000 text", parser.Body.Trim()); + } + + [Test] + public void TestNoHTML() + { + String text = "hallo"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("", parser.Title); + assertEquals("hallo", parser.Body); + } + + [Test] + public void Testivalid() + { + String text = "<title>foo</title>bar"; + Parser parser = new Parser(new StringReader(text)); + assertEquals("foo", parser.Title); + assertEquals("bar", parser.Body); + } + } +}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TrecContentSourceTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TrecContentSourceTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TrecContentSourceTest.cs new file mode 100644 index 0000000..d83bb5a --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/TrecContentSourceTest.cs @@ -0,0 +1,431 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Documents; +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using static Lucene.Net.Benchmarks.ByTask.Feeds.TrecDocParser; + +namespace Lucene.Net.Benchmarks.ByTask.Feeds +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public class TrecContentSourceTest : LuceneTestCase + { + /** A TrecDocMaker which works on a String and not files. */ + private class StringableTrecSource : TrecContentSource + { + + + private String docs = null; + + public StringableTrecSource(String docs, bool forever) + { + this.docs = docs; + this.m_forever = forever; + } + + internal override void OpenNextFile() + { + if (reader != null) + { + if (!m_forever) + { + throw new NoMoreDataException(); + } + ++iteration; + } + + reader = new StringReader(docs); + } + + public override void SetConfig(Config config) + { + htmlParser = new DemoHTMLParser(); + } + } + + private void assertDocData(DocData dd, String expName, String expTitle, + String expBody, DateTime? expDate) + { + assertNotNull(dd); + assertEquals(expName, dd.Name); + assertEquals(expTitle, dd.Title); + assertTrue(dd.Body.IndexOf(expBody) != -1); + DateTime? date = dd.Date != null ? DateTools.StringToDate(dd.Date) : (DateTime?)null; + assertEquals(expDate, date); + } + + private void assertNoMoreDataException(StringableTrecSource stdm) + { + bool thrown = false; + try + { + stdm.GetNextDocData(null); + } +#pragma warning disable 168 + catch (NoMoreDataException e) +#pragma warning restore 168 + { + thrown = true; + } + assertTrue("Expecting NoMoreDataException", thrown); + } + + [Test] + public void TestOneDocument() + { + String docs = "<DOC>\r\n" + + "<DOCNO>TEST-000</DOCNO>\r\n" + + "<DOCHDR>\r\n" + + "http://lucene.apache.org.trecdocmaker.test\r\n" + + "HTTP/1.1 200 OK\r\n" + + "Date: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Server: Apache/1.3.27 (Unix)\r\n" + + "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Content-Length: 614\r\n" + + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + + "</DOCHDR>\r\n" + + "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-000 title\r\n" + + "</title>\r\n" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-000 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n" + + "</DOC>"; + StringableTrecSource source = new StringableTrecSource(docs, false); + source.SetConfig(null); + + DocData dd = source.GetNextDocData(new DocData()); + assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", source + .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); + + + assertNoMoreDataException(source); + } + + [Test] + public void TestTwoDocuments() + { + String docs = "<DOC>\r\n" + + "<DOCNO>TEST-000</DOCNO>\r\n" + + "<DOCHDR>\r\n" + + "http://lucene.apache.org.trecdocmaker.test\r\n" + + "HTTP/1.1 200 OK\r\n" + + "Date: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Server: Apache/1.3.27 (Unix)\r\n" + + "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Content-Length: 614\r\n" + + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + + "</DOCHDR>\r\n" + + "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-000 title\r\n" + + "</title>\r\n" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-000 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n" + + "</DOC>\r\n" + + "<DOC>\r\n" + + "<DOCNO>TEST-001</DOCNO>\r\n" + + "<DOCHDR>\r\n" + + "http://lucene.apache.org.trecdocmaker.test\r\n" + + "HTTP/1.1 200 OK\r\n" + + "Date: Sun, 11 Jan 2009 08:01:00 GMT\r\n" + + "Server: Apache/1.3.27 (Unix)\r\n" + + "Last-Modified: Sun, 11 Jan 2008 08:01:00 GMT\r\n" + + "Content-Length: 614\r\n" + + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + + "</DOCHDR>\r\n" + + "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-001 title\r\n" + + "</title>\r\n" + + "<meta name=\"date\" content=\"Tue, 09 Dec 2003 22:39:08 GMT\">" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-001 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n" + + "</DOC>"; + StringableTrecSource source = new StringableTrecSource(docs, false); + source.SetConfig(null); + + DocData dd = source.GetNextDocData(new DocData()); + assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", source + .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); + + dd = source.GetNextDocData(dd); + assertDocData(dd, "TEST-001_0", "TEST-001 title", "TEST-001 text", source + .ParseDate("Tue, 09 Dec 2003 22:39:08 GMT")); + + + assertNoMoreDataException(source); + } + + // If a Date: attribute is missing, make sure the document is not skipped, but + // rather that null Data is assigned. + [Test] + public void TestMissingDate() + { + String docs = "<DOC>\r\n" + + "<DOCNO>TEST-000</DOCNO>\r\n" + + "<DOCHDR>\r\n" + + "http://lucene.apache.org.trecdocmaker.test\r\n" + + "HTTP/1.1 200 OK\r\n" + + "Server: Apache/1.3.27 (Unix)\r\n" + + "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Content-Length: 614\r\n" + + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + + "</DOCHDR>\r\n" + + "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-000 title\r\n" + + "</title>\r\n" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-000 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n" + + "</DOC>\r\n" + + "<DOC>\r\n" + + "<DOCNO>TEST-001</DOCNO>\r\n" + + "<DOCHDR>\r\n" + + "http://lucene.apache.org.trecdocmaker.test\r\n" + + "HTTP/1.1 200 OK\r\n" + + "Date: Sun, 11 Jan 2009 08:01:00 GMT\r\n" + + "Server: Apache/1.3.27 (Unix)\r\n" + + "Last-Modified: Sun, 11 Jan 2009 08:01:00 GMT\r\n" + + "Content-Length: 614\r\n" + + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + + "</DOCHDR>\r\n" + + "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-001 title\r\n" + + "</title>\r\n" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-001 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n" + + "</DOC>"; + StringableTrecSource source = new StringableTrecSource(docs, false); + source.SetConfig(null); + + DocData dd = source.GetNextDocData(new DocData()); + assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", null); + + dd = source.GetNextDocData(dd); + assertDocData(dd, "TEST-001_0", "TEST-001 title", "TEST-001 text", source + .ParseDate("Sun, 11 Jan 2009 08:01:00 GMT")); + + + assertNoMoreDataException(source); + } + + // When a 'bad date' is input (unparsable date), make sure the DocData date is + // assigned null. + [Test] + public void TestBadDate() + { + String docs = "<DOC>\r\n" + + "<DOCNO>TEST-000</DOCNO>\r\n" + + "<DOCHDR>\r\n" + + "http://lucene.apache.org.trecdocmaker.test\r\n" + + "HTTP/1.1 200 OK\r\n" + + "Date: Bad Date\r\n" + + "Server: Apache/1.3.27 (Unix)\r\n" + + "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Content-Length: 614\r\n" + + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + + "</DOCHDR>\r\n" + + "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-000 title\r\n" + + "</title>\r\n" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-000 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n" + + "</DOC>"; + StringableTrecSource source = new StringableTrecSource(docs, false); + source.SetConfig(null); + + DocData dd = source.GetNextDocData(new DocData()); + assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", null); + + + assertNoMoreDataException(source); + } + + [Test] + public void TestForever() + { + String docs = "<DOC>\r\n" + + "<DOCNO>TEST-000</DOCNO>\r\n" + + //"<docno>TEST-000</docno>\r\n" + + "<DOCHDR>\r\n" + + "http://lucene.apache.org.trecdocmaker.test\r\n" + + "HTTP/1.1 200 OK\r\n" + + "Date: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Server: Apache/1.3.27 (Unix)\r\n" + + "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + + "Content-Length: 614\r\n" + + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + + "</DOCHDR>\r\n" + + "<html>\r\n" + + "\r\n" + + "<head>\r\n" + + "<title>\r\n" + + "TEST-000 title\r\n" + + "</title>\r\n" + + "</head>\r\n" + + "\r\n" + + "<body>\r\n" + + "TEST-000 text\r\n" + + "\r\n" + + "</body>\r\n" + + "\r\n" + + "</DOC>"; + StringableTrecSource source = new StringableTrecSource(docs, true); + source.SetConfig(null); + + DocData dd = source.GetNextDocData(new DocData()); + assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", source + .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); + + // same document, but the second iteration changes the name. + dd = source.GetNextDocData(dd); + assertDocData(dd, "TEST-000_1", "TEST-000 title", "TEST-000 text", source + .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); + source.Dispose(); + + // Don't test that NoMoreDataException is thrown, since the forever flag is + // turned on. + } + + /** + * Open a trec content source over a directory with files of all trec path types and all + * supported formats - bzip, gzip, txt. + */ + [Test] + public void TestTrecFeedDirAllTypes() + { + DirectoryInfo dataDir = CreateTempDir("trecFeedAllTypes"); + using (var stream = GetDataFile("trecdocs.zip")) + TestUtil.Unzip(stream, dataDir); + TrecContentSource tcs = new TrecContentSource(); + Dictionary<string, string> props = new Dictionary<string, string>(); + props["print.props"] = "false"; + props["content.source.verbose"] = "false"; + props["content.source.excludeIteration"] = "true"; + props["doc.maker.forever"] = "false"; + props["docs.dir"] = dataDir.FullName.Replace('\\', '/'); + props["trec.doc.parser"] = typeof(TrecParserByPath).AssemblyQualifiedName; + props["content.source.forever"] = "false"; + tcs.SetConfig(new Config(props)); + tcs.ResetInputs(); + DocData dd = new DocData(); + int n = 0; + bool gotExpectedException = false; + // LUCENENET specific - skip our UNKNOWN element. + var pathTypes = ((ParsePathType[])Enum.GetValues(typeof(ParsePathType))).Where(x => x != ParsePathType.UNKNOWN).ToArray(); + HashSet<ParsePathType> unseenTypes = new HashSet<ParsePathType>(pathTypes); + try + { + while (n < 100) + { // arbiterary limit to prevent looping forever in case of test failure + dd = tcs.GetNextDocData(dd); + ++n; + assertNotNull("doc data " + n + " should not be null!", dd); + unseenTypes.Remove(tcs.currPathType); + switch (tcs.currPathType) + { + case ParsePathType.GOV2: + assertDocData(dd, "TEST-000", "TEST-000 title", "TEST-000 text", tcs.ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); + break; + case ParsePathType.FBIS: + assertDocData(dd, "TEST-001", "TEST-001 Title", "TEST-001 text", tcs.ParseDate("1 January 1991")); + break; + case ParsePathType.FR94: + // no title extraction in this source for now + assertDocData(dd, "TEST-002", null, "DEPARTMENT OF SOMETHING", tcs.ParseDate("February 3, 1994")); + break; + case ParsePathType.FT: + assertDocData(dd, "TEST-003", "Test-003 title", "Some pub text", tcs.ParseDate("980424")); + break; + case ParsePathType.LATIMES: + assertDocData(dd, "TEST-004", "Test-004 Title", "Some paragraph", tcs.ParseDate("January 17, 1997, Sunday")); + break; + default: + assertTrue("Should never get here!", false); + break; + } + } + } +#pragma warning disable 168 + catch (NoMoreDataException e) +#pragma warning restore 168 + { + gotExpectedException = true; + } + assertTrue("Should have gotten NoMoreDataException!", gotExpectedException); + assertEquals("Wrong number of documents created by source!", 5, n); + assertTrue("Did not see all types!", unseenTypes.Count == 0); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/trecdocs.zip ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/trecdocs.zip b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/trecdocs.zip new file mode 100644 index 0000000..f12dbca Binary files /dev/null and b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/trecdocs.zip differ http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/AddIndexesTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/AddIndexesTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/AddIndexesTaskTest.cs new file mode 100644 index 0000000..5fea5a5 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/AddIndexesTaskTest.cs @@ -0,0 +1,153 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Store; +using NUnit.Framework; +using System.Collections.Generic; +using System.IO; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Tests the functionality of {@link AddIndexesTask}. + /// </summary> + public class AddIndexesTaskTest : BenchmarkTestCase + { + private static DirectoryInfo testDir, inputDir; + + public override void BeforeClass() + { + base.BeforeClass(); + testDir = CreateTempDir("addIndexesTask"); + + // create a dummy index under inputDir + inputDir = new DirectoryInfo(Path.Combine(testDir.FullName, "input")); + Store.Directory tmpDir = NewFSDirectory(inputDir); + try + { + IndexWriter writer = new IndexWriter(tmpDir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for (int i = 0; i < 10; i++) + { + writer.AddDocument(new Document()); + } + writer.Dispose(); + } + finally + { + tmpDir.Dispose(); + } + } + + + private PerfRunData createPerfRunData() + { + IDictionary<string, string> props = new Dictionary<string, string>(); + props["writer.version"] = TEST_VERSION_CURRENT.ToString(); + props["print.props"] = "false"; // don't print anything + props["directory"] = "RAMDirectory"; + props[AddIndexesTask.ADDINDEXES_INPUT_DIR] = inputDir.FullName; + Config config = new Config(props); + return new PerfRunData(config); + } + + private void assertIndex(PerfRunData runData) + { + Store.Directory taskDir = runData.Directory; + assertSame(typeof(RAMDirectory), taskDir.GetType()); + IndexReader r = DirectoryReader.Open(taskDir); + try + { + assertEquals(10, r.NumDocs); + } + finally + { + r.Dispose(); + } + } + + [Test] + public void TestAddIndexesDefault() + { + PerfRunData runData = createPerfRunData(); + // create the target index first + new CreateIndexTask(runData).DoLogic(); + + AddIndexesTask task = new AddIndexesTask(runData); + task.Setup(); + + // add the input index + task.DoLogic(); + + // close the index + new CloseIndexTask(runData).DoLogic(); + + + assertIndex(runData); + + runData.Dispose(); + } + + [Test] + public void TestAddIndexesDir() + { + PerfRunData runData = createPerfRunData(); + // create the target index first + new CreateIndexTask(runData).DoLogic(); + + AddIndexesTask task = new AddIndexesTask(runData); + task.Setup(); + + // add the input index + task.SetParams("true"); + task.DoLogic(); + + // close the index + new CloseIndexTask(runData).DoLogic(); + + + assertIndex(runData); + + runData.Dispose(); + } + + [Test] + public void TestAddIndexesReader() + { + PerfRunData runData = createPerfRunData(); + // create the target index first + new CreateIndexTask(runData).DoLogic(); + + AddIndexesTask task = new AddIndexesTask(runData); + task.Setup(); + + // add the input index + task.SetParams("false"); + task.DoLogic(); + + // close the index + new CloseIndexTask(runData).DoLogic(); + + + assertIndex(runData); + + runData.Dispose(); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltPackageTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltPackageTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltPackageTaskTest.cs new file mode 100644 index 0000000..c8f4c79 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltPackageTaskTest.cs @@ -0,0 +1,68 @@ +using NUnit.Framework; +using System; +using System.Reflection; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks.Alt +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Tests that tasks in alternate packages are found. + /// </summary> + public class AltPackageTaskTest : BenchmarkTestCase + { + /** Benchmark should fail loading the algorithm when alt is not specified */ + [Test] + public void TestWithoutAlt() + { + try + { + execBenchmark(altAlg(false)); + assertFalse("Should have failed to run the algorithm", true); + } +#pragma warning disable 168 + catch (Exception e) +#pragma warning restore 168 + { + // expected exception, do nothing + } + } + + /** Benchmark should be able to load the algorithm when alt is specified */ + [Test] + public void TestWithAlt() + { + Benchmark bm = execBenchmark(altAlg(true)); + assertNotNull(bm); + assertNotNull(bm.RunData.Points); + } + + private String[] altAlg(bool allowAlt) + { + String altTask = "{ AltTest }"; + if (allowAlt) + { + return new String[] { + "alt.tasks.packages = " +this.GetType().GetTypeInfo().Assembly.GetName().Name, + altTask + }; + } + return new String[] { altTask }; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltTestTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltTestTask.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltTestTask.cs new file mode 100644 index 0000000..7e13bb9 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/Alt/AltTestTask.cs @@ -0,0 +1,35 @@ +namespace Lucene.Net.Benchmarks.ByTask.Tasks.Alt +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// {@link PerfTask} which does nothing, but is in a different package + /// </summary> + public class AltTestTask : PerfTask + { + public AltTestTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + return 0; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CommitIndexTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CommitIndexTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CommitIndexTaskTest.cs new file mode 100644 index 0000000..450916d --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CommitIndexTaskTest.cs @@ -0,0 +1,63 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Index; +using NUnit.Framework; +using System.Collections.Generic; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Tests the functionality of {@link CreateIndexTask}. + /// </summary> + public class CommitIndexTaskTest : BenchmarkTestCase + { + private PerfRunData createPerfRunData() + { + Dictionary<string, string> props = new Dictionary<string, string>(); + props["writer.version"] = TEST_VERSION_CURRENT.ToString(); + props["print.props"] = "false"; // don't print anything + props["directory"] = "RAMDirectory"; + Config config = new Config(props); + return new PerfRunData(config); + } + + [Test] + public void TestNoParams() + { + PerfRunData runData = createPerfRunData(); + new CreateIndexTask(runData).DoLogic(); + new CommitIndexTask(runData).DoLogic(); + new CloseIndexTask(runData).DoLogic(); + } + + [Test] + public void TestCommitData() + { + PerfRunData runData = createPerfRunData(); + new CreateIndexTask(runData).DoLogic(); + CommitIndexTask task = new CommitIndexTask(runData); + task.SetParams("params"); + task.DoLogic(); + SegmentInfos infos = new SegmentInfos(); + infos.Read(runData.Directory); + assertEquals("params", infos.UserData[OpenReaderTask.USER_DATA]); + new CloseIndexTask(runData).DoLogic(); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingHighlighterTestTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingHighlighterTestTask.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingHighlighterTestTask.cs new file mode 100644 index 0000000..678339f --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingHighlighterTestTask.cs @@ -0,0 +1,85 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Search.Highlight; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Test Search task which counts number of searches. + /// </summary> + public class CountingHighlighterTestTask : SearchTravRetHighlightTask + { + public static int numHighlightedResults = 0; + public static int numDocsRetrieved = 0; + + public CountingHighlighterTestTask(PerfRunData runData) + : base(runData) + { + } + + protected override Document RetrieveDoc(IndexReader ir, int id) + { + Document document = ir.Document(id); + if (document != null) + { + numDocsRetrieved++; + } + return document; + } + + private class BenchmarkHighlighterAnonymousHelper : BenchmarkHighlighter + { + private readonly CountingHighlighterTestTask outerInstance; + private readonly Highlighter highlighter; + public BenchmarkHighlighterAnonymousHelper(CountingHighlighterTestTask outerInstance, Highlighter highlighter) + { + this.outerInstance = outerInstance; + this.highlighter = highlighter; + } + public override int DoHighlight(IndexReader reader, int doc, string field, Document document, Analyzer analyzer, string text) + { + TokenStream ts = TokenSources.GetAnyTokenStream(reader, doc, field, document, analyzer); + TextFragment[] + frag = highlighter.GetBestTextFragments(ts, text, outerInstance.m_mergeContiguous, outerInstance.m_maxFrags); + numHighlightedResults += frag != null ? frag.Length : 0; + return frag != null ? frag.Length : 0; + } + } + + protected override BenchmarkHighlighter GetBenchmarkHighlighter(Query q) + { + m_highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q)); + return new BenchmarkHighlighterAnonymousHelper(this, m_highlighter); + // return new BenchmarkHighlighter() { + // @Override + // public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) + // { + // TokenStream ts = TokenSources.GetAnyTokenStream(reader, doc, field, document, analyzer); + // TextFragment [] + // frag = highlighter.GetBestTextFragments(ts, text, mergeContiguous, maxFrags); + // numHighlightedResults += frag != null ? frag.Length : 0; + // return frag != null ? frag.Length : 0; + // } + //}; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingSearchTestTask.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingSearchTestTask.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingSearchTestTask.cs new file mode 100644 index 0000000..10c7628 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CountingSearchTestTask.cs @@ -0,0 +1,65 @@ +using Lucene.Net.Support; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Test Search task which counts number of searches. + /// </summary> + public class CountingSearchTestTask : SearchTask + { + public static int numSearches = 0; + public static long startMillis; + public static long lastMillis; + public static long prevLastMillis; + + private static object syncLock = new object(); + + public CountingSearchTestTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + int res = base.DoLogic(); + IncrNumSearches(); + return res; + } + + private static void IncrNumSearches() + { + lock (syncLock) + { + prevLastMillis = lastMillis; + lastMillis = Time.CurrentTimeMilliseconds(); + if (0 == numSearches) + { + startMillis = prevLastMillis = lastMillis; + } + numSearches++; + } + } + + public long GetElapsedMillis() + { + return lastMillis - startMillis; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CreateIndexTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CreateIndexTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CreateIndexTaskTest.cs new file mode 100644 index 0000000..29cbaf7 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/CreateIndexTaskTest.cs @@ -0,0 +1,129 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Index; +using Lucene.Net.Support; +using Lucene.Net.Support.IO; +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Tests the functionality of {@link CreateIndexTask}. + /// </summary> + public class CreateIndexTaskTest : BenchmarkTestCase + { + private PerfRunData createPerfRunData(String infoStreamValue) + { + Dictionary<string, string> props = new Dictionary<string, string>(); + // :Post-Release-Update-Version.LUCENE_XY: +#pragma warning disable 612, 618 + props["writer.version"] = LuceneVersion.LUCENE_47.ToString(); +#pragma warning restore 612, 618 + props["print.props"] = "false"; // don't print anything + props["directory"] = "RAMDirectory"; + if (infoStreamValue != null) + { + props["writer.info.stream"] = infoStreamValue; + } + Config config = new Config(props); + return new PerfRunData(config); + } + + [Test] + public void TestInfoStream_SystemOutErr() + { + + TextWriter curOut = SystemConsole.Out; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SystemConsole.Out = new StreamWriter(baos, Encoding.GetEncoding(0)); + try + { + PerfRunData runData = createPerfRunData("SystemOut"); + CreateIndexTask cit = new CreateIndexTask(runData); + cit.DoLogic(); + new CloseIndexTask(runData).DoLogic(); + assertTrue(baos.Length > 0); + } + finally + { + SystemConsole.Out = curOut; + } + + TextWriter curErr = SystemConsole.Error; + baos = new ByteArrayOutputStream(); + SystemConsole.Error = new StreamWriter(baos, Encoding.GetEncoding(0)); + try + { + PerfRunData runData = createPerfRunData("SystemErr"); + CreateIndexTask cit = new CreateIndexTask(runData); + cit.DoLogic(); + new CloseIndexTask(runData).DoLogic(); + assertTrue(baos.Length > 0); + } + finally + { + SystemConsole.Error = curErr; + } + + } + + [Test] + public void TestInfoStream_File() + { + + FileInfo outFile = new FileInfo(Path.Combine(getWorkDir().FullName, "infoStreamTest")); + PerfRunData runData = createPerfRunData(outFile.FullName); + new CreateIndexTask(runData).DoLogic(); + new CloseIndexTask(runData).DoLogic(); + assertTrue(new FileInfo(outFile.FullName).Length > 0); + } + + [Test] + public void TestNoMergePolicy() + { + PerfRunData runData = createPerfRunData(null); + runData.Config.Set("merge.policy", typeof(NoMergePolicy).AssemblyQualifiedName); + new CreateIndexTask(runData).DoLogic(); + new CloseIndexTask(runData).DoLogic(); + } + + [Test] + public void TestNoMergeScheduler() + { + PerfRunData runData = createPerfRunData(null); + runData.Config.Set("merge.scheduler", typeof(NoMergeScheduler).AssemblyQualifiedName); + new CreateIndexTask(runData).DoLogic(); + new CloseIndexTask(runData).DoLogic(); + } + + [Test] + public void TestNoDeletionPolicy() + { + PerfRunData runData = createPerfRunData(null); + runData.Config.Set("deletion.policy", typeof(NoDeletionPolicy).AssemblyQualifiedName); + new CreateIndexTask(runData).DoLogic(); + new CloseIndexTask(runData).DoLogic(); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/PerfTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/PerfTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/PerfTaskTest.cs new file mode 100644 index 0000000..572e109 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/PerfTaskTest.cs @@ -0,0 +1,81 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using NUnit.Framework; +using System.Collections.Generic; +using System.Globalization; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Tests the functionality of the abstract {@link PerfTask}. + /// </summary> + public class PerfTaskTest : BenchmarkTestCase + { + private sealed class MyPerfTask : PerfTask + { + + public MyPerfTask(PerfRunData runData) + : base(runData) + { + } + + public override int DoLogic() + { + return 0; + } + + public int getLogStep() { return m_logStep; } + } + + private PerfRunData createPerfRunData(bool setLogStep, int logStepVal, + bool setTaskLogStep, int taskLogStepVal) + { + Dictionary<string, string> props = new Dictionary<string, string>(); + if (setLogStep) + { + props["log.step"] = logStepVal.ToString(CultureInfo.InvariantCulture); + } + if (setTaskLogStep) + { + props["log.step.MyPerf"] = taskLogStepVal.ToString(CultureInfo.InvariantCulture); + } + props["directory"] = "RAMDirectory"; // no accidental FS dir. + Config config = new Config(props); + return new PerfRunData(config); + } + + private void doLogStepTest(bool setLogStep, int logStepVal, + bool setTaskLogStep, int taskLogStepVal, int expLogStepValue) + { + PerfRunData runData = createPerfRunData(setLogStep, logStepVal, setTaskLogStep, taskLogStepVal); + MyPerfTask mpt = new MyPerfTask(runData); + assertEquals(expLogStepValue, mpt.getLogStep()); + } + + [Test] + public void TestLogStep() + { + doLogStepTest(false, -1, false, -1, PerfTask.DEFAULT_LOG_STEP); + doLogStepTest(true, -1, false, -1, int.MaxValue); + doLogStepTest(true, 100, false, -1, 100); + doLogStepTest(false, -1, true, -1, int.MaxValue); + doLogStepTest(false, -1, true, 100, 100); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/SearchWithSortTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/SearchWithSortTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/SearchWithSortTaskTest.cs new file mode 100644 index 0000000..959681a --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/SearchWithSortTaskTest.cs @@ -0,0 +1,35 @@ +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Search; +using NUnit.Framework; +using System.Collections.Generic; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public class SearchWithSortTaskTest : BenchmarkTestCase + { + [Test] + public void TestSetParams_docField() + { + SearchWithSortTask task = new SearchWithSortTask(new PerfRunData(new Config(new Dictionary<string, string>()))); + task.SetParams("doc"); + assertEquals(SortFieldType.DOC, task.Sort.GetSort()[0].Type); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs new file mode 100644 index 0000000..8f710bc --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs @@ -0,0 +1,121 @@ +using Lucene.Net.Benchmarks.ByTask.Feeds; +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Documents; +using Lucene.Net.Support; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Tests the functionality of {@link WriteEnwikiLineDocTask}. + /// </summary> + public class WriteEnwikiLineDocTaskTest : BenchmarkTestCase + { + // class has to be public so that Class.forName.newInstance() will work + /** Interleaves category docs with regular docs */ + public sealed class WriteLineCategoryDocMaker : DocMaker + { + + AtomicInt32 flip = new AtomicInt32(0); + + public override Document MakeDocument() + { + bool isCategory = (flip.IncrementAndGet() % 2 == 0); + Document doc = new Document(); + doc.Add(new StringField(BODY_FIELD, "body text", Field.Store.NO)); + doc.Add(new StringField(TITLE_FIELD, isCategory ? "Category:title text" : "title text", Field.Store.NO)); + doc.Add(new StringField(DATE_FIELD, "date text", Field.Store.NO)); + return doc; + } + + } + + private PerfRunData createPerfRunData(FileInfo file, String docMakerName) + { + Dictionary<string, string> props = new Dictionary<string, string>(); + props["doc.maker"] = docMakerName; + props["line.file.out"] = file.FullName; + props["directory"] = "RAMDirectory"; // no accidental FS dir. + Config config = new Config(props); + return new PerfRunData(config); + } + + private void doReadTest(FileInfo file, String expTitle, + String expDate, String expBody) + { + doReadTest(2, file, expTitle, expDate, expBody); + FileInfo categoriesFile = WriteEnwikiLineDocTask.CategoriesLineFile(file); + doReadTest(2, categoriesFile, "Category:" + expTitle, expDate, expBody); + } + + private void doReadTest(int n, FileInfo file, String expTitle, String expDate, String expBody) + { + Stream @in = new FileStream(file.FullName, FileMode.Open, FileAccess.Read); + TextReader br = new StreamReader(@in, Encoding.UTF8); + try + { + String line = br.ReadLine(); + WriteLineDocTaskTest.assertHeaderLine(line); + for (int i = 0; i < n; i++) + { + line = br.ReadLine(); + assertNotNull(line); + String[] parts = line.Split(WriteLineDocTask.SEP).TrimEnd(); + int numExpParts = expBody == null ? 2 : 3; + assertEquals(numExpParts, parts.Length); + assertEquals(expTitle, parts[0]); + assertEquals(expDate, parts[1]); + if (expBody != null) + { + assertEquals(expBody, parts[2]); + } + } + assertNull(br.ReadLine()); + } + finally + { + br.Dispose(); + } + } + + [Test] + public void TestCategoryLines() + { + // WriteLineDocTask replaced only \t characters w/ a space, since that's its + // separator char. However, it didn't replace newline characters, which + // resulted in errors in LineDocSource. + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "two-lines-each.txt")); + PerfRunData runData = createPerfRunData(file, typeof(WriteLineCategoryDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteEnwikiLineDocTask(runData); + for (int i = 0; i < 4; i++) + { // four times so that each file should have 2 lines. + wldt.DoLogic(); + } + wldt.Dispose(); + + + doReadTest(file, "title text", "date text", "body text"); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteLineDocTaskTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteLineDocTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteLineDocTaskTest.cs new file mode 100644 index 0000000..8edad56 --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteLineDocTaskTest.cs @@ -0,0 +1,436 @@ +using ICSharpCode.SharpZipLib.BZip2; +using Lucene.Net.Benchmarks.ByTask.Feeds; +using Lucene.Net.Benchmarks.ByTask.Utils; +using Lucene.Net.Documents; +using Lucene.Net.Support; +using Lucene.Net.Support.Threading; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using System.Text; +using System.Threading; + +namespace Lucene.Net.Benchmarks.ByTask.Tasks +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Tests the functionality of {@link WriteLineDocTask}. + /// </summary> + public class WriteLineDocTaskTest : BenchmarkTestCase + { + // class has to be public so that Class.forName.newInstance() will work + public sealed class WriteLineDocMaker : DocMaker + { + + public override Document MakeDocument() + { + Document doc = new Document(); + doc.Add(new StringField(BODY_FIELD, "body", Field.Store.NO)); + doc.Add(new StringField(TITLE_FIELD, "title", Field.Store.NO)); + doc.Add(new StringField(DATE_FIELD, "date", Field.Store.NO)); + return doc; + } + + } + + // class has to be public so that Class.forName.newInstance() will work + public sealed class NewLinesDocMaker : DocMaker + { + + public override Document MakeDocument() + { + Document doc = new Document(); + doc.Add(new StringField(BODY_FIELD, "body\r\ntext\ttwo", Field.Store.NO)); + doc.Add(new StringField(TITLE_FIELD, "title\r\ntext", Field.Store.NO)); + doc.Add(new StringField(DATE_FIELD, "date\r\ntext", Field.Store.NO)); + return doc; + } + + } + + // class has to be public so that Class.forName.newInstance() will work + public sealed class NoBodyDocMaker : DocMaker + { + public override Document MakeDocument() + { + Document doc = new Document(); + doc.Add(new StringField(TITLE_FIELD, "title", Field.Store.NO)); + doc.Add(new StringField(DATE_FIELD, "date", Field.Store.NO)); + return doc; + } + } + + // class has to be public so that Class.forName.newInstance() will work + public sealed class NoTitleDocMaker : DocMaker + { + public override Document MakeDocument() + { + Document doc = new Document(); + doc.Add(new StringField(BODY_FIELD, "body", Field.Store.NO)); + doc.Add(new StringField(DATE_FIELD, "date", Field.Store.NO)); + return doc; + } + } + + // class has to be public so that Class.forName.newInstance() will work + public sealed class JustDateDocMaker : DocMaker + { + public override Document MakeDocument() + { + Document doc = new Document(); + doc.Add(new StringField(DATE_FIELD, "date", Field.Store.NO)); + return doc; + } + } + + // class has to be public so that Class.forName.newInstance() will work + // same as JustDate just that this one is treated as legal + public sealed class LegalJustDateDocMaker : DocMaker + { + public override Document MakeDocument() + { + Document doc = new Document(); + doc.Add(new StringField(DATE_FIELD, "date", Field.Store.NO)); + return doc; + } + } + + // class has to be public so that Class.forName.newInstance() will work + public sealed class EmptyDocMaker : DocMaker + { + public override Document MakeDocument() + { + return new Document(); + } + } + + // class has to be public so that Class.forName.newInstance() will work + public sealed class ThreadingDocMaker : DocMaker + { + + public override Document MakeDocument() + { + Document doc = new Document(); + String name = Thread.CurrentThread.Name; + doc.Add(new StringField(BODY_FIELD, "body_" + name, Field.Store.NO)); + doc.Add(new StringField(TITLE_FIELD, "title_" + name, Field.Store.NO)); + doc.Add(new StringField(DATE_FIELD, "date_" + name, Field.Store.NO)); + return doc; + } + + } + + private PerfRunData createPerfRunData(FileInfo file, + bool allowEmptyDocs, + String docMakerName) + { + Dictionary<string, string> props = new Dictionary<string, string>(); + props["doc.maker"] = docMakerName; + props["line.file.out"] = file.FullName; + props["directory"] = "RAMDirectory"; // no accidental FS dir. + if (allowEmptyDocs) + { + props["sufficient.fields"] = ","; + } + if (typeof(LegalJustDateDocMaker).Equals(Type.GetType(docMakerName))) + { + props["line.fields"] = DocMaker.DATE_FIELD; + props["sufficient.fields"] = DocMaker.DATE_FIELD; + } + Config config = new Config(props); + return new PerfRunData(config); + } + + private void doReadTest(FileInfo file, FileType fileType, String expTitle, + String expDate, String expBody) + { + Stream input = new FileStream(file.FullName, FileMode.Open, FileAccess.Read); + switch (fileType) + { + case FileType.BZIP2: + input = new BZip2InputStream(input); + break; + case FileType.GZIP: + input = new GZipStream(input, CompressionMode.Decompress); + break; + case FileType.PLAIN: + break; // nothing to do + default: + assertFalse("Unknown file type!", true); //fail, should not happen + break; + } + TextReader br = new StreamReader(input, Encoding.UTF8); + try + { + String line = br.ReadLine(); + assertHeaderLine(line); + line = br.ReadLine(); + assertNotNull(line); + String[] parts = line.Split(WriteLineDocTask.SEP).TrimEnd(); + int numExpParts = expBody == null ? 2 : 3; + assertEquals(numExpParts, parts.Length); + assertEquals(expTitle, parts[0]); + assertEquals(expDate, parts[1]); + if (expBody != null) + { + assertEquals(expBody, parts[2]); + } + assertNull(br.ReadLine()); + } + finally + { + br.Dispose(); + } + } + + internal static void assertHeaderLine(String line) + { + assertTrue("First line should be a header line", line.StartsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR, StringComparison.Ordinal)); + } + + /* Tests WriteLineDocTask with a bzip2 format. */ + [Test] + public void TestBZip2() + { + + // Create a document in bz2 format. + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line.bz2")); + PerfRunData runData = createPerfRunData(file, false, typeof(WriteLineDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + + doReadTest(file, FileType.BZIP2, "title", "date", "body"); + } + + /* Tests WriteLineDocTask with a gzip format. */ + [Test] + public void TestGZip() + { + + // Create a document in gz format. + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line.gz")); + PerfRunData runData = createPerfRunData(file, false, typeof(WriteLineDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + + doReadTest(file, FileType.GZIP, "title", "date", "body"); + } + + [Test] + public void TestRegularFile() + { + + // Create a document in regular format. + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, false, typeof(WriteLineDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + + doReadTest(file, FileType.PLAIN, "title", "date", "body"); + } + + [Test] + public void TestCharsReplace() + { + // WriteLineDocTask replaced only \t characters w/ a space, since that's its + // separator char. However, it didn't replace newline characters, which + // resulted in errors in LineDocSource. + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, false, typeof(NewLinesDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + + doReadTest(file, FileType.PLAIN, "title text", "date text", "body text two"); + } + + [Test] + public void TestEmptyBody() + { + // WriteLineDocTask threw away documents w/ no BODY element, even if they + // had a TITLE element (LUCENE-1755). It should throw away documents if they + // don't have BODY nor TITLE + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, false, typeof(NoBodyDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + + doReadTest(file, FileType.PLAIN, "title", "date", null); + } + + [Test] + public void TestEmptyTitle() + { + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, false, typeof(NoTitleDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + + doReadTest(file, FileType.PLAIN, "", "date", "body"); + } + + /** Fail by default when there's only date */ + [Test] + public void TestJustDate() + { + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, false, typeof(JustDateDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + TextReader br = new StreamReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8); + try + { + String line = br.ReadLine(); + assertHeaderLine(line); + line = br.ReadLine(); + assertNull(line); + } + finally + { + br.Dispose(); + } + } + + [Test] + public void TestLegalJustDate() + { + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, false, typeof(LegalJustDateDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + TextReader br = new StreamReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8); + try + { + String line = br.ReadLine(); + assertHeaderLine(line); + line = br.ReadLine(); + assertNotNull(line); + } + finally + { + br.Dispose(); + } + } + + [Test] + public void TestEmptyDoc() + { + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, true, typeof(EmptyDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + wldt.DoLogic(); + wldt.Dispose(); + + TextReader br = new StreamReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8); + try + { + String line = br.ReadLine(); + assertHeaderLine(line); + line = br.ReadLine(); + assertNotNull(line); + } + finally + { + br.Dispose(); + } + } + private class ThreadAnonymousHelper : ThreadClass + { + private readonly WriteLineDocTask wldt; + public ThreadAnonymousHelper(string name, WriteLineDocTask wldt) + : base(name) + { + this.wldt = wldt; + } + + public override void Run() + { + try + { + wldt.DoLogic(); + } + catch (Exception e) + { + throw new Exception(e.ToString(), e); + } + } + } + + [Test] + public void TestMultiThreaded() + { + FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); + PerfRunData runData = createPerfRunData(file, false, typeof(ThreadingDocMaker).AssemblyQualifiedName); + WriteLineDocTask wldt = new WriteLineDocTask(runData); + ThreadClass[] threads = new ThreadClass[10]; + for (int i = 0; i < threads.Length; i++) + { + threads[i] = new ThreadAnonymousHelper("t" + i, wldt); + } + + foreach (ThreadClass t in threads) t.Start(); + foreach (ThreadClass t in threads) t.Join(); + + wldt.Dispose(); + + ISet<String> ids = new HashSet<string>(); + TextReader br = new StreamReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8); + try + { + String line = br.ReadLine(); + assertHeaderLine(line); // header line is written once, no matter how many threads there are + for (int i = 0; i < threads.Length; i++) + { + line = br.ReadLine(); + String[] parts = line.Split(WriteLineDocTask.SEP).TrimEnd(); + assertEquals(3, parts.Length); + // check that all thread names written are the same in the same line + String tname = parts[0].Substring(parts[0].IndexOf('_')); + ids.add(tname); + assertEquals(tname, parts[1].Substring(parts[1].IndexOf('_'))); + assertEquals(tname, parts[2].Substring(parts[2].IndexOf('_'))); + } + // only threads.length lines should exist + assertNull(br.ReadLine()); + assertEquals(threads.Length, ids.size()); + } + finally + { + br.Dispose(); + } + } + } +}