Lucene.Net.Benchmark: Added Sax and TagSoup to the Support folder.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/198e5868 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/198e5868 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/198e5868 Branch: refs/heads/master Commit: 198e586874c849e98df78477abf56d1902151222 Parents: 6cf6a6e Author: Shad Storhaug <s...@shadstorhaug.com> Authored: Fri Aug 4 00:21:07 2017 +0700 Committer: Shad Storhaug <s...@shadstorhaug.com> Committed: Fri Aug 4 19:33:28 2017 +0700 ---------------------------------------------------------------------- .../ByTask/Feeds/DemoHTMLParser.cs | 8 +- .../ByTask/Feeds/EnwikiContentSource.cs | 9 +- .../Lucene.Net.Benchmark.csproj | 60 + .../Lucene.Net.Benchmark.project.json | 4 +- .../Support/EnglishNumberFormatExtensions.cs | 48 +- .../Support/Sax/Attributes.cs | 219 ++ .../Support/Sax/ContentHandler.cs | 364 +++ .../Support/Sax/DTDHandler.cs | 100 + .../Support/Sax/EntityResolver.cs | 109 + .../Support/Sax/ErrorHandler.cs | 122 + .../Support/Sax/Ext/Attributes2.cs | 108 + .../Support/Sax/Ext/Attributes2Impl.cs | 277 ++ .../Support/Sax/Ext/DeclHandler.cs | 131 + .../Support/Sax/Ext/DefaultHandler2.cs | 112 + .../Support/Sax/Ext/EntityResolver2.cs | 178 ++ .../Support/Sax/Ext/LexicalHandler.cs | 180 ++ .../Support/Sax/Ext/Locator2.cs | 64 + .../Support/Sax/Ext/Locator2Impl.cs | 76 + .../Support/Sax/Helpers/AttributesImpl.cs | 615 ++++ .../Support/Sax/Helpers/DefaultHandler.cs | 389 +++ .../Support/Sax/Helpers/LocatorImpl.cs | 131 + .../Support/Sax/Helpers/NamespaceSupport.cs | 841 +++++ .../Support/Sax/Helpers/XMLFilterImpl.cs | 587 ++++ .../Support/Sax/InputSource.cs | 242 ++ src/Lucene.Net.Benchmark/Support/Sax/Locator.cs | 125 + .../Support/Sax/SAXException.cs | 165 + .../Support/Sax/SAXNotRecognizedException.cs | 66 + .../Support/Sax/SAXNotSupportedException.cs | 67 + .../Support/Sax/SAXParseException.cs | 269 ++ .../Support/Sax/XMLFilter.cs | 41 + .../Support/Sax/XMLReader.cs | 305 ++ .../Support/StringExtensions.cs | 14 + .../Support/TagSoup/AutoDetector.cs | 41 + .../Support/TagSoup/Element.cs | 215 ++ .../Support/TagSoup/ElementType.cs | 269 ++ .../Support/TagSoup/HTMLScanner.cs | 745 +++++ .../Support/TagSoup/HTMLSchema.Generated.cs | 2910 ++++++++++++++++++ .../Support/TagSoup/HTMLSchema.tt | 72 + .../Support/TagSoup/PYXScanner.cs | 138 + .../Support/TagSoup/PYXWriter.cs | 286 ++ .../Support/TagSoup/Parser.cs | 1484 +++++++++ .../Support/TagSoup/ScanHandler.cs | 105 + .../Support/TagSoup/Scanner.cs | 53 + .../Support/TagSoup/Schema.cs | 159 + .../Support/TagSoup/XMLReader.cs | 1567 ++++++++++ .../Support/TagSoup/definitions/html.stml | 249 ++ .../Support/TagSoup/definitions/html.tssl | 2762 +++++++++++++++++ .../Support/TagSoup/stml/stml.rnc | 49 + .../Support/TagSoup/stml/stml.xslt | 150 + .../Support/TagSoup/tssl/tssl-models.xslt | 47 + .../Support/TagSoup/tssl/tssl-validate.xslt | 40 + .../Support/TagSoup/tssl/tssl.rnc | 75 + .../Support/TagSoup/tssl/tssl.xslt | 220 ++ 53 files changed, 17627 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs index 0903754..2ee6184 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/DemoHTMLParser.cs @@ -1,8 +1,8 @@ // LUCENENET TODO: Use HTML Agility pack instead of SAX ? using Lucene.Net.Support; -using Sax.Net; -using Sax.Net.Helpers; +using Sax; +using Sax.Helpers; using System; using System.Collections.Generic; using System.IO; @@ -64,9 +64,9 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds public Parser(InputSource source) { - TagSoup.Net.Parser parser = new TagSoup.Net.Parser(); + TagSoup.Parser parser = new TagSoup.Parser(); - parser.SetFeature(TagSoup.Net.Parser.NAMESPACES_FEATURE, true); + parser.SetFeature(TagSoup.Parser.NAMESPACES_FEATURE, true); StringBuilder title = new StringBuilder(), body = new StringBuilder(); DefaultHandler handler = new DefaultHandlerAnonymousHelper(this, title, body); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs index 2cb24d0..6870b3b 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs @@ -3,8 +3,8 @@ using Lucene.Net.Benchmarks.ByTask.Utils; using Lucene.Net.Support.Threading; using Lucene.Net.Util; -using Sax.Net; -using Sax.Net.Helpers; +using Sax; +using Sax.Helpers; using System; using System.Collections.Generic; using System.Globalization; @@ -195,9 +195,10 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds try { - Sax.Net.IXmlReader reader = new TagSoup.Net.XmlReaderFactory().CreateXmlReader(); //XMLReaderFactory.createXMLReader(); + Sax.IXMLReader reader = new TagSoup.Parser(); //XMLReaderFactory.createXMLReader(); reader.ContentHandler = this; reader.ErrorHandler = this; + while (!stopped) { Stream localFileIS = outerInstance.@is; @@ -293,7 +294,6 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds } } } - } private static readonly IDictionary<string, int?> ELEMENTS = new Dictionary<string, int?>(); @@ -350,6 +350,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds parser.Stop(); if (@is != null) { + Thread.Sleep(1); // LUCENENET: Allow parser to stop before Dispose() is called @is.Dispose(); @is = null; } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj index f00cd18..2645226 100644 --- a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj +++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj @@ -160,6 +160,49 @@ <Compile Include="Quality\Utils\SimpleQQParser.cs" /> <Compile Include="Quality\Utils\SubmissionReport.cs" /> <Compile Include="Support\EnglishNumberFormatExtensions.cs" /> + <Compile Include="Support\Sax\Attributes.cs" /> + <Compile Include="Support\Sax\ContentHandler.cs" /> + <Compile Include="Support\Sax\DTDHandler.cs" /> + <Compile Include="Support\Sax\EntityResolver.cs" /> + <Compile Include="Support\Sax\ErrorHandler.cs" /> + <Compile Include="Support\Sax\Ext\Attributes2.cs" /> + <Compile Include="Support\Sax\Ext\Attributes2Impl.cs" /> + <Compile Include="Support\Sax\Ext\DeclHandler.cs" /> + <Compile Include="Support\Sax\Ext\DefaultHandler2.cs" /> + <Compile Include="Support\Sax\Ext\EntityResolver2.cs" /> + <Compile Include="Support\Sax\Ext\Locator2.cs" /> + <Compile Include="Support\Sax\Ext\LexicalHandler.cs" /> + <Compile Include="Support\Sax\Ext\Locator2Impl.cs" /> + <Compile Include="Support\Sax\Helpers\AttributesImpl.cs" /> + <Compile Include="Support\Sax\Helpers\DefaultHandler.cs" /> + <Compile Include="Support\Sax\Helpers\LocatorImpl.cs" /> + <Compile Include="Support\Sax\Helpers\NamespaceSupport.cs" /> + <Compile Include="Support\Sax\Helpers\XMLFilterImpl.cs" /> + <Compile Include="Support\Sax\InputSource.cs" /> + <Compile Include="Support\Sax\XMLReader.cs" /> + <Compile Include="Support\Sax\Locator.cs" /> + <Compile Include="Support\Sax\SAXException.cs" /> + <Compile Include="Support\Sax\SAXNotRecognizedException.cs" /> + <Compile Include="Support\Sax\SAXNotSupportedException.cs" /> + <Compile Include="Support\Sax\SAXParseException.cs" /> + <Compile Include="Support\Sax\XMLFilter.cs" /> + <Compile Include="Support\StringExtensions.cs" /> + <Compile Include="Support\TagSoup\AutoDetector.cs" /> + <Compile Include="Support\TagSoup\Element.cs" /> + <Compile Include="Support\TagSoup\ElementType.cs" /> + <Compile Include="Support\TagSoup\HTMLScanner.cs" /> + <Compile Include="Support\TagSoup\HTMLSchema.Generated.cs"> + <AutoGen>True</AutoGen> + <DesignTime>True</DesignTime> + <DependentUpon>HTMLSchema.tt</DependentUpon> + </Compile> + <Compile Include="Support\TagSoup\Parser.cs" /> + <Compile Include="Support\TagSoup\PYXScanner.cs" /> + <Compile Include="Support\TagSoup\PYXWriter.cs" /> + <Compile Include="Support\TagSoup\ScanHandler.cs" /> + <Compile Include="Support\TagSoup\Scanner.cs" /> + <Compile Include="Support\TagSoup\Schema.cs" /> + <Compile Include="Support\TagSoup\XMLReader.cs" /> <Compile Include="Utils\ExtractReuters.cs" /> <Compile Include="Utils\ExtractWikipedia.cs" /> <Compile Include="..\CommonAssemblyInfo.cs"> @@ -202,8 +245,25 @@ </ItemGroup> <ItemGroup> <None Include="Lucene.Net.Benchmark.project.json" /> + <None Include="Support\TagSoup\definitions\html.tssl" /> + <None Include="Support\TagSoup\stml\stml.rnc" /> + <None Include="Support\TagSoup\tssl\tssl.rnc" /> + </ItemGroup> + <ItemGroup> + <Content Include="Support\TagSoup\definitions\html.stml" /> + <Content Include="Support\TagSoup\HTMLSchema.tt"> + <Generator>TextTemplatingFileGenerator</Generator> + <LastGenOutput>HTMLSchema.Generated.cs</LastGenOutput> + </Content> + <Content Include="Support\TagSoup\stml\stml.xslt" /> + <Content Include="Support\TagSoup\tssl\tssl-models.xslt" /> + <Content Include="Support\TagSoup\tssl\tssl-validate.xslt" /> + <Content Include="Support\TagSoup\tssl\tssl.xslt" /> </ItemGroup> <ItemGroup /> + <ItemGroup> + <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" /> + </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json index 0a83392..f764f6a 100644 --- a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json +++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json @@ -4,10 +4,8 @@ }, "dependencies": { "icu.net": "54.1.1-alpha", - "Sax.Net": "2.0.2", "SharpZipLib": "0.86.0", - "Spatial4n.Core": "0.4.1-beta00003", - "TagSoup.Net": "1.2.1.1" + "Spatial4n.Core": "0.4.1-beta00003" }, "frameworks": { "net451": {} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs index 71362f0..1d99a8b 100644 --- a/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs +++ b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs @@ -10,12 +10,12 @@ namespace Lucene.Net.Support /// </summary> public static class EnglishNumberFormatExtensions { - private const long Quadrillion = Trillion * 1000; - private const long Trillion = Billion * 1000; - private const long Billion = Million * 1000; - private const long Million = Thousand * 1000; - private const long Thousand = Hundred * 10; - private const long Hundred = 100; + private const long QUADRILLION = TRILLION * 1000; + private const long TRILLION = BILLION * 1000; + private const long BILLION = MILLION * 1000; + private const long MILLION = THOUSAND * 1000; + private const long THOUSAND = HUNDRED * 10; + private const long HUNDRED = 100; /// <summary> /// Returns the spelled-out English words for the provided <paramref name="value"/>. @@ -44,60 +44,60 @@ namespace Lucene.Net.Support long unit = 0; - if (value >= Quadrillion) + if (value >= QUADRILLION) { - unit = (value / Quadrillion); - value -= unit * Quadrillion; + unit = (value / QUADRILLION); + value -= unit * QUADRILLION; ToWords(unit, builder); builder.Append(" quadrillion"); if (value > 0) builder.Append(" "); } - if (value >= Trillion) + if (value >= TRILLION) { - unit = (value / Trillion); - value -= unit * Trillion; + unit = (value / TRILLION); + value -= unit * TRILLION; ToWords(unit, builder); builder.Append(" trillion"); if (value > 0) builder.Append(" "); } - if (value >= Billion) + if (value >= BILLION) { - unit = (value / Billion); - value -= unit * Billion; + unit = (value / BILLION); + value -= unit * BILLION; ToWords(unit, builder); builder.Append(" billion"); if (value > 0) builder.Append(" "); } - if (value >= Million) + if (value >= MILLION) { - unit = (value / Million); - value -= unit * Million; + unit = (value / MILLION); + value -= unit * MILLION; ToWords(unit, builder); builder.Append(" million"); if (value > 0) builder.Append(" "); } - if (value >= Thousand) + if (value >= THOUSAND) { - unit = (value / Thousand); - value -= unit * Thousand; + unit = (value / THOUSAND); + value -= unit * THOUSAND; ToWords(unit, builder); builder.Append(" thousand"); if (value > 0) builder.Append(" "); } - if (value >= Hundred) + if (value >= HUNDRED) { - unit = (value / Hundred); - value -= unit * Hundred; + unit = (value / HUNDRED); + value -= unit * HUNDRED; ToWords(unit, builder); builder.Append(" hundred"); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/Attributes.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/Sax/Attributes.cs b/src/Lucene.Net.Benchmark/Support/Sax/Attributes.cs new file mode 100644 index 0000000..5a51ae3 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/Sax/Attributes.cs @@ -0,0 +1,219 @@ +// Attributes.java - attribute list with Namespace support +// http://www.saxproject.org +// Written by David Megginson +// NO WARRANTY! This class is in the public domain. +// $Id: Attributes.java,v 1.13 2004/03/18 12:28:05 dmegginson Exp $ + +namespace Sax +{ + /// <summary> + /// Interface for a list of XML attributes. + /// </summary> + /// <remarks> + /// <em>This module, both source code and documentation, is in the + /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em> + /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a> + /// for further information. + /// <para/> + /// This interface allows access to a list of attributes in + /// three different ways: + /// <list type="number"> + /// <item><description>by attribute index;</description></item> + /// <item><description>by Namespace-qualified name; or</description></item> + /// <item><description>by qualified (prefixed) name.</description></item> + /// </list> + /// <para/> + /// The list will not contain attributes that were declared + /// #IMPLIED but not specified in the start tag. It will also not + /// contain attributes used as Namespace declarations(xmlns*) unless + /// the <a href="http://xml.org/sax/features/namespace-prefixes">http://xml.org/sax/features/namespace-prefixes</a> + /// feature is set to <var>true</var> (it is <var>false</var> by + /// default). + /// Because SAX2 conforms to the original "Namespaces in XML" + /// recommendation, it normally does not + /// give namespace declaration attributes a namespace URI. + /// <para/> + /// Some SAX2 parsers may support using an optional feature flag + /// (<a href="http://xml.org/sax/features/xmlns-uris">http://xml.org/sax/features/xmlns-uris</a>) to request + /// that those attributes be given URIs, conforming to a later + /// backwards-incompatible revision of that recommendation. (The + /// attribute's "local name" will be the prefix, or "xmlns" when + /// defining a default element namespace.) For portability, handler + /// code should always resolve that conflict, rather than requiring + /// parsers that can change the setting of that feature flag. + /// If the namespace-prefixes feature (see above) is + /// <var>false</var>, access by qualified name may not be available; if + /// the<code>http://xml.org/sax/features/namespaces</code> feature is + /// <var>false</var>, access by Namespace-qualified names may not be + /// available. + /// <para/>This interface replaces the now-deprecated SAX1 { @link + /// org.xml.sax.AttributeList AttributeList } interface, which does not + /// contain Namespace support.In addition to Namespace support, it + /// adds the<var> getIndex</var> methods (below). + /// <para/>The order of attributes in the list is unspecified, and will + /// vary from implementation to implementation. + /// </remarks> + /// <since>SAX 2.0</since> + /// <author>David Megginson</author> + /// <version>2.0.1 (sax2r2)</version> + /// <seealso cref="Helpers.Attributes"/> + /// <seealso cref="Ext.IDeclHandler"/> + public interface IAttributes + { + //////////////////////////////////////////////////////////////////// + // Indexed access. + //////////////////////////////////////////////////////////////////// + + /// <summary> + /// Return the number of attributes in the list. + /// <para/> + /// Once you know the number of attributes, you can iterate + /// through the list. + /// </summary> + /// <returns>The number of attributes in the list.</returns> + /// <seealso cref="GetURI(int)"/> + /// <seealso cref="GetLocalName(int)"/> + /// <seealso cref="GetQName(int)"/> + /// <seealso cref="GetType(int)"/> + /// <seealso cref="GetValue(int)"/> + int Length { get; } + + /// <summary> + /// Look up an attribute's Namespace URI by index. + /// </summary> + /// <param name="index">The attribute index (zero-based).</param> + /// <returns>The Namespace URI, or the empty string if none + /// is available, or null if the index is out of + /// range.</returns> + /// <seealso cref="GetURI(int)"/> + string GetURI(int index); + + /// <summary> + /// Look up an attribute's local name by index. + /// </summary> + /// <param name="index">The attribute index (zero-based).</param> + /// <returns>The local name, or the empty string if Namespace + /// processing is not being performed, or null + /// if the index is out of range.</returns> + /// <seealso cref="Length"/> + string GetLocalName(int index); + + /// <summary> + /// Look up an attribute's XML qualified (prefixed) name by index. + /// </summary> + /// <param name="index">The attribute index (zero-based).</param> + /// <returns>The XML qualified name, or the empty string + /// if none is available, or null if the index + /// is out of range.</returns> + /// <seealso cref="Length"/> + string GetQName(int index); + + /// <summary> + /// Look up an attribute's type by index. + /// </summary> + /// <remarks> + /// The attribute type is one of the strings "CDATA", "ID", + /// "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", + /// or "NOTATION" (always in upper case). + /// <para/> + /// If the parser has not read a declaration for the attribute, + /// or if the parser does not report attribute types, then it must + /// return the value "CDATA" as stated in the XML 1.0 Recommendation + /// (clause 3.3.3, "Attribute-Value Normalization"). + /// <para/> + /// For an enumerated attribute that is not a notation, the + /// parser will report the type as "NMTOKEN". + /// </remarks> + /// <param name="index">The attribute index (zero-based).</param> + /// <returns>The attribute's type as a string, or null if the + /// index is out of range.</returns> + /// <seealso cref="Length"/> + string GetType(int index); + + /// <summary> + /// Look up an attribute's value by index. + /// </summary> + /// <remarks> + /// If the attribute value is a list of tokens (IDREFS, + /// ENTITIES, or NMTOKENS), the tokens will be concatenated + /// into a single string with each token separated by a + /// single space. + /// </remarks> + /// <param name="index">The attribute index (zero-based).</param> + /// <returns>The attribute's value as a string, or null if the + /// index is out of range.</returns> + /// <seealso cref="Length"/> + string GetValue(int index); + + //////////////////////////////////////////////////////////////////// + // Name-based query. + //////////////////////////////////////////////////////////////////// + + /// <summary> + /// Look up the index of an attribute by Namespace name. + /// </summary> + /// <param name="uri">The Namespace URI, or the empty string if + /// the name has no Namespace URI.</param> + /// <param name="localName">The attribute's local name.</param> + /// <returns>The index of the attribute, or -1 if it does not + /// appear in the list.</returns> + int GetIndex(string uri, string localName); + + /// <summary> + /// Look up the index of an attribute by XML qualified (prefixed) name. + /// </summary> + /// <param name="qName">The qualified (prefixed) name.</param> + /// <returns>The index of the attribute, or -1 if it does not + /// appear in the list.</returns> + int GetIndex(string qName); + + /// <summary> + /// Look up an attribute's type by Namespace name. + /// <para/> + /// See <see cref="GetType(int)"/> for a description + /// of the possible types. + /// </summary> + /// <param name="uri">The Namespace URI, or the empty String if the + /// name has no Namespace URI.</param> + /// <param name="localName">The local name of the attribute.</param> + /// <returns>The attribute type as a string, or null if the + /// attribute is not in the list or if Namespace + /// processing is not being performed.</returns> + string GetType(string uri, string localName); + + /// <summary> + /// Look up an attribute's type by XML qualified (prefixed) name. + /// <para/> + /// See <see cref="GetType(int)"/> for a description + /// of the possible types. + /// </summary> + /// <param name="qName">The XML qualified name.</param> + /// <returns>The attribute type as a string, or null if the + /// attribute is not in the list or if qualified names + /// are not available.</returns> + string GetType(string qName); + + /// <summary> + /// Look up an attribute's value by Namespace name. + /// <para/> + /// See <see cref="GetValue(int)"/> for a description + /// of the possible values. + /// </summary> + /// <param name="uri">The Namespace URI, or the empty String if the + /// name has no Namespace URI.</param> + /// <param name="localName">The local name of the attribute.</param> + /// <returns>The attribute value as a string, or null if the + /// attribute is not in the list.</returns> + string GetValue(string uri, string localName); + + /// <summary> + /// Look up an attribute's value by XML qualified (prefixed) name. + /// <para/> + /// See <see cref="GetValue(int)"/> for a description + /// of the possible values. + /// </summary> + /// <param name="qName"></param> + /// <returns></returns> + string GetValue(string qName); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/ContentHandler.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/Sax/ContentHandler.cs b/src/Lucene.Net.Benchmark/Support/Sax/ContentHandler.cs new file mode 100644 index 0000000..3f03aae --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/Sax/ContentHandler.cs @@ -0,0 +1,364 @@ +// ContentHandler.java - handle main document content. +// http://www.saxproject.org +// Written by David Megginson +// NO WARRANTY! This class is in the public domain. +// $Id: ContentHandler.java,v 1.13 2004/04/26 17:50:49 dmegginson Exp $ + +namespace Sax +{ + /// <summary> + /// Receive notification of the logical content of a document. + /// </summary> + /// <remarks> + /// <em>This module, both source code and documentation, is in the + /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em> + /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a> + /// for further information. + /// <para/> + /// This is the main interface that most SAX applications + /// implement: if the application needs to be informed of basic parsing + /// events, it implements this interface and registers an instance with + /// the SAX parser using the <see cref="IXMLReader.ContentHandler"/> + /// method.The parser uses the instance to report + /// basic document-related events like the start and end of elements + /// and character data. + /// <para/>The order of events in this interface is very important, and + /// mirrors the order of information in the document itself.For + /// example, all of an element's content (character data, processing + /// instructions, and/or subelements) will appear, in order, between + /// the startElement event and the corresponding endElement event.</p> + /// <p>This interface is similar to the now-deprecated SAX 1.0 + /// DocumentHandler interface, but it adds support for Namespaces + /// and for reporting skipped entities(in non-validating XML + /// processors). + /// </remarks> + /// <since>SAX 2.0</since> + /// <author>David Megginson</author> + /// <version>2.0.1+ (sax2r3pre1)</version> + /// <seealso cref="IXMLReader"/> + /// <seealso cref="IDTDHandler"/> + /// <seealso cref="IErrorHandler"/> + public interface IContentHandler + { + /// <summary> + /// Receive an object for locating the origin of SAX document events. + /// </summary> + /// <remarks> + /// SAX parsers are strongly encouraged (though not absolutely + /// required) to supply a locator: if it does so, it must supply + /// the locator to the application by invoking this method before + /// invoking any of the other methods in the ContentHandler + /// interface. + /// <para/> + /// The locator allows the application to determine the end + /// position of any document-related event, even if the parser is + /// not reporting an error. Typically, the application will + /// use this information for reporting its own errors (such as + /// character content that does not match an application's + /// business rules). The information returned by the locator + /// is probably not sufficient for use with a search engine. + /// <para/> + /// Note that the locator will return correct information only + /// during the invocation SAX event callbacks after + /// <see cref="StartDocument()"/> returns and before + /// <see cref="EndDocument()"/> is called. The + /// application should not attempt to use it at any other time. + /// </remarks> + /// <param name="locator">an object that can return the location of + /// any SAX document event</param> + /// <seealso cref="ILocator"/> + void SetDocumentLocator(ILocator locator); + + /// <summary> + /// Receive notification of the beginning of a document. + /// </summary> + /// <remarks> + /// The SAX parser will invoke this method only once, before any + /// other event callbacks (except for <see cref="SetDocumentLocator(ILocator)"/>. + /// </remarks> + /// <exception cref="SAXException">any SAX exception, possibly + /// wrapping another exception</exception> + /// <seealso cref="EndDocument()"/> + void StartDocument(); + + /// <summary> + /// Receive notification of the end of a document. + /// </summary> + /// <remarks> + /// <strong>There is an apparent contradiction between the + /// documentation for this method and the documentation for + /// <see cref="IErrorHandler.FatalError(SAXParseException)"/>. Until this ambiguity is + /// resolved in a future major release, clients should make no + /// assumptions about whether <see cref="EndDocument()"/> will or will not be + /// invoked when the parser has reported a <see cref="IErrorHandler.FatalError(SAXParseException)"/> or thrown + /// an exception.</strong> + /// <para/>The SAX parser will invoke this method only once, and it will + /// be the last method invoked during the parse.The parser shall + /// not invoke this method until it has either abandoned parsing + /// (because of an unrecoverable error) or reached the end of + /// input. + /// </remarks> + /// <exception cref="SAXException">any SAX exception, possibly + /// wrapping another exception</exception> + /// <seealso cref="StartDocument()"/> + void EndDocument(); + + /// <summary> + /// Begin the scope of a prefix-URI Namespace mapping. + /// </summary> + /// <remarks> + /// The information from this event is not necessary for + /// normal Namespace processing: the SAX XML reader will + /// automatically replace prefixes for element and attribute + /// names when the<code>http://xml.org/sax/features/namespaces</code> + /// feature is <var>true</var> (the default). + /// <para/> + /// There are cases, however, when applications need to + /// use prefixes in character data or in attribute values, + /// where they cannot safely be expanded automatically; the + /// start/endPrefixMapping event supplies the information + /// to the application to expand prefixes in those contexts + /// itself, if necessary. + /// <para/>Note that start/endPrefixMapping events are not + /// guaranteed to be properly nested relative to each other: + /// all startPrefixMapping events will occur immediately before the + /// corresponding <see cref="StartElement(string, string, string, IAttributes)"/> event, + /// and all <see cref="EndPrefixMapping(string)"/> + /// events will occur immediately after the corresponding + /// <see cref="EndElement(string, string, string)"/> event, + /// but their order is not otherwise + /// guaranteed. + /// <para/>There should never be start/endPrefixMapping events for the + /// "xml" prefix, since it is predeclared and immutable. + /// </remarks> + /// <param name="prefix">the Namespace prefix being declared. + /// An empty string is used for the default element namespace, + /// which has no prefix.</param> + /// <param name="uri">the Namespace URI the prefix is mapped to</param> + /// <exception cref="SAXException">the client may throw + /// an exception during processing</exception> + /// <seealso cref="EndPrefixMapping(string)"/> + /// <seealso cref="StartElement(string, string, string, IAttributes)"/> + void StartPrefixMapping(string prefix, string uri); + + /// <summary> + /// End the scope of a prefix-URI mapping. + /// </summary> + /// <remarks> + /// See <see cref="StartPrefixMapping(string, string)"/> for + /// details. These events will always occur immediately after the + /// corresponding <see cref="EndElement(string, string, string)"/> event, but the order of + /// <see cref="EndPrefixMapping(string)"/> events is not otherwise + /// guaranteed. + /// </remarks> + /// <param name="prefix">the prefix that was being mapped. + /// This is the empty string when a default mapping scope ends.</param> + /// <exception cref="SAXException">the client may throw + /// an exception during processing</exception> + /// <seealso cref="EndPrefixMapping(string)"/> + /// <seealso cref="EndElement(string, string, string)"/> + void EndPrefixMapping(string prefix); + + /// <summary> + /// Receive notification of the beginning of an element. + /// <para/> + /// The Parser will invoke this method at the beginning of every + /// element in the XML document; there will be a corresponding + /// <see cref="EndElement(string, string, string)"/> event for every <see cref="StartElement(string, string, string, IAttributes)"/> event + /// (even when the element is empty). All of the element's content will be + /// reported, in order, before the corresponding <see cref="EndElement(string, string, string)"/> + /// event. + /// <para/> + /// This event allows up to three name components for each element: + /// <list type="number"> + /// <item><description>the Namespace URI;</description></item> + /// <item><description>the local name; and</description></item> + /// <item><description>the qualified (prefixed) name.</description></item> + /// </list> + /// <para/> + /// Any or all of these may be provided, depending on the + /// values of the<var> http://xml.org/sax/features/namespaces</var> + /// and the<var>http://xml.org/sax/features/namespace-prefixes</var> + /// properties: + /// <list type="bullet"> + /// <item><description>the Namespace URI and local name are required when + /// the namespaces property is <var>true</var> (the default), and are + /// optional when the namespaces property is <var>false</var> (if one is + /// specified, both must be);</description></item> + /// <item><description>the qualified name is required when the namespace-prefixes property + /// is <var>true</var>, and is optional when the namespace-prefixes property + /// is <var>false</var> (the default).</description></item> + /// </list> + /// <para/>Note that the attribute list provided will contain only + /// attributes with explicit values (specified or defaulted): + /// #IMPLIED attributes will be omitted. The attribute list + /// will contain attributes used for Namespace declarations + /// (xmlns/// attributes) only if the + /// <a href="http://xml.org/sax/features/namespace-prefixes">http://xml.org/sax/features/namespace-prefixes</a> + /// property is true (it is false by default, and support for a + /// true value is optional). + /// <para/>Like <see cref="Characters(char[], int, int)"/>, attribute values may have + /// characters that need more than one <c>char</c> value. + /// </summary> + /// <param name="uri">uri the Namespace URI, or the empty string if the + /// element has no Namespace URI or if Namespace + /// processing is not being performed</param> + /// <param name="localName">the local name (without prefix), or the + /// empty string if Namespace processing is not being + /// performed</param> + /// <param name="qName">the qualified name (with prefix), or the + /// empty string if qualified names are not available</param> + /// <param name="atts">the attributes attached to the element. If + /// there are no attributes, it shall be an empty + /// <see cref="IAttributes"/> object. The value of this object after + /// <see cref="StartElement(string, string, string, IAttributes)"/> returns is undefined</param> + /// <exception cref="SAXException">any SAX exception, possibly + /// wrapping another exception</exception> + /// <seealso cref="EndElement(string, string, string)"/> + /// <seealso cref="IAttributes"/> + /// <seealso cref="Helpers.Attributes"/> + void StartElement(string uri, string localName, string qName, IAttributes atts); + + /// <summary> + /// Receive notification of the end of an element. + /// </summary> + /// <remarks> + /// The SAX parser will invoke this method at the end of every + /// element in the XML document; there will be a corresponding + /// <see cref="StartElement(string, string, string, IAttributes)"/> event for every endElement + /// event (even when the element is empty). + /// <para/> + /// For information on the names, see <see cref="StartElement(string, string, string, IAttributes)"/>. + /// </remarks> + /// <param name="uri">the Namespace URI, or the empty string if the + /// element has no Namespace URI or if Namespace + /// processing is not being performed</param> + /// <param name="localName">the local name (without prefix), or the + /// empty string if Namespace processing is not being + /// performed</param> + /// <param name="qName">the qualified XML name (with prefix), or the + /// empty string if qualified names are not available</param> + /// <exception cref="SAXException">any SAX exception, possibly + /// wrapping another exception</exception> + void EndElement(string uri, string localName, string qName); + + /// <summary> + /// Receive notification of character data. + /// </summary> + /// <remarks> + /// The Parser will call this method to report each chunk of + /// character data. SAX parsers may return all contiguous character + /// data in a single chunk, or they may split it into several + /// chunks; however, all of the characters in any single event + /// must come from the same external entity so that the Locator + /// provides useful information. + /// <para/> + /// The application must not attempt to read from the array + /// outside of the specified range. + /// <para/> + /// Individual characters may consist of more than one Java + /// <c>char</c> value.There are two important cases where this + /// happens, because characters can't be represented in just sixteen bits. + /// In one case, characters are represented in a <em>Surrogate Pair</em>, + /// using two special Unicode values. Such characters are in the so-called + /// "Astral Planes", with a code point above U+FFFF.A second case involves + /// composite characters, such as a base character combining with one or + /// more accent characters. + /// <para/> Your code should not assume that algorithms using + /// <c>char</c>-at-a-time idioms will be working in character + /// units; in some cases they will split characters. This is relevant + /// wherever XML permits arbitrary characters, such as attribute values, + /// processing instruction data, and comments as well as in data reported + /// from this method. It's also generally relevant whenever Java code + /// manipulates internationalized text; the issue isn't unique to XML. + /// <para/>Note that some parsers will report whitespace in element + /// content using the <see cref="IgnorableWhitespace(char[], int, int)"/> + /// method rather than this one (validating parsers <em>must</em> + /// do so). + /// </remarks> + /// <param name="ch">the characters from the XML document</param> + /// <param name="start">the start position in the array</param> + /// <param name="length">the number of characters to read from the array</param> + /// <exception cref="SAXException">any SAX exception, possibly + /// wrapping another exception</exception> + /// <seealso cref="IgnorableWhitespace(char[], int, int)"/> + /// <seealso cref="ILocator"/> + void Characters(char[] ch, int start, int length); + + /// <summary> + /// Receive notification of ignorable whitespace in element content. + /// </summary> + /// <remarks> + /// Validating Parsers must use this method to report each chunk + /// of whitespace in element content (see the W3C XML 1.0 + /// recommendation, section 2.10): non-validating parsers may also + /// use this method if they are capable of parsing and using + /// content models. + /// <para/> + /// SAX parsers may return all contiguous whitespace in a single + /// chunk, or they may split it into several chunks; however, all of + /// the characters in any single event must come from the same + /// external entity, so that the Locator provides useful + /// information. + /// <para/> + /// The application must not attempt to read from the array + /// outside of the specified range. + /// </remarks> + /// <param name="ch">the characters from the XML document</param> + /// <param name="start">the start position in the array</param> + /// <param name="length">the number of characters to read from the array</param> + /// <exception cref="SAXException">any SAX exception, possibly + /// wrapping another exception</exception> + /// <seealso cref="Characters(char[], int, int)"/> + void IgnorableWhitespace(char[] ch, int start, int length); + + /// <summary> + /// Receive notification of a processing instruction. + /// </summary> + /// <remarks> + /// The Parser will invoke this method once for each processing + /// instruction found: note that processing instructions may occur + /// before or after the main document element. + /// <para/> + /// A SAX parser must never report an XML declaration(XML 1.0, + /// section 2.8) or a text declaration(XML 1.0, section 4.3.1) + /// using this method. + /// <para/> + /// Like <see cref="Characters(char[], int, int)"/>, processing instruction + /// data may have characters that need more than one <c>char</c> + /// value. + /// </remarks> + /// <param name="target">the processing instruction target</param> + /// <param name="data">the processing instruction data, or null if + /// none was supplied. The data does not include any + /// whitespace separating it from the target</param> + /// <exception cref="SAXException">any SAX exception, possibly + /// wrapping another exception</exception> + void ProcessingInstruction(string target, string data); + + /// <summary> + /// Receive notification of a skipped entity. + /// This is not called for entity references within markup constructs + /// such as element start tags or markup declarations. (The XML + /// recommendation requires reporting skipped external entities. + /// SAX also reports internal entity expansion/non-expansion, except + /// within markup constructs.) + /// <para/> + /// The Parser will invoke this method each time the entity is + /// skipped.Non-validating processors may skip entities if they + /// have not seen the declarations(because, for example, the + /// entity was declared in an external DTD subset). All processors + /// may skip external entities, depending on the values of the + /// <a href="http://xml.org/sax/features/external-general-entities">http://xml.org/sax/features/external-general-entities</a> + /// and the <a href="http://xml.org/sax/features/external-general-entities">http://xml.org/sax/features/external-general-entities</a> + /// properties. + /// </summary> + /// <param name="name">the name of the skipped entity. If it is a + /// parameter entity, the name will begin with '%', and if + /// it is the external DTD subset, it will be the string + /// "[dtd]"</param> + /// <seealso cref="SAXException">any SAX exception, possibly + /// wrapping another exception</seealso> + void SkippedEntity(string name); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/DTDHandler.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/Sax/DTDHandler.cs b/src/Lucene.Net.Benchmark/Support/Sax/DTDHandler.cs new file mode 100644 index 0000000..b96777b --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/Sax/DTDHandler.cs @@ -0,0 +1,100 @@ +// SAX DTD handler. +// http://www.saxproject.org +// No warranty; no copyright -- use this as you will. +// $Id: DTDHandler.java,v 1.8 2002/01/30 21:13:43 dbrownell Exp $ + + +namespace Sax +{ + /// <summary> + /// Receive notification of basic DTD-related events. + /// </summary> + /// <remarks> + /// <em>This module, both source code and documentation, is in the + /// Public Domain, and comes with <strong>NO WARRANTY</strong>.</em> + /// See <a href='http://www.saxproject.org'>http://www.saxproject.org</a> + /// for further information. + /// <para/> + /// If a SAX application needs information about notations and + /// unparsed entities, then the application implements this + /// interface and registers an instance with the SAX parser using + /// the parser's setDTDHandler method. The parser uses the + /// instance to report notation and unparsed entity declarations to + /// the application. + /// <para/> + /// Note that this interface includes only those DTD events that + /// the XML recommendation<em>requires</em> processors to report: + /// notation and unparsed entity declarations. + /// <para/> + /// The SAX parser may report these events in any order, regardless + /// of the order in which the notations and unparsed entities were + /// declared; however, all DTD events must be reported after the + /// document handler's startDocument event, and before the first + /// startElement event. + /// (If the <see cref="Ext.ILexicalHandler"/> is + /// used, these events must also be reported before the endDTD event.) + /// <para/> + /// It is up to the application to store the information for + /// future use(perhaps in a hash table or object tree). + /// If the application encounters attributes of type "NOTATION", + /// "ENTITY", or "ENTITIES", it can use the information that it + /// obtained through this interface to find the entity and/or + /// notation corresponding with the attribute value. + /// </remarks> + /// <seealso cref="IXMLReader.SetDTDHandler"/> + public interface IDTDHandler + { + /// <summary> + /// Receive notification of a notation declaration event. + /// </summary> + /// <remarks> + /// It is up to the application to record the notation for later + /// reference, if necessary; + /// notations may appear as attribute values and in unparsed entity + /// declarations, and are sometime used with processing instruction + /// target names. + /// <para/> + /// At least one of publicId and systemId must be non-null. + /// If a system identifier is present, and it is a URL, the SAX + /// parser must resolve it fully before passing it to the + /// application through this event. + /// <para/> + /// There is no guarantee that the notation declaration will be + /// reported before any unparsed entities that use it. + /// </remarks> + /// <param name="name">The notation name.</param> + /// <param name="publicId">The notation's public identifier, or <c>null</c> if none was given.</param> + /// <param name="systemId">The notation's system identifier, or <c>null</c> if none was given.</param> + /// <exception cref="SAXException">Any SAX exception, possibly wrapping another exception.</exception> + /// <seealso cref="UnparsedEntityDecl(string, string, string, string)"/> + /// <seealso cref="IAttributes"/> + void NotationDecl(string name, + string publicId, + string systemId); + + /// <summary> + /// Receive notification of an unparsed entity declaration event. + /// </summary> + /// <remarks> + /// Note that the notation name corresponds to a notation + /// reported by the <see cref="NotationDecl(string, string, string)"/> event. + /// It is up to the application to record the entity for later + /// reference, if necessary; + /// unparsed entities may appear as attribute values. + /// <para/> + /// If the system identifier is a URL, the parser must resolve it + /// fully before passing it to the application. + /// </remarks> + /// <exception cref="SAXException">Any SAX exception, possibly wrapping another exception.</exception> + /// <param name="name">The unparsed entity's name.</param> + /// <param name="publicId">The entity's public identifier, or null if none was given.</param> + /// <param name="systemId">The entity's system identifier.</param> + /// <param name="notationName">The name of the associated notation.</param> + /// <seealso cref="NotationDecl(string, string, string)"/> + /// <seealso cref="IAttributes"/> + void UnparsedEntityDecl(string name, + string publicId, + string systemId, + string notationName); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/EntityResolver.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/Sax/EntityResolver.cs b/src/Lucene.Net.Benchmark/Support/Sax/EntityResolver.cs new file mode 100644 index 0000000..a66bedd --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/Sax/EntityResolver.cs @@ -0,0 +1,109 @@ +// SAX entity resolver. +// http://www.saxproject.org +// No warranty; no copyright -- use this as you will. +// $Id: EntityResolver.java,v 1.10 2002/01/30 21:13:44 dbrownell Exp $ + +namespace Sax +{ + /// <summary> + /// Basic interface for resolving entities. + /// </summary> + /// <remarks> + /// <em>This module, both source code and documentation, is in the + /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em> + /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a> + /// for further information. + /// <para/> + /// If a SAX application needs to implement customized handling + /// for external entities, it must implement this interface and + /// register an instance with the SAX driver using the + /// <see cref="IXMLReader.EntityResolver"/> + /// property. + /// <para/> + /// The XML reader will then allow the application to intercept any + /// external entities(including the external DTD subset and external + /// parameter entities, if any) before including them. + /// <para/> + /// Many SAX applications will not need to implement this interface, + /// but it will be especially useful for applications that build + /// XML documents from databases or other specialised input sources, + /// or for applications that use URI types other than URLs. + /// <para/> + /// The following resolver would provide the application + /// with a special character stream for the entity with the system + /// identifier "http://www.myhost.com/today": + /// + /// <code> + /// public class MyResolver : IEntityResolver + /// { + /// public InputSource ResolveEntity (string publicId, string systemId) + /// { + /// if (systemId.Equals("http://www.myhost.com/today", StringComparison.Ordinal)) + /// { + /// // return a special input source + /// MyReader reader = new MyReader(); + /// return new InputSource(reader); + /// } + /// else + /// { + /// // use the default behaviour + /// return null; + /// } + /// } + /// } + /// </code> + /// <para/> + /// The application can also use this interface to redirect system + /// identifiers to local URIs or to look up replacements in a catalog + /// (possibly by using the public identifier). + /// </remarks> + /// <author>David Megginson</author> + /// <version>2.0.1 (sax2r2)</version> + /// <since>SAX 1.0</since> + /// <seealso cref="IXMLReader.EntityResolver"/> + /// <seealso cref="InputSource"/> + public interface IEntityResolver + { + /// <summary> + /// Allow the application to resolve external entities. + /// </summary> + /// <remarks> + /// The parser will call this method before opening any external + /// entity except the top-level document entity.Such entities include + /// the external DTD subset and external parameter entities referenced + /// within the DTD(in either case, only if the parser reads external + /// parameter entities), and external general entities referenced + /// within the document element(if the parser reads external general + /// entities). The application may request that the parser locate + /// the entity itself, that it use an alternative URI, or that it + /// use data provided by the application(as a character or byte + /// input stream). + /// <para/> + /// Application writers can use this method to redirect external + /// system identifiers to secure and/or local URIs, to look up + /// public identifiers in a catalogue, or to read an entity from a + /// database or other input source(including, for example, a dialog + /// box). Neither XML nor SAX specifies a preferred policy for using + /// public or system IDs to resolve resources.However, SAX specifies + /// how to interpret any InputSource returned by this method, and that + /// if none is returned, then the system ID will be dereferenced as + /// a URL. + /// <para/> + /// If the system identifier is a URL, the SAX parser must + /// resolve it fully before reporting it to the application. + /// </remarks> + /// <param name="publicId">The public identifier of the external entity + /// being referenced, or null if none was supplied.</param> + /// <param name="systemId">The system identifier of the external entity + /// being referenced.</param> + /// <returns>An InputSource object describing the new input source, + /// or null to request that the parser open a regular + /// URI connection to the system identifier.</returns> + /// <exception cref="SAXException">Any SAX exception, possibly wrapping another exception.</exception> + /// <exception cref="IOException">A .NET-specific IO exception, possibly the result of creating + /// a new <see cref="InputStream"/> or <see cref="System.IO.TextReader"/> for the <see cref="InputSource"/>.</exception> + /// <seealso cref="InputSource"/> + InputSource ResolveEntity(string publicId, + string systemId); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/ErrorHandler.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/Sax/ErrorHandler.cs b/src/Lucene.Net.Benchmark/Support/Sax/ErrorHandler.cs new file mode 100644 index 0000000..785e697 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/Sax/ErrorHandler.cs @@ -0,0 +1,122 @@ +// SAX error handler. +// http://www.saxproject.org +// No warranty; no copyright -- use this as you will. +// $Id: ErrorHandler.java,v 1.10 2004/03/08 13:01:00 dmegginson Exp $ + +namespace Sax +{ + /// <summary> + /// Basic interface for SAX error handlers. + /// </summary> + /// <remarks> + /// <em>This module, both source code and documentation, is in the + /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em> + /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a> + /// for further information. + /// <para/> + /// If a SAX application needs to implement customized error + /// handling, it must implement this interface and then register an + /// instance with the XML reader using the + /// <see cref="IXMLReader.ErrorHandler"/> + /// property. The parser will then report all errors and warnings + /// through this interface. + /// <para/> + /// <strong>WARNING:</strong> If an application does <em>not</em> + /// register an ErrorHandler, XML parsing errors will go unreported, + /// except that<em> SAXParseException</em>s will be thrown for fatal errors. + /// In order to detect validity errors, an ErrorHandler that does something + /// with <see cref="Error(SAXParseException)"/> calls must be registered. + /// <para/> + /// For XML processing errors, a SAX driver must use this interface + /// in preference to throwing an exception: it is up to the application + /// to decide whether to throw an exception for different types of + /// errors and warnings.Note, however, that there is no requirement that + /// the parser continue to report additional errors after a call to + /// <see cref="FatalError(SAXParseException)"/>. In other words, a SAX driver class + /// may throw an exception after reporting any fatalError. + /// Also parsers may throw appropriate exceptions for non - XML errors. + /// For example, <see cref="IXMLReader.Parse()"/> would throw + /// an <see cref="System.IO.IOException"/> for errors accessing entities or the document. + /// </remarks> + /// <since>SAX 1.0</since> + /// <author>David Megginson</author> + /// <version>2.0.1+ (sax2r3pre1)</version> + /// <seealso cref="IXMLReader.ErrorHandler"/> + /// <seealso cref="SAXParseException"/> + public interface IErrorHandler + { + /// <summary> + /// Receive notification of a warning. + /// </summary> + /// <remarks> + /// SAX parsers will use this method to report conditions that + /// are not errors or fatal errors as defined by the XML + /// recommendation.The default behaviour is to take no + /// action. + /// <para/> + /// The SAX parser must continue to provide normal parsing events + /// after invoking this method: it should still be possible for the + /// application to process the document through to the end. + /// <para/> + /// Filters may use this method to report other, non-XML warnings + /// as well. + /// </remarks> + /// <param name="exception">The warning information encapsulated in a SAX parse exception.</param> + /// <exception cref="SAXException">Any SAX exception, possibly wrapping another exception.</exception> + /// <seealso cref="SAXParseException"/> + void Warning(SAXParseException exception); + + /// <summary> + /// Receive notification of a recoverable error. + /// </summary> + /// <remarks> + /// This corresponds to the definition of "error" in section 1.2 + /// of the W3C XML 1.0 Recommendation.For example, a validating + /// parser would use this callback to report the violation of a + /// validity constraint.The default behaviour is to take no + /// action. + /// <para/> + /// The SAX parser must continue to provide normal parsing + /// events after invoking this method: it should still be possible + /// for the application to process the document through to the end. + /// If the application cannot do so, then the parser should report + /// a fatal error even if the XML recommendation does not require + /// it to do so. + /// <para/> + /// Filters may use this method to report other, non-XML errors + /// as well. + /// </remarks> + /// <param name="exception">The error information encapsulated in a SAX parse exception.</param> + /// <exception cref="SAXException">Any SAX exception, possibly wrapping another exception.</exception> + /// <seealso cref="SAXParseException"/> + void Error(SAXParseException exception); + + /// <summary> + /// Receive notification of a non-recoverable error. + /// </summary> + /// <remarks> + /// <strong>There is an apparent contradiction between the + /// documentation for this method and the documentation for + /// <see cref="IContentHandler.EndDocument()"/>. Until this ambiguity + /// is resolved in a future major release, clients should make no + /// assumptions about whether EndDocument() will or will not be + /// invoked when the parser has reported a FatalError() or thrown + /// an exception.</strong> + /// <para/> + /// This corresponds to the definition of "fatal error" in + /// section 1.2 of the W3C XML 1.0 Recommendation.For example, a + /// parser would use this callback to report the violation of a + /// well-formedness constraint. + /// <para/> + /// The application must assume that the document is unusable + /// after the parser has invoked this method, and should continue + /// (if at all) only for the sake of collecting additional error + /// messages: in fact, SAX parsers are free to stop reporting any + /// other events once this method has been invoked. + /// </remarks> + /// <param name="exception">The error information encapsulated in a SAX parse exception.</param> + /// <exception cref="SAXException">Any SAX exception, possibly wrapping another exception.</exception> + /// <seealso cref="SAXParseException"/> + void FatalError(SAXParseException exception); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2.cs b/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2.cs new file mode 100644 index 0000000..87248dd --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2.cs @@ -0,0 +1,108 @@ +// Attributes2.java - extended Attributes +// http://www.saxproject.org +// Public Domain: no warranty. +// $Id: Attributes2.java,v 1.6 2004/03/08 13:01:00 dmegginson Exp $ + +namespace Sax.Ext +{ + /// <summary> + /// SAX2 extension to augment the per-attribute information + /// provided though <see cref="IAttributes"/>. + /// If an implementation supports this extension, the attributes + /// provided in <see cref="IContentHandler"/> + /// will implement this interface, + /// and the<em> http://xml.org/sax/features/use-attributes2</em> + /// feature flag will have the value<em>true</em>. + /// <para/> + /// <em>This module, both source code and documentation, is in the + /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em> + /// <para/> + /// XMLReader implementations are not required to support this + /// information, and it is not part of core-only SAX2 distributions. + /// <para/> + /// Note that if an attribute was defaulted (<em>!IsSpecified()</em>) + /// it will of necessity also have been declared(<em>IsDeclared()</em>) + /// in the DTD. + /// Similarly if an attribute's type is anything except CDATA, then it + /// must have been declared. + /// </summary> + /// <since>SAX 2.0 (extensions 1.1 alpha)</since> + /// <author>David Brownell</author> + /// <version>TBS</version> + public interface IAttributes2 : IAttributes + { + /// <summary> + /// Returns false unless the attribute was declared in the DTD. + /// This helps distinguish two kinds of attributes that SAX reports + /// as CDATA: ones that were declared (and hence are usually valid), + /// and those that were not (and which are never valid). + /// </summary> + /// <param name="index">The attribute index (zero-based).</param> + /// <returns>true if the attribute was declared in the DTD, false otherwise.</returns> + /// <exception cref="IndexOutOfRangeException">When the supplied index does not identify an attribute.</exception> + bool IsDeclared(int index); + + /// <summary> + /// Returns false unless the attribute was declared in the DTD. + /// This helps distinguish two kinds of attributes that SAX reports + /// as CDATA: ones that were declared (and hence are usually valid), + /// and those that were not (and which are never valid). + /// </summary> + /// <param name="qName">The XML qualified (prefixed) name.</param> + /// <returns>true if the attribute was declared in the DTD, false otherwise.</returns> + /// <exception cref="ArgumentException">When the supplied name does not identify an attribute.</exception> + bool IsDeclared(string qName); + + /// <summary> + /// Returns false unless the attribute was declared in the DTD. + /// This helps distinguish two kinds of attributes that SAX reports + /// as CDATA: ones that were declared (and hence are usually valid), + /// and those that were not (and which are never valid). + /// </summary> + /// <remarks> + /// Remember that since DTDs do not "understand" namespaces, the + /// namespace URI associated with an attribute may not have come from + /// the DTD.The declaration will have applied to the attribute's + /// <em>qName</em> + /// </remarks> + /// <param name="uri">The Namespace URI, or the empty string if the name has no Namespace URI.</param> + /// <param name="localName">The attribute's local name.</param> + /// <returns>true if the attribute was declared in the DTD, false otherwise.</returns> + /// <exception cref="ArgumentException">When the supplied names do not identify an attribute.</exception> + bool IsDeclared(string uri, string localName); + + /// <summary> + /// Returns true unless the attribute value was provided + /// by DTD defaulting. + /// </summary> + /// <param name="index">The attribute index (zero-based).</param> + /// <returns>true if the value was found in the XML text, false if the value was provided by DTD defaulting.</returns> + /// <exception cref="IndexOutOfRangeException">When the supplied index does not identify an attribute.</exception> + bool IsSpecified(int index); + + /// <summary> + /// Returns true unless the attribute value was provided + /// by DTD defaulting. + /// </summary> + /// <remarks> + /// Remember that since DTDs do not "understand" namespaces, the + /// namespace URI associated with an attribute may not have come from + /// the DTD.The declaration will have applied to the attribute's + /// <em>qName</em>. + /// </remarks> + /// <param name="uri">The Namespace URI, or the empty string if the name has no Namespace URI.</param> + /// <param name="localName">The attribute's local name.</param> + /// <returns>true if the value was found in the XML text, false if the value was provided by DTD defaulting.</returns> + /// <exception cref="ArgumentException">When the supplied names do not identify an attribute.</exception> + bool IsSpecified(string uri, string localName); + + /// <summary> + /// Returns true unless the attribute value was provided + /// by DTD defaulting. + /// </summary> + /// <param name="qName">The XML qualified (prefixed) name.</param> + /// <returns>true if the value was found in the XML text, false if the value was provided by DTD defaulting.</returns> + /// <exception cref="ArgumentException">When the supplied name does not identify an attribute.</exception> + bool IsSpecified(string qName); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2Impl.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2Impl.cs b/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2Impl.cs new file mode 100644 index 0000000..b6c30ed --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/Sax/Ext/Attributes2Impl.cs @@ -0,0 +1,277 @@ +// Attributes2Impl.java - extended AttributesImpl +// http://www.saxproject.org +// Public Domain: no warranty. +// $Id: Attributes2Impl.java,v 1.5 2004/03/08 13:01:01 dmegginson Exp $ + +using Sax.Helpers; +using System; + +namespace Sax.Ext +{ + /// <summary> + /// SAX2 extension helper for additional Attributes information, + /// implementing the <see cref="Attributes2"/> interface. + /// </summary> + /// <remarks> + /// <em>This module, both source code and documentation, is in the + /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em> + /// <para/> + /// This is not part of core-only SAX2 distributions. + /// <para/> + /// The <em>specified</em> flag for each attribute will always + /// be true, unless it has been set to false in the copy constructor + /// or using <see cref="SetSpecified(int, bool)"/>. + /// Similarly, the <em>declared</em> flag for each attribute will + /// always be false, except for defaulted attributes (<em>specified</em> + /// is false), non-CDATA attributes, or when it is set to true using + /// <see cref="SetDeclared(int, bool)"/>. + /// If you change an attribute's type by hand, you may need to modify + /// its <em>declared</em> flag to match. + /// </remarks> + /// <since>SAX 2.0 (extensions 1.1 alpha)</since> + /// <author>David Brownell</author> + /// <version>TBS</version> + public class Attributes2 : Attributes, IAttributes2 + { + private bool[] declared; + private bool[] specified; + + + /// <summary> + /// Construct a new, empty <see cref="Attributes2"/> object. + /// </summary> + public Attributes2() { } + + + /// <summary> + /// Copy an existing Attributes or Attributes2 object. + /// If the object implements Attributes2, values of the + /// <em>specified</em> and <em>declared</em> flags for each + /// attribute are copied. + /// Otherwise the flag values are defaulted to assume no DTD was used, + /// unless there is evidence to the contrary (such as attributes with + /// type other than CDATA, which must have been <em>declared</em>). + /// <p>This constructor is especially useful inside a + /// <see cref="IContentHandler.StartElement(string, string, string, IAttributes)"/> event.</p> + /// </summary> + /// <param name="atts">The existing <see cref="IAttributes"/> object.</param> + public Attributes2(IAttributes atts) + : base(atts) + { + } + + + //////////////////////////////////////////////////////////////////// + // Implementation of Attributes2 + //////////////////////////////////////////////////////////////////// + + + /// <summary> + /// Returns the current value of the attribute's "declared" flag. + /// </summary> + // javadoc mostly from interface + public bool IsDeclared(int index) + { + if (index < 0 || index >= Length) + throw new IndexOutOfRangeException( + "No attribute at index: " + index); + return declared[index]; + } + + + /// <summary> + /// Returns the current value of the attribute's "declared" flag. + /// </summary> + // javadoc mostly from interface + public bool IsDeclared(string uri, string localName) + { + int index = GetIndex(uri, localName); + + if (index < 0) + throw new ArgumentException( + "No such attribute: local=" + localName + + ", namespace=" + uri); + return declared[index]; + } + + /// <summary> + /// Returns the current value of the attribute's "declared" flag. + /// </summary> + // javadoc mostly from interface + public bool IsDeclared(string qName) + { + int index = GetIndex(qName); + + if (index < 0) + throw new ArgumentException( + "No such attribute: " + qName); + return declared[index]; + } + + /// <summary> + /// Returns the current value of an attribute's "specified" flag. + /// </summary> + /// <param name="qName">The attribute index (zero-based).</param> + /// <returns>current flag value</returns> + /// <exception cref="IndexOutOfRangeException">When the supplied index does not identify an attribute.</exception> + public bool IsSpecified(int index) + { + if (index < 0 || index >= Length) + throw new IndexOutOfRangeException( + "No attribute at index: " + index); + return specified[index]; + } + + /// <summary> + /// Returns the current value of an attribute's "specified" flag. + /// </summary> + /// <param name="uri">The Namespace URI, or the empty string if the name has no Namespace URI.</param> + /// <param name="localName">The attribute's local name.</param> + /// <returns>current flag value</returns> + /// <exception cref="ArgumentException">When the supplied names do not identify an attribute.</exception> + public bool IsSpecified(string uri, string localName) + { + int index = GetIndex(uri, localName); + + if (index < 0) + throw new ArgumentException( + "No such attribute: local=" + localName + + ", namespace=" + uri); + return specified[index]; + } + + /// <summary> + /// Returns the current value of an attribute's "specified" flag. + /// </summary> + /// <param name="qName">The XML qualified (prefixed) name.</param> + /// <returns>current flag value</returns> + /// <exception cref="ArgumentException">When the supplied name does not identify an attribute.</exception> + public bool IsSpecified(string qName) + { + int index = GetIndex(qName); + + if (index < 0) + throw new ArgumentException( + "No such attribute: " + qName); + return specified[index]; + } + + + //////////////////////////////////////////////////////////////////// + // Manipulators + //////////////////////////////////////////////////////////////////// + + + /// <summary> + /// Copy an entire Attributes object. The "specified" flags are + /// assigned as true, and "declared" flags as false (except when + /// an attribute's type is not CDATA), + /// unless the object is an Attributes2 object. + /// In that case those flag values are all copied. + /// </summary> + /// <seealso cref="Attributes.SetAttributes(IAttributes)"/> + public override void SetAttributes(IAttributes atts) + { + int length = atts.Length; + + base.SetAttributes(atts); + declared = new bool[length]; + specified = new bool[length]; + + if (atts is Attributes2) { + Attributes2 a2 = (Attributes2)atts; + for (int i = 0; i < length; i++) + { + declared[i] = a2.IsDeclared(i); + specified[i] = a2.IsSpecified(i); + } + } else { + for (int i = 0; i < length; i++) + { + declared[i] = !"CDATA".Equals(atts.GetType(i)); + specified[i] = true; + } + } + } + + /// <summary> + /// Add an attribute to the end of the list, setting its + /// "specified" flag to true. To set that flag's value + /// to false, use <see cref="SetSpecified(int, bool)"/>. + /// <p>Unless the attribute <em>type</em> is CDATA, this attribute + /// is marked as being declared in the DTD. To set that flag's value + /// to true for CDATA attributes, use <see cref="SetDeclared(int, bool)"/>. + /// </summary> + /// <seealso cref="Attributes.AddAttribute(string, string, string, string, string)"/> + public override void AddAttribute(string uri, string localName, string qName, + string type, string value) + { + base.AddAttribute(uri, localName, qName, type, value); + + int length = Length; + + if (length < specified.Length) + { + bool[] newFlags; + + newFlags = new bool[length]; + System.Array.Copy(declared, 0, newFlags, 0, declared.Length); + declared = newFlags; + + newFlags = new bool[length]; + System.Array.Copy(specified, 0, newFlags, 0, specified.Length); + specified = newFlags; + } + + specified[length - 1] = true; + declared[length - 1] = !"CDATA".Equals(type); + } + + // javadoc entirely from superclass + public override void RemoveAttribute(int index) + { + int origMax = Length - 1; + + base.RemoveAttribute(index); + if (index != origMax) + { + System.Array.Copy(declared, index + 1, declared, index, + origMax - index); + System.Array.Copy(specified, index + 1, specified, index, + origMax - index); + } + } + + /// <summary> + /// Assign a value to the "declared" flag of a specific attribute. + /// This is normally needed only for attributes of type CDATA, + /// including attributes whose type is changed to or from CDATA. + /// </summary> + /// <param name="index">The index of the attribute (zero-based).</param> + /// <param name="value">The desired flag value.</param> + /// <exception cref="IndexOutOfRangeException">When the supplied index does not identify an attribute.</exception> + public virtual void SetDeclared(int index, bool value) + { + if (index < 0 || index >= Length) + throw new IndexOutOfRangeException( + "No attribute at index: " + index); + declared[index] = value; + } + + /// <summary> + /// Assign a value to the "specified" flag of a specific attribute. + /// This is the only way this flag can be cleared, except clearing + /// by initialization with the copy constructor. + /// </summary> + /// <param name="index">The index of the attribute (zero-based).</param> + /// <param name="value">The desired flag value.</param> + /// <exception cref="IndexOutOfRangeException">When the supplied index does not identify an attribute.</exception> + public virtual void SetSpecified(int index, bool value) + { + if (index < 0 || index >= Length) + throw new IndexOutOfRangeException( + "No attribute at index: " + index); + specified[index] = value; + } + } +}