http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs new file mode 100644 index 0000000..443348d --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs @@ -0,0 +1,1567 @@ +// XMLWriter.java - serialize an XML document. +// Written by David Megginson, da...@megginson.com +// and placed by him into the public domain. +// Extensively modified by John Cowan for TagSoup. +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + +using Sax; +using Sax.Ext; +using Sax.Helpers; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Globalization; +using System.IO; + +namespace TagSoup +{ + /// <summary> + /// Filter to write an XML document from a SAX event stream. + /// </summary> + /// <remarks> + /// This class can be used by itself or as part of a SAX event + /// stream: it takes as input a series of SAX2 ContentHandler + /// events and uses the information in those events to write + /// an XML document. Since this class is a filter, it can also + /// pass the events on down a filter chain for further processing + /// (you can use the XMLWriter to take a snapshot of the current + /// state at any point in a filter chain), and it can be + /// used directly as a ContentHandler for a SAX2 XMLReader. + /// <para> + /// The client creates a document by invoking the methods for + /// standard SAX2 events, always beginning with the + /// <see cref="StartDocument()" /> method and ending with + /// the <see cref="EndDocument()" /> method. There are convenience + /// methods provided so that clients to not have to create empty + /// attribute lists or provide empty strings as parameters; for + /// example, the method invocation + /// </para> + /// <code> + /// w.StartElement("foo"); + /// </code> + /// <para>is equivalent to the regular SAX2 ContentHandler method</para> + /// <code> + /// w.StartElement("", "foo", "", new Attributes()); + /// </code> + /// <para> + /// Except that it is more efficient because it does not allocate + /// a new empty attribute list each time. The following code will send + /// a simple XML document to standard output: + /// </para> + /// <code> + /// XMLWriter w = new XMLWriter(); + /// w.StartDocument(); + /// w.StartElement("greeting"); + /// w.Characters("Hello, world!"); + /// w.EndElement("greeting"); + /// w.EndDocument(); + /// </code> + /// <para>The resulting document will look like this:</para> + /// <code> + /// <?xml version="1.0" standalone="yes"?> + /// <greeting>Hello, world!</greeting> + /// </code> + /// <para> + /// In fact, there is an even simpler convenience method, + /// <see cref="DataElement(string, string)"/>, designed for writing elements that + /// contain only character data, so the code to generate the + /// document could be shortened to + /// </para> + /// <code> + /// XMLWriter w = new XMLWriter(); + /// w.StartDocument(); + /// w.DataElement("greeting", "Hello, world!"); + /// w.EndDocument(); + /// </code> + /// <h2>Whitespace</h2> + /// <para> + /// According to the XML Recommendation, <em>all</em> whitespace + /// in an XML document is potentially significant to an application, + /// so this class never adds newlines or indentation. If you + /// insert three elements in a row, as in + /// </para> + /// <code> + /// w.DataElement("item", "1"); + /// w.DataElement("item", "2"); + /// w.DataElement("item", "3"); + /// </code> + /// <para>you will end up with</para> + /// <code> + /// <item>1</item><item>3</item><item>3</item> + /// </code> + /// <para> + /// You need to invoke one of the <c>Characters</c> methods + /// explicitly to add newlines or indentation. Alternatively, you + /// can use <see cref="com.megginson.sax.DataWriter DataWriter" />, which + /// is derived from this class -- it is optimized for writing + /// purely data-oriented (or field-oriented) XML, and does automatic + /// linebreaks and indentation (but does not support mixed content + /// properly). + /// </para> + /// <h2>Namespace Support</h2> + /// <para> + /// The writer contains extensive support for XML Namespaces, so that + /// a client application does not have to keep track of prefixes and + /// supply <c>xmlns</c> attributes. By default, the XML writer will + /// generate Namespace declarations in the form _NS1, _NS2, etc., wherever + /// they are needed, as in the following example: + /// </para> + /// <code> + /// w.StartDocument(); + /// w.EmptyElement("http://www.foo.com/ns/", "foo"); + /// w.EndDocument(); + /// </code> + /// <para>The resulting document will look like this:</para> + /// <code> + /// <?xml version="1.0" standalone="yes"?> + /// <_NS1:foo xmlns:_NS1="http://www.foo.com/ns/"/> + /// </code> + /// <para> + /// In many cases, document authors will prefer to choose their + /// own prefixes rather than using the (ugly) default names. The + /// XML writer allows two methods for selecting prefixes: + /// </para> + /// <list type="number"> + /// <item><description>the qualified name</description></item> + /// <item><description>the <see cref="Prefix" /> property.</description></item> + /// </list> + /// <para> + /// Whenever the XML writer finds a new Namespace URI, it checks + /// to see if a qualified (prefixed) name is also available; if so + /// it attempts to use the name's prefix (as long as the prefix is + /// not already in use for another Namespace URI). + /// </para> + /// <para> + /// Before writing a document, the client can also pre-map a prefix + /// to a Namespace URI with the setPrefix method: + /// </para> + /// <code> + /// w.SetPrefix("http://www.foo.com/ns/", "foo"); + /// w.StartDocument(); + /// w.EmptyElement("http://www.foo.com/ns/", "foo"); + /// w.EndDocument(); + /// </code> + /// <para>The resulting document will look like this:</para> + /// <code> + /// <?xml version="1.0" standalone="yes"?> + /// <foo:foo xmlns:foo="http://www.foo.com/ns/"/> + /// </code> + /// <para>The default Namespace simply uses an empty string as the prefix:</para> + /// <code> + /// w.SetPrefix("http://www.foo.com/ns/", ""); + /// w.StartDocument(); + /// w.EmptyElement("http://www.foo.com/ns/", "foo"); + /// w.EndDocument(); + /// </code> + /// <para>The resulting document will look like this:</para> + /// <code> + /// <?xml version="1.0" standalone="yes"?> + /// <foo xmlns="http://www.foo.com/ns/"/> + /// </code> + /// <para> + /// By default, the XML writer will not declare a Namespace until + /// it is actually used. Sometimes, this approach will create + /// a large number of Namespace declarations, as in the following + /// example: + /// </para> + /// <code> + /// <xml version="1.0" standalone="yes"?> + /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> + /// <rdf:Description about="http://www.foo.com/ids/books/12345"> + /// <dc:title xmlns:dc="http://www.purl.org/dc/">A Dark Night</dc:title> + /// <dc:creator xmlns:dc="http://www.purl.org/dc/">Jane Smith</dc:title> + /// <dc:date xmlns:dc="http://www.purl.org/dc/">2000-09-09</dc:title> + /// </rdf:Description> + /// </rdf:RDF> + /// </code> + /// <para> + /// The "rdf" prefix is declared only once, because the RDF Namespace + /// is used by the root element and can be inherited by all of its + /// descendants; the "dc" prefix, on the other hand, is declared three + /// times, because no higher element uses the Namespace. To solve this + /// problem, you can instruct the XML writer to predeclare Namespaces + /// on the root element even if they are not used there: + /// </para> + /// <code> + /// w.ForceNSDecl("http://www.purl.org/dc/"); + /// </code> + /// <para> + /// Now, the "dc" prefix will be declared on the root element even + /// though it's not needed there, and can be inherited by its + /// descendants: + /// </para> + /// <code> + /// <xml version="1.0" standalone="yes"?> + /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + /// xmlns:dc="http://www.purl.org/dc/"> + /// <rdf:Description about="http://www.foo.com/ids/books/12345"> + /// <dc:title>A Dark Night</dc:title> + /// <dc:creator>Jane Smith</dc:title> + /// <dc:date>2000-09-09</dc:title> + /// </rdf:Description> + /// </rdf:RDF> + /// </code> + /// <para> + /// This approach is also useful for declaring Namespace prefixes + /// that be used by qualified names appearing in attribute values or + /// character data. + /// </para> + /// </remarks> + /// <author>David Megginson, da...@megginson.com</author> + /// <version>.2</version> + /// <seealso cref="IXMLFilter" /> + /// <seealso cref="IContentHandler" /> + public class XMLWriter : XMLFilter, ILexicalHandler + { + //////////////////////////////////////////////////////////////////// + // Constructors. + //////////////////////////////////////////////////////////////////// + + /// <summary> + /// Create a new XML writer. + /// <para>Write to standard output.</para> + /// </summary> + public XMLWriter() + { + Init(null); + } + + /// <summary> + /// Create a new XML writer. + /// <para>Write to the writer provided.</para> + /// </summary> + /// <param name="writer"> + /// The output destination, or null to use standard + /// output. + /// </param> + public XMLWriter(TextWriter writer) + { + Init(writer); + } + + /// <summary> + /// Create a new XML writer. + /// <para>Use the specified XML reader as the parent.</para> + /// </summary> + /// <param name="xmlreader"> + /// The parent in the filter chain, or null + /// for no parent. + /// </param> + public XMLWriter(IXMLReader xmlreader) : base(xmlreader) + { + Init(null); + } + + /// <summary> + /// Create a new XML writer. + /// <para> + /// Use the specified XML reader as the parent, and write + /// to the specified writer. + /// </para> + /// </summary> + /// <param name="xmlreader"> + /// The parent in the filter chain, or null + /// for no parent. + /// </param> + /// <param name="writer"> + /// The output destination, or null to use standard + /// output. + /// </param> + public XMLWriter(IXMLReader xmlreader, TextWriter writer) : base(xmlreader) + { + Init(writer); + } + + public virtual void EndCDATA() + { + } + + public virtual void EndDTD() + { + } + + public virtual void EndEntity(string name) + { + } + + public virtual void StartCDATA() + { + } + + public virtual void StartDTD(string name, string publicid, string systemid) + { + if (name == null) + { + return; // can't cope + } + if (hasOutputDTD) + { + return; // only one DTD + } + hasOutputDTD = true; + Write("<!DOCTYPE "); + Write(name); + if (systemid == null) + { + systemid = ""; + } + if (overrideSystem != null) + { + systemid = overrideSystem; + } + char sysquote = (systemid.IndexOf('"') != -1) ? '\'' : '"'; + if (overridePublic != null) + { + publicid = overridePublic; + } + if (!(publicid == null || "".Equals(publicid))) + { + char pubquote = (publicid.IndexOf('"') != -1) ? '\'' : '"'; + Write(" PUBLIC "); + Write(pubquote); + Write(publicid); + Write(pubquote); + Write(' '); + } + else + { + Write(" SYSTEM "); + } + Write(sysquote); + Write(systemid); + Write(sysquote); + Write(">\n"); + } + + public virtual void StartEntity(string name) + { + } + + /// <summary> + /// Internal initialization method. + /// <para>All of the public constructors invoke this method.</para> + /// </summary> + /// <param name="writer"> + /// The output destination, or null to use + /// standard output. + /// </param> + private void Init(TextWriter writer) + { + SetOutput(writer); + nsSupport = new NamespaceSupport(); + prefixTable = new Hashtable(); + forcedDeclTable = new Hashtable(); + doneDeclTable = new Hashtable(); + outputProperties = new Dictionary<string, string>(); + } + + /// <summary> + /// Reset the writer. + /// <para> + /// This method is especially useful if the writer throws an + /// exception before it is finished, and you want to reuse the + /// writer for a new document. It is usually a good idea to + /// invoke <see cref="Flush" /> before resetting the writer, + /// to make sure that no output is lost. + /// </para> + /// <para> + /// This method is invoked automatically by the + /// <see cref="StartDocument" /> method before writing + /// a new document. + /// </para> + /// <para> + /// <strong>Note:</strong> this method will <em>not</em> + /// clear the prefix or URI information in the writer or + /// the selected output writer. + /// </para> + /// </summary> + /// <seealso cref="Flush" /> + public virtual void Reset() + { + elementLevel = 0; + prefixCounter = 0; + nsSupport.Reset(); + } + + /// <summary> + /// Flush the output. + /// <para> + /// This method flushes the output stream. It is especially useful + /// when you need to make certain that the entire document has + /// been written to output but do not want to close the output + /// stream. + /// </para> + /// <para> + /// This method is invoked automatically by the + /// <see cref="EndDocument" /> method after writing a + /// document. + /// </para> + /// </summary> + /// <seealso cref="Reset" /> + public virtual void Flush() + { + output.Flush(); + } + + /// <summary> + /// Set a new output destination for the document. + /// </summary> + /// <param name="writer"> + /// The output destination, or null to use + /// standard output. + /// </param> + /// <seealso cref="Flush" /> + public virtual void SetOutput(TextWriter writer) + { + if (writer == null) + { + output = new StreamWriter(Console.OpenStandardOutput()); + } + else + { + output = writer; + } + } + + /// <summary> + /// Specify a preferred prefix for a Namespace URI. + /// <para> + /// Note that this method does not actually force the Namespace + /// to be declared; to do that, use the <see cref="ForceNSDecl(string)" /> + /// method as well. + /// </para> + /// </summary> + /// <param name="uri"> + /// The Namespace URI. + /// </param> + /// <param name="prefix"> + /// The preferred prefix, or "" to select + /// the default Namespace. + /// </param> + /// <seealso cref="GetPrefix" /> + /// <seealso cref="ForceNSDecl(string)" /> + /// <seealso cref="ForceNSDecl(string,string)" /> + public virtual void SetPrefix(string uri, string prefix) + { + prefixTable[uri] = prefix; + } + + /// <summary> + /// Get the current or preferred prefix for a Namespace URI. + /// </summary> + /// <param name="uri">The Namespace URI.</param> + /// <returns>The preferred prefix, or "" for the default Namespace.</returns> + /// <seealso cref="SetPrefix" /> + public virtual string GetPrefix(string uri) + { + return (string)(prefixTable.ContainsKey(uri) ? prefixTable[uri] : string.Empty); + } + + /// <summary> + /// Force a Namespace to be declared on the root element. + /// <para> + /// By default, the XMLWriter will declare only the Namespaces + /// needed for an element; as a result, a Namespace may be + /// declared many places in a document if it is not used on the + /// root element. + /// </para> + /// <para> + /// This method forces a Namespace to be declared on the root + /// element even if it is not used there, and reduces the number + /// of xmlns attributes in the document. + /// </para> + /// </summary> + /// <param name="uri"> + /// The Namespace URI to declare. + /// </param> + /// <seealso cref="ForceNSDecl(string,string)" /> + /// <seealso cref="SetPrefix" /> + public virtual void ForceNSDecl(string uri) + { + forcedDeclTable[uri] = true; + } + + /// <summary> + /// Force a Namespace declaration with a preferred prefix. + /// <para> + /// This is a convenience method that invokes <see cref="SetPrefix" /> + /// then <see cref="ForceNSDecl(string)" />. + /// </para> + /// </summary> + /// <param name="uri"> + /// The Namespace URI to declare on the root element. + /// </param> + /// <param name="prefix"> + /// The preferred prefix for the Namespace, or "" + /// for the default Namespace. + /// </param> + /// <seealso cref="SetPrefix" /> + /// <seealso cref="ForceNSDecl(string)" /> + public virtual void ForceNSDecl(string uri, string prefix) + { + SetPrefix(uri, prefix); + ForceNSDecl(uri); + } + + //////////////////////////////////////////////////////////////////// + // Methods from Sax.5IContentHandler. + //////////////////////////////////////////////////////////////////// + + /// <summary> + /// Write the XML declaration at the beginning of the document. + /// Pass the event on down the filter chain for further processing. + /// </summary> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the XML declaration, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="IContentHandler.StartDocument" /> + public override void StartDocument() + { + Reset(); + if (!("yes".Equals(outputProperties[OMIT_XML_DECLARATION] ?? "no"))) + { + Write("<?xml"); + if (version == null) + { + Write(" version=\"1.0\""); + } + else + { + Write(" version=\""); + Write(version); + Write("\""); + } + if (false == string.IsNullOrEmpty(outputEncoding)) + { + Write(" encoding=\""); + Write(outputEncoding); + Write("\""); + } + if (standalone == null) + { + Write(" standalone=\"yes\"?>\n"); + } + else + { + Write(" standalone=\""); + Write(standalone); + Write("\""); + } + } + base.StartDocument(); + } + + /// <summary> + /// Write a newline at the end of the document. + /// Pass the event on down the filter chain for further processing. + /// </summary> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the newline, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="IContentHandler.EndDocument" /> + public override void EndDocument() + { + Write('\n'); + base.EndDocument(); + try + { + Flush(); + } + catch (IOException e) + { + throw new SAXException(e.Message, e); + } + } + + /// <summary> + /// Write a start tag. + /// Pass the event on down the filter chain for further processing. + /// </summary> + /// <param name="uri"> + /// The Namespace URI, or the empty string if none + /// is available. + /// </param> + /// <param name="localName"> + /// The element's local (unprefixed) name (required). + /// </param> + /// <param name="qName"> + /// The element's qualified (prefixed) name, or the + /// empty string is none is available. This method will + /// use the qName as a template for generating a prefix + /// if necessary, but it is not guaranteed to use the + /// same qName. + /// </param> + /// <param name="atts"> + /// The element's attribute list (must not be null). + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the start tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="IContentHandler.StartElement" /> + public override void StartElement(string uri, string localName, string qName, IAttributes atts) + { + elementLevel++; + nsSupport.PushContext(); + if (forceDTD && !hasOutputDTD) + { + StartDTD(localName ?? qName, "", ""); + } + Write('<'); + WriteName(uri, localName, qName, true); + WriteAttributes(atts); + if (elementLevel == 1) + { + ForceNSDecls(); + } + WriteNSDecls(); + Write('>'); + // System.out.println("%%%% startElement [" + qName + "] htmlMode = " + htmlMode); + if (htmlMode && (qName.Equals("script") || qName.Equals("style"))) + { + cdataElement = true; + // System.out.println("%%%% CDATA element"); + } + base.StartElement(uri, localName, qName, atts); + } + + /// <summary> + /// Write an end tag. + /// Pass the event on down the filter chain for further processing. + /// </summary> + /// <param name="uri"> + /// The Namespace URI, or the empty string if none + /// is available. + /// </param> + /// <param name="localName"> + /// The element's local (unprefixed) name (required). + /// </param> + /// <param name="qName"> + /// The element's qualified (prefixed) name, or the + /// empty string is none is available. This method will + /// use the qName as a template for generating a prefix + /// if necessary, but it is not guaranteed to use the + /// same qName. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the end tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="IContentHandler.EndElement" /> + public override void EndElement(string uri, string localName, string qName) + { + if ( + !(htmlMode && (uri.Equals("http://www.w3.org/1999/xhtml") || uri.Equals("")) + && (qName.Equals("area") || qName.Equals("base") || qName.Equals("basefont") || qName.Equals("br") + || qName.Equals("col") || qName.Equals("frame") || qName.Equals("hr") || qName.Equals("img") + || qName.Equals("input") || qName.Equals("isindex") || qName.Equals("link") || qName.Equals("meta") + || qName.Equals("param")))) + { + Write("</"); + WriteName(uri, localName, qName, true); + Write('>'); + } + if (elementLevel == 1) + { + Write('\n'); + } + cdataElement = false; + base.EndElement(uri, localName, qName); + nsSupport.PopContext(); + elementLevel--; + } + + /// <summary> + /// Write character data. + /// Pass the event on down the filter chain for further processing. + /// </summary> + /// <param name="ch"> + /// The array of characters to write. + /// </param> + /// <param name="start"> + /// The starting position in the array. + /// </param> + /// <param name="length"> + /// The number of characters to write. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the characters, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="IContentHandler.Characters" /> + public override void Characters(char[] ch, int start, int length) + { + if (!cdataElement) + { + WriteEsc(ch, start, length, false); + } + else + { + for (int i = start; i < start + length; i++) + { + Write(ch[i]); + } + } + base.Characters(ch, start, length); + } + + /// <summary> + /// Write ignorable whitespace. + /// Pass the event on down the filter chain for further processing. + /// </summary> + /// <param name="ch"> + /// The array of characters to write. + /// </param> + /// <param name="start"> + /// The starting position in the array. + /// </param> + /// <param name="length"> + /// The number of characters to write. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the whitespace, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="IContentHandler.IgnorableWhitespace" /> + public override void IgnorableWhitespace(char[] ch, int start, int length) + { + WriteEsc(ch, start, length, false); + base.IgnorableWhitespace(ch, start, length); + } + + /// <summary> + /// Write a processing instruction. + /// Pass the event on down the filter chain for further processing. + /// </summary> + /// <param name="target"> + /// The PI target. + /// </param> + /// <param name="data"> + /// The PI data. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the PI, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="IContentHandler.ProcessingInstruction" /> + public override void ProcessingInstruction(string target, string data) + { + Write("<?"); + Write(target); + Write(' '); + Write(data); + Write("?>"); + if (elementLevel < 1) + { + Write('\n'); + } + base.ProcessingInstruction(target, data); + } + + /// <summary> + /// Write an empty element. + /// This method writes an empty element tag rather than a start tag + /// followed by an end tag. Both a <see cref="StartElement" /> + /// and an <see cref="EndElement(string,string,string)" /> event will + /// be passed on down the filter chain. + /// </summary> + /// <param name="uri"> + /// The element's Namespace URI, or the empty string + /// if the element has no Namespace or if Namespace + /// processing is not being performed. + /// </param> + /// <param name="localName"> + /// The element's local name (without prefix). This + /// parameter must be provided. + /// </param> + /// <param name="qName"> + /// The element's qualified name (with prefix), or + /// the empty string if none is available. This parameter + /// is strictly advisory: the writer may or may not use + /// the prefix attached. + /// </param> + /// <param name="atts"> + /// The element's attribute list. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the empty tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="StartElement" /> + /// <seealso cref="EndElement(string,string,string) " /> + public virtual void EmptyElement(string uri, string localName, string qName, IAttributes atts) + { + nsSupport.PushContext(); + Write('<'); + WriteName(uri, localName, qName, true); + WriteAttributes(atts); + if (elementLevel == 1) + { + ForceNSDecls(); + } + WriteNSDecls(); + Write("/>"); + base.StartElement(uri, localName, qName, atts); + base.EndElement(uri, localName, qName); + } + + /// <summary> + /// Start a new element without a qname or attributes. + /// <para> + /// This method will provide a default empty attribute + /// list and an empty string for the qualified name. + /// It invokes <see cref="StartElement(string, string, string, IAttributes)"/> + /// directly. + /// </para> + /// </summary> + /// <param name="uri"> + /// The element's Namespace URI. + /// </param> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the start tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="StartElement(string, string, string, IAttributes)" /> + public virtual void StartElement(string uri, string localName) + { + StartElement(uri, localName, "", EMPTY_ATTS); + } + + /// <summary> + /// Start a new element without a qname, attributes or a Namespace URI. + /// <para> + /// This method will provide an empty string for the + /// Namespace URI, and empty string for the qualified name, + /// and a default empty attribute list. It invokes + /// #startElement(string, string, string, Attributes)} + /// directly. + /// </para> + /// </summary> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the start tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="StartElement(string, string, string, IAttributes)" /> + public virtual void StartElement(string localName) + { + StartElement("", localName, "", EMPTY_ATTS); + } + + /// <summary> + /// End an element without a qname. + /// <para> + /// This method will supply an empty string for the qName. + /// It invokes <see cref="EndElement(string, string, string)" /> + /// directly. + /// </para> + /// </summary> + /// <param name="uri"> + /// The element's Namespace URI. + /// </param> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the end tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="EndElement(string, string, string)" /> + public virtual void EndElement(string uri, string localName) + { + EndElement(uri, localName, ""); + } + + /// <summary> + /// End an element without a Namespace URI or qname. + /// <para> + /// This method will supply an empty string for the qName + /// and an empty string for the Namespace URI. + /// It invokes <see cref="EndElement(string, string, string)" /> + /// directly. + /// </para> + /// </summary> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the end tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="EndElement(string, string, string)" /> + public virtual void EndElement(string localName) + { + EndElement("", localName, ""); + } + + /// <summary> + /// Add an empty element without a qname or attributes. + /// <para> + /// This method will supply an empty string for the qname + /// and an empty attribute list. It invokes + /// <see cref="EmptyElement(string, string, string, IAttributes)" /> + /// directly. + /// </para> + /// </summary> + /// <param name="uri"> + /// The element's Namespace URI. + /// </param> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the empty tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="EmptyElement(string, string, string, IAttributes)" /> + public virtual void EmptyElement(string uri, string localName) + { + EmptyElement(uri, localName, "", EMPTY_ATTS); + } + + /// <summary> + /// Add an empty element without a Namespace URI, qname or attributes. + /// <para> + /// This method will supply an empty string for the qname, + /// and empty string for the Namespace URI, and an empty + /// attribute list. It invokes + /// <see cref="EmptyElement(string, string, string, IAttributes)" /> + /// directly. + /// </para> + /// </summary> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the empty tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="EmptyElement(string, string, string, IAttributes)" /> + public virtual void EmptyElement(string localName) + { + EmptyElement("", localName, "", EMPTY_ATTS); + } + + /// <summary> + /// Write an element with character data content. + /// <para> + /// This is a convenience method to write a complete element + /// with character data content, including the start tag + /// and end tag. + /// </para> + /// <para> + /// This method invokes + /// <see cref="StartElement(string, string, string, IAttributes)" />, + /// followed by + /// <see cref="Characters(string)" />, followed by + /// <see cref="EndElement(string, string, string)" />. + /// </para> + /// </summary> + /// <param name="uri"> + /// The element's Namespace URI. + /// </param> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <param name="qName"> + /// The element's default qualified name. + /// </param> + /// <param name="atts"> + /// The element's attributes. + /// </param> + /// <param name="content"> + /// The character data content. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the empty tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="StartElement(string, string, string, IAttributes)" /> + /// <seealso cref="Characters(string)" /> + /// <seealso cref="EndElement(string, string, string)" /> + public virtual void DataElement(string uri, string localName, string qName, IAttributes atts, string content) + { + StartElement(uri, localName, qName, atts); + Characters(content); + EndElement(uri, localName, qName); + } + + /// <summary> + /// Write an element with character data content but no attributes. + /// <para> + /// This is a convenience method to write a complete element + /// with character data content, including the start tag + /// and end tag. This method provides an empty string + /// for the qname and an empty attribute list. + /// </para> + /// <para> + /// This method invokes + /// <see cref="StartElement(string, string, string, IAttributes)" />, + /// followed by + /// <see cref="Characters(string)" />, followed by + /// <see cref="EndElement(string, string, string)" />. + /// </para> + /// </summary> + /// <param name="uri"> + /// The element's Namespace URI. + /// </param> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <param name="content"> + /// The character data content. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the empty tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="StartElement(string, string, string, IAttributes)" /> + /// <seealso cref="Characters(string)" /> + /// <seealso cref="EndElement(string, string, string)" /> + public virtual void DataElement(string uri, string localName, string content) + { + DataElement(uri, localName, "", EMPTY_ATTS, content); + } + + /// <summary> + /// Write an element with character data content but no attributes or Namespace URI. + /// <para> + /// This is a convenience method to write a complete element + /// with character data content, including the start tag + /// and end tag. The method provides an empty string for the + /// Namespace URI, and empty string for the qualified name, + /// and an empty attribute list. + /// </para> + /// <para> + /// This method invokes + /// <see cref="StartElement(string, string, string, IAttributes)" />, + /// followed by + /// <see cref="Characters(string)" />, followed by + /// <see cref="EndElement(string, string, string)" />. + /// </para> + /// </summary> + /// <param name="localName"> + /// The element's local name. + /// </param> + /// <param name="content"> + /// The character data content. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the empty tag, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="StartElement(string, string, string, IAttributes)" /> + /// <seealso cref="Characters(string)" /> + /// <seealso cref="EndElement(string, string, string)" /> + public virtual void DataElement(string localName, string content) + { + DataElement("", localName, "", EMPTY_ATTS, content); + } + + /// <summary> + /// Write a string of character data, with XML escaping. + /// <para> + /// This is a convenience method that takes an XML + /// string, converts it to a character array, then invokes + /// <see cref="Characters(char[], int, int)" />. + /// </para> + /// </summary> + /// <param name="data"> + /// The character data. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error + /// writing the string, or if a handler further down + /// the filter chain raises an exception. + /// </exception> + /// <seealso cref="Characters(char[], int, int)" /> + public virtual void Characters(string data) + { + char[] ch = data.ToCharArray(); + Characters(ch, 0, ch.Length); + } + + /// <summary> + /// Force all Namespaces to be declared. + /// This method is used on the root element to ensure that + /// the predeclared Namespaces all appear. + /// </summary> + private void ForceNSDecls() + { + foreach (string prefix in forcedDeclTable.Keys) + { + DoPrefix(prefix, null, true); + } + } + + /// <summary> + /// Determine the prefix for an element or attribute name. + /// TODO: this method probably needs some cleanup. + /// </summary> + /// <param name="uri"> + /// The Namespace URI. + /// </param> + /// <param name="qName"> + /// The qualified name (optional); this will be used + /// to indicate the preferred prefix if none is currently + /// bound. + /// </param> + /// <param name="isElement"> + /// true if this is an element name, false + /// if it is an attribute name (which cannot use the + /// default Namespace). + /// </param> + private string DoPrefix(string uri, string qName, bool isElement) + { + string defaultNS = nsSupport.GetUri(""); + if ("".Equals(uri)) + { + if (isElement && defaultNS != null) + { + nsSupport.DeclarePrefix("", ""); + } + return null; + } + string prefix; + if (isElement && defaultNS != null && uri.Equals(defaultNS)) + { + prefix = ""; + } + else + { + prefix = nsSupport.GetPrefix(uri); + } + if (prefix != null) + { + return prefix; + } + bool containsPrefix = doneDeclTable.ContainsKey(uri); + prefix = (string)(containsPrefix ? doneDeclTable[uri] : null); + if (containsPrefix && ((!isElement || defaultNS != null) && "".Equals(prefix) || nsSupport.GetUri(prefix) != null)) + { + prefix = null; + } + if (prefix == null) + { + containsPrefix = prefixTable.ContainsKey(uri); + prefix = (string)(containsPrefix ? prefixTable[uri] : null); + if (containsPrefix + && ((!isElement || defaultNS != null) && "".Equals(prefix) || nsSupport.GetUri(prefix) != null)) + { + prefix = null; + } + } + if (prefix == null && qName != null && !"".Equals(qName)) + { + int i = qName.IndexOf(':'); + if (i == -1) + { + if (isElement && defaultNS == null) + { + prefix = ""; + } + } + else + { + prefix = qName.Substring(0, i); + } + } + for (; prefix == null || nsSupport.GetUri(prefix) != null; prefix = "__NS" + ++prefixCounter) + { + } + nsSupport.DeclarePrefix(prefix, uri); + doneDeclTable[uri] = prefix; + return prefix; + } + + /// <summary> + /// Write a raw character. + /// </summary> + /// <param name="c"> + /// The character to write. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error writing + /// the character, this method will throw an IOException + /// wrapped in a SAXException. + /// </exception> + private void Write(char c) + { + try + { + output.Write(c); + } + catch (IOException e) + { + throw new SAXException(e.ToString(), e); + } + } + + /// <summary> + /// Write a raw string. + /// </summary> + /// <param name="s"></param> + /// <exception cref="SAXException"> + /// If there is an error writing the string, + /// this method will throw an IOException wrapped in a SAXException + /// </exception> + private void Write(string s) + { + try + { + output.Write(s); + } + catch (IOException e) + { + throw new SAXException(e.ToString(), e); + } + } + + /// <summary> + /// Write out an attribute list, escaping values. + /// The names will have prefixes added to them. + /// </summary> + /// <param name="atts"> + /// The attribute list to write. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error writing + /// the attribute list, this method will throw an + /// IOException wrapped in a SAXException. + /// </exception> + private void WriteAttributes(IAttributes atts) + { + int len = atts.Length; + for (int i = 0; i < len; i++) + { + char[] ch = atts.GetValue(i).ToCharArray(); + Write(' '); + WriteName(atts.GetURI(i), atts.GetLocalName(i), atts.GetQName(i), false); + if (htmlMode && BoolAttribute(atts.GetLocalName(i), atts.GetQName(i), atts.GetValue(i))) + { + break; + } + Write("=\""); + WriteEsc(ch, 0, ch.Length, true); + Write('"'); + } + } + + // Return true if the attribute is an HTML bool from the above list. + private bool BoolAttribute(string localName, string qName, string value) + { + string name = localName; + if (name == null) + { + int i = qName.IndexOf(':'); + if (i != -1) + { + name = qName.Substring(i + 1, qName.Length); + } + } + if (!name.Equals(value)) + { + return false; + } + for (int j = 0; j < _bools.Length; j++) + { + if (name.Equals(_bools[j])) + { + return true; + } + } + return false; + } + + /// <summary> + /// Write an array of data characters with escaping. + /// </summary> + /// <param name="ch"> + /// The array of characters. + /// </param> + /// <param name="start"> + /// The starting position. + /// </param> + /// <param name="length"> + /// The number of characters to use. + /// </param> + /// <param name="isAttVal"> + /// true if this is an attribute value literal. + /// </param> + /// <exception cref="SAXException"> + /// If there is an error writing + /// the characters, this method will throw an + /// IOException wrapped in a SAXException. + /// </exception> + private void WriteEsc(char[] ch, int start, int length, bool isAttVal) + { + for (int i = start; i < start + length; i++) + { + switch (ch[i]) + { + case '&': + Write("&"); + break; + case '<': + Write("<"); + break; + case '>': + Write(">"); + break; + case '\"': + if (isAttVal) + { + Write("""); + } + else + { + Write('\"'); + } + break; + default: + if (!unicodeMode && ch[i] > '\u007f') + { + Write("&#"); + Write(((int)ch[i]).ToString(CultureInfo.InvariantCulture)); + Write(';'); + } + else + { + Write(ch[i]); + } + break; + } + } + } + + /// <summary> + /// Write out the list of Namespace declarations. + /// </summary> + /// <exception cref="SAXException"> + /// This method will throw + /// an IOException wrapped in a SAXException if + /// there is an error writing the Namespace + /// declarations. + /// </exception> + private void WriteNSDecls() + { + IEnumerable prefixes = nsSupport.GetDeclaredPrefixes(); + foreach (string prefix in prefixes) + { + string uri = nsSupport.GetUri(prefix); + if (uri == null) + { + uri = ""; + } + char[] ch = uri.ToCharArray(); + Write(' '); + if ("".Equals(prefix)) + { + Write("xmlns=\""); + } + else + { + Write("xmlns:"); + Write(prefix); + Write("=\""); + } + WriteEsc(ch, 0, ch.Length, true); + Write('\"'); + } + } + + /// <summary> + /// Write an element or attribute name. + /// </summary> + /// <param name="uri"> + /// The Namespace URI. + /// </param> + /// <param name="localName"> + /// The local name. + /// </param> + /// <param name="qName"> + /// The prefixed name, if available, or the empty string. + /// </param> + /// <param name="isElement"> + /// true if this is an element name, false if it + /// is an attribute name. + /// </param> + /// <exception cref="SAXException"> + /// This method will throw an + /// IOException wrapped in a SAXException if there is + /// an error writing the name. + /// </exception> + private void WriteName(string uri, string localName, string qName, bool isElement) + { + string prefix = DoPrefix(uri, qName, isElement); + if (prefix != null && !"".Equals(prefix)) + { + Write(prefix); + Write(':'); + } + if (localName != null && !"".Equals(localName)) + { + Write(localName); + } + else + { + int i = qName.IndexOf(':'); + Write(qName.Substring(i + 1, qName.Length - (i + 1))); + } + } + + //////////////////////////////////////////////////////////////////// + // Default LexicalHandler implementation + //////////////////////////////////////////////////////////////////// + + public virtual void Comment(char[] ch, int start, int length) + { + Write("<!--"); + for (int i = start; i < start + length; i++) + { + Write(ch[i]); + if (ch[i] == '-' && i + 1 <= start + length && ch[i + 1] == '-') + { + Write(' '); + } + } + Write("-->"); + } + + + //////////////////////////////////////////////////////////////////// + // Output properties + //////////////////////////////////////////////////////////////////// + + public virtual string GetOutputProperty(string key) + { + return outputProperties[key]; + } + + public virtual void SetOutputProperty(string key, string value) + { + outputProperties[key] = value; + // System.out.println("%%%% key = [" + key + "] value = [" + value +"]"); + if (key.Equals(ENCODING)) + { + outputEncoding = value; + unicodeMode = value.Substring(0, 3).Equals("utf", StringComparison.OrdinalIgnoreCase); + // System.out.println("%%%% unicodeMode = " + unicodeMode); + } + else if (key.Equals(METHOD)) + { + htmlMode = value.Equals("html"); + } + else if (key.Equals(DOCTYPE_PUBLIC)) + { + overridePublic = value; + forceDTD = true; + } + else if (key.Equals(DOCTYPE_SYSTEM)) + { + overrideSystem = value; + forceDTD = true; + } + else if (key.Equals(VERSION)) + { + version = value; + } + else if (key.Equals(STANDALONE)) + { + standalone = value; + } + // System.out.println("%%%% htmlMode = " + htmlMode); + } + + //////////////////////////////////////////////////////////////////// + // Constants. + //////////////////////////////////////////////////////////////////// + + private readonly IAttributes EMPTY_ATTS = new Attributes(); + public const string CDATA_SECTION_ELEMENTS = + "cdata-section-elements"; + public const string DOCTYPE_PUBLIC = "doctype-public"; + public const string DOCTYPE_SYSTEM = "doctype-system"; + public const string ENCODING = "encoding"; + public const string INDENT = "indent"; // currently ignored + public const string MEDIA_TYPE = "media-type"; // currently ignored + public const string METHOD = "method"; // currently html or xml + public const string OMIT_XML_DECLARATION = "omit-xml-declaration"; + public const string STANDALONE = "standalone"; // currently ignored + public const string VERSION = "version"; + + //////////////////////////////////////////////////////////////////// + // Internal state. + //////////////////////////////////////////////////////////////////// + + + private readonly string[] _bools = { + "checked", + "compact", + "declare", + "defer", + "disabled", + "ismap", + "multiple", + "nohref", + "noresize", + "noshade", + "nowrap", + "readonly", + "selected" + }; + + private Hashtable prefixTable; + private Hashtable forcedDeclTable; + private Hashtable doneDeclTable; + private int elementLevel = 0; + private TextWriter output; + private NamespaceSupport nsSupport; + private int prefixCounter = 0; + private IDictionary<string, string> outputProperties; + private bool unicodeMode = false; + private string outputEncoding = ""; + private bool htmlMode = false; + private bool forceDTD = false; + private bool hasOutputDTD = false; + private string overridePublic = null; + private string overrideSystem = null; + private string version = null; + private string standalone = null; + private bool cdataElement = false; + } +}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml b/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml new file mode 100644 index 0000000..4cab973 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml @@ -0,0 +1,249 @@ +<!-- +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +--> + +<statetable xmlns='http://www.ccil.org/~cowan/XML/tagsoup/stml' + version='1.0'> + + <symbol id='EOF'/> + <symbol id='S'/> + <symbol id='default'/> + + <action id='A_ADUP'/> + <action id='A_ADUP_SAVE'/> + <action id='A_ADUP_STAGC'/> + <action id='A_ANAME'/> + <action id='A_ANAME_ADUP_STAGC'/> + <action id='A_AVAL'/> + <action id='A_AVAL_STAGC'/> + <action id='A_CDATA'/> + <action id='A_CMNT'/> + <action id='A_DECL'/> + <action id='A_ENTITY'/> + <action id='A_ENTITY_START'/> + <action id='A_ETAG'/> + <action id='A_EMPTYTAG'/> + <action id='A_ANAME_ADUP'/> + <action id='A_GI'/> + <action id='A_GI_STAGC'/> + <action id='A_LT'/> + <action id='A_LT_PCDATA'/> + <action id='A_MINUS'/> + <action id='A_MINUS2'/> + <action id='A_MINUS3'/> + <action id='A_PCDATA'/> + <action id='A_PI'/> + <action id='A_PITARGET'/> + <action id='A_PITARGET_PI'/> + <action id='A_SAVE'/> + <action id='A_SKIP'/> + <action id='A_SP'/> + <action id='A_STAGC'/> + <action id='A_UNGET'/> + <action id='A_UNSAVE_PCDATA'/> + + <state id='S_ANAME'> + <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/> + <tr char='=' action='A_ANAME' newstate='S_AVAL'/> + <tr char='>' action='A_ANAME_ADUP_STAGC' newstate='S_PCDATA'/> + <tr char='/' action='A_ANAME_ADUP' newstate='S_EMPTYTAG'/> + <tr symbol='EOF' action='A_ANAME_ADUP_STAGC' newstate='S_DONE'/> + <tr symbol='S' action='A_ANAME' newstate='S_EQ'/> + </state> + <state id='S_APOS'> + <tr symbol='default' action='A_SAVE' newstate='S_APOS'/> + <tr char=''' action='A_AVAL' newstate='S_TAGWS'/> + <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/> + <tr symbol='S' action='A_SP' newstate='S_APOS'/> + </state> + <state id='S_AVAL'> + <tr symbol='default' action='A_SAVE' newstate='S_STAGC'/> + <tr char='"' action='A_SKIP' newstate='S_QUOT'/> + <tr char=''' action='A_SKIP' newstate='S_APOS'/> + <tr char='>' action='A_AVAL_STAGC' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/> + <tr symbol='S' action='A_SKIP' newstate='S_AVAL'/> + </state> + <state id='S_CDATA'> + <tr symbol='default' action='A_SAVE' newstate='S_CDATA'/> + <tr char='<' action='A_SAVE' newstate='S_CDATA2'/> + <tr symbol='EOF' action='A_PCDATA' newstate='S_DONE'/> + </state> + <state id='S_CDATA2'> + <tr symbol='default' action='A_SAVE' newstate='S_CDATA'/> + <tr char='/' action='A_UNSAVE_PCDATA' newstate='S_ETAG'/> + <tr symbol='EOF' action='A_UNSAVE_PCDATA' newstate='S_DONE'/> + </state> + <state id='S_COM'> + <tr symbol='default' action='A_SAVE' newstate='S_COM2'/> + <tr char='-' action='A_SKIP' newstate='S_COM2'/> + <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/> + </state> + <state id='S_COM2'> + <tr symbol='default' action='A_SAVE' newstate='S_COM2'/> + <tr char='-' action='A_SKIP' newstate='S_COM3'/> + <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/> + </state> + <state id='S_COM3'> + <tr symbol='default' action='A_MINUS' newstate='S_COM2'/> + <tr char='-' action='A_SKIP' newstate='S_COM4'/> + <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/> + </state> + <state id='S_COM4'> + <tr symbol='default' action='A_MINUS2' newstate='S_COM2'/> + <tr char='-' action='A_MINUS3' newstate='S_COM4'/> + <tr char='>' action='A_CMNT' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/> + </state> + <state id='S_DECL'> + <tr symbol='default' action='A_SAVE' newstate='S_DECL2'/> + <tr char='-' action='A_SKIP' newstate='S_COM'/> + <tr char='[' action='A_SKIP' newstate='S_BB'/> + <tr char='>' action='A_SKIP' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_DECL2'> + <tr symbol='default' action='A_SAVE' newstate='S_DECL2'/> + <tr char='>' action='A_DECL' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_ENT'> + <tr symbol='default' action='A_ENTITY' newstate='S_ENT'/> + <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/> + </state> + <state id='S_EQ'> + <tr symbol='default' action='A_ADUP_SAVE' newstate='S_ANAME'/> + <tr char='=' action='A_SKIP' newstate='S_AVAL'/> + <tr char='>' action='A_ADUP_STAGC' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_ADUP_STAGC' newstate='S_DONE'/> + <tr symbol='S' action='A_SKIP' newstate='S_EQ'/> + </state> + <state id='S_ETAG'> + <tr symbol='default' action='A_SAVE' newstate='S_ETAG'/> + <tr char='>' action='A_ETAG' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_ETAG' newstate='S_DONE'/> + <tr symbol='S' action='A_SKIP' newstate='S_ETAG'/> + </state> + <state id='S_GI'> + <tr symbol='default' action='A_SAVE' newstate='S_GI'/> + <tr char='/' action='A_SKIP' newstate='S_EMPTYTAG'/> + <tr char='>' action='A_GI_STAGC' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + <tr symbol='S' action='A_GI' newstate='S_TAGWS'/> + </state> + <state id='S_NCR'> + <tr symbol='default' action='A_ENTITY' newstate='S_NCR'/> + <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/> + </state> + <state id='S_XNCR'> + <tr symbol='default' action='A_ENTITY' newstate='S_XNCR'/> + <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/> + </state> + <state id='S_PCDATA'> + <tr symbol='default' action='A_SAVE' newstate='S_PCDATA'/> + <tr char='&' action='A_ENTITY_START' newstate='S_ENT'/> + <tr char='<' action='A_PCDATA' newstate='S_TAG'/> + <tr symbol='EOF' action='A_PCDATA' newstate='S_DONE'/> + </state> + <state id='S_PI'> + <tr symbol='default' action='A_SAVE' newstate='S_PI'/> + <tr char='>' action='A_PI' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_PI' newstate='S_DONE'/> + </state> + <state id='S_PITARGET'> + <tr symbol='default' action='A_SAVE' newstate='S_PITARGET'/> + <tr char='>' action='A_PITARGET_PI' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_PITARGET_PI' newstate='S_DONE'/> + <tr symbol='S' action='A_PITARGET' newstate='S_PI'/> + </state> + <state id='S_QUOT'> + <tr symbol='default' action='A_SAVE' newstate='S_QUOT'/> + <tr char='"' action='A_AVAL' newstate='S_TAGWS'/> + <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/> + <tr symbol='S' action='A_SP' newstate='S_QUOT'/> + </state> + <state id='S_STAGC'> + <tr symbol='default' action='A_SAVE' newstate='S_STAGC'/> + <tr char='>' action='A_AVAL_STAGC' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/> + <tr symbol='S' action='A_AVAL' newstate='S_TAGWS'/> + </state> + <state id='S_TAG'> + <tr symbol='default' action='A_SAVE' newstate='S_GI'/> + <tr char='!' action='A_SKIP' newstate='S_DECL'/> + <tr char='/' action='A_SKIP' newstate='S_ETAG'/> + <tr char='?' action='A_SKIP' newstate='S_PITARGET'/> + <tr char='<' action='A_SAVE' newstate='S_TAG'/> + <tr symbol='EOF' action='A_LT_PCDATA' newstate='S_DONE'/> + <tr symbol='S' action='A_LT' newstate='S_PCDATA'/> + </state> + <state id='S_TAGWS'> + <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/> + <tr char='/' action='A_SKIP' newstate='S_EMPTYTAG'/> + <tr char='>' action='A_STAGC' newstate='S_PCDATA'/> + <tr symbol='EOF' action='A_STAGC' newstate='S_DONE'/> + <tr symbol='S' action='A_SKIP' newstate='S_TAGWS'/> + </state> + <state id='S_EMPTYTAG'> + <tr symbol='S' action='A_SKIP' newstate='S_TAGWS'/> + <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/> + <tr char='>' action='A_EMPTYTAG' newstate='S_PCDATA'/> + </state> + <state id='S_BB'> + <tr char='C' action='A_SKIP' newstate='S_BBC'/> + <tr symbol='default' action='A_SKIP' newstate='S_DECL'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_BBC'> + <tr char='D' action='A_SKIP' newstate='S_BBCD'/> + <tr symbol='default' action='A_SKIP' newstate='S_DECL'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_BBCD'> + <tr char='A' action='A_SKIP' newstate='S_BBCDA'/> + <tr symbol='default' action='A_SKIP' newstate='S_DECL'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_BBCDA'> + <tr char='T' action='A_SKIP' newstate='S_BBCDAT'/> + <tr symbol='default' action='A_SKIP' newstate='S_DECL'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_BBCDAT'> + <tr char='A' action='A_SKIP' newstate='S_BBCDATA'/> + <tr symbol='default' action='A_SKIP' newstate='S_DECL'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_BBCDATA'> + <tr char='[' action='A_SKIP' newstate='S_CDSECT'/> + <tr symbol='default' action='A_SKIP' newstate='S_DECL'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_CDSECT'> + <tr char=']' action='A_SAVE' newstate='S_CDSECT1'/> + <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_CDSECT1'> + <tr char=']' action='A_SAVE' newstate='S_CDSECT2'/> + <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_CDSECT2'> + <tr char='>' action='A_CDATA' newstate='S_PCDATA'/> + <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/> + <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/> + </state> + <state id='S_DONE'/> +</statetable>