I see a problem here, in that most of the time in the regex example is spent
compiling the regex itself.
If you change the regex to be a cached static, like this:
private static IDictionary regexes = new Hashtable();
static ParsingTechnique() {
regexes[ "firstName" ] = new Regex(String.Format("<{0}>(?<value>.*?)</{0}>",
"firstName"), RegexOptions.Compiled );
// and do again for each element name, cos we know what they are, and even
if we don't we add it later
}
string[] RegExParsing(string elementName, string xml) {
ArrayList responses = new ArrayList();
Regex re = (Regex) regexes[ elementName ];
if( re == null ) {
re = new Regex(String.Format("<{0}>(?<value>.*?)</{0}>", elementName ),
RegexOptions.Compiled );
regexes[ elementName ] = re;
}
Then the regex example is much faster, as in:
.Reg Ex: 00.0156250
XMLTextReader: 00.0468750
XPathNavigator: 00.0937500
Regards
John Farrow
----- Original Message -----
From: "Larry O'Brien" <[EMAIL PROTECTED]>
To: <[EMAIL PROTECTED]>
Sent: Tuesday, February 18, 2003 10:48 AM
Subject: Re: [ADVANCED-DOTNET] Regex for parsing XML - Foolish?
> XmlTextReader looks like the clear winner in terms of performance:
>
> Reg Ex: 00:00:00.3805472
> XMLTextReader: 00:00:00.1702448
> XPathNavigator: 00:00:00.3104464
>
> Thanks for the thoughts!
> Larry
>
> using System;
> using NUnit.Framework;
> using System.Xml;
> using System.Xml.XPath;
> using System.Text.RegularExpressions;
> using System.IO;
> using System.Collections;
>
> [TestFixture]
> public class ParsingTechnique
> {
> public ParsingTechnique()
> {
> }
>
> [Test]
> public void TestParsingSpeed()
> {
> string s = //very-long XML string here
> ParsingTechnique pt = new ParsingTechnique();
> DateTime start = DateTime.Now;
> pt.RegExParsing("firstName", s);
> TimeSpan duration = DateTime.Now - start;
> Console.WriteLine("Reg Ex: " + duration);
> start = DateTime.Now;
> pt.StraightReader("firstName", s);
> duration = DateTime.Now - start;
> Console.WriteLine("XMLTextReader: " + duration);
> start = DateTime.Now;
> pt.XPath("firstName", s);
> duration = DateTime.Now - start;
> Console.WriteLine("XPathNavigator: " +
> duration);
> }
>
> string[] RegExParsing(string elementName, string xml)
> {
> ArrayList responses = new ArrayList();
> Regex re = new
> Regex(String.Format("<{0}>(?<value>.*?)</{0}>", elementName));
> MatchCollection matches = re.Matches(xml);
> foreach(Match m in matches)
> {
> string value = m.Groups["value"].Value;
> responses.Add(value);
> }
> return (string[])
> responses.ToArray(typeof(string));
> }
>
> string[] StraightReader(string elementName, string xml)
> {
> ArrayList responses = new ArrayList();
> XmlTextReader xtr = new XmlTextReader(new
> StringReader(xml));
> while(xtr.Read())
> {
> if(xtr.NodeType == XmlNodeType.Element
> && xtr.Name == elementName)
> {
> while(xtr.NodeType !=
> XmlNodeType.Text)
> {
> xtr.Read();
> }
> string val = xtr.Value;
> responses.Add(val);
> }
> }
> xtr.Close();
> return (string[])
> responses.ToArray(typeof(string));
> }
>
> string[] XPath(string elementName, string xml)
> {
> ArrayList responses = new ArrayList();
> XPathDocument doc = new XPathDocument(new
> StringReader(xml));
> XPathNavigator nav = doc.CreateNavigator();
> XPathNodeIterator iter = (XPathNodeIterator)
> nav.Evaluate("//"+elementName);
> while(iter.MoveNext())
> {
> string val = iter.Current.Value;
> responses.Add(val);
> }
> return (string[])
> responses.ToArray(typeof(string));
> }
> }
> }