I see a problem here, in that most of the time in the regex example is spent
compiling the regex itself.

If you change the regex to be a cached static, like this:

private static IDictionary regexes = new Hashtable();

static ParsingTechnique() {

regexes[ "firstName" ] = new Regex(String.Format("<{0}>(?<value>.*?)</{0}>",
"firstName"), RegexOptions.Compiled );

// and do again for each element name, cos we know what they are, and even
if we don't we add it later

}



string[] RegExParsing(string elementName, string xml) {

ArrayList responses = new ArrayList();

Regex re = (Regex) regexes[ elementName ];

if( re == null ) {

    re = new Regex(String.Format("<{0}>(?<value>.*?)</{0}>", elementName ),

    RegexOptions.Compiled );

    regexes[ elementName ] = re;

}

Then the regex example is much faster, as in:

.Reg Ex:                           00.0156250
XMLTextReader:          00.0468750
XPathNavigator:           00.0937500

Regards

John Farrow







----- Original Message -----
From: "Larry O'Brien" <[EMAIL PROTECTED]>
To: <[EMAIL PROTECTED]>
Sent: Tuesday, February 18, 2003 10:48 AM
Subject: Re: [ADVANCED-DOTNET] Regex for parsing XML - Foolish?


> XmlTextReader looks like the clear winner in terms of performance:
>
> Reg Ex:         00:00:00.3805472
> XMLTextReader:  00:00:00.1702448
> XPathNavigator: 00:00:00.3104464
>
> Thanks for the thoughts!
> Larry
>
> using System;
> using NUnit.Framework;
> using System.Xml;
> using System.Xml.XPath;
> using System.Text.RegularExpressions;
> using System.IO;
> using System.Collections;
>
>         [TestFixture]
>         public class ParsingTechnique
>         {
>                 public ParsingTechnique()
>                 {
>                 }
>
>                 [Test]
>         public void TestParsingSpeed()
>                 {
>                         string s = //very-long XML string here
>                         ParsingTechnique pt = new ParsingTechnique();
>                         DateTime start = DateTime.Now;
>                         pt.RegExParsing("firstName", s);
>                         TimeSpan duration = DateTime.Now - start;
>                         Console.WriteLine("Reg Ex: " + duration);
>                         start = DateTime.Now;
>                         pt.StraightReader("firstName", s);
>                         duration = DateTime.Now - start;
>                         Console.WriteLine("XMLTextReader: " + duration);
>                         start = DateTime.Now;
>                         pt.XPath("firstName", s);
>                         duration = DateTime.Now - start;
>                         Console.WriteLine("XPathNavigator: " +
> duration);
>                 }
>
>                 string[] RegExParsing(string elementName, string xml)
>                 {
>                         ArrayList responses = new ArrayList();
>                         Regex re = new
> Regex(String.Format("<{0}>(?<value>.*?)</{0}>", elementName));
>                         MatchCollection matches = re.Matches(xml);
>                         foreach(Match m in matches)
>                         {
>                                 string value = m.Groups["value"].Value;
>                                 responses.Add(value);
>                         }
>                         return (string[])
> responses.ToArray(typeof(string));
>                 }
>
>                 string[] StraightReader(string elementName, string xml)
>                 {
>                         ArrayList responses = new ArrayList();
>                         XmlTextReader xtr = new XmlTextReader(new
> StringReader(xml));
>                         while(xtr.Read())
>                         {
>                                 if(xtr.NodeType == XmlNodeType.Element
> && xtr.Name == elementName)
>                                 {
>                                         while(xtr.NodeType !=
> XmlNodeType.Text)
>                                         {
>                                                 xtr.Read();
>                                         }
>                                         string val = xtr.Value;
>                                         responses.Add(val);
>                                 }
>                         }
>                         xtr.Close();
>                         return (string[])
> responses.ToArray(typeof(string));
>                 }
>
>                 string[] XPath(string elementName, string  xml)
>                 {
>                         ArrayList responses = new ArrayList();
>                         XPathDocument doc = new XPathDocument(new
> StringReader(xml));
>                         XPathNavigator nav = doc.CreateNavigator();
>                         XPathNodeIterator iter = (XPathNodeIterator)
> nav.Evaluate("//"+elementName);
>                         while(iter.MoveNext())
>                         {
>                                 string val = iter.Current.Value;
>                                 responses.Add(val);
>                         }
>                         return (string[])
> responses.ToArray(typeof(string));
>                 }
>         }
> }

Reply via email to