## Keary Suska wrote: ## > > m[<table\b.*?>(.*)</table>]si; ## > The (.*) should probably be (.*?) if there is ## > any possibility of more than one table.
## Some weeks ago I built the following subroutine ## "suche_Klammerinhalt" for matching nested thingis ## (such as <table ...> tags (see sample below: arbitrary text)). ## It counts opening and closing tags one after the ## other and returns the content as soon as the ## balancing closing tag is found. ## It can be called recursively. sub suche_Klammerinhalt { my $Z; ## $Z counts the tags ($Klammer_auf, $Klammer_zu, $_) = @_; $lKlA = length($Klammer_auf); $lKlZ = length($Klammer_auf); while (m,($Klammer_zu.*|$Klammer_auf.*),gm) { $KLammer = $1; if ($Z == 0) { ## Anfang > start tag $KLammerA = $1; $von_ = pos; } $KLammer =~ m,(^.*$Klammer_zu.*$),s ? $Z-- : $Z++; if ($Z == 0) { ## Schlusz > closing tag $bis_ = pos; $KLammerZ = $1; return ( $von_, $bis_, substr($_, $von_, $bis_-$von_-length($KLammerZ)), $KLammerA, $KLammerZ); } } } $sampletext = ' <table 1 bgcolor="#ffffff" width="100%"> <table 2 bgcolor="#dddddd" width="99%"> </table> <table 3 bgcolor="#eeeeee" width="98%"> <tr><td> 1 </td></tr> <tr><td> 2 </td></tr> <tr><td> 3 </td></tr> <tr><td> 4 </td></tr> </table> <table 4 bgcolor="#cccccc" width="97%"> <table 5 bgcolor="#bbbbbb" width="96%"> <tr><td> 1. Feld </td></tr> <tr><td> 2. Feld </td></tr> <tr><td> 3. Feld </td></tr> </table> </table> </table> <table 6 bgcolor="#aaaaaa" width="95%"> </table> '; ## The following ready-to-run ## sample sub call returns the balanced beginning ## and closing "<TABLE ..." tags and the expression between them. ($Von, $Bis, $Klammerinhalt, $Klammer_auf, $Klammer_zu) = suche_Klammerinhalt("<table.*?>", "</table>", $sampletext); ## ... and now print: print " Beginning tag: $Klammer_auf \n Content: $Klammerinhalt \n Closing tag: $Klammer_zu"; ## Tiny dictionary German > English: ## von > from, ## bis > to, ## auf > opening, ## zu > closing, ## Klammer > "any of: parenthesis, brace, bracket, any symbol or ## tag with such a meaning ... ", ## Inhalt > content, ## suche > search ## Feld > field ## HTH, Detlef Lindenthal