## Keary Suska wrote:
## > > m[<table\b.*?>(.*)</table>]si;
## > The (.*) should probably be (.*?) if there is
## > any possibility of more than one table.
## Some weeks ago I built the following subroutine
## "suche_Klammerinhalt" for matching nested thingis
## (such as <table ...> tags (see sample below: arbitrary text)).
## It counts opening and closing tags one after the
## other and returns the content as soon as the
## balancing closing tag is found.
## It can be called recursively.
sub suche_Klammerinhalt { my $Z; ## $Z counts the tags
($Klammer_auf, $Klammer_zu, $_) = @_;
$lKlA = length($Klammer_auf);
$lKlZ = length($Klammer_auf);
while (m,($Klammer_zu.*|$Klammer_auf.*),gm) {
$KLammer = $1;
if ($Z == 0) { ## Anfang > start tag
$KLammerA = $1;
$von_ = pos; }
$KLammer =~ m,(^.*$Klammer_zu.*$),s ? $Z-- : $Z++;
if ($Z == 0) { ## Schlusz > closing tag
$bis_ = pos;
$KLammerZ = $1;
return (
$von_,
$bis_,
substr($_, $von_, $bis_-$von_-length($KLammerZ)),
$KLammerA,
$KLammerZ);
} } }
$sampletext = '
<table 1 bgcolor="#ffffff" width="100%">
<table 2 bgcolor="#dddddd" width="99%">
</table>
<table 3 bgcolor="#eeeeee" width="98%">
<tr><td> 1 </td></tr>
<tr><td> 2 </td></tr>
<tr><td> 3 </td></tr>
<tr><td> 4 </td></tr>
</table>
<table 4 bgcolor="#cccccc" width="97%">
<table 5 bgcolor="#bbbbbb" width="96%">
<tr><td> 1. Feld </td></tr>
<tr><td> 2. Feld </td></tr>
<tr><td> 3. Feld </td></tr>
</table>
</table>
</table>
<table 6 bgcolor="#aaaaaa" width="95%">
</table> ';
## The following ready-to-run
## sample sub call returns the balanced beginning
## and closing "<TABLE ..." tags and the expression between them.
($Von, $Bis, $Klammerinhalt, $Klammer_auf, $Klammer_zu) =
suche_Klammerinhalt("<table.*?>", "</table>", $sampletext);
## ... and now print:
print "
Beginning tag: $Klammer_auf \n
Content: $Klammerinhalt \n
Closing tag: $Klammer_zu";
## Tiny dictionary German > English:
## von > from,
## bis > to,
## auf > opening,
## zu > closing,
## Klammer > "any of: parenthesis, brace, bracket, any symbol or
## tag with such a meaning ... ",
## Inhalt > content,
## suche > search
## Feld > field
## HTH, Detlef Lindenthal