##  Keary Suska wrote:
##  > >  m[<table\b.*?>(.*)</table>]si;
##  > The (.*) should probably be (.*?) if there is
##  > any possibility of more than one table.

##  Some weeks ago I built the following subroutine
##  "suche_Klammerinhalt" for matching nested thingis
##  (such as <table ...> tags (see sample below: arbitrary text)).
##  It counts opening and closing tags one after the
##  other and returns the content as soon as the
##  balancing closing tag is found.
##  It can be called recursively.

sub suche_Klammerinhalt {  my $Z;   ##   $Z counts the tags
  ($Klammer_auf, $Klammer_zu, $_) = @_;
  $lKlA = length($Klammer_auf);
  $lKlZ = length($Klammer_auf);
  while (m,($Klammer_zu.*|$Klammer_auf.*),gm) {
    $KLammer = $1;
    if ($Z == 0) {    ## Anfang > start tag
      $KLammerA = $1;
      $von_ = pos;  }
    $KLammer =~ m,(^.*$Klammer_zu.*$),s ?  $Z--  :  $Z++;
    if ($Z == 0) {    ## Schlusz > closing tag
      $bis_ = pos;
      $KLammerZ = $1;
      return (
         $von_,
         $bis_,
         substr($_, $von_, $bis_-$von_-length($KLammerZ)),
         $KLammerA,
         $KLammerZ);
      }  }  }

$sampletext = '
<table 1 bgcolor="#ffffff" width="100%">
<table 2 bgcolor="#dddddd" width="99%">
</table>
<table 3 bgcolor="#eeeeee" width="98%">
<tr><td> 1 </td></tr>
<tr><td> 2 </td></tr>
<tr><td> 3 </td></tr>
<tr><td> 4 </td></tr>
</table>
<table 4 bgcolor="#cccccc" width="97%">
<table 5 bgcolor="#bbbbbb" width="96%">
<tr><td> 1. Feld </td></tr>
<tr><td> 2. Feld </td></tr>
<tr><td> 3. Feld </td></tr>
</table>
</table>
</table>
<table 6 bgcolor="#aaaaaa" width="95%">
</table> ';

##  The following ready-to-run
##  sample sub call  returns the balanced beginning
##  and closing "<TABLE ..." tags and the expression between them.
($Von, $Bis, $Klammerinhalt, $Klammer_auf, $Klammer_zu) =
    suche_Klammerinhalt("<table.*?>", "</table>", $sampletext);

##  ... and now print:
print "
Beginning tag: $Klammer_auf \n
Content: $Klammerinhalt \n
Closing tag: $Klammer_zu";

## Tiny dictionary German > English:
##   von > from,
##   bis > to,
##   auf > opening,
##   zu  > closing,
##   Klammer > "any of: parenthesis, brace, bracket, any symbol or
##                        tag with such a meaning ... ",
##   Inhalt > content,
##   suche > search
##   Feld > field

##   HTH,   Detlef Lindenthal

Reply via email to