Author: tille
Date: 2008-07-26 15:30:38 +0000 (Sat, 26 Jul 2008)
New Revision: 2326

Modified:
   trunk/community/talks/200808_debconf8/get-archive-pages
Log:
Clean up index pages


Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages     2008-07-26 
14:26:11 UTC (rev 2325)
+++ trunk/community/talks/200808_debconf8/get-archive-pages     2008-07-26 
15:30:38 UTC (rev 2326)
@@ -37,11 +37,32 @@
            my $uri = URI->new($url);
            my $page = $ua->get($url, Host => $uri->host );
            unless ( $page->is_success ) { next } ; # some mailing lists 
startet later ...
-           (my @data) = $page->content =~ 
m#.*<!--TNAVEND-->\n(.+)\n<hr>\n.*#gs;
-           #print "[EMAIL PROTECTED]";
+           (my @data) = $page->content =~ 
m#.*<!--TNAVEND-->\n(.+)<hr>.*<!--BNAVSTART-->.*#gs;
+           #print "$year-$month\n$data\n";
            my $datafile = "${year}-${month}" ;
            unless ( open(HTMLSNIP, ">$datafile") ) { die("Unable to open 
$datafile"); }
-           print HTMLSNIP "@data";
+           my ($content, $subject, $author) ;
+           foreach $content (@data) {
+               my @lines = split(/(\n)/, $content);
+               # print "------> @lines\n" ;
+               my $line;
+               foreach $line (@lines) {
+                   if ( $line =~ /^\s*<\/?ul>\s*$/ || 
+                         $line =~ /^\s*<\/?li>\s*$/ ||
+                         $line =~ /^\s*<li>[^<]+<\/li>\s*$/ ||
+                         $line =~ /^\s*$/) { next ; }
+                   if ( ($subject, $author) = $line =~ 
m#<li><strong>.*html">(.+)</a></strong>\s*<em>(.+)</em>#gs ) {
+                       $_ = $subject ;
+                       $_ =~ s/^Re:\s*//i ;       # Remove Re:
+                       $_ =~ s/^\[[^\]]+\]\s*// ; # Remove other list markers
+                       $_ =~ s/\s*\(fwd\)\s*//i ; # Remove (fwd)
+                       $subject = $_ ;
+                       print HTMLSNIP "$subject ; $author\n";
+                   } else {
+                       print HTMLSNIP "$line\n";
+                   }
+               }
+           }
            close HTMLSNIP ;
        }
     }


_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/mailman/listinfo/debian-med-commit

Reply via email to