Author: jfs
Date: Sun May 25 13:30:16 2014
New Revision: 10417

URL: http://svn.debian.org/wsvn/?sc=1&rev=10417
Log:

- Add one option to retrieve the packages to extract from standard input (with 
the precise location
  in the pool)

- Generalise how information (INFO) is logged, add the option to provide a 
timestamp. So logs carry
  a timestamp of start/finish runs

- Do not move the whole contents of workdir, since this moves the *temporary* 
file itself too!
  (consequently, it does not get unlinked and removed from the system, leaving 
a lot of cruft)

- The --force option now removes the previous directory to ensure all the 
contents get replaced 
  without errors

- Use File::Path's remove_tree and make_path and manage errors properly in both 
situations, errors
  from mkpath (now obsolete) were not being handled properly


Modified:
    man-cgi/extractor/manpage-extractor.pl

Modified: man-cgi/extractor/manpage-extractor.pl
URL: 
http://svn.debian.org/wsvn/man-cgi/extractor/manpage-extractor.pl?rev=10417&op=diff
==============================================================================
--- man-cgi/extractor/manpage-extractor.pl      (original)
+++ man-cgi/extractor/manpage-extractor.pl      Sun May 25 13:30:16 2014
@@ -26,12 +26,13 @@
 use File::Basename;
 use Getopt::Long;
 use File::Temp qw/tempfile/;
-use File::Path;
+use File::Path qw(make_path remove_tree);
 
 # Options
 # -d - debug
 # -f - force extraction
-# -r - read list of packages from stdin
+# -r - read names of packages from stdin
+# -l - read list of package files from stdin
 # -o directory - Output directory (defaults to './manpages-files')
 # -w directory - Work directory (defaults to './work')
 # -a architecture - Only analyse binary packages of this arch
@@ -45,11 +46,13 @@
 my $debug = '';
 my $force = '';
 my $readinput = '';
+my $listinput = '';
 my $ARCHITECTURE = "i386";
 GetOptions ("output=s" => \$OUTPUTDIR,    # string
                "workdir=s"   => \$WORKDIR,      # string
                "architecture=s"   => \$ARCHITECTURE,      # string
                "readinput"  => \$readinput,   # flag
+               "listinput"  => \$listinput,   # flag
                "force"  => \$force,   # flag
                "debug"  => \$debug)   # flag
          or die("Error in command line arguments\n");
@@ -84,6 +87,7 @@
 if ( $readinput ) {
 # Download packages and then extract
        my $mirror = shift;
+       log_info("Starting extraction of selected packages.", 1);
        while ( my $package = <STDIN> ) {
                chomp($package);
                # Obtaint a list of all packages
@@ -94,15 +98,32 @@
                        extract_package($file);
                }
                close PACK;
-               print "INFO: Finished extraction.\n";
+               log_info("Finished extraction.", 1);
+       }
+} elsif ( $listinput ) { 
+# Extract from packages provided
+       my $mirror = shift;
+       log_info("Starting extraction of selected packages.", 1);
+       while ( my $filename = <STDIN> ) {
+               chomp($filename);
+               print "DEBUG: Looking for $filename in $mirror\n" if $debug;
+               $filename = "/".$filename if ( $filename !~ /^\\/ ); # Add a 
separator if it does not exist
+               my $file = $mirror.$filename;
+               if ( -e "$file" ) {
+                       extract_package($file);
+                       log_info("Finished extraction.", 1);
+               } else {
+                       print "ERROR: Cannot find file $file";
+               }       
        }
 } else { 
 # Recursive call
        foreach my $dir (@ARGV) {
                if ( -d $dir ) {
-                       print "INFO: Starting extraction of manpages in 
'$dir'\n";
+                       my $timestamp = localtime(time);
+                       log_info("Starting extraction of manpages in '$dir'", 
1);
                        scan_directory($dir);
-                       print "INFO: Finished extraction.\n";
+                       log_info("Finished extraction.", 1);
                } else {
                        print "ERROR: Will not extract manpages from '$dir', it 
is not a directory\n";
                }
@@ -110,6 +131,18 @@
 }
 
 exit 0;
+
+# Log information with timestamp
+# TODO - generalise for ERROR and DEBUG
+sub log_info {
+       my ($message, $stamp) = @_;
+       $stamp = 0 if ! defined($stamp);
+       my $timestamp = localtime(time);
+       print "INFO: $message ";
+       print "- $timestamp" if $stamp eq 1;
+       print "\n";
+       return;
+}
 
 sub scan_directory  {
        my ($dir) =@_;
@@ -158,7 +191,7 @@
        # Note, this means that we will only analyse one binary package
        # of all the different architectures available
        if ( $arch ne $ARCHITECTURE && $arch ne "all" ) {
-               print "INFO: Skipping package file (architecture '$arch', we 
want '$ARCHITECTURE')\n" if $debug;
+               log_info("Skipping package file (architecture '$arch', we want 
'$ARCHITECTURE')\n") if $debug;
                return 0;
        }
        if ( $EXTENSION eq "dsc" and $debfile =~ /^.*?_(.*?)\.$EXTENSION$/ ) {
@@ -174,16 +207,50 @@
                $mandir = "${OUTPUTDIR}/${pooldir}/${packagename}";
        }
        if ( -e  $mandir ){
+       # Note: IF the directory contain any files, it means that either the 
package
+       # did not contain any files or that there was an error when extracting 
the manpages in
+       # previous runs
+       # TODO: Maybe its best to use an alternative mechanism to avoid going 
through
+       # the same package twice, like keeping an index of extracted packages 
in different
+       # runs and looking for the package there
                if ( ! $force ) {
-                       print "INFO: Skipping package $packagename (version 
'$version' already extracted)\n" if $debug;
+                       log_info("Skipping package $packagename (version 
'$version' already extracted)\n") if $debug;
                        return 0;
                } else {
-                       print "INFO: Forcing overwritting of package 
$packagename (version '$version' already extracted)\n" if $debug;
-               }
-       }
-       mkpath "$mandir" || die ("Could not create $mandir: $!");
-
-       print "INFO: Extracting manpages of $packagename version '$version' in 
$mandir\n";
+                       log_info("Forcing overwritting of package $packagename 
(version '$version' already extracted)\n") if $debug;
+# Remove mandir, it gets recreated again after its removal
+                       my $result = remove_tree($mandir, {verbose => $debug, 
keep_root => 0, safe => 0, error => \my $err});
+                       if (@$err) {
+                               for my $diag (@$err) {
+                                       my ($file, $message) = %$diag;
+                                       if ($file eq '') {
+                                               print "ERROR: Error removing 
$mandir - general error: $message\n";
+                                       }
+                                       else {
+                                               print "ERROR: Error removing 
$mandir - problem unlinking $file: $message\n";
+                                       }
+                               }
+                       }
+               }
+       }
+
+# Creat the mandir
+       if ( make_path ("$mandir", {verbose => $debug, mode => 0755, error => 
\my $err}) == 0 ) {
+# No directory created, check why
+               if (@$err) {
+                       for my $diag (@$err) {
+                               my ($file, $message) = %$diag;
+                               if ($file eq '') {
+                                       print "ERROR: Error creating $mandir - 
general error: $message\n";
+                               }
+                               else {
+                                       print "ERROR: Error creating $mandir - 
problem creating $file: $message\n";
+                               }
+                       }
+               }
+       }
+
+       log_info("Extracting manpages of $packagename version '$version' in 
$mandir\n");
        # You can either do a search in the binary files:
        if ( $EXTENSION eq "deb" ) {
            my $result =  extract_manpages($WORKDIR, $file, $mandir) ;
@@ -196,8 +263,10 @@
                #    if (  -e "$mandir" ) {
                #           rmdir $mandir || die ("Could not remove $mandir: 
$!");
                #    }
-               # Its best to keept it to prevent the script (when its rerun) 
to go through the same
-               # packages twice
+               # TODO: If the directory is kept it will  prevent the script 
(when its rerun) to go through the same
+               # packages twice due to the -e $mandir check above, however, 
this (needlessly) creates
+               # quite a few directories (one per manpage). It might be better 
to keep an index file
+               # of reviewed packages and check from there instead of by using 
empty directories
            }
        }       
        # Now we are done, cleanup
@@ -255,9 +324,9 @@
                # If we have a directory then move all the files in it
                # otherwise, we will return with an error 
                if ( -e "$wdir/usr/" ) {
-                       system "mv $wdir/* $dstdir" ;
+                       system "mv $wdir/usr/ $dstdir" ;
                        if ( $? != 0 ) {
-                               printf STDERR "Error moving directory $wdir to 
$dstdir: $?";
+                               printf STDERR "Error moving directory 
$wdir/usr/ to $dstdir: $?";
                                $result = 1;
                        }  else {
                                # IF we got there everything worked fine and we 
have manpages in the archive
@@ -271,8 +340,8 @@
        }
 
 # Clean up temporary files before returning
-       unlink $tempfile;
        close $tempfileh; 
+       unlink $tempfile or warn "Could not unlink $tempfile: $!";
 # And return with our result
        return $result;
 }


-- 
To UNSUBSCRIBE, email to [email protected]
with a subject of "unsubscribe". Trouble? Contact [email protected]
Archive: https://lists.debian.org/[email protected]

Reply via email to