Author: jfs
Date: Tue Nov  5 00:26:01 2013
New Revision: 10319

URL: http://svn.debian.org/wsvn/?sc=1&rev=10319
Log:
- Use getopt long (more versatile) instead of the short version.
- Make sure we only recurse through directories, reject anything else provided
- Improve debug messages for some situations 
- Do not remove directories created for packages that do not have manpages. 
That way 
  the script skeeps them faster when (re)processing the archive
- Setup a proper template for the temporary files. Also, make sure that 
temporary files are created in the WORKDIR


Modified:
    man-cgi/extractor/manpage-extractor.pl

Modified: man-cgi/extractor/manpage-extractor.pl
URL: 
http://svn.debian.org/wsvn/man-cgi/extractor/manpage-extractor.pl?rev=10319&op=diff
==============================================================================
--- man-cgi/extractor/manpage-extractor.pl      (original)
+++ man-cgi/extractor/manpage-extractor.pl      Tue Nov  5 00:26:01 2013
@@ -24,9 +24,10 @@
 
 use strict;
 use File::Basename;
-use Getopt::Std;
+use Getopt::Long;
 use File::Temp qw/tempfile/;
 use File::Path;
+
 # Options
 # -d - debug
 # -f - force extraction
@@ -34,19 +35,33 @@
 # -o directory - Output directory (defaults to './manpages-files')
 # -w directory - Work directory (defaults to './work')
 # -a architecture - Only analyse binary packages of this arch
-getopts('rdo:w:a:');
-use vars qw/$opt_r $opt_f $opt_d $opt_o $opt_w $opt_a $opt_r/;
-
-if ( $#ARGV == -1  ) {
-# TODO: fix for -r
-    die "usage: $0 [-dfs] [-o dir] [-w dir] [-a arch] <package pool 
directory>\n";
-}
-
-# TODO: Use Getopt some more?
+
+
 my $pwd = `pwd`;
 chomp $pwd;
-my $OUTPUTDIR = $opt_o || $pwd."/manpages-files";
-my $WORKDIR = $opt_w || $pwd."/work";
+# Default values for options
+my $OUTPUTDIR = $pwd."/manpages-files";
+my $WORKDIR = $pwd."/work";
+my $debug = '';
+my $force = '';
+my $readinput = '';
+my $ARCHITECTURE = "i386";
+GetOptions ("output=s" => \$OUTPUTDIR,    # string
+               "workdir=s"   => \$WORKDIR,      # string
+               "architecture=s"   => \$ARCHITECTURE,      # string
+               "readinput"  => \$readinput,   # flag
+               "force"  => \$force,   # flag
+               "debug"  => \$debug)   # flag
+         or die("Error in command line arguments\n");
+
+if ( $#ARGV == -1  ) {
+    die "usage: $0 [-dfr] [-o dir] [-w dir] [-a arch] <package pool 
directory>\n";
+}
+
+
+# TODO: Use Getopt some more?
+
+
 # Are this relative to our path?
 if ($OUTPUTDIR !~ /^\// ) {
        $OUTPUTDIR=$pwd."/".$OUTPUTDIR;
@@ -54,8 +69,8 @@
 if ($WORKDIR !~ /^\// ) {
        $WORKDIR=$pwd."/".$WORKDIR;
 }
-print STDERR "DEBUG Setting workdir to $WORKDIR\n" if $opt_d;
-print STDERR "DEBUG Setting outputdir $OUTPUTDIR\n" if $opt_d;
+print STDERR "DEBUG: Setting workdir to $WORKDIR\n" if $debug;
+print STDERR "DEBUG: Setting outputdir $OUTPUTDIR\n" if $debug;
 
 # Create directories
 if (! -e $OUTPUTDIR) {
@@ -64,29 +79,33 @@
 if (! -e $WORKDIR) {
        mkdir $WORKDIR || die ("Could not create $WORKDIR: $!");
 }
-my $EXTENSION = "deb"; # Default, do binaries
-
-if ( defined($opt_r) ) {
+my $EXTENSION = "deb"; # Default, do binary packages
+
+if ( $readinput ) {
 # Download packages and then extract
        my $mirror = shift;
        while ( my $package = <STDIN> ) {
                chomp($package);
                # Obtaint a list of all packages
-               print "Looking for package $package\n" if $opt_d;
-               open (PACK, '|', "find $mirror -name 
\"${package}_*${EXTENSION}\" -a -type f" );
+               print "DEBUG: Looking for package $package\n" if $debug;
+               open (PACK, "find $mirror -name \"${package}_*${EXTENSION}\" -a 
-type f |" );
                while ( my $file = <PACK> ) {
                        chomp $file;
                        extract_package($file);
                }
                close PACK;
-               print "Finished extraction.\n";
+               print "INFO: Finished extraction.\n";
        }
 } else { 
 # Recursive call
        foreach my $dir (@ARGV) {
-               print "Starting extraction of manpages in '$dir'\n";
-               scan_directory($dir);
-               print "Finished extraction.\n";
+               if ( -d $dir ) {
+                       print "INFO: Starting extraction of manpages in 
'$dir'\n";
+                       scan_directory($dir);
+                       print "INFO: Finished extraction.\n";
+               } else {
+                       print "ERROR: Will not extract manpages from '$dir', it 
is not a directory\n";
+               }
        }
 }
 
@@ -94,7 +113,7 @@
 
 sub scan_directory  {
        my ($dir) =@_;
-       print "DEBUG: Scanning dir $dir\n" if $opt_d;
+       print "DEBUG: Scanning dir $dir\n" if $debug;
         my $DIRFH;
 
         opendir $DIRFH, $dir || warn ("Cannot open directory $dir: $!");
@@ -115,8 +134,11 @@
 # Extract a package to the working directory by making symlinks
 # to its files
        my ($file) = @_;
-       print "DEBUG: Checking file $file\n" if $opt_d;
-       return if $file !~ /\.$EXTENSION$/;
+       print "DEBUG: Checking file $file\n" if $debug;
+       if ( $file !~ /\.$EXTENSION$/ ) {
+               print "DEBUG: Omitting file (not a package)\n" if $debug;
+               return 0;
+       }
        # The file is a deb file, extract the name of the source files
        my @sources;
        my $basedir = dirname($file);
@@ -132,35 +154,46 @@
             chomp ( $version = `dpkg-deb -f "$file" Version` );
             chomp ( $arch = `dpkg-deb -f "$file" Architecture` );
        } 
-       if ( defined($opt_a) and $arch ne $opt_a ) {
-               print "Skipping package $packagename (architecture '$arch')\n";
+
+       # Note, this means that we will only analyse one binary package
+       # of all the different architectures available
+       if ( $arch ne $ARCHITECTURE ) {
+               print "INFO: Skipping package file (architecture '$arch', we 
want '$ARCHITECTURE')\n" if $debug;
                return 0;
        }
        if ( $EXTENSION eq "dsc" and $debfile =~ /^.*?_(.*?)\.$EXTENSION$/ ) {
                        $version = $1;
        }
+
+       # Note, it might not be optimal to keep (forever) old versions of 
manpages
+       # maybe its best to add the distribution (sid, testing...) to the 
OUTPUTDIR
+       # and keep only one extracted package per release
        if ( $version ne "undefined" ) {
                $mandir = "${OUTPUTDIR}/${pooldir}/${packagename}_${version}";
        } else {
                $mandir = "${OUTPUTDIR}/${pooldir}/${packagename}";
        }
-       if ( -e  $mandir && ! $opt_f) {
-       # Note, this means that we will only analyse one binary package
-       # of all the different architectures available
-               print "Skipping package $packagename (version '$version' 
already extracted)\n";
-               return 0;
+       if ( -e  $mandir ){
+               if ( ! $force ) {
+                       print "INFO: Skipping package $packagename (version 
'$version' already extracted)\n" if $debug;
+                       return 0;
+               } else {
+                       print "INFO: Forcing overwritting of package 
$packagename (version '$version' already extracted)\n" if $debug;
+               }
        }
        mkpath "$mandir" || die ("Could not create $mandir: $!");
 
-       print "Extracting manpages of $packagename version '$version' in 
$mandir\n";
+       print "INFO: Extracting manpages of $packagename version '$version' in 
$mandir\n";
        # You can either do a search in the binary files:
        if ( $EXTENSION eq "deb" ) {
             if ( extract_manpages($WORKDIR, $file, $mandir) ) {
-            # Remove the directory, there were no manpages there
                    print "WARNING: No manpages found.\n";
-                   if (  -e "$mandir" ) {
-                           rmdir $mandir || die ("Could not remove $mandir: 
$!");
-                   }
+                       # Optionally, remove the directory, there were no 
manpages there
+               #    if (  -e "$mandir" ) {
+               #           rmdir $mandir || die ("Could not remove $mandir: 
$!");
+               #    }
+               # Its best to keept it to prevent the script (when its rerun) 
to go through the same
+               # packages twice
            }
        }       
        # Now we are done, cleanup
@@ -175,7 +208,7 @@
        # Looks for manpages in the sources
        my $result = 1;
        # Temporary file for dpkg
-       my $tempfileh = new File::Temp ( Template => "TEMPLATE.XXXXXX", DIR => 
File::Spec->tmpdir, SUFFIX => ".suf" ) or  die "Cannot create temporary file: 
$!" ;
+       my $tempfileh = new File::Temp ( Template => "DPKG-DEB.XXXXXX", DIR => 
$WORKDIR, SUFFIX => ".tmp" ) or  die "Cannot create temporary file: $!" ;
 
        my $tempfile = $tempfileh->filename;
 
@@ -185,18 +218,18 @@
        system "$command";
        if ( $? != 0 ) {
                if ($? == -1) {
-                       print STDERR "failed to execute: $!\n";
+                       print STDERR "ERROR: failed to execute: $!\n";
                } elsif ($? & 127) {
-                       printf STDERR "child died with signal %d, %s 
coredump\n",
+                       printf STDERR "ERROR: child died with signal %d, %s 
coredump\n",
                               ($? & 127),  ($? & 128) ? 'with' : 'without';
                } else {
-                       printf STDERR "child exited with value %d\n", $? >> 8;
+                       printf STDERR "ERROR: child exited with value %d\n", $? 
>> 8;
                }
                die "Error running '$command'";
        }
        $command="tar -C $wdir -xf $tempfile usr/share/man ./usr/share/man 
usr/X11R6/man ./usr/X11R6/man 2>/dev/null";
        system "$command";
-       printf STDERR "tar exited with value %d\n", $? >> 8 if $? != 0 && $? != 
( 2 << 8 );
+       printf STDERR "ERROR: tar exited with value %d\n", $? >> 8 if $? != 0 
&& $? != ( 2 << 8 );
 # Note we skip exit value '2' which happens when tar does not find any file 
according to specification
 
 # If we have a directory then move all the files in it


-- 
To UNSUBSCRIBE, email to [email protected]
with a subject of "unsubscribe". Trouble? Contact [email protected]
Archive: http://lists.debian.org/[email protected]

Reply via email to