Darkdadaah has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/96222


Change subject: Proper getopt + head/tail options
......................................................................

Proper getopt + head/tail options

- Use getopt and a clean sub
- add option to keep the first line in every file (-H)
- add option to keep the last line in every file (-T)

Change-Id: I0f59e528799d18ed722226eab931b8aa31d1cccc
---
M tools/decoupe.pl
1 file changed, 107 insertions(+), 35 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wiktionary/anagrimes 
refs/changes/22/96222/1

diff --git a/tools/decoupe.pl b/tools/decoupe.pl
index d3ea120..c4fb22d 100755
--- a/tools/decoupe.pl
+++ b/tools/decoupe.pl
@@ -1,49 +1,121 @@
 #!/usr/bin/perl -w
+
 use strict;
+use warnings;
+use Getopt::Std;
 
-my $file = $ARGV[0];
+our %opt;      # Getopt options
 
-die("Fichier nécessaire (argument 1)\n") if not $file;
-print "$file\n";
-open(FILE, "<$file") or die("");
+#################################################
+# Message about this program and how to use it
+sub usage
+{
+       print STDERR "[ $_[0] ]\n" if $_[0];
+       print STDERR << "EOF";
+       
+       This script split a list in smaller lists with a given max length.
+       
+       usage: $0 [-h] -f file
+       
+       -h        : this (help) message
+       
+       -i <path> : path to the list to split
+       -l <int>  : max number of elements in each subfile
 
-# Declarations
-my $max= $ARGV[1] ? $ARGV[1] : 100000;
-my $num=1;
-my $n=0;
-
-# Suffix?
-my $filename = '';
-my $suff = '';
-if ($file =~ /^(.+)(\..+?)$/) {
-       $filename = $1;
-       $suff = $2;
-}  else {
-       $filename = $file;
-       $suff = '';
+       Optional:
+       -H        : keep the first line in every file
+       -T        : keep the last line in every file
+EOF
+       exit;
 }
-my $out = $filename.'_'.$num.$suff;
 
-# First file
-open(OUT, ">$out") or die "Error";
+##################################
+# Command line options processing
+sub init()
+{
+       getopts( 'i:l:HT', \%opt ) or usage();
+       usage() if $opt{h};
+       usage("No file given (-i)") unless $opt{i};
+       usage("No max length given (-l)") unless ($opt{l} and $opt{l} > 0);
+}
 
-# Ecrit la première ligne pour chaque fichier
-my $first = <FILE>;
-print OUT $first;
+sub split_file
+{
+       my ($list_path, $max, $head, $tail) = @_;
 
-while (<FILE>) {
-       if ($n>$max) {
-               $num++;
-               $n=0;
-               close(OUT);
-               $out =  $filename.'_'.$num.$suff;
-               open(OUT, ">$out") or die "Error";
+
+       open(LIST, "<$list_path") or die("Couldn't read list file $list_path: 
$!");
+       
+       # Define iterator arguments
+       my $num=1;      # For the file name
+       my $n=0;        # For the number of lines
+
+       # If the filename has a suffix, keep the suffix while numbering the 
sub-files
+       my $filename = '';
+       my $suff = '';
+       if ($list_path =~ /^(.+)(\..+?)$/) {
+               $filename = $1;
+               $suff = $2;
+       }  else {
+               $filename = $list_path;
+               $suff = '';
+       }
+       my $out_path = $filename.'_'.$num.$suff;
+       
+       # Begin to write the first file
+       open(OUT, ">$out_path") or die("Couldn't write $out_path: $!");
+
+       # Write the first line in each file
+       my $first = '';
+       if ($head) {
+               $first = <LIST>;
                print OUT $first;
        }
-       print OUT $_;
        
-       $n++;
+       # Read the list file...
+       my $last = '';
+       while (my $line = <LIST>) {
+               # If we reached the max number or lines, close the current file
+               # and open a new one with the next file number
+               if ($n>$max) {
+                       # Close old file
+                       $n=0;
+                       close(OUT);
+                       
+                       # Init new file
+                       $num++;
+                       my $out_path =  $filename.'_'.$num.$suff;
+                       open(OUT, ">$out_path") or die("Couldn't write 
$out_path: $!");
+                       print OUT $first if $opt{H};
+               }
+               # Continue to write the lines from the original file
+               print OUT $line;
+               $n++;
+               $last = $line;
+       }
+       close(OUT);
+       close(LIST);
+       
+       # Write the last line for every file (except the last one)
+       if ($tail) {
+               for (my $i=1; $i < $num; $i++) {
+                       my $out_path =  $filename.'_'.$i.$suff;
+                       
+                       open(OUT, ">>$out_path") or die("Couldn't append to 
$out_path: $!");
+                       print OUT $last;
+                       close(OUT);
+               }
+       }
+       
+       return $num;
 }
-close(OUT);
-close(FILE);
+
+##################################
+# MAIN
+init();
+
+my $num = split_file($opt{i}, $opt{l}, $opt{H}, $opt{T});
+print STDERR "$num files created\n";
+
 __END__
+

-- 
To view, visit https://gerrit.wikimedia.org/r/96222
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0f59e528799d18ed722226eab931b8aa31d1cccc
Gerrit-PatchSet: 1
Gerrit-Project: wiktionary/anagrimes
Gerrit-Branch: master
Gerrit-Owner: Darkdadaah <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to