Darkdadaah has uploaded a new change for review.
https://gerrit.wikimedia.org/r/96222
Change subject: Proper getopt + head/tail options
......................................................................
Proper getopt + head/tail options
- Use getopt and a clean sub
- add option to keep the first line in every file (-H)
- add option to keep the last line in every file (-T)
Change-Id: I0f59e528799d18ed722226eab931b8aa31d1cccc
---
M tools/decoupe.pl
1 file changed, 107 insertions(+), 35 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wiktionary/anagrimes
refs/changes/22/96222/1
diff --git a/tools/decoupe.pl b/tools/decoupe.pl
index d3ea120..c4fb22d 100755
--- a/tools/decoupe.pl
+++ b/tools/decoupe.pl
@@ -1,49 +1,121 @@
#!/usr/bin/perl -w
+
use strict;
+use warnings;
+use Getopt::Std;
-my $file = $ARGV[0];
+our %opt; # Getopt options
-die("Fichier nécessaire (argument 1)\n") if not $file;
-print "$file\n";
-open(FILE, "<$file") or die("");
+#################################################
+# Message about this program and how to use it
+sub usage
+{
+ print STDERR "[ $_[0] ]\n" if $_[0];
+ print STDERR << "EOF";
+
+ This script split a list in smaller lists with a given max length.
+
+ usage: $0 [-h] -f file
+
+ -h : this (help) message
+
+ -i <path> : path to the list to split
+ -l <int> : max number of elements in each subfile
-# Declarations
-my $max= $ARGV[1] ? $ARGV[1] : 100000;
-my $num=1;
-my $n=0;
-
-# Suffix?
-my $filename = '';
-my $suff = '';
-if ($file =~ /^(.+)(\..+?)$/) {
- $filename = $1;
- $suff = $2;
-} else {
- $filename = $file;
- $suff = '';
+ Optional:
+ -H : keep the first line in every file
+ -T : keep the last line in every file
+EOF
+ exit;
}
-my $out = $filename.'_'.$num.$suff;
-# First file
-open(OUT, ">$out") or die "Error";
+##################################
+# Command line options processing
+sub init()
+{
+ getopts( 'i:l:HT', \%opt ) or usage();
+ usage() if $opt{h};
+ usage("No file given (-i)") unless $opt{i};
+ usage("No max length given (-l)") unless ($opt{l} and $opt{l} > 0);
+}
-# Ecrit la première ligne pour chaque fichier
-my $first = <FILE>;
-print OUT $first;
+sub split_file
+{
+ my ($list_path, $max, $head, $tail) = @_;
-while (<FILE>) {
- if ($n>$max) {
- $num++;
- $n=0;
- close(OUT);
- $out = $filename.'_'.$num.$suff;
- open(OUT, ">$out") or die "Error";
+
+ open(LIST, "<$list_path") or die("Couldn't read list file $list_path:
$!");
+
+ # Define iterator arguments
+ my $num=1; # For the file name
+ my $n=0; # For the number of lines
+
+ # If the filename has a suffix, keep the suffix while numbering the
sub-files
+ my $filename = '';
+ my $suff = '';
+ if ($list_path =~ /^(.+)(\..+?)$/) {
+ $filename = $1;
+ $suff = $2;
+ } else {
+ $filename = $list_path;
+ $suff = '';
+ }
+ my $out_path = $filename.'_'.$num.$suff;
+
+ # Begin to write the first file
+ open(OUT, ">$out_path") or die("Couldn't write $out_path: $!");
+
+ # Write the first line in each file
+ my $first = '';
+ if ($head) {
+ $first = <LIST>;
print OUT $first;
}
- print OUT $_;
- $n++;
+ # Read the list file...
+ my $last = '';
+ while (my $line = <LIST>) {
+ # If we reached the max number or lines, close the current file
+ # and open a new one with the next file number
+ if ($n>$max) {
+ # Close old file
+ $n=0;
+ close(OUT);
+
+ # Init new file
+ $num++;
+ my $out_path = $filename.'_'.$num.$suff;
+ open(OUT, ">$out_path") or die("Couldn't write
$out_path: $!");
+ print OUT $first if $opt{H};
+ }
+ # Continue to write the lines from the original file
+ print OUT $line;
+ $n++;
+ $last = $line;
+ }
+ close(OUT);
+ close(LIST);
+
+ # Write the last line for every file (except the last one)
+ if ($tail) {
+ for (my $i=1; $i < $num; $i++) {
+ my $out_path = $filename.'_'.$i.$suff;
+
+ open(OUT, ">>$out_path") or die("Couldn't append to
$out_path: $!");
+ print OUT $last;
+ close(OUT);
+ }
+ }
+
+ return $num;
}
-close(OUT);
-close(FILE);
+
+##################################
+# MAIN
+init();
+
+my $num = split_file($opt{i}, $opt{l}, $opt{H}, $opt{T});
+print STDERR "$num files created\n";
+
__END__
+
--
To view, visit https://gerrit.wikimedia.org/r/96222
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0f59e528799d18ed722226eab931b8aa31d1cccc
Gerrit-PatchSet: 1
Gerrit-Project: wiktionary/anagrimes
Gerrit-Branch: master
Gerrit-Owner: Darkdadaah <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits