BryanDavis has uploaded a new change for review.
https://gerrit.wikimedia.org/r/283377
Change subject: jsub: Add a ton of comments
......................................................................
jsub: Add a ton of comments
Comment the heck out of qsub to facilitate review of its implementation
in preparation for porting to python. Comments prefixed with "bd808:"
are code review commentary on points that need further examination.
Bug: T132475
Change-Id: Ia54cf0fb01d13b18c67e8ac4441fe098c2a6e17b
---
M jobutils/bin/jsub
1 file changed, 80 insertions(+), 10 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/labs/toollabs
refs/changes/77/283377/1
diff --git a/jobutils/bin/jsub b/jobutils/bin/jsub
index 317c2ed..66bbe0f 100755
--- a/jobutils/bin/jsub
+++ b/jobutils/bin/jsub
@@ -17,6 +17,7 @@
use strict;
use warnings;
+use English;
use IPC::Run qw(harness run);
use String::ShellQuote;
@@ -25,6 +26,7 @@
my $full_commandline = join " ", $0, @ARGV;
system('/usr/local/bin/log-command-invocation', 'jsub', $full_commandline)
unless (exists ($ENV {'JOBUTILS_QSUB'}));
+# Supported qsub options and whether they take an argument or not
my %qsubargs = (
'-a' => 1, '-b' => 1, '-cwd' => 0, '-e' => 1, '-hard' => 0, '-i' => 1,
'-j' => 1,
'-l' => 1, '-now' => 1, '-N' => 1, '-o' => 1, '-p' => 1, '-q' => 1,
'-soft' => 0,
@@ -44,6 +46,8 @@
my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid($<);
my $mailto = "$username\@tools.wmflabs.org";
+# Set $script to the filename of the current process
+# (e.g. "jsub", "jstart", "qcronsub", ...)
$script = $1 if $script =~ m{/([^/]+)$};
sub memparse_kb {
@@ -63,13 +67,16 @@
# Set default umask for output files depending on whether we're called
# by a user or a tool.
-my $umask = $> >= 50000 ? oct ('007') : oct ('077');
+my $umask = $EFFECTIVE_USER_ID >= 50000 ? oct ('007') : oct ('077');
+# If called as `jstart` set the $continuous and $once flags
$continuous = $once = 1 if ($script eq 'jstart');
+# If called as `qcronsub` set the $once flag
$once = 1 if $script eq 'qcronsub';
my @options;
-my $jsubrcfilename = exists ($ENV {'JOBUTILS_JSUBRC'}) ? $ENV
{'JOBUTILS_JSUBRC'} : (getpwuid ($<)) [7] . '/.jsubrc';
+# Look for $JOBUTILS_JSUBRC or ~/.jsubrc options file with additional options
+my $jsubrcfilename = exists ($ENV {'JOBUTILS_JSUBRC'}) ? $ENV
{'JOBUTILS_JSUBRC'} : (getpwuid ($REAL_USER_ID)) [7] . '/.jsubrc';
if(open (my $rc, '<', $jsubrcfilename)) {
while(<$rc>) {
if(m/^(-[a-z]+)(?:\s+(.+)|\s*)$/) {
@@ -82,7 +89,9 @@
close ($rc);
}
+# Scan command line arguments
while($#ARGV > 0) {
+ # Stop when you hit an argument that isn't in the expected list
last unless defined $qsubargs{$ARGV[0]};
my $opt = shift;
my $optarg = undef;
@@ -95,6 +104,7 @@
if($#ARGV<0 or $ARGV[0] =~ m/^-/) {
+ # Ran out of arugments without a program to exec or have an unknown option
print STDERR <<"END"
usage: $script [options...] program [arg...]
@@ -124,9 +134,12 @@
exit 1;
}
+# Next argument is the program to run
my $exe = shift;
+# Resolve program relative to $PATH
my $prog = `/usr/bin/which $exe`;
chomp $prog;
+# Resolve all symlinks
while(-l $prog) {
my $symlink = readlink $prog;
if($symlink =~ m{^/}) {
@@ -141,25 +154,35 @@
$prog .= "/$symlink";
}
+# Check to see if we have an absolute path yet
unless($prog =~ m{^/}) {
+ # Nope. Then it must be relative to the current working directory
my $cwd = `pwd`;
chomp $cwd;
$prog = "$cwd/$exe";
+ # Collapse '/./' to '/'
$prog =~ s{/\./}{/}g;
}
+# Scan the program looking for more option directives embedded in the script
+# as '#$ -SOMETHING' comments. This mostly matches the behavior of SGE's qsub.
+# See '-C prefix_string' in `man qsub` for more ugly details.
open EXE, "<$prog" or die "$prog: $!\n";
my $shebang;
-while(read EXE, $shebang, 2) {
- last unless $shebang =~ m/^#[!\$]/;
- my $rest =<EXE>;
+while(read EXE, $shebang, 2) { # Read the next 2 characters
+ last unless $shebang =~ m/^#[!\$]/; # Stop unless we read '#!' or '#$'
+ my $rest =<EXE>; # Read through the next newline
next if $shebang eq '#!';
+ # Capture options from $rest if present
+ # Arguments look like '-FLAG' or '-OPT ARG'
next unless $rest =~ m/\s* ( -\S+ ) (?: \s+ ( [^# \n\t]+ ) )?/x;
+ # Add captured values to our growing collection of options
unshift @options, [ $1, $2 ];
}
close EXE;
-
+# Evaluate the options that we have captured from the rc file, commnad line,
+# and the script itself.
foreach my $option (@options) {
my($opt, $optval) = @$option;
if($opt eq '-mem') {
@@ -179,41 +202,60 @@
$quiet = 1;
} else {
if($opt eq '-l') {
+ # Process qsub's insane '-l resource=value,...' options
+ # If there is an 'h_vmem=...' option, remove it and set $memory
$memory = memparse_kb($1) if $optval =~ s/h_vmem=([0-9]+[mMgGkK]),?//;
+ # If there is a 'virtual_free=...' option, remove it and ...
if ($optval =~ s/virtual_free=([0-9]+[mMgGkK]),?//) {
my $vfmem = memparse_kb($1);
if ($vfmem > $memory) {
+ # If config asks for more virtual_free than h_vmem,
+ # warn the caller and bump h_vmem.
my $new_mem = $vfmem * 1.1;
print "WARNING: virtual_free=${vfmem}k is larger than
h_vmem=${memory}k. Setting h_vmem=${new_mem}k\n";
$memory = $new_mem;
}
}
+ # Continue to next option if we have chopped out all of the values in
+ # this one (skips adding to $qsargs below).
next if $optval =~ m/^,?$/;
}
+ # Add parsed option to $qsargs map
$qsargs{$opt} = $optval;
+
+ # Continue to next option if we saw '-N NAME', '-q QUEUE', or '-b y|n'
next if $opt eq '-N' or $opt eq '-q' or $opt eq '-b';
+
+ # Add option and value to argument stack
push @args, $opt;
push @args, $optval if defined $optval;
}
}
+# Name this job
my $jobname = 'unknown';
+# Take the program filename minus any extension as the job name
+# (e.g. '/home/foo/fobar.sh' -> 'foobar')
$jobname = $1 if $prog =~ m{([^/.]+)(\.[^/]*)?$};
+# If we found a '-N NAME' option earlier, use that name instead
$jobname = $qsargs{'-N'} if defined $qsargs{'-N'};
-my $err = (getpwuid ($<)) [7] . '/' . $jobname . '.err';
-my $out = (getpwuid ($<)) [7] . '/' . $jobname . '.out';
+# Decide on filenames for stderr and stdout
+# Defaults are: $HOME/$jobname.(err|out)
+my $err = (getpwuid ($REAL_USER_ID)) [7] . '/' . $jobname . '.err';
+my $out = (getpwuid ($REAL_USER_ID)) [7] . '/' . $jobname . '.out';
+# If we got an '-e /path/to/stderr' option, use it
$err = $qsargs{'-e'} if defined $qsargs{'-e'};
+# If we got an '-o /path/to/stdout' option, use it
$out = $qsargs{'-o'} if defined $qsargs{'-o'};
+# Use the same file for stderr as stdout if we saw a '-j y' option
$err = $out if defined $qsargs{'-j'} and $qsargs{'-j'} =~ m/^[yY]/;
# For STDOUT and STDERR we do the same dance:
# - If the output file exists, we do nothing (the output file being a
# directory is a subset of this).
# - Otherwise, we touch the output file once with the set umask.
-# - If -stderr is not given and the error output file is not a
-# directory, we redirect STDERR to it.
my $oldumask = umask ($umask);
if (!-e $out) {
open (my $tempfh, '>>', $out) or
@@ -225,6 +267,8 @@
die ("Couldn't touch '$err': $!");
close ($tempfh);
}
+# - If -stderr is not given and the error output file is not a
+# directory, we redirect STDERR to it.
if (!$stderr && !-d $err) {
open (STDERR, '>>', $err) or
die ("Couldn't redirect STDERR to '$err': $!");
@@ -236,37 +280,63 @@
die "\[$now\] $prog: not an executable file\n" unless -f $prog and -x $prog;
if($once) {
+ # Check to see if this named job is live on the grid if we are only supposed
+ # to run a single copy
my $running = system "/usr/bin/job", '-q', $jobname;
die "\[$now\] unable to get job status\n" if $running & 127;
$running >>= 8;
die "\[$now\] there is a job named '$jobname' already active\n" unless
$running==0;
}
+# Add default arguments for /usr/bin/qsub
$mailto = $qsargs{'-M'} if defined $qsargs{'-M'};
+# Set stderr log file for the job unless we saw a '-e /path/to/stderr' option
+# bd808: I think this is a bug as $qsargs{'-e'} is not used later
push @args, '-e', $err unless defined $qsargs{'-e'};
+# Set stdout log file for the job unless we saw a '-o /path/to/stdout' option
+# bd808: I think this is a bug as $qsargs{'-o'} is not used later
push @args, '-o', $out unless defined $qsargs{'-o'};
+# Set the email recipient for the job
push @args, '-M', $mailto;
+# Set the jobname,
+# make all resource limits hard,
+# and set the hard virtual memory limit
push @args, '-N', $jobname, '-hard', '-l', "h_vmem=${memory}k";
+# Define stdin, stdout, and stderr to be used when we run /usr/bin/qsub
my ($qsubinput, $qsuboutput, $qsuberror);
if($continuous) {
+ # Try to make a job that will automatically restart if it crashes
+ # Submit the job to the 'continuous' queue (or an explictly named queue)
push @args, '-q', (defined $qsargs{'-q'})? $qsargs{'-q'}: 'continuous';
+ # Add the '-b y|n' option if we have it (not sure why we do this)
push @args, '-b', $qsargs{'-b'} if defined $qsargs{'-b'};
+ # Write a shell script to stdin that will continue to execute our program
+ # until it exits with an exit status of zero.
+ # bd808: this feels like it could use a bit of improvement
$qsubinput = "#!/bin/bash\n" .
"while ! " . shell_quote($prog, @ARGV) . "; do\n" .
" sleep 5\n" .
"done\n";
} else {
+ # Set stdin to an empty string
$qsubinput = '';
+ # Change queue name if explictly provided (will be 'task' otherwise)
$queue = $qsargs{'-q'} if defined $qsargs{'-q'};
+ # Add queue name argument,
+ # set the binary submission flag,
+ # and add the program and its arguments
push @args, '-q', $queue, '-b', 'y', $prog, @ARGV;
}
+# Execute $JOBUTILS_QSUB or /usr/bin/qsub using the arguments we just built
my $h = harness([exists ($ENV {'JOBUTILS_QSUB'}) ? $ENV {'JOBUTILS_QSUB'} :
'/usr/bin/qsub', @args], \$qsubinput, \$qsuboutput, \$qsuberror);
if (!run ($h)) {
+ # Print stdout and stderr
print STDOUT $qsuboutput;
print STDERR $qsuberror;
+ # Exit with the same exit code as /usr/bin/qsub
exit ($h->result ());
}
--
To view, visit https://gerrit.wikimedia.org/r/283377
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia54cf0fb01d13b18c67e8ac4441fe098c2a6e17b
Gerrit-PatchSet: 1
Gerrit-Project: labs/toollabs
Gerrit-Branch: master
Gerrit-Owner: BryanDavis <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits