BryanDavis has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/283377

Change subject: jsub: Add a ton of comments
......................................................................

jsub: Add a ton of comments

Comment the heck out of qsub to facilitate review of its implementation
in preparation for porting to python. Comments prefixed with "bd808:"
are code review commentary on points that need further examination.

Bug: T132475
Change-Id: Ia54cf0fb01d13b18c67e8ac4441fe098c2a6e17b
---
M jobutils/bin/jsub
1 file changed, 80 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/toollabs 
refs/changes/77/283377/1

diff --git a/jobutils/bin/jsub b/jobutils/bin/jsub
index 317c2ed..66bbe0f 100755
--- a/jobutils/bin/jsub
+++ b/jobutils/bin/jsub
@@ -17,6 +17,7 @@
 
 use strict;
 use warnings;
+use English;
 
 use IPC::Run qw(harness run);
 use String::ShellQuote;
@@ -25,6 +26,7 @@
 my $full_commandline = join " ", $0, @ARGV;
 system('/usr/local/bin/log-command-invocation', 'jsub', $full_commandline) 
unless (exists ($ENV {'JOBUTILS_QSUB'}));
 
+# Supported qsub options and whether they take an argument or not
 my %qsubargs = (
        '-a' => 1, '-b' => 1, '-cwd' => 0, '-e' => 1, '-hard' => 0, '-i' => 1, 
'-j' => 1,
        '-l' => 1, '-now' => 1, '-N' => 1, '-o' => 1, '-p' => 1, '-q' => 1, 
'-soft' => 0,
@@ -44,6 +46,8 @@
 my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid($<);
 my $mailto = "$username\@tools.wmflabs.org";
 
+# Set $script to the filename of the current process
+# (e.g. "jsub", "jstart", "qcronsub", ...)
 $script = $1 if $script =~ m{/([^/]+)$};
 
 sub memparse_kb {
@@ -63,13 +67,16 @@
 
 # Set default umask for output files depending on whether we're called
 # by a user or a tool.
-my $umask = $> >= 50000 ? oct ('007') : oct ('077');
+my $umask = $EFFECTIVE_USER_ID >= 50000 ? oct ('007') : oct ('077');
 
+# If called as `jstart` set the $continuous and $once flags
 $continuous = $once = 1 if ($script eq 'jstart');
+# If called as `qcronsub` set the $once flag
 $once = 1 if $script eq 'qcronsub';
 
 my @options;
-my $jsubrcfilename = exists ($ENV {'JOBUTILS_JSUBRC'}) ? $ENV 
{'JOBUTILS_JSUBRC'} : (getpwuid ($<)) [7] . '/.jsubrc';
+# Look for $JOBUTILS_JSUBRC or ~/.jsubrc options file with additional options
+my $jsubrcfilename = exists ($ENV {'JOBUTILS_JSUBRC'}) ? $ENV 
{'JOBUTILS_JSUBRC'} : (getpwuid ($REAL_USER_ID)) [7] . '/.jsubrc';
 if(open (my $rc, '<', $jsubrcfilename)) {
   while(<$rc>) {
     if(m/^(-[a-z]+)(?:\s+(.+)|\s*)$/) {
@@ -82,7 +89,9 @@
   close ($rc);
 }
 
+# Scan command line arguments
 while($#ARGV > 0) {
+  # Stop when you hit an argument that isn't in the expected list
   last unless defined $qsubargs{$ARGV[0]};
   my $opt = shift;
   my $optarg = undef;
@@ -95,6 +104,7 @@
 
 
 if($#ARGV<0 or $ARGV[0] =~ m/^-/) {
+  # Ran out of arugments without a program to exec or have an unknown option
   print STDERR <<"END"
 
 usage: $script [options...] program [arg...]
@@ -124,9 +134,12 @@
   exit 1;
 }
 
+# Next argument is the program to run
 my $exe = shift;
+# Resolve program relative to $PATH
 my $prog = `/usr/bin/which $exe`;
 chomp $prog;
+# Resolve all symlinks
 while(-l $prog) {
   my $symlink = readlink $prog;
   if($symlink =~ m{^/}) {
@@ -141,25 +154,35 @@
   $prog .= "/$symlink";
 }
 
+# Check to see if we have an absolute path yet
 unless($prog =~ m{^/}) {
+  # Nope. Then it must be relative to the current working directory
   my $cwd = `pwd`;
   chomp $cwd;
   $prog = "$cwd/$exe";
+  # Collapse '/./' to '/'
   $prog =~ s{/\./}{/}g;
 }
 
+# Scan the program looking for more option directives embedded in the script
+# as '#$ -SOMETHING' comments. This mostly matches the behavior of SGE's qsub.
+# See '-C prefix_string' in `man qsub` for more ugly details.
 open EXE, "<$prog" or die "$prog: $!\n";
 my $shebang;
-while(read EXE, $shebang, 2) {
-  last unless $shebang =~ m/^#[!\$]/;
-  my $rest =<EXE>;
+while(read EXE, $shebang, 2) {        # Read the next 2 characters
+  last unless $shebang =~ m/^#[!\$]/; # Stop unless we read '#!' or '#$'
+  my $rest =<EXE>;                    # Read through the next newline
   next if $shebang eq '#!';
+  # Capture options from $rest if present
+  # Arguments look like '-FLAG' or '-OPT ARG'
   next unless $rest =~ m/\s* ( -\S+ ) (?: \s+ ( [^# \n\t]+ ) )?/x;
+  # Add captured values to our growing collection of options
   unshift @options, [ $1, $2 ];
 }
 close EXE;
 
-
+# Evaluate the options that we have captured from the rc file, commnad line,
+# and the script itself.
 foreach my $option (@options) {
   my($opt, $optval) = @$option;
   if($opt eq '-mem') {
@@ -179,41 +202,60 @@
     $quiet = 1;
   } else {
     if($opt eq '-l') {
+      # Process qsub's insane '-l resource=value,...' options
+      # If there is an 'h_vmem=...' option, remove it and set $memory
       $memory = memparse_kb($1) if $optval =~ s/h_vmem=([0-9]+[mMgGkK]),?//;
+      # If there is a 'virtual_free=...' option, remove it and ...
       if ($optval =~ s/virtual_free=([0-9]+[mMgGkK]),?//) {
         my $vfmem = memparse_kb($1);
         if ($vfmem > $memory) {
+            # If config asks for more virtual_free than h_vmem,
+            # warn the caller and bump h_vmem.
             my $new_mem = $vfmem * 1.1;
             print "WARNING: virtual_free=${vfmem}k is larger than 
h_vmem=${memory}k. Setting h_vmem=${new_mem}k\n";
             $memory = $new_mem;
         }
       }
+      # Continue to next option if we have chopped out all of the values in
+      # this one (skips adding to $qsargs below).
       next if $optval =~ m/^,?$/;
     }
+    # Add parsed option to $qsargs map
     $qsargs{$opt} = $optval;
+
+    # Continue to next option if we saw '-N NAME', '-q QUEUE', or '-b y|n'
     next if $opt eq '-N' or $opt eq '-q' or $opt eq '-b';
+
+    # Add option and value to argument stack
     push @args, $opt;
     push @args, $optval if defined $optval;
   }
 }
 
+# Name this job
 my $jobname = 'unknown';
+# Take the program filename minus any extension as the job name
+# (e.g. '/home/foo/fobar.sh' -> 'foobar')
 $jobname = $1 if $prog =~ m{([^/.]+)(\.[^/]*)?$};
+# If we found a '-N NAME' option earlier, use that name instead
 $jobname = $qsargs{'-N'} if defined $qsargs{'-N'};
 
-my $err = (getpwuid ($<)) [7] . '/' . $jobname . '.err';
-my $out = (getpwuid ($<)) [7] . '/' . $jobname . '.out';
+# Decide on filenames for stderr and stdout
+# Defaults are: $HOME/$jobname.(err|out)
+my $err = (getpwuid ($REAL_USER_ID)) [7] . '/' . $jobname . '.err';
+my $out = (getpwuid ($REAL_USER_ID)) [7] . '/' . $jobname . '.out';
 
+# If we got an '-e /path/to/stderr' option, use it
 $err = $qsargs{'-e'} if defined $qsargs{'-e'};
+# If we got an '-o /path/to/stdout' option, use it
 $out = $qsargs{'-o'} if defined $qsargs{'-o'};
+# Use the same file for stderr as stdout if we saw a '-j y' option
 $err = $out if defined $qsargs{'-j'} and $qsargs{'-j'} =~ m/^[yY]/;
 
 # For STDOUT and STDERR we do the same dance:
 # - If the output file exists, we do nothing (the output file being a
 #   directory is a subset of this).
 # - Otherwise, we touch the output file once with the set umask.
-# - If -stderr is not given and the error output file is not a
-#   directory, we redirect STDERR to it.
 my $oldumask = umask ($umask);
 if (!-e $out) {
   open (my $tempfh, '>>', $out) or
@@ -225,6 +267,8 @@
     die ("Couldn't touch '$err': $!");
   close ($tempfh);
 }
+# - If -stderr is not given and the error output file is not a
+#   directory, we redirect STDERR to it.
 if (!$stderr && !-d $err) {
   open (STDERR, '>>', $err) or
     die ("Couldn't redirect STDERR to '$err': $!");
@@ -236,37 +280,63 @@
 die "\[$now\] $prog: not an executable file\n" unless -f $prog and -x $prog;
 
 if($once) {
+  # Check to see if this named job is live on the grid if we are only supposed
+  # to run a single copy
   my $running = system "/usr/bin/job", '-q', $jobname;
   die "\[$now\] unable to get job status\n" if $running & 127;
   $running >>= 8;
   die "\[$now\] there is a job named '$jobname' already active\n" unless 
$running==0;
 }
 
+# Add default arguments for /usr/bin/qsub
 $mailto = $qsargs{'-M'} if defined $qsargs{'-M'};
+# Set stderr log file for the job unless we saw a '-e /path/to/stderr' option
+# bd808: I think this is a bug as $qsargs{'-e'} is not used later
 push @args, '-e', $err unless defined $qsargs{'-e'};
+# Set stdout log file for the job unless we saw a '-o /path/to/stdout' option
+# bd808: I think this is a bug as $qsargs{'-o'} is not used later
 push @args, '-o', $out unless defined $qsargs{'-o'};
+# Set the email recipient for the job
 push @args, '-M', $mailto;
+# Set the jobname,
+# make all resource limits hard,
+# and set the hard virtual memory limit
 push @args, '-N', $jobname, '-hard', '-l', "h_vmem=${memory}k";
 
+# Define stdin, stdout, and stderr to be used when we run /usr/bin/qsub
 my ($qsubinput, $qsuboutput, $qsuberror);
 
 if($continuous) {
+  # Try to make a job that will automatically restart if it crashes
+  # Submit the job to the 'continuous' queue (or an explictly named queue)
   push @args, '-q', (defined $qsargs{'-q'})? $qsargs{'-q'}: 'continuous';
+  # Add the '-b y|n' option if we have it (not sure why we do this)
   push @args, '-b', $qsargs{'-b'} if defined $qsargs{'-b'};
+  # Write a shell script to stdin that will continue to execute our program
+  # until it exits with an exit status of zero.
+  # bd808: this feels like it could use a bit of improvement
   $qsubinput = "#!/bin/bash\n"                                   .
                "while ! " . shell_quote($prog, @ARGV) . "; do\n" .
                "  sleep 5\n"                                     .
                "done\n";
 } else {
+  # Set stdin to an empty string
   $qsubinput = '';
+  # Change queue name if explictly provided (will be 'task' otherwise)
   $queue = $qsargs{'-q'} if defined $qsargs{'-q'};
+  # Add queue name argument,
+  # set the binary submission flag,
+  # and add the program and its arguments
   push @args, '-q', $queue, '-b', 'y', $prog, @ARGV;
 }
 
+# Execute $JOBUTILS_QSUB or /usr/bin/qsub using the arguments we just built
 my $h = harness([exists ($ENV {'JOBUTILS_QSUB'}) ? $ENV {'JOBUTILS_QSUB'} : 
'/usr/bin/qsub', @args], \$qsubinput, \$qsuboutput, \$qsuberror);
 if (!run ($h)) {
+    # Print stdout and stderr
     print STDOUT $qsuboutput;
     print STDERR $qsuberror;
+    # Exit with the same exit code as /usr/bin/qsub
     exit ($h->result ());
 }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/283377
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia54cf0fb01d13b18c67e8ac4441fe098c2a6e17b
Gerrit-PatchSet: 1
Gerrit-Project: labs/toollabs
Gerrit-Branch: master
Gerrit-Owner: BryanDavis <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to