http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/orig/Fortran/setupinterp.f
----------------------------------------------------------------------
diff --git a/climatology/clim/orig/Fortran/setupinterp.f 
b/climatology/clim/orig/Fortran/setupinterp.f
new file mode 100644
index 0000000..8eed0b7
--- /dev/null
+++ b/climatology/clim/orig/Fortran/setupinterp.f
@@ -0,0 +1,291 @@
+#include "interp.h"
+c $Revision: 2.12 $
+c NAME gaussinterp
+c 7 Dec 99     earmstrong      NASA/JPL
+c
+c DESCRIPTION
+c Gaussian interpolation program modified to map SST (HDF) data.
+c Modified from intersstdbg7day3.f (jvazquez' prog).  
+c
+c SYNOPSIS
+c setupinterp() allocates space for arrays, calls interp() which does the 
+c interpolation
+c  
+c USAGE
+c % gaussinterp infile beg_rec end_rec outfile parmfile
+
+c **** Original program comments (intersstdbg7day3.f)  ***
+c DESCRIPTION: Maps data when given the required information. Program
+c   originally written by raghu. Version 7 reads the gridded data set.
+c   Currently maps TMR water vapor.
+c
+c COMMENTS:
+c  This programs wraps around in longitude for 0 to 360 maps.
+c  If different maps size is needed in longitude w/o wraparound
+c  some lines of code need to be modified in the file.
+c
+c
+c CHANGES:
+c  2/6/97 - change fint from integer*4 to integer*2; holds residuals
+c 2/19/97 - change output to direct access file, change sl() to int*2
+c 8/8/97 - change i/o so that it reads data directly from the residual
+c          data files
+c 9/9/97 - change main program so that i/o part are subroutines
+c 9/10/97 - add header,time,lat,lon and #of maps to output file
+c 9/11/97 - add version and output file specification in runtime
+c           argument list
+c **** end Orginal comments ****
+
+c 12/01/99 - major modifications (ema). 
+c              - added command line read for input, output filenames
+c                and records numbers of input file read
+c              - binsum is now a subprogram
+c              - mapping params (interp.h) read in with include statement
+c              - removed some superfluous do loops
+c 12/06/99 - dynamic allocation of arrays added
+c         - read input parms from arg 5 (see 'interp.parms'
+c           for an example of format)
+c         - hdf data array size still hardcoded in interp.h
+c 12/10/99 - variable zlo set equal to command line arg for first
+c           record number in infile to process (istart). zlo 
+c           removed from parameter file (interp.parms)
+c 12/13/99 - imax, jmax calculated from xlo, xhi, ylo, and yhi
+c           imin, jmin, kmin set to 1
+c 12/16/99 - added options in parameter file for sequential
+c           or interleaved maps, and formatted or unformatted
+c           output file
+c 12/27/99 - added basename C call. program can now handle 
+c            compressed datafiles.
+c 1/4/00   - added subsetting of hdf array based on lon/lat 
+c           bounds. zlo now read from parameter file. 
+c 1/4/00   - zlo found from parse of first datafile.
+c           geo-positions of UL, LR written to output header
+
+      program setupinterp
+      
+      integer ndatamax, x_length, y_length
+      integer temp_malloc, sst_inter_malloc
+      integer sst_seq_malloc, sl_malloc
+      integer x_malloc, y_malloc, z_malloc, f_malloc
+      integer xx_malloc, yy_malloc, zz_malloc, vsum_malloc
+      integer ixmin_malloc, ixmax_malloc
+      integer iymin_malloc, iymax_malloc
+      integer izmin_malloc, izmax_malloc
+      real*4 float_size
+      integer*4 int_size
+
+      character*80 plabel
+      character*50 infile
+      character*30 outfile
+      character*60 parmfile
+      character*10 mapformat
+      character*15 outformat
+      character*6 startrec
+      character*6 endrec
+      character*3 cday
+      character*4 cyr
+      character*150 datafile
+      character*30 basename
+
+      common /parms/imin,jmin,kmin,
+     &              xwin2,ywin2,zwin2,
+     &              xh,yh,zh,          
+     &              dx,dy,dz,
+     &              xlo,xhi,ylo,yhi,zlo,
+     &              dxd,dyd
+
+      common /fileio/ infile,outfile,istart,
+     &               iend,mapformat,outformat  
+      common /hdfindex/ icolLeft,icolRight,irowUpper,irowLower
+       
+      common /basefile/ basename
+
+c ...check command line arguments
+      call getarg( 1, infile )
+      call getarg( 2, startrec )
+      call getarg( 3, endrec )
+      call getarg( 4, parmfile )
+      call getarg( 5, outfile )
+
+      inum_args = iargc()
+      if ( inum_args .ne. 5 ) then
+         call usage()
+         stop
+      endif
+
+      read( startrec, '(i)' ) istart
+      read( endrec, '(i)' ) iend
+
+      if ( iend .lt. istart ) then
+         write(6,*) '\n        Error: end record number
+     & smaller than beginning record ! \n'
+         stop
+      endif
+
+c--  zlo determined by reading first record and parsing the datafile
+c--  for year and julian day using getbase()  (C call)
+
+c-- Open input file list
+      open( unit=ifile,status='old',file=infile,access='direct',
+     &  recl=RECSIZE,form='formatted',err=2000,iostat=ierr )
+
+       read( ifile, '(a)', rec=istart ) datafile
+       close( ifile, status='keep', err=2500, iostat=ierr )
+
+       call getbase( datafile )        ! returns basename
+       cday=basename( 5:7 )
+       cyr=basename( 1:4 )
+       read( cday,'(i3)' ) iday
+       read( cyr,'(i4)' ) iyr
+       zlo = float(iday)+float(iyr-1985)*365.
+       if(iyr.gt.1988) zlo = zlo + 1.
+       if(iyr.gt.1992) zlo = zlo + 1.
+       if(iyr.gt.1996) zlo = zlo + 1.
+       if(iyr.gt.2000) zlo = zlo + 1.
+       if(iyr.gt.2004) zlo = zlo + 1.
+       print *, 'zlo is ', zlo
+
+c-- read in the parameter file 
+      open( unit=iparm,status='old',form='formatted',file=parmfile,
+     &     err=1000,iostat=ierr )
+
+      read( unit=iparm, fmt=* ) ndatamax
+      read( unit=iparm, fmt=* ) x_length, y_length
+      read( unit=iparm, fmt=* ) kmax
+      read( unit=iparm, fmt=* ) xwin2, ywin2, zwin2
+      read( unit=iparm, fmt=* ) xh, yh, zh
+      read( unit=iparm, fmt=* ) dx, dy, dz
+      read( unit=iparm, fmt=* ) xlo, xhi
+      read( unit=iparm, fmt=* ) ylo, yhi
+c      read( unit=iparm, fmt=* ) zlo
+c      read( unit=iparm, fmt=* ) dxd, dyd
+      read( unit=iparm, fmt='(a)' ) mapformat
+      read( unit=iparm, fmt='(a)' ) outformat
+      close( iparm,status='keep',err=1500,iostat=ierr )
+
+c       write(*,*) ndatamax
+c       write(*,*) x_length, y_length
+c       write(*,*) kmax
+c       write(*,*) xwin2,ywin2, zwin2
+c       write(*,*) xh, yh, zh
+c       write(*,*) dx, dy, dz
+c       write(*,*) xlo, xhi
+c       write(*,*) ylo, yhi
+c       write(*,*) zlo
+cc      write(*,*) dxd, dyd
+c       write(*,*) mapformat
+c       write(*,*) outformat
+
+      if( (mapformat .ne. 'interleave') .and.  
+     &    (mapformat .ne. 'sequential') ) then
+          print *, 'check parameter file for interleave or sequential maps !'
+          stop
+      endif
+      if( (outformat .ne. 'formatted') .and.  
+     &    (outformat .ne. 'unformatted') ) then
+          print *, 'check parameter file for formatted or unformatted output !'
+          stop
+      endif
+
+
+c-- calculate max number of interpolation "steps" based on
+c-- long and lat ranges divided by interpolation interval
+c      imax = ( (xhi - xlo) + 1 ) / dx
+c      jmax = ( abs(yhi - ylo) + 1 ) / dy
+      imax = ( abs(xhi - xlo)  ) / dx
+      jmax = ( abs(yhi - ylo)  ) / dy
+
+      print *, 'imax, jmax, kmax',  imax,jmax,kmax
+
+      imin = 1
+      jmin = 1
+      kmin = 1
+
+c-- calculate degrees per grid point
+      dxd =  360. / float(x_length)
+      dyd =  180. / float(y_length)
+
+c-- find columns and rows for subsetting into hdf image array 
+c-- remember @ hdfarray(1,1) -->  lon=-180, lat=90
+      icolLeft = nint( ((xlo + 180) / dxd) + 1 )
+      icolRight = nint( (xhi + 180) / dxd )
+      irowUpper = nint( (abs(yhi - 90) / dyd) + 1 )
+      irowLower = nint( (abs(ylo - 90) / dyd) )
+
+c      print *, 'icolLeft, icolRight,irowUpper, irowLower', 
+c     & icolLeft, icolRight, irowUpper, irowLower 
+
+c-- 4 bytes for floats and ints
+      float_size = 4
+      int_size = 4
+
+c-- allocate space for arrays
+          itotsize = x_length*y_length*float_size
+      temp_malloc = malloc( %val(itotsize) )     
+          itotsize = kmax*float_size
+      sst_inter_malloc = malloc( %val(itotsize) )     
+          itotsize = imax*float_size
+      sst_seq_malloc = malloc( %val(itotsize) )     
+          itotsize = imax*jmax*kmax*float_size
+      sl_malloc = malloc( %val(itotsize) ) 
+          itotsize =  ndatamax*float_size
+      x_malloc = malloc( %val(itotsize) ) 
+      y_malloc = malloc( %val(itotsize) ) 
+      z_malloc = malloc( %val(itotsize) ) 
+      f_malloc = malloc( %val(itotsize) ) 
+          itotsize = imax*float_size
+      xx_malloc = malloc( %val(itotsize) ) 
+          itotsize = jmax*float_size
+      yy_malloc = malloc( %val(itotsize) ) 
+          itotsize = kmax*float_size
+      zz_malloc = malloc( %val(itotsize) ) 
+          itotsize = 2*imax*jmax*kmax*float_size
+      vsum_malloc = malloc( %val(itotsize) ) 
+          itotsize = ndatamax*int_size
+      ixmin_malloc = malloc( %val(itotsize) ) 
+      ixmax_malloc = malloc( %val(itotsize) ) 
+      iymin_malloc = malloc( %val(itotsize) ) 
+      iymax_malloc = malloc( %val(itotsize) ) 
+      izmin_malloc = malloc( %val(itotsize) ) 
+      izmax_malloc = malloc( %val(itotsize) ) 
+
+c      print *, 'done malloc arrays\n'
+
+c--- call interp() with 'space' variables passed call by value
+      call interp( %val(temp_malloc), 
+     &      %val(sst_inter_malloc), %val(sst_seq_malloc),
+     &      %val(sl_malloc), %val(x_malloc), %val(y_malloc), 
+     &     %val(z_malloc), %val(f_malloc), %val(xx_malloc),
+     &     %val(yy_malloc), %val(zz_malloc), 
+     &     %val(vsum_malloc), %val(ixmin_malloc), %val(ixmax_malloc),
+     &     %val(iymin_malloc), %val(iymax_malloc), %val(izmin_malloc), 
+     &     %val(izmax_malloc), ndatamax, x_length, y_length,
+     &     imax, jmax, kmax )
+
+c-- how to free memory? dunno
+
+
+ 1000 print *, 'Error opening parameter file: ', parmfile, 'error num: ', ierr
+ 1500 print *, 'Error closing parameter file: ', parmfile, 'error num: ', ierr
+ 2000 print *, 'Error opening input: ', infile, 'error num: ', ierr
+ 2500 print *, 'Error closing input: ', infile, 'error num: ', ierr
+
+      end
+
+      subroutine usage()
+         print *, '\n  ~~ gaussian interpolation of pathfinder SST data
+     & (f77 version) ~~\n '
+         print *, 'USAGE: % gaussinterp infile beg_rec end_rec parmfile 
outfile\n'
+         print *, ' -- infile: file containing list to process
+     & (ascii; fixed length records)'
+         print *, ' -- beg_rec: first record (file) in infile processed'
+         print *, ' -- end_rec: last record (file) in infile processed'
+         print *, ' -- parmfile: input parameter file (e.g., interp.parms)'
+         print *, ' -- outfile: interpolated output file (ascii or binary)'
+         print *, '\n e.g., % gaussinterp list.dat 100 600 interp.parms 
output.dat '
+         print *, '[process records 100 to 600 of list.dat; interpolated
+     & result is output.dat]\n'
+         print *, 'note: see interp.parms for example format of parameter file'
+         print *, '      see gaussinterp.readme for more general information'
+         print *, '      this executable compiled for image x and y size:', 
XSIZE, YSIZE  
+      end

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/pixelStats.py
----------------------------------------------------------------------
diff --git a/climatology/clim/pixelStats.py b/climatology/clim/pixelStats.py
new file mode 100644
index 0000000..8ac6b1a
--- /dev/null
+++ b/climatology/clim/pixelStats.py
@@ -0,0 +1,217 @@
+"""
+pixelStats.py
+
+Compute a multi-epoch (multi-day) statistics for each lat/lon pixel read from 
daily Level-3 grids.
+
+Also do statistics roll-ups from daily to monthly, monthly to seasonal, 
seasonal to yearly, 
+yearly to multi-year, and multi-year to total N-year period.
+
+Simple code to be run using Spark or Dpark.
+
+"""
+
+import sys, os, urllib, re, time
+import numpy as N
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pylab as M
+from netCDF4 import Dataset, default_fillvals
+
+from variables import getVariables, close
+from split import splitByMonth
+from cache import retrieveFile, CachePath
+
+#from pyspark import SparkContext    # both imported below when needed
+#import dpark
+
+Modes = ['sequential', 'dpark', 'spark']
+
+Accumulators = ['count', 'sum', 'sumsq', 'min', 'max']
+Stats = ['count', 'mean', 'stddev', 'min', 'max']
+
+GroupByKeys = ['month', 'season', 'year', '3-year', 'total']
+
+TimeFromFilenameDOY = {'get': ('year', 'doy'), 'regex': 
re.compile(r'\/A(....)(...)')}
+
+
+def pixelStats(urls, variable, nPartitions, 
timeFromFilename=TimeFromFilenameDOY, groupByKeys=GroupByKeys, 
accumulators=Accumulators,
+               cachePath=CachePath, mode='dpark', modes=Modes):
+    '''Compute a global (or regional) pixel mean field in parallel, given a 
list of URL's pointing to netCDF files.'''
+    baseKey = groupByKeys[0]
+    if baseKey == 'month':
+        urlsByKey = splitByMonth(urls, timeFromFilename)
+    else:
+        print >>sys.stderr, 'pixelStats: Unrecognized groupByKey "%s".  Must 
be in %s' % (baseKey, str(groupByKeys))
+        sys.exit(1)
+
+    if mode == 'sequential':
+        accum = [accumulate(u, variable, accumulators) for u in urlsByKey]
+        merged = reduce(combine, accum)
+        stats = statsFromAccumulators(merged)
+
+    elif mode == 'dpark':
+        import dpark
+        urls = dpark.parallelize(urlsByKey, nPartitions)                       
   # returns RDD of URL lists
+        accum = urls.map(lambda urls: accumulate(urls, variable, 
accumulators))   # returns RDD of stats accumulators
+        merged = accum.reduce(combine)                                         
   # merged accumulators on head node
+        stats = statsFromAccumulators(merged)                                  
   # compute final stats from accumulators
+
+    elif mode == 'spark':
+        from pyspark import SparkContext
+        sc = SparkContext(appName="PixelStats")
+        urls = sc.parallelize(urlsByKey, nPartitions)                          
   # returns RDD of URL lists
+        accum = urls.map(lambda urls: accumulate(urls, variable, 
accumulators))   # returns RDD of stats accumulators
+        merged = accum.reduce(combine)                                         
   # merged accumulators on head node
+        stats = statsFromAccumulators(merged)                                  
   # compute final stats from accumulators
+
+    else:
+        stats = None
+        if mode not in modes:
+            print >>sys.stderr, 'pixelStats: Unrecognized mode  "%s".  Must be 
in %s' % (mode, str(modes))
+            sys.exit(1)
+    return stats
+
+
+def accumulate(urls, variable, accumulators, cachePath=CachePath):
+    '''Accumulate data into statistics accumulators like count, sum, sumsq, 
min, max, M3, M4, etc.'''
+    keys, urls = urls
+    accum = {}
+    for i, url in enumerate(urls):
+        try:
+            path = retrieveFile(url, cachePath)
+            fn = os.path.split(path)[1]
+        except:
+            print >>sys.stderr, 'accumulate: Error, continuing without file 
%s' % url
+            continue
+
+        try:
+            var, fh = getVariables(path, [variable], arrayOnly=True, 
set_auto_mask=True)   # return dict of variable objects by name
+            v = var[variable]   # masked array
+            close(fh)
+        except:
+            print >>sys.stderr, 'accumulate: Error, cannot read variable %s 
from file %s' % (variable, path)
+            continue
+
+        if i == 0:
+            for k in accumulators:
+                if k == 'min':     accum[k] = default_fillvals['f8'] * 
N.ones(v.shape, dtype=N.float64)
+                elif k == 'max':   accum[k] = -default_fillvals['f8'] * 
N.ones(v.shape, dtype=N.float64)
+                elif k == 'count': accum[k] = N.zeros(v.shape, dtype=N.int64)
+                else:
+                    accum[k] = N.zeros(v.shape, dtype=N.float64)
+
+        if 'count' in accumulators:
+            accum['count'] += ~v.mask
+        if 'min' in accumulators:
+            accum['min'] = N.ma.minimum(accum['min'], v)
+        if 'max' in accumulators:
+            accum['max'] = N.ma.maximum(accum['max'], v)
+
+        v = N.ma.filled(v, 0.)
+        if 'sum' in accumulators:
+            accum['sum'] += v
+        if 'sumsq' in accumulators:
+            accum['sumsq'] += v*v
+    return (keys, accum)
+
+
+def combine(a, b):
+    '''Combine accumulators by summing.'''
+    keys, a = a
+    b = b[1]
+    for k in a.keys():
+        if k != 'min' and k != 'max':
+            a[k] += b[k]
+    if 'min' in accumulators:
+        a['min'] = N.ma.minimum(a['min'], b['min'])
+    if 'max' in accumulators:
+        a['max'] = N.ma.maximum(a['max'], b['max'])
+    return (('total',), a)
+
+
+def statsFromAccumulators(accum):
+    '''Compute final statistics from accumulators.'''
+    keys, accum = accum
+    # Mask all of the accumulator arrays
+    accum['count'] = N.ma.masked_equal(accum['count'], 0, copy=False)
+    mask = accum['count'].mask
+    for k in accum:
+        if k != 'count':
+            accum[k] = N.ma.array(accum[k], copy=False, mask=mask)
+
+    # Compute stats (masked)
+    stats = {}
+    if 'count' in accum:
+        stats['count'] = accum['count']
+    if 'min' in accum:
+        stats['min'] = accum['min']
+    if 'max' in accum:
+        stats['max'] = accum['max']
+    if 'sum' in accum:
+        stats['mean'] = accum['sum'] / accum['count']
+    if 'sumsq' in accum:
+        stats['stddev'] = N.sqrt(accum['sumsq'] / 
(accum['count'].astype(N.float32) - 1))
+    return (keys, stats)
+
+
+def writeStats(urls, variable, stats, outFile, copyToHdfsPath=None, 
format='NETCDF4', cachePath=CachePath):
+    '''Write out stats arrays to netCDF with some attributes.
+    '''
+    keys, stats = stats
+    dout = Dataset(outFile, 'w', format=format)
+    print >>sys.stderr, 'Writing %s ...' % outFile
+    dout.setncattr('variable', variable)
+    dout.setncattr('urls', str(urls))
+    dout.setncattr('level', str(keys))
+
+    inFile = retrieveFile(urls[0], cachePath)
+    din = Dataset(inFile, 'r')
+    try:
+        coordinates = din.variables[variable].getncattr('coordinates')
+        coordinates = coordinates.split()
+    except:
+        coordinates = ('lat', 'lon')     # kludge: FIX ME
+    
+    # Add dimensions and variables, copying data
+    coordDim = [dout.createDimension(coord, din.variables[coord].shape[0]) for 
coord in coordinates]     # here lat, lon, alt, etc.
+    for coord in coordinates:
+        var = dout.createVariable(coord, din.variables[coord].dtype, (coord,))
+        var[:] = din.variables[coord][:]
+
+    # Add stats variables
+    for k,v in stats.items():
+        var = dout.createVariable(k, stats[k].dtype, coordinates)
+        var[:] = v[:]
+
+    din.close()
+    dout.close()
+    return outFile
+    
+
+def totalStats(args):
+    urlFile = args[0]
+    with open(urlFile, 'r') as f:
+        urls = [line.strip() for line in f]
+    variable = args[1]
+    mode = args[2]
+    nPartitions = int(args[3])
+    outFile = args[4]
+    stats = pixelStats(urls, variable, nPartitions, mode=mode)
+    outFile = writeStats(urls, variable, stats, outFile)
+    return outFile
+
+def main(args):
+    return totalStats(args)
+
+if __name__ == '__main__':
+    print main(sys.argv[1:])
+
+
+# python pixelStats.py urls_sst_daynight_2003_3days.txt sst sequential 1 
modis_sst_stats_test.nc
+# python pixelStats.py urls_sst_daynight_2003_4months.txt sst sequential 1 
modis_sst_stats_test.nc
+# python pixelStats.py urls_sst_daynight_2003_4months.txt sst dpark 4 
modis_sst_stats_test.nc
+# python pixelStats.py urls_sst_daynight_2003_4months.txt sst spark 4 
modis_sst_stats_test.nc
+
+# python pixelStats.py urls_sst_daynight_2003_2015.txt sst dpark 16 
modis_sst_stats.nc
+# python pixelStats.py urls_sst_daynight_2003_2015.txt sst spark 16 
modis_sst_stats.nc
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/plotlib.py
----------------------------------------------------------------------
diff --git a/climatology/clim/plotlib.py b/climatology/clim/plotlib.py
new file mode 100644
index 0000000..5ed46f3
--- /dev/null
+++ b/climatology/clim/plotlib.py
@@ -0,0 +1,843 @@
+#!/bin/env python
+
+import sys, os, math, types, time, datetime
+from urllib import urlopen
+from urllib import urlretrieve
+from urlparse import urlparse
+#import Numeric as N
+import numpy as N
+import numpy.ma as MA
+#import matplotlib.numerix.ma as MA
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pylab as M
+from mpl_toolkits.basemap import Basemap
+#import hdfeos
+
+def echo2(*s): sys.stderr.write(' '.join(map(str, s)) + '\n')
+def warn(*s):  echo2('plotlib:', *s)
+def die(*s):   warn('Error,',  *s); sys.exit()
+
+CmdOptions = {'MCommand':  ['title', 'xlabel', 'ylabel',  'xlim', 'ylim', 
'show'],
+              'plot':      ['label', 'linewidth', 'legend', 'axis'],
+              'map.plot':  ['label', 'linewidth', 'axis'],
+              'map.scatter':  ['norm', 'alpha', 'linewidths', 'faceted', 
'hold'],
+              'savefig':   ['dpi', 'orientation']
+              }
+
+def imageMap(lons, lats, vals, vmin=None, vmax=None, 
+             imageWidth=None, imageHeight=None, outFile=None,
+             projection='cyl', cmap=M.cm.jet, makeFigure=False,
+             borders=[0., -90., 360., 90.], autoBorders=True, borderSlop=10.,
+             meridians=[0, 360, 60], parallels=[-60, 90, 30],
+             **options
+             ):
+    lons = normalizeLons(lons)
+    if vmin == 'auto': vmin = None
+    if vmax == 'auto': vmax = None
+    if imageWidth is not None: makeFigure = True
+    if projection is None or projection == '': projection = 'cyl'
+    if cmap is None or cmap == '': cmap = M.cm.jet
+    if isinstance(cmap, types.StringType) and cmap != '':
+        try:
+            cmap = eval('M.cm.' + cmap)
+        except:
+            cmap = M.cm.jet
+
+#    ensureItems(options, {'xlabel': 'Longitude (deg)', 'ylabel': 'Latitude 
(deg)', \
+    ensureItems(options, { \
+                     'title': 'An Image Map', 'dpi': 100,
+                     'imageWidth': imageWidth or 1024, 'imageHeight': 
imageHeight or 768})
+    if autoBorders:
+        borders = [min(lons), min(lats), max(lons), max(lats)]
+        borders = roundBorders(borders, borderSlop)
+
+    m = Basemap(borders[0], borders[1], borders[2], borders[3], \
+                projection=projection, lon_0=N.average([lons[0], lons[-1]]))
+
+    if makeFigure:
+        dpi = float(options['dpi'])
+        width = float(imageWidth) / dpi
+        if imageHeight is None:
+            height = width * m.aspect
+        else:
+            height = float(imageHeight) / dpi
+        f = M.figure(figsize=(width,height)).add_axes([0.1,0.1,0.8,0.8], 
frameon=True)
+
+    if vmin is not None or vmax is not None: 
+        if vmin is None:
+            vmin = min(vals)
+        else:
+            vmin = float(vmin)
+        if vmax is None:
+            vmax = max(vals)
+        else:
+            vmax = float(vmax)
+        vrange = (vmax - vmin) / 255.
+        levels = N.arange(vmin, vmax, vrange/30.)
+    else:
+        levels = 30
+
+    x, y = m(*M.meshgrid(lons,lats))
+    c = m.contourf(x, y, vals, levels, cmap=cmap, colors=None)
+
+    m.drawcoastlines()
+    m.drawmeridians(range(meridians[0], meridians[1], meridians[2]), 
labels=[0,0,0,1])
+    m.drawparallels(range(parallels[0], parallels[1], parallels[2]), 
labels=[1,1,1,1])
+    M.colorbar(c)
+    evalKeywordCmds(options)
+    if outFile:
+        M.savefig(outFile, **validCmdOptions(options, 'savefig'))
+
+
+# all of these calls below are handled by the evalKeywordCmds line above
+#    M.xlim(0,360)
+#    M.xlabel(xlabel)
+#    M.ylabel(ylabel)
+#    M.title(title)
+#    M.show()
+
+
+def imageMap2(lons, lats, vals, vmin=None, vmax=None, 
+             imageWidth=None, imageHeight=None, outFile=None,
+             projection='cyl', cmap=M.cm.jet, makeFigure=False,
+             borders=[0., -90., 360., 90.], autoBorders=True, borderSlop=10.,
+             meridians=[0, 360, 60], parallels=[-60, 90, 30],
+             **options
+             ):
+#    lons = normalizeLons(lons)
+    if vmin == 'auto': vmin = None
+    if vmax == 'auto': vmax = None
+    if imageWidth is not None: makeFigure = True
+    if projection is None or projection == '': projection = 'cyl'
+    if cmap is None or cmap == '': cmap = M.cm.jet
+    if isinstance(cmap, types.StringType) and cmap != '':
+        try:
+            cmap = eval('M.cm.' + cmap)
+        except:
+            cmap = M.cm.jet
+
+#    ensureItems(options, {'xlabel': 'Longitude (deg)', 'ylabel': 'Latitude 
(deg)', \
+    ensureItems(options, { \
+                     'title': 'An Image Map', 'dpi': 100,
+                     'imageWidth': imageWidth or 1024, 'imageHeight': 
imageHeight or 768})
+    if autoBorders:
+        borders = [min(min(lons)), min(min(lats)), max(max(lons)), 
max(max(lats))]
+        borders = roundBorders(borders, borderSlop)
+
+    m = Basemap(borders[0], borders[1], borders[2], borders[3], \
+                projection=projection, lon_0=N.average([lons.flat[0], 
lons.flat[-1]]))
+
+    if makeFigure:
+        dpi = float(options['dpi'])
+        width = float(imageWidth) / dpi
+        if imageHeight is None:
+            height = width * m.aspect
+        else:
+            height = float(imageHeight) / dpi
+        f = M.figure(figsize=(width,height)).add_axes([0.1,0.1,0.8,0.8], 
frameon=True)
+
+    if vmin is not None or vmax is not None: 
+        if vmin is None:
+            vmin = min(min(vals))
+        else:
+            vmin = float(vmin)
+        if vmax is None:
+            vmax = max(max(vals))
+        else:
+            vmax = float(vmax)
+        vrange = vmax - vmin
+        levels = N.arange(vmin, vmax, vrange/30.)
+    else:
+        levels = 30
+
+    c = m.contourf(lons, lats, vals, levels, cmap=cmap, colors=None)
+    m.drawcoastlines()
+    m.drawmeridians(range(meridians[0], meridians[1], meridians[2]), 
labels=[0,0,0,1])
+    m.drawparallels(range(parallels[0], parallels[1], parallels[2]), 
labels=[1,1,1,1])
+    M.colorbar(c, orientation='horizontal')
+    evalKeywordCmds(options)
+    if outFile: M.savefig(outFile, **validCmdOptions(options, 'savefig'))
+
+
+def image2(vals, vmin=None, vmax=None, outFile=None,
+           imageWidth=None, imageHeight=None, upOrDown='upper',
+           cmap=M.cm.jet, makeFigure=False, **options
+          ):
+    M.clf()
+    M.axes([0, 0, 1, 1])
+    if vmin == 'auto': vmin = None
+    if vmax == 'auto': vmax = None
+    if imageWidth is not None: makeFigure = True
+    if cmap is None or cmap == '': cmap = M.cm.jet
+    if isinstance(cmap, types.StringType) and cmap != '':
+        try:
+            cmap = eval('M.cm.' + cmap)
+        except:
+            cmap = M.cm.jet
+
+    if makeFigure:
+        dpi = float(options['dpi'])
+        width = float(imageWidth) / dpi
+        height = float(imageHeight) / dpi
+        f = M.figure(figsize=(width,height)).add_axes([0.1,0.1,0.8,0.8], 
frameon=True)
+
+    if vmin is not None or vmax is not None: 
+        if vmin is None:
+            vmin = min(min(vals))
+        else:
+            vmin = float(vmin)
+        if vmax is None:
+            vmax = max(max(vals))
+        else:
+            vmax = float(vmax)
+        vrange = vmax - vmin
+        levels = N.arange(vmin, vmax, vrange/30.)
+    else:
+        levels = 30
+
+    M.contourf(vals, levels, cmap=cmap, origin=upOrDown)
+    evalKeywordCmds(options)
+    if outFile: M.savefig(outFile, **validCmdOptions(options, 'savefig'))
+
+
+def marksOnMap(lons, lats, vals=None, vmin=None, vmax=None,
+               imageWidth=None, imageHeight=None, outFile=None,
+               projection='cyl', cmap=M.cm.jet,
+               sizes=80, colors='b', marker='o', makeFigure=False,
+               times=None, timeDelta=None,
+               borders=[0., -90., 360., 90.], autoBorders=True, borderSlop=10.,
+               meridians=[0, 360, 60], parallels=[-60, 90, 30],
+               **options
+               ):
+    lons = normalizeLons(lons)
+    if imageWidth is not None: makeFigure = True
+    if projection is None: projection = 'cyl'
+
+#    ensureItems(options, {'xlabel': 'Longitude (deg)', 'ylabel': 'Latitude 
(deg)',
+    ensureItems(options, { \
+                          'title': 'Markers on a Map', 'dpi': 100,
+                          'imageWidth': imageWidth or 1024, 'imageHeight': 
imageHeight or 768})
+    if autoBorders:
+        borders = [min(lons), min(lats), max(lons), max(lats)]
+        borders = roundBorders(borders, borderSlop)
+
+    m = Basemap(borders[0], borders[1], borders[2], borders[3], \
+                projection=projection, lon_0=N.average([lons[0], lons[-1]]))
+
+    if makeFigure:
+        dpi = float(options['dpi'])
+        width = float(imageWidth) / dpi
+        if imageHeight is None:
+            height = width * m.aspect
+        else:
+            height = float(imageHeight) / dpi
+        f = 
M.figure(figsize=(width,height)).add_axes([0.1,0.1,0.8,0.8],frameon=True)
+
+    if vals is not None: 
+        if vmin is None or vmin == 'auto':
+            vmin = min(vals)
+            warn('auto scale min is %f' % vmin)
+        else:
+            vmin = float(vmin)
+        if vmax is None or vmax == 'auto':
+           vmax = max(vals)
+        else:
+            vmax = float(vmax)
+            warn('auto scale max is %f' % vmax)
+        vrange = (vmax - vmin) / 255.
+        colors = N.array([(val - vmin)/vrange for val in vals])
+
+    if timeDelta is not None:
+        if times is not None:
+            times = map(mkTime, times)
+            timeDelta = float(timeDelta) * 60.
+            start = roundTime(times[0], 'down', timeDelta)
+            end = roundTime(times[-1], 'up', timeDelta)
+            plotTimes = arange(start, end+timeDelta, timeDelta)
+        else:
+            timeDelta = None
+            
+
+    g = m.scatter(lons, lats, s=sizes, c=colors, marker=marker, cmap=cmap,
+                  vmin=vmin, vmax=vmax, **validCmdOptions(options, 
'map.scatter'))
+
+    m.drawcoastlines()
+    m.drawmeridians(range(meridians[0], meridians[1], meridians[2]), 
labels=[0,0,0,1])
+    m.drawparallels(range(parallels[0], parallels[1], parallels[2]), 
labels=[1,1,1,1])
+    M.colorbar(g)
+    evalKeywordCmds(options)
+    if outFile: M.savefig(outFile, **validCmdOptions(options, 'savefig'))
+
+
+def plotSwathVar(granules, variable, scaleFactor, title, outFile, 
filterMin=None, filterMax=None,
+                 scaleMin=None, scaleMax=None, imageWidth=None, 
imageHeight=None,
+                 plotType='map', projection='cyl', markerSize=10, **options):
+    if filterMin == 'auto': filterMin = None
+    if filterMax == 'auto': filterMax = None
+#    files = [localize(url) for url in granules if url != 'None']
+    files = granules
+    imageFiles = []
+
+    for i, file in enumerate(files):
+        print 'plotSwathVar: Reading %s: %s' % (file, variable)
+        localFile = localize(file, retrieve=False)
+        if i == 0:
+            swath = hdfeos.swaths(file)[0]
+#            geoFields = hdfeos.swath_geo_fields(file, swath)
+        lat = hdfeos.swath_field_read(file, swath, 'Latitude')
+        lon = hdfeos.swath_field_read(file, swath, 'Longitude')
+###        time = hdfeos.swath_field_read(file, swath, 'Time')
+###        pressure = hdfeos.swath_field_read(file, swath, '???')
+
+        if N.minimum.reduce(lon.flat) < -360. or N.minimum.reduce(lat.flat) < 
-90.:
+            useImageMap = False   # have missing values in lat/lon coord 
variables
+        else:
+            useImageMap = True
+
+        dataFields = hdfeos.swath_data_fields(file, swath)
+        if '[' not in variable:
+            varName = variable
+        else:
+            varName, slice = variable.split('[')
+        if varName not in dataFields:
+            die('%s not a variable in %s' % (variable, file))
+        if '[' not in variable:
+            var = hdfeos.swath_field_read(file, swath, variable) * 
float(scaleFactor)
+        else:
+            vals = hdfeos.swath_field_read(file, swath, varName)
+            var = eval('['.join(('vals', slice)))
+            var = var * float(scaleFactor)
+
+        print 'plotSwathVar: Variable range: %f -> %f' % (min(min(var)), 
max(max(var)))
+        if plotType != 'map' or not useImageMap:
+            lat = lat.flat; lon = lon.flat; var = var.flat
+
+        if filterMin is not None or filterMax is not None:
+            if filterMin is not None and filterMax is None:
+                cond = N.greater(var, float(filterMin))
+            elif filterMin is None and filterMax is not None:
+                cond = N.less(var, float(filterMax))
+            else:
+                cond = N.logical_and(N.greater(var, float(filterMin)),
+                                     N.less(var, float(filterMax)))
+            if plotType == 'map' and useImageMap:
+                lat = MA.masked_where(cond, lat, copy=0)
+                lon = MA.masked_where(cond, lon, copy=0)
+                var = MA.masked_where(cond, var, copy=0)
+            else:
+                lat = N.compress(cond, lat.flat)
+                lon = N.compress(cond, lon.flat)
+                var = N.compress(cond, var.flat)
+
+        if plotType == 'map':
+            imageFile = localFile + '_map.png'
+            if useImageMap:
+                imageMap2(lon, lat, var, scaleMin, scaleMax, imageWidth, 
imageHeight,
+                          imageFile, projection, autoBorders=False, 
title=title+' '+file,
+                          **options)
+            else:
+                marksOnMap(lon, lat, var, scaleMin, scaleMax,
+                           imageWidth, imageHeight, imageFile, projection,
+                           autoBorders=True, title=title+' '+file,
+                           sizes=markerSize*markerSize, **options)
+        elif plotType == 'hist':
+            imageFile = localFile + '_aot_hist.png'
+            hist(var, 50, imageFile)
+        else:
+            die("plotSwathVar: plotType must be 'map' or 'hist'")
+
+        imageFiles.append(imageFile)
+    return makeMovie(imageFiles, outFile)
+
+
+def imageSwathVar(granules, variable, scaleFactor, title, outFile, 
filterMin=None, filterMax=None,
+                 scaleMin=None, scaleMax=None, imageWidth=None, 
imageHeight=None,
+                 plotType='map', projection='cyl', markerSize=10, **options):
+    if filterMin == 'auto': filterMin = None
+    if filterMax == 'auto': filterMax = None
+#    files = [localize(url) for url in granules if url != 'None']
+    files = granules
+    imageFiles = []; lonLatBounds = []
+
+    for i, file in enumerate(files):
+        print 'imageSwathVar: Reading %s: %s' % (file, variable)
+        localFile = localize(file, retrieve=False)
+        if i == 0:
+            swath = hdfeos.swaths(file)[0]
+#            geoFields = hdfeos.swath_geo_fields(file, swath)
+        lat = hdfeos.swath_field_read(file, swath, 'Latitude')
+        lon = hdfeos.swath_field_read(file, swath, 'Longitude')
+###        time = hdfeos.swath_field_read(file, swath, 'Time')
+###        pressure = hdfeos.swath_field_read(file, swath, '???')
+
+        if N.minimum.reduce(lon.flat) < -360. or N.minimum.reduce(lat.flat) < 
-90.:
+            useImageMap = False   # have missing values in lat/lon coord 
variables
+        else:
+            useImageMap = True
+
+        dataFields = hdfeos.swath_data_fields(file, swath)
+        if '[' not in variable:
+            varName = variable
+        else:
+            varName, slice = variable.split('[')
+        if varName not in dataFields:
+            die('%s not a variable in %s' % (variable, file))
+        if '[' not in variable:
+            var = hdfeos.swath_field_read(file, swath, variable) * 
float(scaleFactor)
+        else:
+            vals = hdfeos.swath_field_read(file, swath, varName)
+            var = eval('['.join(('vals', slice)))
+            var = var * float(scaleFactor)
+
+        print 'imageSwathVar: Variable range: %f -> %f' % (min(min(var)), 
max(max(var)))
+        if plotType != 'map' or not useImageMap:
+            lat = lat.flat; lon = lon.flat; var = var.flat
+
+        if filterMin is not None or filterMax is not None:
+            if filterMin is not None and filterMax is None:
+                cond = N.greater(var, float(filterMin))
+            elif filterMin is None and filterMax is not None:
+                cond = N.less(var, float(filterMax))
+            else:
+                cond = N.logical_and(N.greater(var, float(filterMin)),
+                                     N.less(var, float(filterMax)))
+            if plotType == 'map' and useImageMap:
+                lat = MA.masked_where(cond, lat, copy=0)
+                lon = MA.masked_where(cond, lon, copy=0)
+                var = MA.masked_where(cond, var, copy=0)
+            else:
+                lat = N.compress(cond, lat.flat)
+                lon = N.compress(cond, lon.flat)
+                var = N.compress(cond, var.flat)
+
+        lonLatBound = (min(min(lon)), min(min(lat)), max(max(lon)), 
max(max(lat)))
+        lonLatBounds.append(lonLatBound)
+
+        if plotType == 'map':
+            imageFile = localFile + '_image.png'
+            if useImageMap:
+                upOrDown = 'upper'
+                if lat[0, 0] < lat[-1, 0]: upOrDown = 'lower'
+                if lon[0, 0] > lon[0, -1]: var = fliplr(var)
+                image2(var, scaleMin, scaleMax, imageFile, upOrDown=upOrDown, 
**options)
+#                plainImage2(var, imageFile)
+            else:
+                marksOnMap(lon, lat, var, scaleMin, scaleMax,
+                           imageWidth, imageHeight, imageFile, projection,
+                           autoBorders=True, title=title+' '+file,
+                           sizes=markerSize*markerSize, **options)
+        elif plotType == 'hist':
+            imageFile = localFile + '_aot_hist.png'
+            hist(var, 50, imageFile)
+        else:
+            die("plotSwathVar: plotType must be 'map' or 'hist'")
+
+        imageFiles.append(imageFile)
+    print "imageSwathVar results:", imageFiles
+    return (imageFiles, lonLatBounds)
+
+def fliplr(a):
+    b = N.array(a)
+    for i in range(b.shape[0]):
+        b[i,:] = a[i,::-1]
+    return b
+
+def plainImage2(var, imageFile):
+    M.clf()
+    M.figimage(var)
+    M.savefig(imageFile)
+
+
+def maskNcVar(inNcFile, outNcFile, outVars=[], writeMode='include',
+              conditionVar=None, filterMin=None, filterMax=None,
+              varToMask=None, maskValue=-9999.):
+    nc = NC(inNcFile)
+    if conditionVar is not None:
+        try:
+            condVar = nc.variables[conditionVar]
+        except:
+            die('maskNcVar: Variable %s not in ncFile %s' % (conditionVar, 
inNcFile))
+        if varToMask is None: die('maskNcVar: Must specify variable to mask 
with condition.')
+        try:
+            var = nc.variables[varToMask]
+        except:
+            die('maskNcVar: Variable %s not in ncFile %s' % (varToMask, 
inNcFile))
+
+        if filterMin is not None or filterMax is not None:
+            if filterMin is not None and filterMax is None:
+                cond = N.greater(var, float(filterMin))
+            elif filterMin is None and filterMax is not None:
+                cond = N.less(var, float(filterMax))
+            else:
+                cond = N.logical_and(N.greater(var, float(filterMin)),
+                                     N.less(var, float(filterMax)))
+            var = N.putmask(var, cond, float(maskValue))
+        outVars = list( set(outVars).add(conditionVar).add(varToMask) )
+    return subsetNcFile(inNcFile, outVars, outNcFile)
+
+def subsetNcFile(inNcFile, outNcFile, outVars, writeMode='include'):
+    if writeMode == '' or writeMode == 'auto': writeMode = 'include'
+    inf = NC(inNcFile)
+    outf = NC(outNcFile, 'w')
+    return outNcFile
+
+
+def hist(x, bins, outFile=None, **options):
+    if outFile: M.clf()
+    M.hist(x, bins, **options)
+    if outFile: M.savefig(outFile, **validCmdOptions(options, 'savefig'))
+
+def localize(url, retrieve=True):
+    scheme, netloc, path, params, query, frag = urlparse(url)
+    dir, file = os.path.split(path)
+    if retrieve: urlretrieve(url, file)
+    return file
+
+def makeMovie(files, outFile):
+    if len(files) > 1:
+        outMovieFile = os.path.splitext(outFile)[0] + '.mpg'
+        cmd = 'convert ' + ' '.join(files) + ' ' + outMovieFile
+        os.system(cmd)
+        warn('Wrote movie ' + outMovieFile)
+        return outMovieFile
+    else:
+        os.rename(files[0], outFile)
+        return outFile
+
+def mkTime(timeStr):
+    """Make a time object from a date/time string YYYY-MM-DD HH:MM:SS"""
+    from time import mktime, strptime
+    return mktime( strptime(timeStr, '%Y %m %d %H %M %S') )
+
+def roundTime(time, resolution, direction):
+    pass
+
+def roundBorders(borders, borderSlop=10.):
+    b0 = roundBorder(borders[0], 'down', borderSlop,   0.)
+    b1 = roundBorder(borders[1], 'down', borderSlop, -90.)
+    b2 = roundBorder(borders[2],   'up', borderSlop, 360.)
+    b3 = roundBorder(borders[3],   'up', borderSlop,  90.)
+    return [b0, b1, b2, b3]
+
+def roundBorder(val, direction, step, end):
+    if direction == 'up':
+        rounder = math.ceil
+        slop = step
+    else:
+        rounder = math.floor
+        slop = -step
+###    v = rounder(val/step) * step + slop
+    v = rounder(val/step) * step
+    if abs(v - end) < step+1.: v = end
+    return v
+
+def normalizeLon(lon):
+    if lon < 0.: return lon + 360.
+    if lon > 360.: return lon - 360.
+    return lon
+
+def normalizeLons(lons):
+    return N.array([normalizeLon(lon) for lon in lons])
+
+
+def plotColumns(specs, groupBy=None, outFile=None, rmsDiffFrom=None, 
floatFormat=None,
+                colors='bgrcmyk', markers='+x^svD<4>3', **options):
+    if groupBy:
+        plotColumnsGrouped(specs, groupBy, outFile, rmsDiffFrom, floatFormat,
+                           colors, markers, **options)
+    else:
+        plotColumnsSimple(specs, outFile, rmsDiffFrom, floatFormat,
+                          colors, markers, **options)
+
+
+def plotColumnsSimple(specs, outFile=None, rmsDiffFrom=None, floatFormat=None,
+                colors='bgrcmyk', markers='+x^svD<4>3', **options):
+    """Plot olumns of numbers from one or more data files.
+    Each plot spec. contains a filename and a list of labelled columns:
+      e.g., ('file1', 'xlabel:1,ylabel1:4,ylabel2:2,ylabel3:13)
+    Bug:  For the moment, only have 7 different colors and 10 different 
markers.
+    """
+    ensureItems(options, {'legend': True})
+    ydataMaster = None
+    for spec in specs:
+        file, columns = spec          # each spec is a (file, columnList) pair
+        columns = columns.split(',')  # each columnList is a comma-separated 
list of named columns
+        # Each named column is a colon-separated pair or triple 
'label:integer[:style]'
+        # Column indices are one-based.
+        # Styles are concatenated one-char flags like 'go' for green circles or
+        # 'kx-' for black X's with a line.
+        fields = N.array([map(floatOrMiss, line.split()) for line in 
open(file, 'r')])
+        xcol = columns.pop(0)  # first column in list is the x axis
+        xlabel, xcol, xstyle = splitColumnSpec(xcol)
+        xdata = fields[:,xcol-1]
+        markIndex = 0
+        for ycol in columns:
+            ylabel, ycol, ystyle = splitColumnSpec(ycol)
+            if ystyle is None: ystyle = colors[markIndex] + markers[markIndex] 
           
+            ydata = fields[:,ycol-1]  # all other columns are multiple y plots
+            if rmsDiffFrom:
+                if ydataMaster is None:
+                    ydataMaster = ydata    # kludge: must be first ycol in 
first file
+                    ylabelMaster = ylabel
+                else:
+                    s = diffStats(ylabelMaster, ydataMaster, ylabel, ydata)
+                    print >>sys.stderr, s.format(floatFormat)
+                    n, mean, sigma, min, max, rms = s.calc()
+                    ylabel = ylabel + ' ' + floatFormat % rms
+            M.plot(xdata, ydata, ystyle, label=ylabel)
+            markIndex += 1
+    evalKeywordCmds(options)
+    if outFile: M.savefig(outFile)    
+
+
+def plotColumnsGrouped(specs, groupBy, outFile=None, rmsDiffFrom=None, 
floatFormat=None,
+                colors='bgrcmyk', markers='+x^svD<4>3', **options):
+    """Plot olumns of numbers from one or more data files.
+    Each plot spec. contains a filename and a list of labelled columns:
+      e.g., ('file1', 'xlabel:1,ylabel1:4,ylabel2:2,ylabel3:13)
+    Bug:  For the moment, only have 7 different colors and 10 different 
markers.
+    """
+    ensureItems(options, {'legend': True})
+    ydataMaster = None
+    for spec in specs:
+        file, columns = spec          # each spec is a (file, columnList) pair
+        columns = columns.split(',')  # each columnList is a comma-separated 
list of named columns
+        # Each named column is a colon-separated pair or triple 
'label:integer[:style]'
+        # Column indices are one-based.
+        # Styles are concatenated one-char flags like 'go' for green circles or
+        # 'kx-' for black X's with a line.
+        fields = N.array([map(floatOrMiss, line.split()) for line in 
open(file, 'r')])
+        xcol = columns.pop(0)  # first column in list is the x axis
+        xlabel, xcol, xstyle = splitColumnSpec(xcol)
+        xdata = fields[:,xcol-1]
+        markIndex = 0
+        for ycol in columns:
+            ylabel, ycol, ystyle = splitColumnSpec(ycol)
+            if ystyle is None: ystyle = colors[markIndex] + markers[markIndex] 
           
+            ydata = fields[:,ycol-1]  # all other columns are multiple y plots
+            if rmsDiffFrom:
+                if ydataMaster is None:
+                    ydataMaster = ydata    # kludge: must be first ycol in 
first file
+                    ylabelMaster = ylabel
+                else:
+                    s = diffStats(ylabelMaster, ydataMaster, ylabel, ydata)
+                    print >>sys.stderr, s.format(floatFormat)
+                    n, mean, sigma, min, max, rms = s.calc()
+                    ylabel = ylabel + ' ' + floatFormat % rms
+            M.plot(xdata, ydata, ystyle, label=ylabel)
+            markIndex += 1
+    evalKeywordCmds(options)
+    if outFile: M.savefig(outFile)    
+
+
+def plotTllv(inFile, markerType='kx', outFile=None, groupBy=None, **options):
+    """Plot the lat/lon locations of points from a time/lat/lon/value file."""
+    fields = N.array([map(float, line.split()) for line in open(inFile, 'r')])
+    lons = fields[:,2]; lats = fields[:,1]
+    marksOnMap(lons, lats, markerType, outFile, \
+               title='Lat/lon plot of '+inFile, **options)
+
+
+def plotVtecAndJasonTracks(gtcFiles, outFile=None, names=None, 
makeFigure=True, show=False, **options):
+    """Plot GAIM climate and assim VTEC versus JASON using at least two 'gc' 
files.
+    First file is usually climate file, and rest are assim files.
+    """
+    ensureItems(options, {'title': 'GAIM vs. JASON for '+gtcFiles[0], \
+                          'xlabel': 'Geographic Latitude (deg)', 'ylabel': 
'VTEC (TECU)'})
+    if 'show' in options:
+        show = True
+        del options['show']
+    M.subplot(211)
+    gtcFile = gtcFiles.pop(0)
+    name = 'clim_'
+    if names: name = names.pop(0)
+    specs = [(gtcFile, 'latitude:2,jason:6,gim__:8,%s:13,iri__:10' % name)]
+    name = 'assim'
+    for i, gtcFile in enumerate(gtcFiles):
+        label = name
+        if len(gtcFiles) > 1: label += str(i+1)
+        specs.append( (gtcFile, 'latitude:2,%s:13' % label) )
+    plotColumns(specs, rmsDiffFrom='jason', floatFormat='%5.1f', **options)
+    M.legend()
+    
+    M.subplot(212)
+    options.update({'title': 'JASON Track Plot', 'xlabel': 'Longitude (deg)', 
'ylabel': 'Latitude (deg)'})
+    fields = N.array([map(floatOrMiss, line.split()) for line in 
open(gtcFiles[0], 'r')])
+    lons = fields[:,2]; lats = fields[:,1]
+    marksOnMap(lons, lats, show=show, **options)
+    if outFile: M.savefig(outFile)
+
+
+def diffStats(name1, vals1, name2, vals2):
+    """Compute RMS difference between two Numeric vectors."""
+    from Stats import Stats
+    label = name2 + ' - ' + name1
+    diff = vals2 - vals1
+    return Stats().label(label).addm(diff)
+
+def ensureItems(d1, d2):
+    for key in d2.keys():
+        if key not in d1: d1[key] = d2[key]
+
+def splitColumnSpec(s):
+    """Split column spec 'label:integer[:style]' into its 2 or 3 parts."""
+    items = s.split(':')
+    n = len(items)
+    if n < 2:
+        die('plotlib: Bad column spec. %s' % s)
+    elif n == 2:
+        items.append(None)
+    items[1] = int(items[1])
+    return items
+
+def floatOrMiss(val, missingValue=-999.):
+    try: val = float(val)
+    except: val = missingValue
+    return val
+
+def floatOrStr(val):
+    try: val = float(val)
+    except: pass
+    return val
+
+def evalKeywordCmds(options, cmdOptions=CmdOptions):
+    for option in options:
+        if option in cmdOptions['MCommand']:
+            args = options[option]
+            if args:
+                if args is True:
+                    args = ''
+                else:
+                    args = "'" + args + "'"
+                if option in cmdOptions:
+                    args += dict2kwargs( validCmdOptions(options, 
cmdOptions[option]) )
+                try:
+                    eval('M.' + option + '(%s)' % args)
+                except:
+                    die('failed eval of keyword command option failed: %s=%s' 
% (option, args))
+#        else:
+#            warn('Invalid keyword option specified" %s=%s' % (option, args))
+
+def validCmdOptions(options, cmd, possibleOptions=CmdOptions):
+    return dict([(option, options[option]) for option in options.keys()
+                    if option in possibleOptions[cmd]])
+
+def dict2kwargs(d):
+    args = [',%s=%s' % (kw, d[kw]) for kw in d]
+    return ', '.join(args)
+
+def csv2columns(csvFile, columns):
+    """Given a CSV file and a comma-separated list of desired column names and
+    types (name:type), return an array of vectors containing type-converted 
data.
+
+    Type should be the name of string-to-val type-conversion function such as 
float, int, or str.
+    If type is missing, then float conversion is assumed.
+    """
+    import csv
+    names = []; types = []; cols = []
+    for column in columns.split(','):
+        if column.find(':') > 0:
+            name, type = column.split(':')
+        else:
+            name = column; type = 'float'
+        names.append(name.strip())
+        types.append( eval(type.strip()) )  # get type conversion function 
from type string
+        cols.append([])
+
+    print csvFile
+    for fields in csv.DictReader(urlopen(csvFile).readlines(), 
skipinitialspace=True):
+        tmpColVals = []
+        try:
+            for i, type in enumerate(types): tmpColVals.append( 
type(fields[names[i]]) )
+        except Exception, e:
+            print "Got exception coercing values: %s" % e
+            continue
+        for i in range(len(types)): cols[i].append(tmpColVals[i])
+    return [N.array(col) for col in cols]
+
+def csv2marksOnMap(csvFile, columns, title, vmin=None, vmax=None,
+                   imageWidth=None, imageHeight=None, mapFile=None, 
projection='cyl'):
+    if mapFile is None: mapFile = csvFile + '.map.png'
+    lons, lats, vals = csv2columns(csvFile, columns)
+    marksOnMap(lons, lats, vals, vmin, vmax, imageWidth, imageHeight,
+               mapFile, projection, title=title)
+    return mapFile
+
+def imageSlice(dataFile, lonSlice, latSlice, varSlice, title, vmin=None, 
vmax=None,
+               imageWidth=None, imageHeight=None, imageFile=None,
+               projection='cyl', colormap=M.cm.jet):
+#    if dataFile == []: dataFile = 
'http://laits.gmu.edu/NWGISS_Temp_Data/WCSfCays5.hdf'
+    if imageFile is None: imageFile = dataFile + '.image.png'
+
+    print dataFile
+    print imageFile
+    tmpFile, headers = urlretrieve(dataFile)
+#    tmpFile = 'WCSfCays5.hdf'
+    try:
+        from Scientific.IO.NetCDF import NetCDFFile as NC
+        nc = NC(tmpFile)
+        fileType = 'nc'
+    except:
+        try:
+            import hdfeos
+            grids = hdfeos.grids(tmpFile)
+            fileType = 'hdf_grid'
+            grid = grids[0]
+        except:
+            try:
+                swaths = hdfeos.swaths(tmpFile)
+                fileType = 'hdf_swath'
+                swath = swaths[0]
+            except:
+                raise RuntimeError, 'imageSlice: Can only slice & image netCDF 
or HDF grid/swath files.'
+    if fileType == 'nc':
+        lons = evalSlice(nc, lonSlice)
+        lats = evalSlice(nc, latSlice)
+        vals = evalSlice(nc, varSlice)
+    elif fileType == 'hdf_grid':
+        print grids
+        fields = hdfeos.grid_fields(tmpFile, grid)
+        print fields
+        field = fields[0]
+        dims = hdfeos.grid_field_dims(tmpFile, grid, field)
+        print dims
+        lons = hdfeos.grid_field_read(tmpFile, grid, lonSlice)
+        lats = hdfeos.grid_field_read(tmpFile, grid, latSlice)
+        vals = hdfeos.grid_field_read(tmpFile, grid, field)
+    elif fileType == 'hdf_swath':
+        print swaths
+        print hdfeos.swath_geo_fields(tmpFile, swath)
+        print hdfeos.swath_data_fields(tmpFile, swath)
+        lons = hdfeos.get_swath_field(tmpFile, swath, 'Longitude')  # assume 
HDFEOS conventions
+        lats = hdfeos.get_swath_field(tmpFile, swath, 'Latitude')
+        vals = hdfeos.get_swath_field(tmpFile, swath, varSlice)
+
+    imageMap(lons, lats, vals, vmin, vmax, imageWidth, imageHeight, imageFile,
+             title=title, projection=projection, cmap=colormap)
+    return imageFile
+
+def evalSlice(nc, varSlice):
+    if varSlice.find('[') > 0:
+        varName, slice = varSlice.split('[')
+        vals = nc.variables[varName]
+        vals = eval('['.join(('vals', slice)))
+    else:
+        vals = nc.variables[varSlice][:]
+    return vals
+        
+
+if __name__ == '__main__':
+    from sys import argv
+#    lons = N.arange(0, 361, 2, N.Float)
+#    lats = N.arange(-90, 91, 1, N.Float)
+#    data = N.fromfunction( lambda x,y: x+y, (len(lats), len(lons)))
+
+    outFile = 'out.png'
+#    imageMap(lons, lats, data, outFile)
+#    marksOnMap(lons, lats, 'bx', outFile)
+#    plotVtecAndJasonTracks([argv[1], argv[2]], outFile, show=True, 
legend=True)
+
+    me = argv.pop(0)
+    args = map(floatOrStr, argv)
+    imageSlice(*args)

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/reroot.py
----------------------------------------------------------------------
diff --git a/climatology/clim/reroot.py b/climatology/clim/reroot.py
new file mode 100644
index 0000000..9b7a823
--- /dev/null
+++ b/climatology/clim/reroot.py
@@ -0,0 +1,31 @@
+''' 
+ reroot.py -- Change the root of the URL for a list of files
+
+'''
+
+import sys, os
+import urlparse
+
+AIRS_DAP = 'http://airspar1.gesdisc.eosdis.nasa.gov/opendap/Aqua_AIRS'
+AIRS_FTP = 'ftp://airsl2.gesdisc.eosdis.nasa.gov/ftp/data/s4pa/Aqua_AIRS'
+# matchStart for this case is 'Aqua_AIRS'
+
+
+def reroot(url, root=AIRS_DAP, matchStart='Aqua_AIRS'):
+    protocol, netloc, path, params, query, fragment = urlparse.urlparse(url)
+    start = root[:root.index(matchStart)]
+    rest = path[path.index(matchStart):-1]
+    return start + rest
+
+
+def main(args):
+#    root = args[0]
+#    matchStart = args[1]
+    root = AIRS_DAP
+    matchStart = 'Aqua_AIRS'
+    for url in sys.stdin:
+        print reroot(url, root, matchStart)
+        
+
+if __name__ == '__main__':
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/setup.py
----------------------------------------------------------------------
diff --git a/climatology/clim/setup.py b/climatology/clim/setup.py
new file mode 100644
index 0000000..8173ae6
--- /dev/null
+++ b/climatology/clim/setup.py
@@ -0,0 +1,8 @@
+
+from setuptools import setup
+
+setup(name='clim',
+      version='0.1dev0')
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/sort.py
----------------------------------------------------------------------
diff --git a/climatology/clim/sort.py b/climatology/clim/sort.py
new file mode 100644
index 0000000..615eb95
--- /dev/null
+++ b/climatology/clim/sort.py
@@ -0,0 +1,43 @@
+#
+# sort.py -- Utility routines to sort URL lists into N-day groups for 
computing climatologies.
+#
+
+import sys, os, urlparse
+
+
+def sortByKeys(urls, getKeysFn):
+    '''Extract keys (e.g.  DOY and year) from filename and sort by the keys.'''
+    keyed = []
+    for url in urls:
+        if url is None: continue
+        url = url.strip()
+        if url == '': continue
+        keyed.append( (getKeysFn(url), url) )
+
+    keyed.sort()
+    sort = [u[1] for u in keyed]     # remove keys
+    return sort
+
+
+def main(args):
+    from datasets import ModisSst
+    urlFile = args[0]
+    urls = open(urlFile, 'r').readlines()
+    urlsSorted = sortByKeys(urls, ModisSst.getKeys)
+    print '\n'.join(urlsSorted)
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
+
+
+# Get URL's for MODIS SST daily 4km netCDF files 
+# wls is a ls command for the web.  Understands FTP & HTTP root URL's and 
traverses directories.  Can also retrieve all of the matching files.
+
+# python wls.py --wildcard 'A*sst*.nc' 
ftp://podaac.jpl.nasa.gov/OceanTemperature/modis/L3/aqua/11um/v2014.0/4km/daily 
> urls
+
+# Sort by DOY, year, N/D
+# python sort.py < urls > urls_sorted
+
+# Now have URL list that is in proper order to compute N-day climatologies.
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/sparkTest.py
----------------------------------------------------------------------
diff --git a/climatology/clim/sparkTest.py b/climatology/clim/sparkTest.py
new file mode 100644
index 0000000..4f0cd26
--- /dev/null
+++ b/climatology/clim/sparkTest.py
@@ -0,0 +1,17 @@
+#
+# sparkTest.py -- word count example to test installation
+#
+
+from pyspark import SparkContext
+
+sc = SparkContext(appName="PythonWordCount")
+#lines = sc.textFile("file:///home/code/climatology-gaussianInterp/README.md", 
8)
+lines = sc.textFile("file:///usr/share/dict/words", 128)
+counts = lines.flatMap(lambda x: x.split(' ')) \
+              .map(lambda x: (x, 1)) \
+              .reduceByKey(lambda x,y: x + y)
+
+output = counts.collect()
+#for (word, count) in output:
+#    print("%s: %i" % (word, count))
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/spatialFilter.py
----------------------------------------------------------------------
diff --git a/climatology/clim/spatialFilter.py 
b/climatology/clim/spatialFilter.py
new file mode 100644
index 0000000..192108b
--- /dev/null
+++ b/climatology/clim/spatialFilter.py
@@ -0,0 +1,36 @@
+#
+# spatialFilter routine -- Apply a fixed spatial filter (smoother) in lat/lon 
and then average over times/grids
+#
+# Calls into optimized routine in Fortran (spatialFilter_f.f).
+#
+
+import numpy as N, time
+from spatialFilter_f import spatialfilter_f
+
+
+def spatialFilter(var,                      # bundle of input arrays: masked 
variable, coordinates
+                  varNames,                 # list of names in order: primary 
variable, coordinates in order lat, lon, time
+                  spatialFilter,            # 3x3 filter numpy array of 
integers
+                  normalization,            # normalization factor for filter 
(integer)
+                  missingValue=-9999.,      # value to mark missing values in 
interp result
+                  verbose=1,                # integer to set verbosity level
+                  optimization='fortran'):  # Mode of optimization, using 
'fortran' or 'cython'
+    '''Apply a fixed spatial filter (smoother) in lat/lon and then average 
over times/grids.
+    '''
+    # Prepare numpy arrays
+    v = var[varNames[0]][:]                     # real*4 in Fortran code, is 
v.dtype correct?
+    vmask = N.ma.getmask(v).astype('int8')[:]   # integer*1, convert bool mask 
to one-byte integer for Fortran
+    vtime  = var[varNames[1]][:]                # integer*4 in Fortran
+    lat = var[varNames[2]][:]                   # real*4
+    lon = var[varNames[3]][:]                   # real*4
+
+    if optimization == 'fortran':
+        vinterp, vcount, status = \
+             spatialfilter_f(v, vmask, vtime, lat, lon,
+                             spatialFilter, normalization, missingValue, 
verbose)
+    else:
+        pass
+
+    vinterp = N.ma.masked_where(vcount == 0, vinterp)
+    return (vinterp, vcount, status)
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/spatialFilter_f.f
----------------------------------------------------------------------
diff --git a/climatology/clim/spatialFilter_f.f 
b/climatology/clim/spatialFilter_f.f
new file mode 100644
index 0000000..1b705b4
--- /dev/null
+++ b/climatology/clim/spatialFilter_f.f
@@ -0,0 +1,121 @@
+c
+c spatialFilter routine -- Apply a fixed spatial filter (smoother) in lat/lon 
and then average over times/grids
+c
+c Designed to be called from python using f2py.
+c
+c
+c Low Pass Filter = 1/9  * [1 1 1         ! normalization = 9
+c                           1 1 1
+c                           1 1 1]
+c
+c Gaussian Filter = 1/16 * [1 2 1         ! normalization = 16
+c                           2 4 2
+c                           1 2 1]
+c
+
+      subroutine spatialFilter_f(var, mask,
+     &                         time, lat, lon,
+     &                         filter, normalization,
+     &                         missingValue,
+     &                         verbose,
+     &                         vinterp, vcount, status,
+     &                         ntime, nlat, nlon)
+
+      implicit none
+      integer*4 ntime, nlat, nlon
+c                 ! prepared 3D array of variable data over time, lon, & lat
+      real*4    var(ntime, nlat, nlon)
+c                 ! variable to be interpolated, over ntime epochs
+      integer*1 mask(ntime, nlat, nlon)
+c                 ! pixel quality mask
+      integer*4 time(ntime)
+c                 ! time epochs to gaussian-interpolate over
+      real*4 lat(nlat)
+                  ! latitude corrdinate vector
+      real*4 lon(nlon)
+                  ! longitude corrdinate vector
+cf2py intent(in) var, mask, time, lat, lon        !! annotations for f2py 
processor
+
+      integer*4 filter(3, 3)
+c                 ! 3x3 filter coefficients as integers
+      integer*4 normalization
+c                 ! Normalization factor for the filter, divide by sum of 
integers
+cf2py intent(in) filter, normalization
+
+      real*4 missingValue
+c                 ! value to mark missing values in interp result
+      integer*4 verbose
+c                 ! integer to set verbosity level
+cf2py intent(in) missingValue, verbose
+
+      real*4 vinterp(nlat, nlon)
+c                 ! interpolated variable using gaussians, missing values not 
counted
+      integer*4 vcount(nlat, nlon)
+c                 ! count of good data, might be zero after masking
+      integer*4 status
+c                 ! negative status indicates error
+cf2py intent(out) vinterp, vcount, status
+
+      integer*4 iin, jin, kin
+      integer*4 i, j, fac, count
+      real*4 val, sum
+
+      write(6, *) 'Echoing inputs ...'
+      write(6, *) 'ntime, nlat, nlon:', ntime, nlat, nlon
+      write(6, *) 'filter:', filter
+      write(6, *) 'normalization', normalization
+      write(6, *) 'missingValue:', missingValue
+
+      status = 0
+
+      if (verbose .gt. 3) then
+          write(6, *) 'time:', time
+          write(6, *) 'lat:', lat
+          write(6, *) 'lon:', lon
+c          write(6, *) 'mask(3):', mask(3,:,:)
+          write(6, *) 'var(3):', var(3,:,:)
+      end if
+
+      do i = 1, nlat
+         if (verbose .gt. 1) write(6, *) lat(i)
+         do j = 1, nlon
+            vinterp(i,j) = 0.0
+            vcount(i,j) = 0.0
+            if (verbose .gt. 3) then
+               write(6, *) '(i,j) = ', i, j
+               write(6, *) '(lat,lon) = ', lat(i), lon(j)
+            end if
+
+            do kin = 1, ntime
+               sum = 0.0
+               count = 0
+               do iin = -1, +1
+                  if (i+iin .lt. 1 .or. i+iin .gt. nlat) cycle
+                  do jin = -1, +1
+                     if (j+jin .lt. 1 .or. j+jin .gt. nlon) cycle
+
+                     if (mask(kin,iin,jin) .eq. 0) then
+                        fac = filter(iin+2, jin+2)
+                        val = var(kin,iin,jin)
+                        sum = sum + fac * val
+                        count = count + fac
+                     end if
+                  end do
+               end do
+               if (count .gt. 0) then
+c                 ! filter for (i,j) pixel isn't empty
+                  vinterp(i,j) = vinterp(i,j) + sum / normalization
+                  vcount(i,j) = vcount(i,j) + 1
+               end if
+            end do
+            if (vcount(i,j) .gt. 0) then
+               vinterp(i,j) = vinterp(i,j) / vcount(i,j)
+c              ! compute mean over number of non-empty times/grids
+            else
+               vinterp(i,j) = missingValue
+            end if
+         end do
+      end do
+      return
+      end
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/spatialFilter_f.mk
----------------------------------------------------------------------
diff --git a/climatology/clim/spatialFilter_f.mk 
b/climatology/clim/spatialFilter_f.mk
new file mode 100644
index 0000000..9150b5b
--- /dev/null
+++ b/climatology/clim/spatialFilter_f.mk
@@ -0,0 +1 @@
+f2py -c -m spatialFilter_f spatialFilter_f.f

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/split.py
----------------------------------------------------------------------
diff --git a/climatology/clim/split.py b/climatology/clim/split.py
new file mode 100755
index 0000000..cf3129b
--- /dev/null
+++ b/climatology/clim/split.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python
+
+"""
+split.py == Some utility functions to split lists of URL's into chunks or time 
periods.
+
+"""
+
+import sys, os, re, json
+import datetime
+
+
+def fixedSplit(seq, n):
+    '''Split a sequence into fixed-length chunks of length N.  Last chunk is 
different length.'''
+    chunk = []
+    for i, s in enumerate(seq):
+        chunk.extend(s)
+        if (i+1) % n == 0:
+            yield chunk
+            chunk = []
+    if len(chunk) > 0: yield chunk
+
+
+def splitByMonth(seq, timeFromFilename={'get': 'doy', 'regex': 
re.compile(r'\/A.(....)(...)')}, transformer=None, keyed=True):
+    '''Split URL's into months using regex to extract information from the 
filename.  Return list or keyed dictionary.'''
+    if timeFromFilename['get'][1] == 'doy':
+        transformer = lambda keys: (keys[0], doy2month(*keys))
+    urlsByMonth = [ku for ku in splitByKeys(seq, timeFromFilename['regex'], 
transformer, keyed)]
+    return urlsByMonth
+
+
+def splitByKeys(seq, regex, transformer=None, keyed=True):
+    '''Split a sequence into chunks by a key.
+The key is extracted from the string by matching a regular expression to the 
string and returning the matched groups.
+    '''
+    regex = re.compile(regex)
+    chunk = []
+    for i, s in enumerate(seq):
+        s = s.strip()
+        if i == 0:
+            keys = extractKeys(s, regex, transformer)
+        keys1 = extractKeys(s, regex, transformer)
+        if keys1 != keys:
+            if keyed:
+                if len(keys) == 1:
+                    try:
+                        intKey = int(keys[0])
+                        yield (intKey, chunk)
+                    except:
+                        yield (keys, chunk)
+                else:
+                    yield (keys, chunk)
+            else:
+                yield chunk
+            chunk = [s]
+            keys = keys1
+        else:
+            chunk.append(s)
+    if len(chunk) > 0:
+        if keyed:
+            if len(keys) == 1:
+                try:
+                    intKey = int(keys[0])
+                    yield (intKey, chunk)
+                except:
+                    yield (keys, chunk)
+        else:
+            yield chunk
+
+
+def extractKeys(s, regex, transformer=None):
+    '''Extract keys from a string by matching a regular expression to the 
string and returning the matched groups.  Transformer functions alter the 
keys.'''
+    regex = re.compile(regex)
+    mat = regex.search(s)
+    if not mat:
+        print >>sys.stderr, 'extractKeys: Fatal error, regex %s does not match 
%s' % (regex.pattern, s)
+        sys.exit(1)
+    else:
+        keys = mat.groups()
+    if transformer is not None:
+        keys = transformer(keys)
+    return keys
+    
+
+def splitByNDays(seq, n, regex, transformer=None, keyed=True):
+    '''Split URL's into N-day chunks.'''
+    daily = [s for s in splitByKeys(seq, regex, transformer, keyed)]
+    for chunk in fixedSplit(daily, n):
+        yield chunk
+
+def splitByNDaysKeyed(seq, n, regex, transformer=None, keyed=True):
+    '''Split URL's into N-day chunks.'''
+    daily = [s for s in splitByKeys(seq, regex, transformer, keyed)]    # url 
groups keyed by DOY first
+    for chunk in daily:
+        keys, chunk = chunk
+        try:
+            key = int(keys[0])
+        except:
+            key = int(keys)
+        i = (int((key-1)/n)) * n + 1
+        yield (i, chunk)
+
+def groupByKeys(seq):
+    '''Merge multiple keys into a single key by appending lists.'''
+    seq = [s for s in seq]
+    merge = {}
+    for s in seq:
+        key, chunk = s
+        if key not in merge:
+            merge[key] = chunk
+        else:
+            merge[key].extend(chunk)    # extend returns None, that blows
+    result = []
+    for k in sorted(merge.keys()):
+        result.append((k, merge[k]))
+    return result
+
+
+def windowSplit(seq, nEpochs, nWindow):
+    '''Split a sequence (e.g. of daily files/urls) into nWindow-long chunks 
for climatology averaging.
+The length of the window will usually be longer than the nEpochs the average 
is good for.
+    '''
+    pass
+
+
+# Tests follow.
+
+def test1(args):
+    n = int(args[0])
+    fn = args[1]
+    with open(fn, 'r') as f:
+        for chunk in fixedSplit(f, n):
+            print ' '.join(chunk)
+
+def test2(args):
+    regex = args[0]
+    regex = re.compile(regex)
+    fn = args[1]
+    with open(fn, 'r') as f:
+        for chunk in splitByKey(f, regex):
+            print ' '.join(chunk)
+
+def test3(args):
+    '''Broken!'''
+    nDays = int(args[0])
+    regex = args[1]
+    regex = re.compile(regex)
+    fn = args[2]
+    with open(fn, 'r') as f:
+        for chunk in splitByNDays(f, nDays, regex):
+            print chunk
+
+def test4(args):
+    '''Correct!'''
+    nDays = int(args[0])
+    regex = args[1]
+    regex = re.compile(regex)
+    fn = args[2]
+    with open(fn, 'r') as f:
+        for chunk in splitByNDays(f, nDays, regex):
+            print
+            print '\n'.join(chunk)
+            print len(chunk)
+
+def test5(args):
+    '''Generate multi-line JSON for pyspark.'''
+    nDays = int(args[0])
+    regex = args[1]
+    fn = args[2]
+    with open(fn, 'r') as f:
+        for chunk in splitByNDays(f, nDays, regex):
+            print json.dumps(chunk)
+
+def test6(args):
+    '''Generate keyed split by month for spark.'''
+    regex = args[0]
+    fn = args[1]
+    with open(fn, 'r') as f:
+        for chunk in splitByMonth(f, {'get': 'doy', 'regex': 
re.compile(regex)}):
+            print chunk
+
+
+def main(args):
+#    test1(args)
+#    test2(args)
+#    test3(args)
+#    test4(args)
+#    test5(args)
+    test6(args)
+
+if __name__ == '__main__':
+    import sys
+    main(sys.argv[1:])
+
+
+# python split.py 5 '(...).L3m' urls_sst_daynight_2003_2015_sorted.txt
+
+# python split.py '\/A(....)(...)' urls_sst_daynight_2003_4months.txt
+# python split.py '\/A(....)(...)' urls_sst_daynight_2003_2015.txt

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/test/__init__.py
----------------------------------------------------------------------
diff --git a/climatology/clim/test/__init__.py 
b/climatology/clim/test/__init__.py
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/test/ccmpTest.py
----------------------------------------------------------------------
diff --git a/climatology/clim/test/ccmpTest.py 
b/climatology/clim/test/ccmpTest.py
new file mode 100644
index 0000000..b15494d
--- /dev/null
+++ b/climatology/clim/test/ccmpTest.py
@@ -0,0 +1,19 @@
+"""
+Copyright (c) 2017 Jet Propulsion Laboratory,
+California Institute of Technology.  All rights reserved
+"""
+
+import unittest
+import ClimatologySpark2
+
+
+class CCMPTest(unittest.TestCase):
+    def cmmp_test(self):
+        dsName = 'CCMPWind'
+        nEpochs = '1'
+        nWindow = '1'
+        averager = 'pixelMean'
+        sparkConfig = 'multicore,4,4'
+        outHdfsPath = 'cache/clim'
+
+        ClimatologySpark2.main([dsName, nEpochs, nWindow, averager, 
sparkConfig, outHdfsPath])

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/timePartitions.py
----------------------------------------------------------------------
diff --git a/climatology/clim/timePartitions.py 
b/climatology/clim/timePartitions.py
new file mode 100755
index 0000000..993d939
--- /dev/null
+++ b/climatology/clim/timePartitions.py
@@ -0,0 +1,32 @@
+"""
+ timePartitions.py
+
+ Routines to partition time ranges into segments, and time-ordered
+ file URL's into time segments.
+
+"""
+
+import os, sys
+
+
+def partitionFilesByKey(paths, path2keyFn):
+    """Partition a list of files (paths) into groups by a key.
+The key is computed from the file path by the passed-in 'path2key' function.
+
+For example, to group files by day or month, the key function could return a 
string
+date like 'YYYY/MM/DD' or a month as 'YYYY/MM'.
+    """
+    key = path2keyFn(paths[0])
+    groupedPaths = []
+    for path in paths:
+        if path.strip() == '': continue
+        nextKey = path2keyFn(path)
+        if nextKey != key:
+            yield (key, groupedPaths)
+            key = nextKey
+            groupedPaths = [path]
+        else:
+            groupedPaths.append(path)
+    if len(groupedPaths) > 0: yield (key, groupedPaths)
+
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/__init__.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/__init__.py 
b/climatology/clim/util/__init__.py
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/array.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/array.py b/climatology/clim/util/array.py
new file mode 100755
index 0000000..bc759f3
--- /dev/null
+++ b/climatology/clim/util/array.py
@@ -0,0 +1,180 @@
+#!/bin/env python
+
+"""
+array.py -- Simple utilities for arrays:  slice, compress by mask, bundle up, 
etc.
+"""
+
+import sys, os
+import numpy as N
+#import pynio as NC
+
+
+def sliceArrays(i, j, *arrays):
+    """Slice all input arrays using the supplied indices, a[i:j], and return 
them in the
+same order as input.
+    """
+    return [a[i:j] for a in arrays]
+
+def compressArrays(mask, *arrays):
+    """Compress all input arrays using the supplied mask, a.compress(mask), 
and return them
+in the same order as input.
+    """
+    return [a.compress(mask) for a in arrays]
+
+
+class BadNameError(RuntimeError): pass
+class DimensionError(RuntimeError): pass
+class VariableError(RuntimeError): pass
+
+ReservedKeywords = ['dimensions', 'variables', 'attributes']
+
+
+class BundleOfArrays(object):
+    """Simple holder class for a bundle of arrays (variables).  Each variable 
is a vector
+or array over the supplied dimensions, or a scalar value.  Its purpose is to 
synchronize
+modification of many arrays, such as by slice or compress, and to provide 
persistence for
+a bundle of variables to a file (e.g. netCDF).
+    """
+    def __init__(self, dimensions=None, **kwargs):
+        """Init object with array dimensions and scalar values."""
+        self.dimensions = {}; self.variables = {}; self.attributes = {}
+        self.createAttributes(**kwargs)
+        if dimensions is None: dimensions = ('Record', -1)   # default dim 
name for vectors
+        self.createDims(dimensions)                          # of unlimited 
dimension (-1)
+
+    def createAttribute(self, name, val):
+        self.checkForBadName(name)
+        self.attributes[name] = val
+        setattr(self, name, val)
+        return self
+
+    def createAttributes(self, **kwargs):
+        for key, val in kwargs.iteritems():
+            self.createAttribute(key, val)
+
+    def createDim(self, name, size):
+        """Create a dimension to be used in the variable arrays."""
+        self.checkForBadName(name)
+        if type(size) != int: raise DimensionError('Size of dimension must be 
an integer')
+        self.dimensions[name] = size
+
+    def createDims(self, *dims):
+        """Create multiple dimensions from list of (name, size) tuples."""
+        for dim in dims:
+            if type(dim) == tuple:
+                name, val = dim
+            else:
+                name = dim.name; val = dim
+            self.createDim(name, val)
+        return self
+
+    def createVar(self, name, val, copy=False):
+        """Create a variable array.  If the value is None, the variable is not 
created.
+By default, the array is NOT copied since a copy may have just been made by 
slicing, etc.
+If you need to make a copy of the incoming val array, use copy=True.
+        """
+        self.checkForBadName(name)
+        if val is not None:
+            try:
+                n = len(val)
+                if copy:
+                    try:
+                        val = val.copy()   # use copy method if it exists, 
e.g. numpy array
+                    except:
+                        val = val[:]   # copy array by slicing
+            except:
+                raise VariableError('Variable must be a list, vector, array, 
or None.')
+            self.variables[name] = val
+            setattr(self, name, val)
+
+    def createVars(self, *vars):
+        """Create multiple variables from list of (name, value) tuples."""
+        for var in vars:
+            if type(var) == list or type(var) == tuple:
+                name, val = var
+            else:
+                name = var.name; val = var
+            self.createVar(name, val)
+        return self
+
+    def slice(self, i, j):
+        """Slice all of the variable arrays as a[i:j], and return new array 
bundle."""
+        out = self.shallowCopy()
+        for key in self.variables:
+            val = self.variables[key][i:j]
+            out.createVar(key, val, copy=False)
+        return out
+    
+    def compress(self, mask):
+        """Compress all of the variable arrays using a mask, 
a.compress(mask)[i:j], and return
+a new array bundle.
+        """
+        out = self.shallowCopy()
+        for key in self.variables:
+            s = self.variables[key].shape
+            a = self.variables[key]
+            if len(s) == 1:
+                val = a.compress(mask)
+            else:
+                # Can't use N.compress() from old numpy version because
+                # it flattens the additional dimensions (bug).
+                # Temporarily, do it by lists in python (slower)
+                val = N.array([a[i] for i, b in enumerate(mask) if b])
+            out.createVar(key, val, copy=True)
+        return out
+
+    def shallowCopy(self):
+        """Shallow copy of the object, retaining all attributes, dimensions, 
and variables.
+*** Note:  This routine does NOT copy the arrays themselves, but slice & 
compress do. ***
+        """
+        out = BundleOfArrays()
+        out.attributes = self.attributes.copy()
+        # Must use copy() here or both bundles will point to same attr/dim/var 
dictionaries.
+        # It is a shallow copy, but this is OK since attr/dim values are 
immutable.
+        for key, val in out.attributes.iteritems(): setattr(out, key, val)
+        out.dimensions = self.dimensions.copy()
+        out.variables = self.variables.copy()
+        # Again, shallow copy is OK, referred-to arrays are copied when 
slice/compress called
+        for key, val in out.variables.iteritems(): setattr(out, key, val)
+        return out
+
+    def checkForBadName(self, name):
+        """Ensure that name is not in reserved attribute list.
+Raises exception BadNameError.
+        """
+        if name in ReservedKeywords:
+            raise BadNameError('Attribute name, "%s", is in reserved list %s' 
% (name, \
+                    str(ReservedKeywords)))
+#        try:
+#            k = getattr(self, name)
+#            raise BadNameError('Attribute %s already exists.' % name)
+#        except:
+#            pass
+
+    def __repr__(self):
+        return 'Attributes:  %s\nDimensions:  %s\nVariables:  %s' % tuple(
+                   map(str, (self.attributes, self.dimensions, 
self.variables.keys())))
+
+
+def test(args):
+    n = 300
+    vars = ['obsL', 'errL', 'obsP', 'errP']
+    obsL = N.array(range(n))+1.; errL = N.ones(n)
+    obsP = N.array(range(n))+1.; errP = N.ones(n)
+    obs = BundleOfArrays(name='test', nRecords=n).createVars(*zip(vars, (obsL, 
errL, obsP, errP)))
+    print obs
+    print len(obs.obsL)
+    a = obs.shallowCopy()
+    print a
+    print len(a.obsL)
+    b = obs.slice(100, 200)
+    print b
+    print len(b.obsL)
+    print len(a.obsL)
+    print len(obs.obsL)
+
+def main(args):
+    test(args)
+
+if __name__ == '__main__':
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/introspect.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/introspect.py 
b/climatology/clim/util/introspect.py
new file mode 100755
index 0000000..1fcf4bf
--- /dev/null
+++ b/climatology/clim/util/introspect.py
@@ -0,0 +1,35 @@
+#!/bin/env python
+
+"""
+introspect.py -- A few introspection utilities, most importantly myArgs().
+"""
+
+import sys, os
+from inspect import currentframe, getargvalues, getmodule
+
+
+def callingFrame():
+    """Return calling frame info."""
+    return currentframe().f_back
+
+def myArgs():
+    """Return the arguments of the executing function (the caller of 
myArgs)."""
+    return getargvalues(currentframe().f_back)[3]
+
+
+def test():
+    def func1(a, b, c):
+        print currentframe()
+        print getmodule(callingFrame())   # getmodule doesn't work on frame 
objects
+        args = myArgs()
+        print args
+        return a + b + c
+
+    return func1(1, 2, 3)
+
+
+def main(args):
+    print test()
+
+if __name__ == '__main__':
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/plot.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/plot.py b/climatology/clim/util/plot.py
new file mode 100644
index 0000000..05254ef
--- /dev/null
+++ b/climatology/clim/util/plot.py
@@ -0,0 +1,133 @@
+
+"""
+plot.py -- Simple plotting utilitites
+"""
+
+import sys, os
+from matplotlib.figure import Figure
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+
+def echo2(*s): sys.stderr.write(' '.join(map(str, s)) + '\n')
+def echo2n(*s): sys.stderr.write('\n'.join(map(str, s)) + '\n')
+def warn(*s):  echo2('plot:', *s)
+def die(*s):   warn('Error,',  *s); sys.exit()
+
+
+def makeMovie(plotFiles, outFile, deletePlots=True):
+    """Make MPG movie from a series of still images."""
+    if type(plotFiles) == dict:
+        plotFiles = [plotFiles[k] for k in sorted(plotFiles.keys())]
+    cmd = 'convert ' + ' '.join(plotFiles) + ' ' + outFile
+    warn(cmd)
+    os.system(cmd)
+    warn('Wrote movie ' + outFile)
+    if deletePlots:
+        for f in plotFiles: os.unlink(f)
+    return outFile
+
+def createPlot():
+    fig = Figure()
+    canvas = FigureCanvas(fig)
+    ax = fig.add_subplot(1,1,1)
+    return (fig, canvas, ax)
+
+def labelPlot(fix, canvas, ax, title,
+              xlabel=None,
+              ylabel=None,
+              xlim=None,
+              ylim=None):
+#    ax.set_title(title)
+    if xlabel is not None: ax.set_xlabel(xlabel)
+    if ylabel is not None: ax.set_ylabel(ylabel)
+    if xlim is not None: ax.set_xlim(xlim)
+    if ylim is not None: ax.set_ylim(ylim)
+
+
+def createTwoPlots(figure=None, canvas=None, vertical=True):
+    if figure is None:
+        fig = Figure()
+    else:
+        fig = figure
+    if canvas is None: canvas = FigureCanvas(fig)
+    if vertical:
+        ax1 = fig.add_subplot(2,1,1)
+        ax2 = fig.add_subplot(2,1,2)
+    else:
+        ax1 = fig.add_subplot(1,2,1)
+        ax2 = fig.add_subplot(1,2,2)
+    return (fig, canvas, ax1, ax2)
+
+def labelTwoPlots(fig, canvas, ax1, ax2,
+                   title = None,
+                   title1 = None,
+                   xlabel1 = None, ylabel1 = None,
+                   xlim1 = None, ylim1 = None,
+                   title2 = None,
+                   xlabel2 = None, ylabel2 = None,
+                   xlim2 = None, ylim2 = None,
+                 ):
+    if title1 is None: title1 = title
+    if title1 is not None: ax1.set_title(title1)
+    if xlabel1 is not None: ax1.set_xlabel(xlabel1)
+    if ylabel1 is not None: ax1.set_ylabel(ylabel1)
+    if xlim1 is not None: ax1.set_xlim(xlim1)
+    if ylim1 is not None: ax1.set_ylim(ylim1)
+    if title2 is not None: ax2.set_title(title2)
+    if xlabel2 is not None: ax2.set_xlabel(xlabel2)
+    if ylabel2 is not None: ax2.set_ylabel(ylabel2)
+    if xlim2 is not None: ax2.set_xlim(xlim2)
+    if ylim2 is not None: ax2.set_ylim(ylim2)
+
+
+def createFourPlots(figure=None, canvas=None):
+    if figure is None:
+        fig = Figure()
+    else:
+        fig = figure
+    if canvas is None: canvas = FigureCanvas(fig)
+    ax1 = fig.add_subplot(2,2,1)
+    ax2 = fig.add_subplot(2,2,2)
+    ax3 = fig.add_subplot(2,2,3)
+    ax4 = fig.add_subplot(2,2,4)
+    return (fig, canvas, ax1, ax2, ax3, ax4)
+
+def labelFourPlots(fig, canvas, ax1, ax2, ax3, ax4,
+                   file, sat,
+                   title = None,
+                   title1 = None,
+                   xlabel1 = None, ylabel1 = None,
+                   xlim1 = None, ylim1 = None,
+                   title2 = None,
+                   xlabel2 = None, ylabel2 = None,
+                   xlim2 = None, ylim2 = None,
+                   title3 = None,
+                   xlabel3 = None, ylabel3 = None,
+                   xlim3 = None, ylim3 = None,
+                   title4 = None,
+                   xlabel4 = None, ylabel4 = None,
+                   xlim4 = None, ylim4 = None,
+                 ):
+    if title is None: title = file + ': ' + sat
+#    fig.set_title(title)
+    if title1 is None: title1 = title
+    if title1 is not None: ax1.set_title(title1)
+    if xlabel1 is not None: ax1.set_xlabel(xlabel1)
+    if ylabel1 is not None: ax1.set_ylabel(ylabel1)
+    if xlim1 is not None: ax1.set_xlim(xlim1)
+    if ylim1 is not None: ax1.set_ylim(ylim1)
+    if title2 is not None: ax2.set_title(title2)
+    if xlabel2 is not None: ax2.set_xlabel(xlabel2)
+    if ylabel2 is not None: ax2.set_ylabel(ylabel2)
+    if xlim2 is not None: ax2.set_xlim(xlim2)
+    if ylim2 is not None: ax2.set_ylim(ylim2)
+    if title3 is not None: ax3.set_title(title3)
+    if xlabel3 is not None: ax3.set_xlabel(xlabel3)
+    if ylabel3 is not None: ax3.set_ylabel(ylabel3)
+    if xlim3 is not None: ax3.set_xlim(xlim3)
+    if ylim3 is not None: ax3.set_ylim(ylim3)
+    if title4 is not None: ax4.set_title(title4)
+    if xlabel4 is not None: ax4.set_xlabel(xlabel4)
+    if ylabel4 is not None: ax4.set_ylabel(ylabel4)
+    if xlim4 is not None: ax4.set_xlim(xlim4)
+    if ylim4 is not None: ax4.set_ylim(ylim4)
+


Reply via email to