[32/50] [abbrv] initial mccsearch code

lewismc Thu, 16 Oct 2014 10:18:05 -0700

http://git-wip-us.apache.org/repos/asf/climate/blob/96aad4bf/mccsearch/code/newPreprocessingMERG.py
----------------------------------------------------------------------
diff --git a/mccsearch/code/newPreprocessingMERG.py 
b/mccsearch/code/newPreprocessingMERG.py
new file mode 100755
index 0000000..cedc155
--- /dev/null
+++ b/mccsearch/code/newPreprocessingMERG.py
@@ -0,0 +1,175 @@
+
+'''
+ The scripts for processing MERG raw data
+ #******************************************************************
+ '''
+
+import fileinput
+import glob
+import os
+import re
+import string
+import subprocess
+import sys
+
+#----------------------- GLOBAL VARIABLES --------------------------
+# --------------------- User defined variables ---------------------
+LATMIN = '-5.0'         #min latitude; -ve values in the SH e.g. 5S = -5
+LATMAX = '20.0'            #max latitude; -ve values in the SH e.g. 5S = -5
+LONMIN = '-30.0'        #min longitude; -ve values in the WH e.g. 59.8W = -59.8
+LONMAX = '30.0'            #min longitude; -ve values in the WH e.g. 59.8W = 
-59.8
+T_BB_MAX = '250'           #warmest temp to allow in K
+T_BB_MIN = '180'                        #coolest temp to allow in K
+#------------------- End user defined Variables -------------------
+
+#------------------------ End GLOBAL VARS -------------------------
+
+def preprocessingMERG(MERGdirname):
+    '''
+    Purpose::
+        Utility script for unzipping and converting the merg*.Z files from 
Mirador to
+        NETCDF format. The files end up in a folder called mergNETCDF in the 
directory
+        where the raw MERG data is
+        NOTE: VERY RAW AND DIRTY
+
+    Input::
+        Directory to the location of the raw MERG files, preferably zipped
+
+    Output::
+       none
+
+    Assumptions::
+       1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D 
(http://opengrads.org/doc/scripts/lats4d/)
+         have been installed on the system and the user can access
+       2 User can write files in location where script is being called
+       3 the files havent been unzipped
+    '''
+
+    os.chdir((MERGdirname+'/'))
+    imgFilename = ''
+
+    #Just incase the X11 server is giving problems
+    subprocess.call('export DISPLAY=:0.0', shell=True)
+
+    #for files in glob.glob("*-pixel"):
+    for files in glob.glob("*.Z"):
+        fname = os.path.splitext(files)[0]
+
+        #unzip it
+        bash_cmd = 'gunzip ' + files
+        subprocess.call(bash_cmd, shell=True)
+
+        #determine the time from the filename
+        ftime = re.search('\_(.*)\_',fname).group(1)
+
+        yy = ftime[0:4]
+        mm = ftime[4:6]
+        day = ftime[6:8]
+        hr = ftime [8:10]
+
+        #TODO: must be something more pythonic!
+
+        if mm=='01':
+            mth = 'Jan'
+        if mm == '02':
+            mth = 'Feb'
+        if mm == '03':
+            mth = 'Mar'
+        if mm == '04':
+            mth = 'Apr'
+        if mm == '05':
+            mth = 'May'
+        if mm == '06':
+            mth = 'Jun'
+        if mm == '07':
+            mth = 'Jul'
+        if mm == '08':
+            mth = 'Aug'
+        if mm == '09':
+            mth = 'Sep'
+        if mm == '10':
+            mth = 'Oct'
+        if mm == '11':
+            mth = 'Nov'
+        if mm == '12':
+            mth = 'Dec'
+
+
+        subprocess.call('rm merg.ctl', shell=True)
+        subprocess.call('touch merg.ctl', shell=True)
+        replaceExpDset = 'echo DSET ' + fname +' >> merg.ctl'
+        replaceExpTdef = 'echo TDEF 99999 LINEAR '+hr+'z'+day+mth+yy +' 30mn' 
+' >> merg.ctl'
+        subprocess.call(replaceExpDset, shell=True)
+        subprocess.call('echo "OPTIONS yrev little_endian template" >> 
merg.ctl', shell=True)
+        subprocess.call('echo "UNDEF  330" >> merg.ctl', shell=True)
+        subprocess.call('echo "TITLE  globally merged IR data" >> merg.ctl', 
shell=True)
+        subprocess.call('echo "XDEF 9896 LINEAR   0.0182 0.036378335" >> 
merg.ctl', shell=True)
+        subprocess.call('echo "YDEF 3298 LINEAR   -59.982 0.036383683" >> 
merg.ctl', shell=True)
+        subprocess.call('echo "ZDEF   01 LEVELS 1" >> merg.ctl', shell=True)
+        subprocess.call(replaceExpTdef, shell=True)
+        subprocess.call('echo "VARS 1" >> merg.ctl', shell=True)
+        subprocess.call('echo "ch4  1  -1,40,1,-1 IR BT  (add  "75" to this 
value)" >> merg.ctl', shell=True)
+        subprocess.call('echo "ENDVARS" >> merg.ctl', shell=True)
+
+        #generate the lats4D command for GrADS
+        lats4D = 'lats4d -v -q -lat ' + LATMIN +' ' + LATMAX + ' ' + '-lon ' + 
LONMIN + ' ' + LONMAX + ' -time '+hr+'Z'+day+mth+yy + ' -func @+75 ' + '-i 
merg.ctl' + ' -o ' + fname
+
+        gradscmd = 'grads -lbc ' + '\'' +lats4D + '\''
+        #run grads and lats4d command
+        subprocess.call(gradscmd, shell=True)
+
+        #----- Generate GrADS images--------------
+        imgFilename = hr+'Z'+day+mth+yy+'.gif'
+        tempMaskedImages(imgFilename)
+        #-------End Generate GrADS images ---------
+        
+        #remove the raw data file to save some resources
+        rmCmd= "rm " + fname
+        subprocess.call(rmCmd, shell=True)
+
+    #when all the files have been converted, mv the netcdf files
+    subprocess.call('mkdir mergNETCDF', shell=True)
+    subprocess.call('mv *.nc mergNETCDF', shell=True)
+    #mv all images
+    subprocess.call('mkdir mergImgs', shell=True)
+    subprocess.call('mv *.gif mergImgs', shell=True)
+
+    return
+
+#******************************************************************
+def tempMaskedImages(imgFilename):
+    '''
+    Purpose:: To generate temperature-masked images for a first pass 
verification
+
+    Input::
+        filename for the gif file
+
+    Output::
+        None - gif images for each file of IR temperature < T_BB_MAX are 
generated in folder called mergImgs
+
+    Assumptions::
+       Same as for preprocessingMERG
+       1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D 
(http://opengrads.org/doc/scripts/lats4d/)
+         have been installed on the system and the user can access
+       2 User can write files in location where script is being called
+       3 the files havent been unzipped
+    '''
+
+    subprocess.call('rm tempMaskedImages.gs', shell=True)
+    subprocess.call('touch tempMaskedImages.gs', shell=True)
+    subprocess.call('echo "''\'open merg.ctl''\'" >> tempMaskedImages.gs', 
shell=True)
+    subprocess.call('echo "''\'set mpdset hires''\'" >> tempMaskedImages.gs', 
shell=True)
+    subprocess.call('echo "''\'set lat ' +LATMIN + ' ' +LATMAX +'\'" >> 
tempMaskedImages.gs', shell=True)
+    subprocess.call('echo "''\'set lon ' +LONMIN + ' ' +LONMAX +'\'" >> 
tempMaskedImages.gs', shell=True)
+    subprocess.call('echo "''\'set cint 10''\'" >> tempMaskedImages.gs', 
shell=True)
+    subprocess.call('echo "''\'set cmax '+T_BB_MAX +'\'" >> 
tempMaskedImages.gs', shell=True)
+    subprocess.call('echo "''\'set cmin '+T_BB_MIN +'\'" >> 
tempMaskedImages.gs', shell=True)
+    subprocess.call('echo "''\'d ch4+75''\'" >> tempMaskedImages.gs', 
shell=True)
+    subprocess.call('echo "''\'draw title Masked Temp @ '+imgFilename +'\'" >> 
tempMaskedImages.gs', shell=True)
+    subprocess.call('echo "''\'printim '+imgFilename +' x1000 y800''\'" >> 
tempMaskedImages.gs', shell=True)
+    subprocess.call('echo "''\'quit''\'" >> tempMaskedImages.gs', shell=True)
+    gradscmd = 'grads -blc ' + '\'run tempMaskedImages.gs''\''
+    subprocess.call(gradscmd, shell=True)
+    return
+
+#******************************************************************


http://git-wip-us.apache.org/repos/asf/climate/blob/96aad4bf/mccsearch/code/preprocessingMERG.py
----------------------------------------------------------------------
diff --git a/mccsearch/code/preprocessingMERG.py 
b/mccsearch/code/preprocessingMERG.py
new file mode 100755
index 0000000..04964a8
--- /dev/null
+++ b/mccsearch/code/preprocessingMERG.py
@@ -0,0 +1,125 @@
+'''
+#preprocessing for MERG data
+# unzips files and generates the NETCDF files using GrADS lats4D
+# http://opengrads.org/doc/scripts/lats4d/
+# This script requires the merg_template.ctl resides with the folder 
+# TODO: generate this file
+# Kim Whitehall
+# Last updated: June 4th 2013
+'''
+import subprocess
+import fileinput
+import json
+import os
+import glob
+import re
+import sys
+
+
+def preprocessing_merg():
+          '''
+     Purpose::
+         Utility script for unzipping and converting the merg*.Z files from 
Mirador to 
+         NETCDF format
+         NOTE: VERY RAW AND DIRTY 
+     Input::
+         none
+          
+     Output::
+        none
+
+     Assumptions::
+        1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D 
(http://opengrads.org/doc/scripts/lats4d/)
+          have been installed on the system and the user can access 
+        2 User can write files in location where script is being called
+        3 the files havent been unzipped     
+     '''
+     os.chdir("/Users/whitehal/Documents/kimSum2013/mergData1/")
+     searchExpDset = "DSET "
+     searchExpTdef = "TDEF "
+     #for files in glob.glob("*-pixel"):
+     for files in glob.glob("*.Z"):
+          fname = os.path.splitext(files)[0]
+          
+          #unzip it
+          bash_cmd = 'gunzip ' + files
+          subprocess.call(bash_cmd, shell=True)
+         
+          #determine the time from the filename
+          ftime = re.search('\_(.*)\_',fname).group(1)
+          
+          yy = ftime[0:4]
+          mm = ftime[4:6]
+          day = ftime[6:8]
+          hr = ftime [8:10]
+          
+
+          if mm=='01':
+               mth = 'Jan'
+          if mm == '02':
+               mth = 'Feb'
+          if mm == '03':
+               mth = 'Mar'
+          if mm == '04':
+               mth = 'Apr'
+          if mm == '05':
+               mth = 'May'
+          if mm == '06':
+               mth = 'Jun'
+          if mm == '07':
+               mth = 'Jul'
+          if mm == '08':
+               mth = 'Aug'
+          if mm == '09':
+               mth = 'Sep'
+          if mm == '10':
+               mth = 'Oct'
+          if mm == '11':
+               mth = 'Nov'
+          if mm == '12':
+               mth = 'Dec'
+
+
+          subprocess.call('rm merg.ctl', shell=True)
+          subprocess.call('touch merg.ctl', shell=True)
+          replaceExpDset = 'echo DSET ' + fname +' >> merg.ctl'
+          replaceExpTdef = 'echo TDEF 99999 LINEAR '+hr+'z'+day+mth+yy +' 
30mn' +' >> merg.ctl'
+          subprocess.call(replaceExpDset, shell=True) 
+          subprocess.call('echo "OPTIONS yrev little_endian template" >> 
merg.ctl', shell=True)
+          subprocess.call('echo "UNDEF  330" >> merg.ctl', shell=True)
+          subprocess.call('echo "TITLE  globally merged IR data" >> merg.ctl', 
shell=True)
+          subprocess.call('echo "XDEF 9896 LINEAR   0.0182 0.036378335" >> 
merg.ctl', shell=True)
+          subprocess.call('echo "YDEF 3298 LINEAR   -59.982 0.036383683" >> 
merg.ctl', shell=True)
+          subprocess.call('echo "ZDEF   01 LEVELS 1" >> merg.ctl', shell=True)
+          subprocess.call(replaceExpTdef, shell=True)
+          subprocess.call('echo "VARS 1" >> merg.ctl', shell=True)
+          subprocess.call('echo "ch4  1  -1,40,1,-1 IR BT  (add  "75" to this 
value)" >> merg.ctl', shell=True)
+          subprocess.call('echo "ENDVARS" >> merg.ctl', shell=True)
+
+
+
+          # #cp template merg.ctl
+          # subprocess.call('cp -f merg_template.ctl merg.ctl', shell=True)
+         
+          # #edit merg.ctl dset line to contain filename by running file
+          # 
#http://stackoverflow.com/questions/39086/search-and-replace-a-line-in-a-file-in-python
+          # replaceExpDset = 'DSET ' + fname
+          # replaceExpTdef = 'TDEF 99999 LINEAR '+hr+'z'+day+mth+yy +' 30mn'
+          # for line in fileinput.input("merg.ctl", inplace=True):
+          #      if searchExpDset in line:
+          #           line = line.replace(searchExpDset, replaceExpDset)
+          #      if searchExpTdef in line:
+          #           line = line.replace(searchExpTdef, replaceExpTdef)
+          #      sys.stdout.write(line) 
+
+          #lats4d cmd
+          lats4D = 'lats4d -v -q -lat -5 40 -lon -90 60 -time 
'+hr+'Z'+day+mth+yy + ' -func @+75 ' + '-i merg.ctl' + ' -o ' + fname
+          
+          gradscmd = 'grads -blc ' + '\'' +lats4D + '\''
+          #run grads and lats4d command
+          subprocess.call(gradscmd, shell=True)
+
+     #when all the files have benn converted, mv the netcdf files
+     subprocess.call('mkdir mergNETCDF', shell=True)
+     subprocess.call('mv *.nc mergNETCDF', shell=True)
+     return
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/climate/blob/96aad4bf/mccsearch/code/process.py
----------------------------------------------------------------------
diff --git a/mccsearch/code/process.py b/mccsearch/code/process.py
new file mode 100644
index 0000000..5a362ec
--- /dev/null
+++ b/mccsearch/code/process.py
@@ -0,0 +1,1284 @@
+"""Collection of functions that process numpy arrays of varying shapes.  
+Functions can range from subsetting to regridding"""
+
+# Python Standard Libary Imports
+import math
+import datetime
+import re
+import string
+import sys
+import time
+
+# 3rd Party Imports
+import Nio
+import numpy as np
+import numpy.ma as ma
+from scipy.ndimage import map_coordinates
+
+
+def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, 
lonmin, lonmax):
+    '''
+     Extract a sub-region from a data array.
+     
+     Example::
+       **e.g.** the user may load a global model file, but only want to 
examine data over North America
+            This function extracts a sub-domain from the original data.
+            The defined sub-region must be a regular lat/lon bounding box,
+            but the model data may be on a non-regular grid (e.g. rotated, or 
Guassian grid layout).
+            Data are kept on the original model grid and a rectangular (in 
model-space) region 
+            is extracted which contains the rectangular (in lat/lon space) 
user supplied region.
+    
+     INPUT::
+       data - 3d masked data array
+       lats - 2d array of latitudes corresponding to data array
+       lons - 2d array of longitudes corresponding to data array
+       latmin, latmax, lonmin, lonmax - bounding box of required region to 
extract
+       
+     OUTPUT::
+       data2 - subset of original data array containing only data from 
required subregion
+       lats2 - subset of original latitude data array
+       lons2 - subset of original longitude data array
+
+    '''
+    
+    
+
+    # Mask model data array to find grid points inside the supplied bounding 
box
+    whlat = (lats > latmin) & (lats < latmax)
+    whlon = (lons > lonmin) & (lons < lonmax)
+    wh = whlat & whlon
+
+    # Find required matrix indices describing the limits of the regular 
lat/lon bounding box
+    jmax = np.where(wh == True)[0].max()
+    jmin = np.where(wh == True)[0].min()
+    imax = np.where(wh == True)[1].max()
+    imin = np.where(wh == True)[1].min()
+
+    # Cut out the sub-region from the data array
+    data2 = ma.zeros((data.shape[0], jmax - jmin, imax - imin))
+    data2 = data[:, jmin:jmax, imin:imax]
+
+    # Cut out sub-region from lats,lons arrays
+    lats2 = lats[jmin:jmax, imin:imax]
+    lons2 = lons[jmin:jmax, imin:imax]
+
+    return data2, lats2, lons2
+
+def calc_area_mean(data, lats, lons, mymask='not set'):
+    '''
+     Calculate Area Average of data in a masked array
+     
+     INPUT::
+         data:  a masked array of data (NB. only data from one time expected 
to be passed at once)
+         lats:  2d array of regularly gridded latitudes
+         lons:  2d array of regularly gridded longitudes
+         mymask:  (optional) defines spatial region to do averaging over
+     
+     OUTPUT::
+         area_mean: a value for the mean inside the area
+
+    '''
+    
+    
+
+    # If mask not passed in, then set maks to cover whole data domain
+    if(mymask == 'not set'):
+       mymask = np.empty(data.shape)
+       mymask[:] = False       # NB. mask means (don't show), so False 
everywhere means use everything.
+   
+    # Dimension check on lats, lons
+    #  Sometimes these arrays are 3d, sometimes 2d, sometimes 1d
+    #  This bit of code just converts to the required 2d array shape
+    if(len(lats.shape) == 3):
+       lats = lats[0, :, :]
+
+    if(len(lons.shape) == 3):
+       lons = lons[0, :, :]
+
+    if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)):
+       lons, lats = np.meshgrid(lons, lats)
+ 
+    # Calculate grid length (assuming regular lat/lon grid)
+    dlat = lats[1, 0] - lats[0, 0]
+    dlon = lons[0, 1] - lons[0, 0]
+
+    # Calculates weights for each grid box
+    myweights = calc_area_in_grid_box(lats, dlon, dlat)
+
+    # Create a new masked array covering just user selected area (defined in 
mymask)
+    #   NB. this preserves missing data points in the observations data
+    subdata = ma.masked_array(data, mask=mymask)
+ 
+    if(myweights.shape != subdata.shape):
+       myweights.resize(subdata.shape)
+       myweights[1:, :] = myweights[0, :]
+
+    # Calculate weighted mean using ma.average (which takes weights)
+    area_mean = ma.average(subdata, weights=myweights)
+ 
+    return area_mean
+
+def calc_area_in_grid_box(latitude, dlat, dlon):
+    '''
+     Calculate area of regular lat-lon grid box
+     
+     INPUT::
+        latitude: latitude of grid box (degrees)
+        dlat:     grid length in latitude direction (degrees)
+        dlon:     grid length in longitude direction (degrees)
+        
+     OUTPUT::
+        A:        area of the grid box
+      
+      '''
+
+    R = 6371000  # radius of Earth in metres
+    C = 2 * math.pi * R
+
+    latitude = np.radians(latitude)
+
+    A = (dlon * (C / 360.)) * (dlat * (C / 360.) * np.cos(latitude))
+
+    return A
+
+def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
+    '''
+     Perform regridding from one set of lat,lon values onto a new set 
(lat2,lon2)
+    
+     Input::
+         q          - the variable to be regridded
+         lat,lon    - original co-ordinates corresponding to q values
+         lat2,lon2  - new set of latitudes and longitudes that you want to 
regrid q onto 
+         order      - (optional) interpolation order 1=bi-linear, 3=cubic 
spline
+         mdi       - (optional) fill value for missing data (used in creation 
of masked array)
+      
+     Output::
+         q2  - q regridded onto the new set of lat2,lon2 
+    
+    '''
+
+    nlat = q.shape[0]
+    nlon = q.shape[1]
+
+    nlat2 = lat2.shape[0]
+    nlon2 = lon2.shape[1]
+
+    # To make our lives easier down the road, let's 
+    # turn these into arrays of x & y coords
+    loni = lon2.ravel()
+    lati = lat2.ravel()
+
+    loni = loni.copy() # NB. it won't run unless you do this...
+    lati = lati.copy()
+
+    # Now, we'll set points outside the boundaries to lie along an edge
+    loni[loni > lon.max()] = lon.max()
+    loni[loni < lon.min()] = lon.min()
+    
+    # To deal with the "hard" break, we'll have to treat y differently,
+    # so we're just setting the min here...
+    lati[lati > lat.max()] = lat.max()
+    lati[lati < lat.min()] = lat.min()
+    
+    
+    # We need to convert these to (float) indicies
+    #   (xi should range from 0 to (nx - 1), etc)
+    loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
+    
+    # Deal with the "hard" break in the y-direction
+    lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
+    
+    # Notes on dealing with MDI when regridding data.
+    #  Method adopted here:
+    #    Use bilinear interpolation of data by default (but user can specify 
other order using order=... in call)
+    #    Perform bilinear interpolation of data, and of mask.
+    #    To be conservative, new grid point which contained some missing data 
on the old grid is set to missing data.
+    #            -this is achieved by looking for any non-zero interpolated 
mask values.
+    #    To avoid issues with bilinear interpolation producing strong 
gradients leading into the MDI,
+    #     set values at MDI points to mean data value so little gradient 
visible = not ideal, but acceptable for now.
+    
+    # Set values in MDI so that similar to surroundings so don't produce large 
gradients when interpolating
+    # Preserve MDI mask, by only changing data part of masked array object.
+    for shift in (-1, 1):
+        for axis in (0, 1):
+            q_shifted = np.roll(q, shift=shift, axis=axis)
+            idx = ~q_shifted.mask * q.mask
+            q.data[idx] = q_shifted[idx]
+
+    # Now we actually interpolate
+    # map_coordinates does cubic interpolation by default, 
+    # use "order=1" to preform bilinear interpolation instead...
+    q2 = map_coordinates(q, [lati, loni], order=order)
+    q2 = q2.reshape([nlat2, nlon2])
+
+    # Set values to missing data outside of original domain
+    q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= 
lat.max(), 
+                                                              lat2 <= 
lat.min()), 
+                                                np.logical_or(lon2 <= 
lon.min(), 
+                                                              lon2 >= 
lon.max())))
+    
+    # Make second map using nearest neighbour interpolation -use this to 
determine locations with MDI and mask these
+    qmdi = np.zeros_like(q)
+    qmdi[q.mask == True] = 1.
+    qmdi[q.mask == False] = 0.
+    qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
+    qmdi_r = qmdi_r.reshape([nlat2, nlon2])
+    mdimask = (qmdi_r != 0.0)
+    
+    # Combine missing data mask, with outside domain mask define above.
+    q2.mask = np.logical_or(mdimask, q2.mask)
+
+    return q2
+
+def create_mask_using_threshold(masked_array, threshold=0.5):
+   '''
+    Routine to create a mask, depending on the proportion of times with 
missing data.
+    For each pixel, calculate proportion of times that are missing data,
+    **if** the proportion is above a specified threshold value,
+    then mark the pixel as missing data.
+   
+    Input::
+        masked_array - a numpy masked array of data (assumes time on axis 0, 
and space on axes 1 and 2).
+        threshold    - (optional) threshold proportion above which a pixel is 
marked as 'missing data'. 
+        NB. default threshold = 50%
+   
+    Output::
+        mymask       - a numpy array describing the mask. NB. not a masked 
array, just the mask itself.
+
+   '''
+   
+
+   # try, except used as some model files don't have a full mask, but a single 
bool
+   #  the except catches this situation and deals with it appropriately.
+   try:
+       nT = masked_array.mask.shape[0]
+
+       # For each pixel, count how many times are masked.
+       nMasked = masked_array.mask[:, :, :].sum(axis=0)
+
+       # Define new mask as when a pixel has over a defined threshold ratio of 
masked data
+       #   e.g. if the threshold is 75%, and there are 10 times,
+       #        then a pixel will be masked if more than 5 times are masked.
+       mymask = nMasked > (nT * threshold)
+
+   except:
+       mymask = np.zeros_like(masked_array.data[0, :, :])
+
+   return mymask
+
+def calc_average_on_new_time_unit(data, dateList, unit='monthly'):
+   '''
+    Routine to calculate averages on longer time units than the data exists on.
+    
+    Example::
+        If the data is 6-hourly, calculate daily, or monthly means.
+   
+    Input::
+        data     - data values
+        dateList - list of python datetime structures corresponding to data 
values
+        unit     - string describing time unit to average onto. 
+        *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'*
+      
+     Output:
+        meanstorem - numpy masked array of data values meaned over required 
time period
+        newTimesList - a list of python datetime objects representing the data 
in the new averagin period
+        *NB.* currently set to beginning of averaging period, 
+        **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.**
+   '''
+
+   # First catch unknown values of time unit passed in by user
+   acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | 
(unit == 'daily') | (unit == 'pentad')
+
+   if not acceptable:
+      print 'Error: unknown unit type selected for time averaging'
+      print '       Please check your code.'
+      return
+
+   # Calculate arrays of years (2007,2007),
+   #                     yearsmonths (200701,200702),
+   #                     or yearmonthdays (20070101,20070102) 
+   #  -depending on user selected averaging period.
+
+   # Year list
+   if unit == 'annual':
+      print 'Calculating annual mean data'
+      timeunits = []
+
+      for i in np.arange(len(dateList)):
+
+         timeunits.append(str(dateList[i].year))
+
+      timeunits = np.array(timeunits, dtype=int)
+         
+   # YearMonth format list
+   if unit == 'monthly':
+      print 'Calculating monthly mean data'
+      timeunits = []
+
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month))
+
+      timeunits = np.array(timeunits, dtype=int)
+
+   # YearMonthDay format list
+   if unit == 'daily':
+      print 'Calculating daily mean data'
+      timeunits = []
+ 
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month) + \
+                          str("%02d" % dateList[i].day))
+
+      timeunits = np.array(timeunits, dtype=int)
+
+
+   # TODO: add pentad setting using Julian days?
+
+
+   # Full list: a special case
+   if unit == 'full':
+      print 'Calculating means data over full time range'
+      timeunits = []
+
+      for i in np.arange(len(dateList)):
+         timeunits.append(999)  # i.e. we just want the same value for all 
times.
+
+      timeunits = np.array(timeunits, dtype=int)
+
+
+
+   # empty list to store new times
+   newTimesList = []
+
+   # Decide whether or not you need to do any time averaging.
+   #   i.e. if data are already on required time unit then just pass data 
through and 
+   #        calculate and return representative datetimes.
+   processing_required = True
+   if len(timeunits) == (len(np.unique(timeunits))):
+        processing_required = False
+
+   # 1D data arrays, i.e. time series
+   if data.ndim == 1:
+     # Create array to store the resulting data
+     meanstore = np.zeros(len(np.unique(timeunits)))
+  
+     # Calculate the means across each unique time unit
+     i = 0
+     for myunit in np.unique(timeunits):
+       if processing_required:
+         datam = ma.masked_array(data, timeunits != myunit)
+         meanstore[i] = ma.average(datam)
+
+       # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit) == 4:  # YYYY
+        yyyy = int(myunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit) == 6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit) == 8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit) == 3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the 
entire time span
+        dt = dateList[-1] - dateList[0]
+        halfway = dateList[0] + (dt / 2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+  
+       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+       i = i + 1
+
+   # 3D data arrays
+   if data.ndim == 3:
+
+     #datamask = create_mask_using_threshold(data,threshold=0.75)
+
+     # Create array to store the resulting data
+     meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], 
data.shape[2]])
+  
+     # Calculate the means across each unique time unit
+     i = 0
+     datamask_store = []
+     for myunit in np.unique(timeunits):
+       if processing_required:
+
+         mask = np.zeros_like(data)
+         mask[timeunits != myunit, :, :] = 1.0
+
+         # Calculate missing data mask within each time unit...
+         datamask_at_this_timeunit = np.zeros_like(data)
+         datamask_at_this_timeunit[:] = 
create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75)
+         # Store results for masking later
+         datamask_store.append(datamask_at_this_timeunit[0])
+
+         # Calculate means for each pixel in this time unit, ignoring missing 
data (using masked array).
+         datam = ma.masked_array(data, np.logical_or(mask, 
datamask_at_this_timeunit))
+         meanstore[i, :, :] = ma.average(datam, axis=0)
+
+       # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit) == 4:  # YYYY
+        yyyy = int(smyunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit) == 6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit) == 8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit) == 3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the 
entire time span
+        dt = dateList[-1] - dateList[0]
+        halfway = dateList[0] + (dt / 2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+
+       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+
+       i += 1
+
+     if not processing_required:
+        meanstorem = data
+
+     if processing_required:
+        # Create masked array (using missing data mask defined above)
+        datamask_store = np.array(datamask_store)
+        meanstorem = ma.masked_array(meanstore, datamask_store)
+
+   return meanstorem, newTimesList
+
+def calc_running_accum_from_period_accum(data):
+    '''
+     Routine to calculate running total accumulations from individual period 
accumulations.
+     ::
+    
+         e.g.  0,0,1,0,0,2,2,1,0,0
+            -> 0,0,1,1,1,3,5,6,6,6
+    
+     Input::
+          data: numpy array with time in the first axis
+    
+     Output::
+          running_acc: running accumulations
+    
+    '''
+    
+    
+    running_acc = np.zeros_like(data)
+    
+    if(len(data.shape) == 1):
+        running_acc[0] = data[0]
+    
+    if(len(data.shape) > 1):
+        running_acc[0, :] = data[0, :]
+    
+    for i in np.arange(data.shape[0] - 1):
+        if(len(data.shape) == 1):
+            running_acc[i + 1] = running_acc[i] + data[i + 1]
+    
+        if(len(data.shape) > 1):
+            running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :]
+    
+    return running_acc
+
+def ignore_boundaries(data, rim=10):
+    '''
+     Routine to mask the lateral boundary regions of model data to ignore them 
from calculations.
+     
+     Input::
+         data - a masked array of model data
+         rim - (optional) number of grid points to ignore
+    
+     Output::
+         data - data array with boundary region masked
+    
+    '''
+    
+    nx = data.shape[1]
+    ny = data.shape[0]
+    
+    rimmask = np.zeros_like(data)
+    for j in np.arange(rim):
+        rimmask[j, 0:nx - 1] = 1.0
+    
+    for j in ny - 1 - np.arange(rim):
+        rimmask[j, 0:nx - 1] = 1.0
+    
+    for i in np.arange(rim):
+        rimmask[0:ny - 1, i] = 1.0
+    
+    for i in nx - 1 - np.arange(rim):
+        rimmask[0:ny - 1, i] = 1.0
+    
+    data = ma.masked_array(data, mask=rimmask)
+    
+    return data
+
+def normalizeDatetimes(datetimes, timestep):
+    """
+    Input::
+        datetimes - list of datetime objects that need to be normalized
+        timestep - string of value ('daily' | 'monthly')
+    Output::
+        normalDatetimes - list of datetime objects that have been normalized
+    
+    Normalization Rules::
+        Daily data will be forced to an hour value of 00:00:00
+        Monthly data will be forced to the first of the month at midnight 
+    """
+    normalDatetimes = []
+    if timestep.lower() == 'monthly':
+        for inputDatetime in datetimes:
+            if inputDatetime.day != 1:
+                # Clean the inputDatetime
+                inputDatetimeString = inputDatetime.strftime('%Y%m%d')
+                normalInputDatetimeString = inputDatetimeString[:6] + '01'
+                inputDatetime = 
datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+
+            normalDatetimes.append(inputDatetime)
+
+    elif timestep.lower() == 'daily':
+        for inputDatetime in datetimes:
+            if inputDatetime.hour != 0 or inputDatetime.minute != 0 or 
inputDatetime.second != 0:
+                datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
+                normalDatetimeString = datetimeString[:8] + '000000'
+                inputDatetime = 
datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+            
+            normalDatetimes.append(inputDatetime)
+
+    return normalDatetimes
+
+def getModelTimes(modelFile, timeVarName):
+    '''
+    TODO:  Do a better job handling dates here
+    Routine to convert from model times ('hours since 1900...', 'days since 
...')
+    into a python datetime structure
+
+    Input::
+        modelFile - path to the model tile you want to extract the times list 
and modelTimeStep from
+        timeVarName - name of the time variable in the model file
+
+    Output::
+        times  - list of python datetime objects describing model data times
+        modelTimeStep - 'hourly','daily','monthly','annual'
+    '''
+
+    f = Nio.open_file(modelFile)
+    xtimes = f.variables[timeVarName]
+    timeFormat = xtimes.attributes['units']
+    
+    # search to check if 'since' appears in units
+    try:
+         sinceLoc = re.search('since', timeFormat).end()
+
+
+    #KDW the below block generates and error. But the print statement, 
indicates that sinceLoc found something
+    except AttributeError:
+         print 'Error decoding model times: time variable attributes do not 
contain "since"'
+         raise
+
+    units = ''
+    TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
+    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so 
know units
+    for unit in TIME_UNITS:
+        if re.search(unit, timeFormat):
+            units = unit
+            break
+
+    # cut out base time (the bit following 'since')
+    base_time_string = string.lstrip(timeFormat[sinceLoc:])
+    
+    # decode base time
+    base_time = decodeTimeFromString(base_time_string)
+    
+    times = []
+
+
+
+    for xtime in xtimes[:]:
+              
+        # Cast time as an int ***KDW remove this so fractional xtime can be 
read from MERG
+        xtime = int(xtime)
+
+        if units == 'minutes':
+            dt = datetime.timedelta(minutes=xtime)
+            new_time = base_time + dt
+        elif units == 'hours':
+            dt = datetime.timedelta(hours=xtime)
+            new_time = base_time + dt
+        elif units == 'days':
+            dt = datetime.timedelta(days=xtime)
+            new_time = base_time + dt
+        elif units == 'months':
+            # NB. adding months in python is complicated as month length 
varies and hence ambiguous.
+            # Perform date arithmatic manually
+            #  Assumption: the base_date will usually be the first of the month
+            #              NB. this method will fail if the base time is on 
the 29th or higher day of month
+            #                      -as can't have, e.g. Feb 31st.
+            new_month = int(base_time.month + xtime % 12)
+            new_year = int(math.floor(base_time.year + xtime / 12.))
+            new_time = datetime.datetime(new_year, new_month, base_time.day, 
base_time.hour, base_time.second, 0)
+        elif units == 'years':
+            dt = datetime.timedelta(years=xtime)
+            new_time = base_time + dt
+        
+        #print "xtime is:", xtime, "dt is: ", dt
+        
+
+
+        times.append(new_time)
+       
+    try:
+        timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12)
+        modelTimeStep = getModelTimeStep(units, timeStepLength)
+     
+        #KDW if timeStepLength is zero do not normalize times as this would 
create an empty list
+        if timeStepLength != 0:
+          times = normalizeDatetimes(times, modelTimeStep) 
+    except:
+        raise
+
+    return times, modelTimeStep
+
+def getModelTimeStep(units, stepSize):
+    # Time units are now determined. Determine the time intervals of input 
data (mdlTimeStep)
+
+    if units == 'minutes':
+        if stepSize == 60:
+            modelTimeStep = 'hourly'
+        elif stepSize == 1440:
+            modelTimeStep = 'daily'
+        # 28 days through 31 days
+        elif 40320 <= stepSize <= 44640:
+            modelTimeStep = 'monthly'
+        # 365 days through 366 days 
+        elif 525600 <= stepSize <= 527040:
+            modelTimeStep = 'annual' 
+        else:
+            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
+
+    elif units == 'hours':
+      #need a check for fractional hrs and only one hr i.e. stepSize=0
+        if stepSize == 0 or stepSize == 1:
+            modelTimeStep = 'hourly'
+        elif stepSize == 24:
+            modelTimeStep = 'daily'
+        elif 672 <= stepSize <= 744:
+            modelTimeStep = 'monthly' 
+        elif 8760 <= stepSize <= 8784:
+            modelTimeStep = 'annual' 
+        else:
+            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
+
+    elif units == 'days':
+        if stepSize == 1:
+            modelTimeStep = 'daily'
+        elif 28 <= stepSize <= 31:
+            modelTimeStep = 'monthly'
+        elif 365 <= stepSize <= 366:
+            modelTimeStep = 'annual'
+        else:
+            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
+
+    elif units == 'months':
+        if stepSize == 1:
+            modelTimeStep = 'monthly'
+        elif stepSize == 12:
+            modelTimeStep = 'annual'
+        else:
+            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
+
+    elif units == 'years':
+        if stepSize == 1:
+            modelTimeStep = 'annual'
+        else:
+            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
+
+    else:
+        errorMessage = 'the time unit ', units, ' is not currently handled in 
this version.'
+        raise Exception(errorMessage)
+
+    return modelTimeStep
+
+def decodeTimeFromString(time_string):
+    '''
+     Decodes string into a python datetime object
+     *Method:* tries a bunch of different time format possibilities and 
hopefully one of them will hit.
+     ::
+
+       **Input:**  time_string - a string that represents a date/time
+       
+       **Output:** mytime - a python datetime object
+    '''
+
+    # This will deal with times that use decimal seconds
+    if '.' in time_string:
+        time_string = time_string.split('.')[0] + '0'
+    else:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y/%m/%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y%m%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y:%m:%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y%m%d%H%M%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+#KDW added
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d %H')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+
+    print 'Error decoding time string: string does not match a predefined time 
format'
+    return 0
+
+def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, 
wrflons):
+   '''
+    Wrapper routine for regridding.
+    
+    Either regrids model to obs grid, or obs to model grid, depending on user 
choice
+    
+        Inputs::
+            regrid_choice - [0] = Regrid obs data onto model grid or 
+                            [1] = Regrid model data onto obs grid
+
+            obsdata,wrfdata - data arrays
+            obslats,obslons - observation lat,lon arrays
+            wrflats,wrflons - model lat,lon arrays
+   
+        Output::
+            rdata,rdata2 - regridded data
+               lats,lons    - latitudes and longitudes of regridded data
+   
+   '''
+
+   # Regrid obs data onto model grid
+   if(regrid_choice == '0'):
+
+      ndims = len(obsdata.shape)
+      if(ndims == 3):
+         newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]]
+        nT = obsdata.shape[0]
+
+      if(ndims == 2):
+         newshape = [wrfdata.shape[0], wrfdata.shape[1]]
+        nT = 0
+
+      rdata = wrfdata
+      lats, lons = wrflats, wrflons
+
+      # Create a new masked array with the required dimensions
+      tmp = np.zeros(newshape)
+      rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0))
+      tmp = 0 # free up some memory
+
+      rdata2[:] = 0.0
+
+      if(nT > 0):
+       for t in np.arange(nT):
+         rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], 
obslons[:, :], wrflats[:, :], wrflons[:, :])
+
+      if(nT == 0):
+         rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], 
wrflats[:, :], wrflons[:, :])
+
+
+   # Regrid model data onto obs grid
+   if(regrid_choice == '1'):
+      ndims = len(wrfdata.shape)
+      if(ndims == 3):
+         newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]]
+        nT = obsdata.shape[0]
+
+      if(ndims == 2):
+         newshape = [obsdata.shape[0], obsdata.shape[1]]
+        nT = 0
+
+      rdata2 = obsdata
+      lats, lons = obslats, obslons
+
+      tmp = np.zeros(newshape)
+      rdata = ma.MaskedArray(tmp, mask=(tmp == 0))
+      tmp = 0 # free up some memory
+
+      rdata[:] = 0.0
+ 
+      if(nT > 0):
+        for t in np.arange(nT):
+        rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, 
:], obslats[:, :], obslons[:, :])
+
+      if(nT == 0):
+       rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], 
obslats[:, :], obslons[:, :])
+
+  
+   return rdata, rdata2, lats, lons
+
+def extract_sub_time_selection(allTimes, subTimes, data):
+    '''
+     Routine to extract a sub-selection of times from a data array.
+
+     Example::
+           Suppose a data array has 30 time values for daily data for a whole 
month, 
+           but you only want the data from the 5th-15th of the month.
+    
+     Input::
+           allTimes - list of datetimes describing what times the data in the 
data array correspond to
+           subTimes - the times you want to extract data from
+           data     - the data array
+    
+     Output::
+           subdata     - subselection of data array
+    
+    '''
+    # Create new array to store the subselection
+    subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]])
+    
+    # Loop over all Times and when it is a member of the required 
subselection, copy the data across
+    i = 0     # counter for allTimes
+    subi = 0  # counter for subTimes
+    for t in allTimes:
+        if(np.setmember1d(allTimes, subTimes)):
+            subdata[subi, :, :] = data[i, :, :]
+            subi += 1
+        i += 1
+    
+    return subdata
+
+def calc_average_on_new_time_unit_K(data, dateList, unit):
+    '''
+    Routine to calculate averages on longer time units than the data exists on.
+    e.g. if the data is 6-hourly, calculate daily, or monthly means.
+     
+    Input:
+        data     - data values
+        dateList - list of python datetime structures corresponding to data 
values
+        unit     - string describing time unit to average onto 
+                      e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+    Output:
+        meanstorem - numpy masked array of data values meaned over required 
time period
+        newTimesList - a list of python datetime objects representing the data 
in the new averagin period
+                           NB. currently set to beginning of averaging period, 
+                           i.e. mean Jan 1st - Jan 31st -> represented as Jan 
1st, 00Z.
+    '''
+
+    # Check if the user-selected temporal grid is valid. If not, EXIT
+    acceptable = 
(unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+    if not acceptable:
+        print 'Error: unknown unit type selected for time averaging: EXIT'
+        return -1,-1,-1,-1
+
+    # Calculate arrays of: annual timeseries: year (2007,2007),
+    #                      monthly time series: year-month (200701,200702),
+    #                      daily timeseries:  year-month-day 
(20070101,20070102) 
+    #  depending on user-selected averaging period.
+
+    # Year list
+    if unit=='annual':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year))
+        timeunits = np.array(timeunits, dtype=int)
+         
+    # YearMonth format list
+    if unit=='monthly':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month))
+        timeunits = np.array(timeunits,dtype=int)
+
+    # YearMonthDay format list
+    if unit=='daily':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month) + str("%02d" % dateList[i].day))
+        timeunits = np.array(timeunits,dtype=int)
+
+    # TODO: add pentad setting using Julian days?
+
+    # Full list: a special case
+    if unit == 'full':
+        comment='Calculating means data over the entire time range: i.e., 
annual-mean climatology'
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(999)  # i.e. we just want the same value for all 
times.
+        timeunits = np.array(timeunits, dtype=int)
+
+    # empty list to store new times
+    newTimesList = []
+
+    # Decide whether or not you need to do any time averaging.
+    #   i.e. if data are already on required time unit then just pass data 
through and 
+    #        calculate and return representative datetimes.
+    processing_required = True
+    if len(timeunits)==(len(np.unique(timeunits))):
+        processing_required = False
+
+    # 1D data arrays, i.e. time series
+    if data.ndim==1:
+        # Create array to store the resulting data
+        meanstore = np.zeros(len(np.unique(timeunits)))
+  
+        # Calculate the means across each unique time unit
+        i=0
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                datam=ma.masked_array(data,timeunits!=myunit)
+                meanstore[i] = ma.average(datam)
+            
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(myunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point 
of the entire time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i = i+1
+
+    # 3D data arrays
+    if data.ndim==3:
+        # datamask = create_mask_using_threshold(data,threshold=0.75)
+        # Create array to store the resulting data
+        meanstore = 
np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+  
+        # Calculate the means across each unique time unit
+        i=0
+        datamask_store = []
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                mask = np.zeros_like(data)
+                mask[timeunits!=myunit,:,:] = 1.0
+                # Calculate missing data mask within each time unit...
+                datamask_at_this_timeunit = np.zeros_like(data)
+                datamask_at_this_timeunit[:]= 
create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+                # Store results for masking later
+                datamask_store.append(datamask_at_this_timeunit[0])
+                # Calculate means for each pixel in this time unit, ignoring 
missing data (using masked array).
+                datam = 
ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
+                meanstore[i,:,:] = ma.average(datam,axis=0)
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(smyunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point 
of the entire time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i += 1
+
+        if not processing_required:
+            meanstorem = data
+
+        if processing_required:
+            # Create masked array (using missing data mask defined above)
+            datamask_store = np.array(datamask_store)
+            meanstorem = ma.masked_array(meanstore, datamask_store)
+
+    return meanstorem,newTimesList
+
+def decode_model_timesK(ifile,timeVarName,file_type):
+    
#################################################################################################
+    #  Convert model times ('hours since 1900...', 'days since ...') into a 
python datetime structure
+    #  Input:
+    #      filelist - list of model files
+    #      timeVarName - name of the time variable in the model files
+    #  Output:
+    #      times  - list of python datetime objects describing model data times
+    #     Peter Lean February 2011
+    
#################################################################################################
+    f = Nio.open_file(ifile,mode='r',options=None,format=file_type)
+    xtimes = f.variables[timeVarName]
+    timeFormat = xtimes.attributes['units']
+    #timeFormat = "days since 1979-01-01 00:00:00"
+    # search to check if 'since' appears in units
+    try:
+        sinceLoc = re.search('since',timeFormat).end()
+    except:
+        print 'Error decoding model times: time var attributes do not contain 
"since": Exit'
+        sys.exit()
+    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so 
know units
+    # TODO:  Swap out this section for a set of if...elseif statements
+    units = ''
+    try:
+        _ = re.search('minutes',timeFormat).end()
+        units = 'minutes'
+    except:
+        pass
+    try:
+        _ = re.search('hours',timeFormat).end()
+        units = 'hours'
+    except:
+        pass
+    try:
+        _ = re.search('days',timeFormat).end()
+        units = 'days'
+    except:
+        pass
+    try:
+        _ = re.search('months',timeFormat).end()
+        units = 'months'
+    except:
+        pass
+    try:
+        _ = re.search('years',timeFormat).end()
+        units = 'years'
+    except:
+        pass
+    # cut out base time (the bit following 'since')
+    base_time_string = string.lstrip(timeFormat[sinceLoc:])
+    # decode base time
+    # 9/25/2012: datetime.timedelta has problem with the argument because 
xtimes is NioVariable.
+    # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it 
into an integer xtime.
+    base_time = decodeTimeFromString(base_time_string)
+    times=[]
+    for ttmp in xtimes[:]:
+        xtime = int(ttmp)
+        if(units=='minutes'):  
+            dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt
+        if(units=='hours'):  
+            dt = datetime.timedelta(hours=xtime); new_time = base_time + dt
+        if(units=='days'):  
+            dt = datetime.timedelta(days=xtime); new_time = base_time + dt
+        if(units=='months'):   # NB. adding months in python is complicated as 
month length varies and hence ambigous.
+            # Perform date arithmatic manually
+            #  Assumption: the base_date will usually be the first of the month
+            #              NB. this method will fail if the base time is on 
the 29th or higher day of month
+            #                      -as can't have, e.g. Feb 31st.
+            new_month = int(base_time.month + xtime % 12)
+            new_year = int(math.floor(base_time.year + xtime / 12.))
+            #print 
type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second)
+            new_time = 
datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0)
+        if(units=='years'):
+            dt = datetime.timedelta(years=xtime); new_time = base_time + dt
+        times.append(new_time)
+    return times
+
+
+def regrid_in_time(data,dateList,unit):
+   
#################################################################################################
+   # Routine to calculate averages on longer time units than the data exists 
on.
+   #  e.g. if the data is 6-hourly, calculate daily, or monthly means.
+   #  Input:
+   #         data     - data values
+   #         dateList - list of python datetime structures corresponding to 
data values
+   #         unit     - string describing time unit to average onto 
+   #                       e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+   #  Output:
+   #         meanstorem - numpy masked array of data values meaned over 
required time period
+   #         newTimesList - a list of python datetime objects representing the 
data in the new averagin period
+   #                            NB. currently set to beginning of averaging 
period, 
+   #                            i.e. mean Jan 1st - Jan 31st -> represented as 
Jan 1st, 00Z.
+   # ..............................
+   #   Jinwon Kim, Sept 30, 2012
+   #   Created from calc_average_on_new_time_unit_K, Peter's original, with 
the modification below:
+   #   Instead of masked array used by Peter, use wh to defined data within 
the averaging range.
+   
#################################################################################################
+
+   print '***  Begin calc_average_on_new_time_unit_KK  ***'
+   # Check if the user-selected temporal grid is valid. If not, EXIT
+   acceptable = 
(unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+   if not acceptable:
+     print 'Error: unknown unit type selected for time averaging: EXIT'; 
return -1,-1,-1,-1
+
+   # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, 
[200701,200702]), daily (yyyymmdd, [20070101,20070102]) 
+   # "%02d" is similar to i2.2 in Fortran
+   if unit=='annual':                      # YYYY
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year))
+   elif unit=='monthly':                   # YYYYMM
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month))
+   elif unit=='daily':                     # YYYYMMDD
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month) + str("%02d" % dateList[i].day))
+   elif unit=='full':                      # Full list: a special case
+      comment='Calculating means data over the entire time range: i.e., 
annual-mean climatology'
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(999)             # i.e. we just want the same value 
for all times.
+   timeunits = np.array(timeunits,dtype=int)
+   print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits
+   #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits
+
+   # empty list to store time levels after temporal regridding
+   newTimesList = []
+
+   # Decide whether or not you need to do any time averaging.
+   #   i.e. if data are already on required time unit then just pass data 
through and calculate and return representative datetimes.
+   processing_required = True
+   if len(timeunits)==(len(np.unique(timeunits))):
+        processing_required = False
+   print 'processing_required= ',processing_required,': input time steps= 
',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits))
+   #print 'np.unique(timeunits): ',np.unique(timeunits)
+   print 'data.ndim= ',data.ndim
+
+   if data.ndim==1:                                           # 1D data 
arrays, i.e. 1D time series
+      # Create array to store the temporally regridded data
+     meanstore = np.zeros(len(np.unique(timeunits)))
+      # Calculate the means across each unique time unit
+     i=0
+     for myunit in np.unique(timeunits):
+       if processing_required:
+         wh = timeunits==myunit
+         datam = data[wh]
+         meanstore[i] = ma.average(datam)
+        # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit)==4:  # YYYY
+        yyyy = int(myunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit)==6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit)==8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit)==3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the 
entire time span
+        dt = dateList[-1]-dateList[0]
+        halfway = dateList[0]+(dt/2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+       i = i+1
+
+   elif data.ndim==3:                                         # 3D data 
arrays, i.e. 2D time series
+      # Create array to store the resulting data
+     meanstore = 
np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+     # Calculate the means across each unique time unit
+     i=0
+     datamask_store = []
+     for myunit in np.unique(timeunits):
+       if processing_required:
+         wh = timeunits==myunit
+         datam = data[wh,:,:]
+         meanstore[i,:,:] = ma.average(datam,axis=0)
+        # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit)==4:  # YYYY
+        yyyy = int(smyunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit)==6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit)==8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit)==3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the 
entire time span
+        dt = dateList[-1]-dateList[0]
+        halfway = dateList[0]+(dt/2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+       i += 1
+
+     if not processing_required:
+        meanstorem = data
+     elif processing_required:
+        meanstorem = meanstore
+
+   print '***  End calc_average_on_new_time_unit_KK  ***'
+   return meanstorem,newTimesList

[32/50] [abbrv] initial mccsearch code

Reply via email to