[1/2] git commit: removing files.py and process.py from te code repo

joyce Tue, 28 Oct 2014 11:50:36 -0700

Repository: climate
Updated Branches:
  refs/heads/master 6e4920c05 -> 2104da0c9



removing files.py and process.py from te code repo


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/ad8e0c7f
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/ad8e0c7f
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/ad8e0c7f

Branch: refs/heads/master
Commit: ad8e0c7f94e6c6cc270e880f92d1bd395cc3dd49
Parents: 48352bc
Author: Kim Whitehall <[email protected]>
Authored: Tue Oct 28 08:29:07 2014 -0700
Committer: Kim Whitehall <[email protected]>
Committed: Tue Oct 28 08:29:07 2014 -0700

----------------------------------------------------------------------
 mccsearch/code/files.py     |  783 -----------------------------
 mccsearch/code/mccSearch.py |    6 +-
 mccsearch/code/process.py   | 1007 --------------------------------------
 3 files changed, 2 insertions(+), 1794 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/ad8e0c7f/mccsearch/code/files.py
----------------------------------------------------------------------
diff --git a/mccsearch/code/files.py b/mccsearch/code/files.py
deleted file mode 100644
index b238754..0000000
--- a/mccsearch/code/files.py
+++ /dev/null
@@ -1,783 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-"""
-Module for handling data input files.  Requires netCDF and Numpy be 
-installed.
-
-This module can easily open NetCDF, HDF and Grib files.  Search the netCDF4
-documentation for a complete list of supported formats.
-"""
-
-from os import path
-import netCDF4
-import numpy as np
-import numpy.ma as ma
-import sys
-
-from toolkit import process
-from utils import fortranfile
-from utils import misc
-
-
-VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'],
-                  'latitude': ['latitude', 'lat', 'lats', 'latitudes'],
-                  'longitude': ['longitude', 'lon', 'lons', 'longitudes']
-                  }
-
-
-def findunique(seq):
-    keys = {}
-    for e in seq:
-        keys[e] = 1
-    return keys.keys()
-
-def getVariableByType(filename, variableType):
-    """
-    Function that will try to return the variable from a file based on a 
provided
-    parameter type.
-    
-    Input::
-        filename - the file to inspect
-        variableType - time | latitude | longitude
-    
-    Output::
-        variable name OR list of all variables in the file if a single variable
-        name match cannot be found.
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print "netCDF4Error:", sys.exc_info()[0]
-        raise
-    
-    variableKeys = f.variables.keys()
-    f.close()
-    variableKeys = [variable.encode().lower() for variable in variableKeys]
-    variableMatch = VARIABLE_NAMES[variableType]
-
-    commonVariables = list(set(variableKeys).intersection(variableMatch)) 
-
-    if len(commonVariables) == 1:
-        return str(commonVariables[0])
-    
-    else:
-        return variableKeys
-
-def getVariableRange(filename, variableName):
-    """
-    Function to return the min and max values of the given variable in
-    the supplied filename.
-   
-    Input::
-        filename - absolute path to a file
-        variableName - variable whose min and max values should be returned
-
-    Output::
-        variableRange - tuple of order (variableMin, variableMax)
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print "netCDF4Error:", sys.exc_info()[0]
-        raise
-    
-    varArray = f.variables[variableName][:]
-    return (varArray.min(), varArray.max())
-
-
-def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, 
lonVarName):
-    '''
-    Read in data from a list of model files into a single data structure
-   
-    Input:
-       filelist - list of filenames (including path)
-       myvar    - string containing name of variable to load in (as it appears 
in file)
-    Output:
-       lat, lon - 2D array of latitude and longitude values
-       timestore    - list of times
-       t2store  - numpy array containing data from all files    
-   
-     NB. originally written specific for WRF netCDF output files
-         modified to make more general (Feb 2011)
-   
-      Peter Lean July 2010 
-    '''
-
-    filelist.sort()
-    filename = filelist[0]
-    # Crash nicely if 'filelist' is zero length
-    """TODO:  Throw Error instead via try Except"""
-    if len(filelist) == 0:
-        print 'Error: no files have been passed to read_data_from_file_list()'
-        sys.exit()
-
-    # Open the first file in the list to:
-    #    i) read in lats, lons
-    #    ii) find out how many timesteps in the file 
-    #        (assume same ntimes in each file in list)
-    #     -allows you to create an empty array to store variable data for all 
times
-    tmp = netCDF4.Dataset(filename, mode='r')
-    latsraw = tmp.variables[latVarName][:]
-    lonsraw = tmp.variables[lonVarName][:]
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-    if(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-
-    timesraw = tmp.variables[timeVarName]
-    ntimes = len(timesraw)
-    
-    print 'Lats and lons read in for first file in filelist'
-
-    # Create a single empty masked array to store model data from all files
-    t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, 
:])))
-    timestore = ma.zeros((ntimes * len(filelist))) 
-    
-    # Now load in the data for real
-    #  NB. no need to reload in the latitudes and longitudes -assume invariant
-    i = 0
-    timesaccu = 0 # a counter for number of times stored so far in t2store 
-    #  NB. this method allows for missing times in data files 
-    #      as no assumption made that same number of times in each file...
-
-
-    for i, ifile in enumerate(filelist):
-
-        #print 'Loading data from file: ',filelist[i]
-        f = netCDF4.Dataset(ifile, mode='r')
-        t2raw = ma.array(f.variables[myvar][:])
-        timesraw = f.variables[timeVarName]
-        time = timesraw[:]
-        ntimes = len(time)
-        print 'file= ', i, 'ntimes= ', ntimes, filelist[i]
-        print 't2raw shape: ', t2raw.shape
-        
-        # Flatten dimensions which needn't exist, i.e. level 
-        #   e.g. if for single level then often data have 4 dimensions, when 3 
dimensions will do.
-        #  Code requires data to have dimensions, (time,lat,lon)
-        #    i.e. remove level dimensions
-        # Remove 1d axis from the t2raw array
-        # Example: t2raw.shape == (365, 180, 360 1) <maps to (time, lat, lon, 
height)>
-        # After the squeeze you will be left with (365, 180, 360) instead
-        t2tmp = t2raw.squeeze()
-        # Nb. if this happens to be data for a single time only, then we just 
flattened it by accident
-        #     lets put it back... 
-        if t2tmp.ndim == 2:
-            t2tmp = np.expand_dims(t2tmp, 0)
-
-        t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :]
-        timestore[timesaccu + np.arange(ntimes)] = time
-        timesaccu += ntimes
-        f.close()
-        
-    print 'Data read in successfully with dimensions: ', t2store.shape
-    
-    # TODO: search for duplicated entries (same time) and remove duplicates.
-    # Check to see if number of unique times == number of times, if so then no 
problem
-
-    if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())):
-        print 'WARNING: Possible duplicated times'
-
-    # Decode model times into python datetime objects. Note: timestore becomes 
a list (no more an array) here
-    timestore, _ = process.getModelTimes(filename, timeVarName)
-    
-    # Make sure latlon grid is monotonically increasing and that the domains
-    # are correct
-    lat, lon, t2store = checkLatLon(lat, lon, t2store)
-    data_dict = {'lats': lat, 'lons': lon, 'times': timestore, 'data': t2store}
-    return data_dict
-
-def select_var_from_file(myfile, fmt='not set'):
-    '''
-     Routine to act as user interface to allow users to select variable of 
interest from a file.
-     
-      Input:
-         myfile - filename
-         fmt - (optional) specify fileformat for netCDF4 if filename suffix is 
non-standard
-    
-      Output:
-         myvar - variable name in file
-    
-        Peter Lean  September 2010
-    '''
-
-    print fmt
-    
-    if fmt == 'not set':
-        f = netCDF4.Dataset(myfile, mode='r')
-    
-    if fmt != 'not set':
-        f = netCDF4.Dataset(myfile, mode='r', format=fmt)
-    
-    keylist = [key.encode().lower() for key in f.variables.keys()]
-    
-    i = 0
-    for v in keylist:
-        print '[', i, '] ', f.variables[v].long_name, ' (', v, ')'
-        i += 1
-
-    user_selection = raw_input('Please select variable : [0 -' + str(i - 1) + 
']  ')
-    
-    myvar = keylist[int(user_selection)]
-    
-    return myvar
-
-def select_var_from_wrf_file(myfile):
-    '''
-     Routine to act as user interface to allow users to select variable of 
interest from a wrf netCDF file.
-     
-      Input:
-         myfile - filename
-    
-      Output:
-         mywrfvar - variable name in wrf file
-    
-        Peter Lean  September 2010
-    '''
-
-    f = netCDF4.Dataset(myfile, mode='r', format='NETCDF4')
-    keylist = [key.encode().lower() for key in f.variables.keys()]
-
-    i = 0
-    for v in keylist:
-        try:
-            print '[', i, '] ', f.variables[v].description, ' (', v, ')'
-        except:
-            print ''
-
-        i += 1
-    
-    user_selection = raw_input('Please select WRF variable : [0 -' + str(i - 
1) + ']  ')
-    
-    mywrfvar = keylist[int(user_selection)]
-    
-    return mywrfvar
-
-def read_data_from_one_file(ifile, myvar, latVarName, lonVarName, timeVarName, 
file_type):
-    """
-    Purpose::
-        Read in data from one file at a time
-    
-    Input::   
-        filelist - list of filenames (including path)
-        myvar - string containing name of variable to load in (as it appears 
in file)s
-        lonVarName - name of the Longitude Variable
-        timeVarName - name of the Time Variable
-        fileType - type of file we are trying to parse
-        
-     Output::  
-        lat, lon - 2D arrays of latitude and longitude values
-        times - list of times
-        t2store - numpy array containing data from the file for the requested 
variable
-        varUnit - units for the variable given by t2store  
-    """           
-    f = netCDF4.Dataset(ifile, mode='r')
-    try:
-        varUnit = f.variables[myvar].units.encode().upper()
-    except:
-        varUnit = raw_input('Enter the model variable unit: \n> ').upper()
-    t2raw = ma.array(f.variables[myvar][:])
-    t2tmp = t2raw.squeeze()
-    if t2tmp.ndim == 2:
-        t2tmp = np.expand_dims(t2tmp, 0)
-        
-    lonsraw = f.variables[lonVarName][:]
-    latsraw = f.variables[latVarName][:]
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-    if(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-    
-    f.close()
-    print '  success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= 
', t2tmp.shape, ' Unit= ', varUnit
-    timestore = process.decode_model_timesK(ifile, timeVarName, file_type)
-    
-    # Make sure latlon grid is monotonically increasing and that the domains
-    # are correct
-    lat, lon, t2store = checkLatLon(lat, lon, t2tmp)
-    return lat, lon, timestore, t2store, varUnit
-
-def findTimeVariable(filename):
-    """
-     Function to find what the time variable is called in a model file.
-        Input::
-            filename - file to crack open and check for a time variable
-        Output::
-            timeName - name of the input file's time variable
-            variableNameList - list of variable names from the input filename
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print("Unable to open '%s' to try and read the Time variable" % 
filename)
-        raise
-
-    variableNameList = [variable.encode() for variable in f.variables.keys()]
-    # convert all variable names into lower case
-    varNameListLowerCase = [x.lower() for x in variableNameList]
-
-    # Use "set" types for finding common variable name from in the file and 
from the list of possibilities
-    possibleTimeNames = set(['time', 'times', 'date', 'dates', 'julian'])
-    
-    # Use the sets to find the intersection where variable names are in 
possibleNames
-    timeNameSet = possibleTimeNames.intersection(varNameListLowerCase)
-    
-    if len(timeNameSet) == 0:
-        print("Unable to autodetect the Time Variable Name in the '%s'" % 
filename)
-        timeName = misc.askUserForVariableName(variableNameList, targetName 
="Time")
-    
-    else:
-        timeName = timeNameSet.pop()
-    
-    return timeName, variableNameList
-
-
-def findLatLonVarFromFile(filename):
-    """
-    Function to find what the latitude and longitude variables are called in a 
model file.
-    
-    Input:: 
-        -filename 
-    Output::
-        -latVarName
-        -lonVarName
-        -latMin 
-        -latMax
-        -lonMin
-        -lonMax
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print("Unable to open '%s' to try and read the Latitude and Longitude 
variables" % filename)
-        raise
-
-    variableNameList = [variable.encode() for variable in f.variables.keys()]
-    # convert all variable names into lower case
-    varNameListLowerCase = [x.lower() for x in variableNameList]
-
-    # Use "set" types for finding common variable name from in the file and 
from the list of possibilities
-    possibleLatNames = set(['latitude', 'lat', 'lats', 'latitudes'])
-    possibleLonNames = set(['longitude', 'lon', 'lons', 'longitudes'])
-    
-    # Use the sets to find the intersection where variable names are in 
possibleNames
-    latNameSet = possibleLatNames.intersection(varNameListLowerCase)
-    lonNameSet = possibleLonNames.intersection(varNameListLowerCase)
-    
-    if len(latNameSet) == 0 or len(lonNameSet) == 0:
-        print("Unable to autodetect Latitude and/or Longitude values in the 
file")
-        latName = misc.askUserForVariableName(variableNameList, targetName 
="Latitude")
-        lonName = misc.askUserForVariableName(variableNameList, targetName 
="Longitude")
-    
-    else:
-        latName = latNameSet.pop()
-        lonName = lonNameSet.pop()
-    
-    lats = np.array(f.variables[latName][:])
-    lons = np.array(f.variables[lonName][:])
-    
-    latMin = lats.min()
-    latMax = lats.max()
-    
-    # Convert the lons from 0:360 into -180:180
-    lons[lons > 180] = lons[lons > 180] - 360.
-    lonMin = lons.min()
-    lonMax = lons.max()
-
-    return latName, lonName, latMin, latMax, lonMin, lonMax
-
-
-def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, 
lonVarName, file_type):
-    
##################################################################################
-    # Read in data from a list of model files into a single data structure
-    # Input:   filelist - list of filenames (including path)
-    #          myvar    - string containing name of variable to load in (as it 
appears in file)
-    # Output:  lat, lon - 2D array of latitude and longitude values
-    #          times    - list of times
-    #          t2store  - numpy array containing data from all files    
-    # Modified from read_data_from_file_list to read data from multiple models 
into a 4-D array
-    # 1. The code now processes model data that completely covers the 20-yr 
period. Thus,
-    #    all model data must have the same time levels (ntimes). Unlike in the 
oroginal, ntimes
-    #    is fixed here.
-    # 2. Because one of the model data exceeds 240 mos (243 mos), the model 
data must be
-    #    truncated to the 240 mons using the ntimes determined from the first 
file.
-    
##################################################################################
-    filelist.sort()
-    nfiles = len(filelist)
-    # Crash nicely if 'filelist' is zero length
-    if nfiles == 0:
-        print 'Error: no files have been passed to read_data_from_file_list(): 
Exit'
-        sys.exit()
-
-    # Open the first file in the list to:
-    #    i)  read in lats, lons
-    #    ii) find out how many timesteps in the file (assume same ntimes in 
each file in list)
-    #     -allows you to create an empty array to store variable data for all 
times
-    tmp = netCDF4.Dataset(filelist[0], mode='r', format=file_type)
-    latsraw = tmp.variables[latVarName][:]
-    lonsraw = tmp.variables[lonVarName][:]
-    timesraw = tmp.variables[timeVarName]
-    
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-        
-    elif(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-    ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :])
-    
-    print 'Lats and lons read in for first file in filelist'
-
-    # Create a single empty masked array to store model data from all files
-    #t2store = ma.zeros((ntimes*nfiles,nygrd,nxgrd))
-    t2store = ma.zeros((nfiles, ntimes, nygrd, nxgrd))
-    #timestore=ma.zeros((ntimes*nfiles)) 
-    
-    ## Now load in the data for real
-    ##  NB. no need to reload in the latitudes and longitudes -assume invariant
-    #timesaccu=0 # a counter for number of times stored so far in t2store 
-    #  NB. this method allows for missing times in data files 
-    #      as no assumption made that same number of times in each file...
-
-    for i, ifile in enumerate(filelist):
-        #print 'Loading data from file: ',filelist[i]
-        f = netCDF4.Dataset(ifile, mode='r')
-        t2raw = ma.array(f.variables[myvar][:])
-        timesraw = f.variables[timeVarName]
-        #ntimes=len(time)
-        #print 'file= ',i,'ntimes= ',ntimes,filelist[i]
-        ## Flatten dimensions which needn't exist, i.e. level 
-        ##   e.g. if for single level then often data have 4 dimensions, when 
3 dimensions will do.
-        ##  Code requires data to have dimensions, (time,lat,lon)
-        ##    i.e. remove level dimensions
-        t2tmp = t2raw.squeeze()
-        ## Nb. if data happen to be for a single time, we flattened it by 
accident; lets put it back... 
-        if t2tmp.ndim == 2:
-            t2tmp = np.expand_dims(t2tmp, 0)
-        #t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:]
-        t2store[i, 0:ntimes, :, :] = t2tmp[0:ntimes, :, :]
-        #timestore[timesaccu+np.arange(ntimes)]=time
-        #timesaccu=timesaccu+ntimes
-        f.close()
-
-    print 'Data read in successfully with dimensions: ', t2store.shape
-    
-    # Decode model times into python datetime objects. Note: timestore becomes 
a list (no more an array) here
-    ifile = filelist[0]
-    timestore, _ = process.getModelTimes(ifile, timeVarName)
-
-    # Make sure latlon grid is monotonically increasing and that the domains
-    # are correct
-    lat, lon, t2store = checkLatLon(lat, lon, t2store)
-    return lat, lon, timestore, t2store
-
-def find_latlon_ranges(filelist, lat_var_name, lon_var_name):
-    # Function to return the latitude and longitude ranges of the data in a 
file,
-    # given the identifying variable names.
-    #
-    #    Input:
-    #            filelist - list of filenames (data is read in from first file 
only)
-    #            lat_var_name - variable name of the 'latitude' variable
-    #            lon_var_name - variable name of the 'longitude' variable
-    #
-    #    Output:
-    #            latMin, latMax, lonMin, lonMax - self explanatory
-    #
-    #                    Peter Lean      March 2011
-    
-    filename = filelist[0]
-    
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-        
-        lats = f.variables[lat_var_name][:]
-        latMin = lats.min()
-        latMax = lats.max()
-        
-        lons = f.variables[lon_var_name][:]
-        lons[lons > 180] = lons[lons > 180] - 360.
-        lonMin = lons.min()
-        lonMax = lons.max()
-        
-        return latMin, latMax, lonMin, lonMax
-
-    except:
-        print 'Error: there was a problem with finding the latitude and 
longitude ranges in the file'
-        print '       Please check that you specified the filename, and 
variable names correctly.'
-        
-        sys.exit()
-
-def writeBN_lola(fileName, lons, lats):
-    # write a binary data file that include longitude (1-d) and latitude (1-d) 
values
-    
-    F = fortranfile.FortranFile(fileName, mode='w')
-    ngrdY = lons.shape[0]; ngrdX = lons.shape[1]
-    tmpDat = ma.zeros(ngrdX); tmpDat[:] = lons[0, :]; F.writeReals(tmpDat)
-    tmpDat = ma.zeros(ngrdY); tmpDat[:] = lats[:, 0]; F.writeReals(tmpDat)
-    # release temporary arrays
-    tmpDat = 0
-    F.close()
-
-def writeBNdata(fileName, numOBSs, numMDLs, nT, ngrdX, ngrdY, numSubRgn, 
obsData, mdlData, obsRgnAvg, mdlRgnAvg):
-    
#(fileName,maskOption,numOBSs,numMDLs,nT,ngrdX,ngrdY,numSubRgn,obsData,mdlData,obsRgnAvg,mdlRgnAvg):
-    # write spatially- and regionally regridded data into a binary data file
-    missing = -1.e26
-    F = fortranfile.FortranFile(fileName, mode='w')
-    # construct a data array to replace mask flag with a missing value 
(missing=-1.e12) for printing
-    data = ma.zeros((nT, ngrdY, ngrdX))
-    for m in np.arange(numOBSs):
-        data[:, :, :] = obsData[m, :, :, :]; msk = data.mask
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                for i in np.arange(ngrdX):
-                    if msk[n, j, i]: data[n, j, i] = missing
-
-        # write observed data. allowed to write only one row at a time
-        tmpDat = ma.zeros(ngrdX)
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                tmpDat[:] = data[n, j, :]
-                F.writeReals(tmpDat)
-
-    # write model data (dep. on the number of models).
-    for m in np.arange(numMDLs):
-        data[:, :, :] = mdlData[m, :, :, :]
-        msk = data.mask
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                for i in np.arange(ngrdX):
-                    if msk[n, j, i]:
-                        data[n, j, i] = missing
-
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                tmpDat[:] = data[n, j, :]
-                F.writeReals(tmpDat)
-
-    data = 0     # release the array allocated for data
-    # write data in subregions
-    if numSubRgn > 0:
-        print 'Also included are the time series of the means over ', 
numSubRgn, ' areas from obs and model data'
-        tmpDat = ma.zeros(nT); print numSubRgn
-        for m in np.arange(numOBSs):
-            for n in np.arange(numSubRgn):
-                tmpDat[:] = obsRgnAvg[m, n, :]
-                F.writeReals(tmpDat)
-        for m in np.arange(numMDLs):
-            for n in np.arange(numSubRgn):
-                tmpDat[:] = mdlRgnAvg[m, n, :]
-                F.writeReals(tmpDat)
-    tmpDat = 0     # release the array allocated for tmpDat
-    F.close()
-
-def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, 
mdlRgnAvg, obsList, mdlList, subRegions):
-    # write an output file of variables up to 3 dimensions
-    # fileName: the name of the output data file
-    # numSubRgn  : the number of subregions
-    # lons[ngrdX]: longitude
-    # lats[ngrdY]: latitudes
-    # obsData[nT,ngrdY,ngrdX]: the obs time series of the entire model domain
-    # mdlData[numMDLs,nT,ngrdY,ngrdX]: the mdltime series of the entire model 
domain
-    # obsRgnAvg[numSubRgn,nT]: the obs time series for the all subregions
-    # mdlRgnAvg[numMDLs,numSubRgn,nT]: the mdl time series for the all 
subregions
-    dimO = obsData.shape[0]      # the number of obs data
-    dimM = mdlData.shape[0]      # the number of mdl data
-    dimT = mdlData.shape[1]      # the number of time levels
-    dimY = mdlData.shape[2]      # y-dimension
-    dimX = mdlData.shape[3]      # x-dimension
-    dimR = obsRgnAvg.shape[1]    # the number of subregions
-    f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
-    print mdlRgnAvg.shape, dimM, dimR, dimT
-    #create global attributes
-    f.description = ''
-    # create dimensions
-    print 'Creating Dimensions within the NetCDF Object...'
-    f.createDimension('unity', 1)
-    f.createDimension('time', dimT)
-    f.createDimension('west_east', dimX)
-    f.createDimension('south_north', dimY)
-    f.createDimension('obs', dimO)
-    f.createDimension('models', dimM)
-        
-    # create the variable (real*4) to be written in the file
-    print 'Creating Variables...'
-    f.createVariable('lon', 'd', ('south_north', 'west_east'))
-    f.createVariable('lat', 'd', ('south_north', 'west_east'))
-    f.createVariable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east'))
-    f.createVariable('mDat', 'd', ('models', 'time', 'south_north', 
'west_east'))
-    
-    if subRegions:
-        f.createDimension('regions', dimR)
-        f.createVariable('oRgn', 'd', ('obs', 'regions', 'time'))
-        f.createVariable('mRgn', 'd', ('models', 'regions', 'time'))
-        f.variables['oRgn'].varAttName = 'Observation time series: Subregions'
-        f.variables['mRgn'].varAttName = 'Model time series: Subregions'
-
-    loadDataIntoNetCDF(f, obsList, obsData, 'Observation')
-    print 'Loaded the Observations into the NetCDF'
-
-    loadDataIntoNetCDF(f, mdlList, mdlData, 'Model')
-
-    # create attributes and units for the variable
-    print 'Creating Attributes and Units...'
-    f.variables['lon'].varAttName = 'Longitudes'
-    f.variables['lon'].varUnit = 'degrees East'
-    f.variables['lat'].varAttName = 'Latitudes'
-    f.variables['lat'].varUnit = 'degrees North'
-    f.variables['oDat'].varAttName = 'Observation time series: entire domain'
-    f.variables['mDat'].varAttName = 'Model time series: entire domain'
-
-    # assign the values to the variable and write it
-    f.variables['lon'][:] = lons[:]
-    f.variables['lat'][:] = lats[:]
-    if subRegions:
-        f.variables['oRgn'][:] = obsRgnAvg[:]
-        f.variables['mRgn'][:] = mdlRgnAvg[:]
-
-    f.close()
-
-def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType):
-    """
-    Input::
-        fileObject - netCDF4 file object data will be loaded into
-        datasets - List of dataset names
-        dataArray - Multi-dimensional array of data to be loaded into the 
NetCDF file
-        dataType - String with value of either 'Model' or 'Observation'
-    Output::
-        No return value.  netCDF4 file object is updated in place
-    """
-    datasetCount = 0
-    for datasetCount, dataset in enumerate(datasets):
-        if dataType.lower() == 'observation':
-            datasetName = dataset.replace(' ','')
-        elif dataType.lower() == 'model':
-            datasetName = path.splitext(path.basename(dataset))[0]
-        print "Creating variable %s" % datasetName
-        fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 
'west_east'))
-        fileObject.variables[datasetName].varAttName = 'Obseration time 
series: entire domain'
-        print 'Loading values into %s' % datasetName
-        fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:]
-
-def checkLatLon(latsin, lonsin, datain):
-    """
-    Purpose::
-        Checks whether latitudes and longitudes are monotonically increasing
-        within the domains [-90, 90) and [-180, 180) respectively, and 
rearranges the input data
-        accordingly if they are not.
-    
-    Input::
-        latsin - Array of latitudes read from a raw netcdf file
-        lonsin - Array of longitudes read from a raw netcdf file
-        datain  - Array of data values read from a raw netcdf file.
-                   The shape is assumed to be (..., nLat, nLon).
-        
-    Output::
-        latsout - 2D array of (rearranged) latitudes
-        lonsout - 2D array of (rearranged) longitudes
-        dataout - Array of (rearranged) data
-    """
-    # Avoid unnecessary shifting if all lons are higher than 180
-    if lonsin.min() > 180:
-        lonsin -= 360
-        
-    # Make sure lats and lons are monotonically increasing
-    latsDecreasing = np.diff(latsin[:, 0]) < 0
-    lonsDecreasing = np.diff(lonsin[0]) < 0
-    
-    # If all values are decreasing then they just need to be reversed
-    latsReversed, lonsReversed = latsDecreasing.all(), lonsDecreasing.all()
-    
-    # If the lat values are unsorted then raise an exception
-    if not latsReversed and latsDecreasing.any():
-        raise ValueError('Latitudes must be monotonically increasing.')
-    
-    # Perform same checks now for lons
-    if not lonsReversed and lonsDecreasing.any():
-        raise ValueError('Longitudes must be monotonically increasing.')
-    
-    # Also check if lons go from [0, 360), and convert to [-180, 180)
-    # if necessary
-    lonsShifted = lonsin.max() > 180
-    latsout, lonsout, dataout = latsin[:], lonsin[:], datain[:]
-    # Now correct data if latlon grid needs to be shifted    
-    if latsReversed:
-        latsout = latsout[::-1]
-        dataout = dataout[..., ::-1, :]
-        
-    if lonsReversed:
-        lonsout = lonsout[..., ::-1]
-        dataout = dataout[..., ::-1]
-        
-    if lonsShifted:
-        lat1d = latsout[:, 0]
-        dataout, lon1d = shiftgrid(180, dataout, lonsout[0], start=False)
-        lonsout, latsout = np.meshgrid(lon1d, lat1d) 
-        
-    return latsout, lonsout, dataout
-    
-def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0):
-    """
-    Purpose::
-        Shift global lat/lon grid east or west. This function is taken directly
-        from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does 
not
-        currently support arrays with more than two dimensions.
-        https://github.com/matplotlib/basemap
-        
-    Input::
-        lon0 - starting longitude for shifted grid (ending longitude if 
start=False). 
-               lon0 must be on input grid (within the range of lonsin).
-        datain - original data with longitude the right-most dimension.
-        lonsin - original longitudes.
-        start  - if True, lon0 represents the starting longitude of the new 
grid. 
-                 if False, lon0 is the ending longitude. Default True.
-        cyclic - width of periodic domain (default 360)
-
-    Output:: 
-        dataout - data on shifted grid
-        lonsout - lons on shifted grid
-    """
-    if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
-        # Use all data instead of raise ValueError, 'cyclic point not included'
-        start_idx = 0
-    else:
-        # If cyclic, remove the duplicate point
-        start_idx = 1
-    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
-        raise ValueError('lon0 outside of range of lonsin')
-    i0 = np.argmin(np.fabs(lonsin-lon0))
-    i0_shift = len(lonsin)-i0
-    if ma.isMA(datain):
-        dataout  = ma.zeros(datain.shape,datain.dtype)
-    else:
-        dataout  = np.zeros(datain.shape,datain.dtype)
-    if ma.isMA(lonsin):
-        lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
-    else:
-        lonsout = np.zeros(lonsin.shape,lonsin.dtype)
-    if start:
-        lonsout[0:i0_shift] = lonsin[i0:]
-    else:
-        lonsout[0:i0_shift] = lonsin[i0:]-cyclic
-    dataout[...,0:i0_shift] = datain[...,i0:]
-    if start:
-        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
-    else:
-        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
-    dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx]
-    return dataout,lonsout
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/climate/blob/ad8e0c7f/mccsearch/code/mccSearch.py
----------------------------------------------------------------------
diff --git a/mccsearch/code/mccSearch.py b/mccsearch/code/mccSearch.py
index 7bfaed6..5484df1 100644
--- a/mccsearch/code/mccSearch.py
+++ b/mccsearch/code/mccSearch.py
@@ -43,10 +43,6 @@ import matplotlib.cm as cm
 import matplotlib.colors as mcolors
 from matplotlib.ticker import FuncFormatter, FormatStrFormatter
 
-#existing modules in services
-import Nio
-#import files
-#import process
 #----------------------- GLOBAL VARIABLES --------------------------
 # --------------------- User defined variables ---------------------
 #FYI the lat lon values are not necessarily inclusive of the points given. 
These are the limits
@@ -2482,6 +2478,8 @@ def getModelTimes(xtimes, timeVarName):
         modelTimeStep - 'hourly','daily','monthly','annual'
     '''
 
+    from ocw import utils
+
     timeFormat = xtimes.units
     # search to check if 'since' appears in units
     try:

http://git-wip-us.apache.org/repos/asf/climate/blob/ad8e0c7f/mccsearch/code/process.py
----------------------------------------------------------------------
diff --git a/mccsearch/code/process.py b/mccsearch/code/process.py
deleted file mode 100644
index 7367ee7..0000000
--- a/mccsearch/code/process.py
+++ /dev/null
@@ -1,1007 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-"""Collection of functions that process numpy arrays of varying shapes.
-Functions can range from subsetting to regridding"""
-
-# Python Standard Libary Imports
-import math
-import datetime
-import re
-import string
-import sys
-import time
-
-# 3rd Party Imports
-import netCDF4
-import numpy as np
-import numpy.ma as ma
-
-
-
-def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, 
lonmin, lonmax):
-    '''
-     Extract a sub-region from a data array.
-
-     Example::
-       **e.g.** the user may load a global model file, but only want to 
examine data over North America
-            This function extracts a sub-domain from the original data.
-            The defined sub-region must be a regular lat/lon bounding box,
-            but the model data may be on a non-regular grid (e.g. rotated, or 
Guassian grid layout).
-            Data are kept on the original model grid and a rectangular (in 
model-space) region
-            is extracted which contains the rectangular (in lat/lon space) 
user supplied region.
-
-     INPUT::
-       data - 3d masked data array
-       lats - 2d array of latitudes corresponding to data array
-       lons - 2d array of longitudes corresponding to data array
-       latmin, latmax, lonmin, lonmax - bounding box of required region to 
extract
-
-     OUTPUT::
-       data2 - subset of original data array containing only data from 
required subregion
-       lats2 - subset of original latitude data array
-       lons2 - subset of original longitude data array
-
-    '''
-
-
-
-    # Mask model data array to find grid points inside the supplied bounding 
box
-    whlat = (lats > latmin) & (lats < latmax)
-    whlon = (lons > lonmin) & (lons < lonmax)
-    wh = whlat & whlon
-
-    # Find required matrix indices describing the limits of the regular 
lat/lon bounding box
-    jmax = np.where(wh == True)[0].max()
-    jmin = np.where(wh == True)[0].min()
-    imax = np.where(wh == True)[1].max()
-    imin = np.where(wh == True)[1].min()
-
-    # Cut out the sub-region from the data array
-    data2 = ma.zeros((data.shape[0], jmax - jmin, imax - imin))
-    data2 = data[:, jmin:jmax, imin:imax]
-
-    # Cut out sub-region from lats,lons arrays
-    lats2 = lats[jmin:jmax, imin:imax]
-    lons2 = lons[jmin:jmax, imin:imax]
-
-    return data2, lats2, lons2
-
-def calc_area_mean(data, lats, lons, mymask='not set'):
-    '''
-     Calculate Area Average of data in a masked array
-
-     INPUT::
-         data:  a masked array of data (NB. only data from one time expected 
to be passed at once)
-         lats:  2d array of regularly gridded latitudes
-         lons:  2d array of regularly gridded longitudes
-         mymask:  (optional) defines spatial region to do averaging over
-
-     OUTPUT::
-         area_mean: a value for the mean inside the area
-
-    '''
-
-
-
-    # If mask not passed in, then set maks to cover whole data domain
-    if(mymask == 'not set'):
-        mymask = np.empty(data.shape)
-        mymask[:] = False        # NB. mask means (don't show), so False 
everywhere means use everything.
-
-    # Dimension check on lats, lons
-    #  Sometimes these arrays are 3d, sometimes 2d, sometimes 1d
-    #  This bit of code just converts to the required 2d array shape
-    if(len(lats.shape) == 3):
-        lats = lats[0, :, :]
-
-    if(len(lons.shape) == 3):
-        lons = lons[0, :, :]
-
-    if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)):
-        lons, lats = np.meshgrid(lons, lats)
-
-    # Calculate grid length (assuming regular lat/lon grid)
-    dlat = lats[1, 0] - lats[0, 0]
-    dlon = lons[0, 1] - lons[0, 0]
-
-    # Calculates weights for each grid box
-    myweights = calc_area_in_grid_box(lats, dlon, dlat)
-
-    # Create a new masked array covering just user selected area (defined in 
mymask)
-    #   NB. this preserves missing data points in the observations data
-    subdata = ma.masked_array(data, mask=mymask)
-
-    if(myweights.shape != subdata.shape):
-        myweights.resize(subdata.shape)
-        myweights[1:, :] = myweights[0, :]
-
-    # Calculate weighted mean using ma.average (which takes weights)
-    area_mean = ma.average(subdata, weights=myweights)
-
-    return area_mean
-
-def calc_area_in_grid_box(latitude, dlat, dlon):
-    '''
-     Calculate area of regular lat-lon grid box
-
-     INPUT::
-        latitude: latitude of grid box (degrees)
-        dlat:     grid length in latitude direction (degrees)
-        dlon:     grid length in longitude direction (degrees)
-
-     OUTPUT::
-        A:        area of the grid box
-
-      '''
-
-    R = 6371000  # radius of Earth in metres
-    C = 2 * math.pi * R
-
-    latitude = np.radians(latitude)
-
-    A = (dlon * (C / 360.)) * (dlat * (C / 360.) * np.cos(latitude))
-
-    return A
-
-def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999):
-    """ 
-    This function has been moved to the ocw/dataset_processor module
-    """
-    from ocw import dataset_processor
-    q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, 
order=1)
-
-    return q2
-
-def create_mask_using_threshold(masked_array, threshold=0.5):
-    """ 
-    .. deprecated:: 0.3-incubating
-       Use :func:`ocw.dataset_processor._rcmes_create_mask_using_threshold` 
instead.
-    """
-    from ocw import dataset_processor
-    mask = dataset_processor._rcmes_create_mask_using_threshold(masked_array, 
threshold)
-
-    return mask
-
-def calc_average_on_new_time_unit(data, dateList, unit='monthly'):
-    '''
-     Routine to calculate averages on longer time units than the data exists 
on.
-
-     Example::
-         If the data is 6-hourly, calculate daily, or monthly means.
-
-     Input::
-         data     - data values
-         dateList - list of python datetime structures corresponding to data 
values
-         unit     - string describing time unit to average onto.
-         *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'*
-
-      Output:
-         meanstorem - numpy masked array of data values meaned over required 
time period
-         newTimesList - a list of python datetime objects representing the 
data in the new averagin period
-         *NB.* currently set to beginning of averaging period,
-         **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.**
-    '''
-
-    # First catch unknown values of time unit passed in by user
-    acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | 
(unit == 'daily') | (unit == 'pentad')
-
-    if not acceptable:
-        print 'Error: unknown unit type selected for time averaging'
-        print '       Please check your code.'
-        return
-
-    # Calculate arrays of years (2007,2007),
-    #                     yearsmonths (200701,200702),
-    #                     or yearmonthdays (20070101,20070102)
-    #  -depending on user selected averaging period.
-
-    # Year list
-    if unit == 'annual':
-        print 'Calculating annual mean data'
-        timeunits = []
-
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year))
-
-        timeunits = np.array(timeunits, dtype=int)
-
-    # YearMonth format list
-    if unit == 'monthly':
-        print 'Calculating monthly mean data'
-        timeunits = []
-
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month))
-
-        timeunits = np.array(timeunits, dtype=int)
-
-    # YearMonthDay format list
-    if unit == 'daily':
-        print 'Calculating daily mean data'
-        timeunits = []
-
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month) + \
-                             str("%02d" % dateList[i].day))
-
-        timeunits = np.array(timeunits, dtype=int)
-
-
-    # TODO: add pentad setting using Julian days?
-
-
-    # Full list: a special case
-    if unit == 'full':
-        print 'Calculating means data over full time range'
-        timeunits = []
-
-        for i in np.arange(len(dateList)):
-            timeunits.append(999)  # i.e. we just want the same value for all 
times.
-
-        timeunits = np.array(timeunits, dtype=int)
-
-
-
-    # empty list to store new times
-    newTimesList = []
-
-    # Decide whether or not you need to do any time averaging.
-    #   i.e. if data are already on required time unit then just pass data 
through and
-    #        calculate and return representative datetimes.
-    processing_required = True
-    if len(timeunits) == (len(np.unique(timeunits))):
-        processing_required = False
-
-    # 1D data arrays, i.e. time series
-    if data.ndim == 1:
-        # Create array to store the resulting data
-        meanstore = np.zeros(len(np.unique(timeunits)))
-
-        # Calculate the means across each unique time unit
-        i = 0
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                datam = ma.masked_array(data, timeunits != myunit)
-                meanstore[i] = ma.average(datam)
-
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit) == 4:  # YYYY
-                yyyy = int(myunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit) == 6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit) == 8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit) == 3:  # Full time range
-                # Need to set an appropriate time representing the mid-point 
of the entire time span
-                dt = dateList[-1] - dateList[0]
-                halfway = dateList[0] + (dt / 2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-
-            newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
-            i = i + 1
-
-    # 3D data arrays
-    if data.ndim == 3:
-
-        #datamask = create_mask_using_threshold(data,threshold=0.75)
-
-        # Create array to store the resulting data
-        meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], 
data.shape[2]])
-
-        # Calculate the means across each unique time unit
-        i = 0
-        datamask_store = []
-        for myunit in np.unique(timeunits):
-            if processing_required:
-
-                mask = np.zeros_like(data)
-                mask[timeunits != myunit, :, :] = 1.0
-
-                # Calculate missing data mask within each time unit...
-                datamask_at_this_timeunit = np.zeros_like(data)
-                datamask_at_this_timeunit[:] = 
create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75)
-                # Store results for masking later
-                datamask_store.append(datamask_at_this_timeunit[0])
-
-                # Calculate means for each pixel in this time unit, ignoring 
missing data (using masked array).
-                datam = ma.masked_array(data, np.logical_or(mask, 
datamask_at_this_timeunit))
-                meanstore[i, :, :] = ma.average(datam, axis=0)
-
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit) == 4:  # YYYY
-                yyyy = int(smyunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit) == 6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit) == 8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit) == 3:  # Full time range
-                # Need to set an appropriate time representing the mid-point 
of the entire time span
-                dt = dateList[-1] - dateList[0]
-                halfway = dateList[0] + (dt / 2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-
-            newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
-
-            i += 1
-
-        if not processing_required:
-            meanstorem = data
-
-        if processing_required:
-            # Create masked array (using missing data mask defined above)
-            datamask_store = np.array(datamask_store)
-            meanstorem = ma.masked_array(meanstore, datamask_store)
-
-    return meanstorem, newTimesList
-
-def calc_running_accum_from_period_accum(data):
-    '''
-     Routine to calculate running total accumulations from individual period 
accumulations.
-     ::
-
-         e.g.  0,0,1,0,0,2,2,1,0,0
-            -> 0,0,1,1,1,3,5,6,6,6
-
-     Input::
-          data: numpy array with time in the first axis
-
-     Output::
-          running_acc: running accumulations
-
-    '''
-
-
-    running_acc = np.zeros_like(data)
-
-    if(len(data.shape) == 1):
-        running_acc[0] = data[0]
-
-    if(len(data.shape) > 1):
-        running_acc[0, :] = data[0, :]
-
-    for i in np.arange(data.shape[0] - 1):
-        if(len(data.shape) == 1):
-            running_acc[i + 1] = running_acc[i] + data[i + 1]
-
-        if(len(data.shape) > 1):
-            running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :]
-
-    return running_acc
-
-def ignore_boundaries(data, rim=10):
-    '''
-     Routine to mask the lateral boundary regions of model data to ignore them 
from calculations.
-
-     Input::
-         data - a masked array of model data
-         rim - (optional) number of grid points to ignore
-
-     Output::
-         data - data array with boundary region masked
-
-    '''
-
-    nx = data.shape[1]
-    ny = data.shape[0]
-
-    rimmask = np.zeros_like(data)
-    for j in np.arange(rim):
-        rimmask[j, 0:nx - 1] = 1.0
-
-    for j in ny - 1 - np.arange(rim):
-        rimmask[j, 0:nx - 1] = 1.0
-
-    for i in np.arange(rim):
-        rimmask[0:ny - 1, i] = 1.0
-
-    for i in nx - 1 - np.arange(rim):
-        rimmask[0:ny - 1, i] = 1.0
-
-    data = ma.masked_array(data, mask=rimmask)
-
-    return data
-
-def normalizeDatetimes(datetimes, timestep):
-    """ This function has been moved to the ocw/dataset_processor module """
-    from ocw import dataset_processor as dsp
-    return dsp._rcmes_normalize_datetimes(datetimes, timestep)
-
-def getModelTimes(modelFile, timeVarName):
-    '''
-    TODO:  Do a better job handling dates here
-    Routine to convert from model times ('hours since 1900...', 'days since 
...')
-    into a python datetime structure
-
-    Input::
-        modelFile - path to the model tile you want to extract the times list 
and modelTimeStep from
-        timeVarName - name of the time variable in the model file
-
-    Output::
-        times  - list of python datetime objects describing model data times
-        modelTimeStep - 'hourly','daily','monthly','annual'
-    '''
-
-    f = netCDF4.Dataset(modelFile, mode='r')
-    xtimes = f.variables[timeVarName]
-    timeFormat = xtimes.units.encode()
-
-    # search to check if 'since' appears in units
-    try:
-        sinceLoc = re.search('since', timeFormat).end()
-
-    except AttributeError:
-        print 'Error decoding model times: time variable attributes do not 
contain "since"'
-        raise
-
-    units = None
-    TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
-    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so 
know units
-    for unit in TIME_UNITS:
-        if re.search(unit, timeFormat):
-            units = unit
-            break
-
-    # cut out base time (the bit following 'since')
-    base_time_string = string.lstrip(timeFormat[sinceLoc:])
-    # decode base time
-    base_time = decodeTimeFromString(base_time_string)
-    times = []
-
-    for xtime in xtimes[:]:
-
-        # Cast time as an int
-        #TODO: KDW this may cause problems for data that is hourly with more 
than one timestep in it
-        xtime = int(xtime)
-
-        if units == 'minutes':
-            dt = datetime.timedelta(minutes=xtime)
-            new_time = base_time + dt
-        elif units == 'hours':
-            dt = datetime.timedelta(hours=xtime)
-            new_time = base_time + dt
-        elif units == 'days':
-            dt = datetime.timedelta(days=xtime)
-            new_time = base_time + dt
-        elif units == 'months':
-            # NB. adding months in python is complicated as month length 
varies and hence ambiguous.
-            # Perform date arithmatic manually
-            #  Assumption: the base_date will usually be the first of the month
-            #              NB. this method will fail if the base time is on 
the 29th or higher day of month
-            #                      -as can't have, e.g. Feb 31st.
-            new_month = int(base_time.month + xtime % 12)
-            new_year = int(math.floor(base_time.year + xtime / 12.))
-            new_time = datetime.datetime(new_year, new_month, base_time.day, 
base_time.hour, base_time.second, 0)
-        elif units == 'years':
-            dt = datetime.timedelta(years=xtime)
-            new_time = base_time + dt
-
-        times.append(new_time)
-
-    try:
-        if len(xtimes) == 1:
-            timeStepLength = 0
-        else:
-            timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12)
-            
-        modelTimeStep = getModelTimeStep(units, timeStepLength)
-       
-        #if timeStepLength is zero do not normalize times as this would create 
an empty list for MERG (hourly) data
-        if timeStepLength != 0:
-            times = normalizeDatetimes(times, modelTimeStep) 
-    except:
-        raise
-
-   
-    return times, modelTimeStep
-
-def getModelTimeStep(units, stepSize):
-    # Time units are now determined. Determine the time intervals of input 
data (mdlTimeStep)
-
-    if units == 'minutes':
-        if stepSize == 60:
-            modelTimeStep = 'hourly'
-        elif stepSize == 1440:
-            modelTimeStep = 'daily'
-        # 28 days through 31 days
-        elif 40320 <= stepSize <= 44640:
-            modelTimeStep = 'monthly'
-        # 365 days through 366 days
-        elif 525600 <= stepSize <= 527040:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
-
-    elif units == 'hours':
-        #need a check for fractional hrs and only one hr i.e. stepSize=0 e.g. 
with MERG data
-        if stepSize == 0 or stepSize == 1:
-            modelTimeStep = 'hourly'
-        elif stepSize == 24:
-            modelTimeStep = 'daily'
-        elif 672 <= stepSize <= 744:
-            modelTimeStep = 'monthly'
-        elif 8760 <= stepSize <= 8784:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
-
-    elif units == 'days':
-        if stepSize == 1:
-            modelTimeStep = 'daily'
-        elif 28 <= stepSize <= 31:
-            modelTimeStep = 'monthly'
-        elif 365 <= stepSize <= 366:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
-
-    elif units == 'months':
-        if stepSize == 1:
-            modelTimeStep = 'monthly'
-        elif stepSize == 12:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
-
-    elif units == 'years':
-        if stepSize == 1:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max 
time interval (annual)', units, stepSize)
-
-    else:
-        errorMessage = 'the time unit ', units, ' is not currently handled in 
this version.'
-        raise Exception(errorMessage)
-
-    return modelTimeStep
-
-def decodeTimeFromString(time_string):
-    '''
-     Decodes string into a python datetime object
-     *Method:* tries a bunch of different time format possibilities and 
hopefully one of them will hit.
-     ::
-
-       **Input:**  time_string - a string that represents a date/time
-
-       **Output:** mytime - a python datetime object
-    '''
-
-    # This will deal with times that use decimal seconds
-    if '.' in time_string:
-        time_string = time_string.split('.')[0] + '0'
-    else:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y/%m/%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y%m%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y:%m:%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y%m%d%H%M%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d %H')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    print 'Error decoding time string: string does not match a predefined time 
format'
-    return 0
-
-def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, 
wrflons):
-    '''
-     Wrapper routine for regridding.
-
-     Either regrids model to obs grid, or obs to model grid, depending on user 
choice
-
-         Inputs::
-             regrid_choice - [0] = Regrid obs data onto model grid or
-                             [1] = Regrid model data onto obs grid
-
-             obsdata,wrfdata - data arrays
-             obslats,obslons - observation lat,lon arrays
-             wrflats,wrflons - model lat,lon arrays
-
-         Output::
-             rdata,rdata2 - regridded data
-                 lats,lons    - latitudes and longitudes of regridded data
-
-    '''
-
-    # Regrid obs data onto model grid
-    if(regrid_choice == '0'):
-
-        ndims = len(obsdata.shape)
-        if(ndims == 3):
-            newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]]
-            nT = obsdata.shape[0]
-
-        if(ndims == 2):
-            newshape = [wrfdata.shape[0], wrfdata.shape[1]]
-            nT = 0
-
-        rdata = wrfdata
-        lats, lons = wrflats, wrflons
-
-        # Create a new masked array with the required dimensions
-        tmp = np.zeros(newshape)
-        rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0))
-        tmp = 0 # free up some memory
-
-        rdata2[:] = 0.0
-
-        if(nT > 0):
-            for t in np.arange(nT):
-                rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], 
obslons[:, :], wrflats[:, :], wrflons[:, :])
-
-        if(nT == 0):
-            rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, 
:], wrflats[:, :], wrflons[:, :])
-
-
-    # Regrid model data onto obs grid
-    if(regrid_choice == '1'):
-        ndims = len(wrfdata.shape)
-        if(ndims == 3):
-            newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]]
-            nT = obsdata.shape[0]
-
-        if(ndims == 2):
-            newshape = [obsdata.shape[0], obsdata.shape[1]]
-            nT = 0
-
-        rdata2 = obsdata
-        lats, lons = obslats, obslons
-
-        tmp = np.zeros(newshape)
-        rdata = ma.MaskedArray(tmp, mask=(tmp == 0))
-        tmp = 0 # free up some memory
-
-        rdata[:] = 0.0
-
-        if(nT > 0):
-            for t in np.arange(nT):
-                rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], 
wrflons[:, :], obslats[:, :], obslons[:, :])
-
-        if(nT == 0):
-            rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, 
:], obslats[:, :], obslons[:, :])
-
-
-    return rdata, rdata2, lats, lons
-
-def extract_sub_time_selection(allTimes, subTimes, data):
-    '''
-     Routine to extract a sub-selection of times from a data array.
-
-     Example::
-           Suppose a data array has 30 time values for daily data for a whole 
month,
-           but you only want the data from the 5th-15th of the month.
-
-     Input::
-           allTimes - list of datetimes describing what times the data in the 
data array correspond to
-           subTimes - the times you want to extract data from
-           data     - the data array
-
-     Output::
-           subdata     - subselection of data array
-
-    '''
-    # Create new array to store the subselection
-    subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]])
-
-    # Loop over all Times and when it is a member of the required 
subselection, copy the data across
-    i = 0     # counter for allTimes
-    subi = 0  # counter for subTimes
-    for t in allTimes:
-        if(np.in1d(allTimes, subTimes)):
-            subdata[subi, :, :] = data[i, :, :]
-            subi += 1
-        i += 1
-
-    return subdata
-
-def calc_average_on_new_time_unit_K(data, dateList, unit):
-    """ 
-    This function has been moved to the ocw/dataset_processor module
-    """
-    from ocw import dataset_processor
-    temporally_regridded_data = 
dataset_processor._rcmes_calc_average_on_new_time_unit_K(data, dateList, unit)
-    return temporally_regridded_data
-
-
-def decode_model_timesK(ifile,timeVarName,file_type):
-    
#################################################################################################
-    #  Convert model times ('hours since 1900...', 'days since ...') into a 
python datetime structure
-    #  Input:
-    #      filelist - list of model files
-    #      timeVarName - name of the time variable in the model files
-    #  Output:
-    #      times  - list of python datetime objects describing model data times
-    #     Peter Lean February 2011
-    
#################################################################################################
-    f = netCDF4.Dataset(ifile,mode='r',format=file_type)
-    xtimes = f.variables[timeVarName]
-    timeFormat = xtimes.units.encode()
-    #timeFormat = "days since 1979-01-01 00:00:00"
-    # search to check if 'since' appears in units
-    try:
-        sinceLoc = re.search('since',timeFormat).end()
-    except:
-        print 'Error decoding model times: time var attributes do not contain 
"since": Exit'
-        sys.exit()
-    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so 
know units
-    # TODO:  Swap out this section for a set of if...elseif statements
-    units = ''
-    try:
-        _ = re.search('minutes',timeFormat).end()
-        units = 'minutes'
-    except:
-        pass
-    try:
-        _ = re.search('hours',timeFormat).end()
-        units = 'hours'
-    except:
-        pass
-    try:
-        _ = re.search('days',timeFormat).end()
-        units = 'days'
-    except:
-        pass
-    try:
-        _ = re.search('months',timeFormat).end()
-        units = 'months'
-    except:
-        pass
-    try:
-        _ = re.search('years',timeFormat).end()
-        units = 'years'
-    except:
-        pass
-    # cut out base time (the bit following 'since')
-    base_time_string = string.lstrip(timeFormat[sinceLoc:])
-    # decode base time
-    # 9/25/2012: datetime.timedelta has problem with the argument because 
xtimes is NioVariable.
-    # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it 
into an integer xtime.
-    base_time = decodeTimeFromString(base_time_string)
-    times=[]
-    for ttmp in xtimes[:]:
-        xtime = int(ttmp)
-        if(units=='minutes'):
-            dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt
-        if(units=='hours'):
-            dt = datetime.timedelta(hours=xtime); new_time = base_time + dt
-        if(units=='days'):
-            dt = datetime.timedelta(days=xtime); new_time = base_time + dt
-        if(units=='months'):   # NB. adding months in python is complicated as 
month length varies and hence ambigous.
-            # Perform date arithmatic manually
-            #  Assumption: the base_date will usually be the first of the month
-            #              NB. this method will fail if the base time is on 
the 29th or higher day of month
-            #                      -as can't have, e.g. Feb 31st.
-            new_month = int(base_time.month + xtime % 12)
-            new_year = int(math.floor(base_time.year + xtime / 12.))
-            #print 
type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second)
-            new_time = 
datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0)
-        if(units=='years'):
-            dt = datetime.timedelta(years=xtime); new_time = base_time + dt
-        times.append(new_time)
-    return times
-
-
-def regrid_in_time(data,dateList,unit):
-    
#################################################################################################
-    # Routine to calculate averages on longer time units than the data exists 
on.
-    #  e.g. if the data is 6-hourly, calculate daily, or monthly means.
-    #  Input:
-    #         data     - data values
-    #         dateList - list of python datetime structures corresponding to 
data values
-    #         unit     - string describing time unit to average onto
-    #                       e.g. 'monthly', 'daily', 
'pentad','annual','decadal'
-    #  Output:
-    #         meanstorem - numpy masked array of data values meaned over 
required time period
-    #         newTimesList - a list of python datetime objects representing 
the data in the new averagin period
-    #                            NB. currently set to beginning of averaging 
period,
-    #                            i.e. mean Jan 1st - Jan 31st -> represented 
as Jan 1st, 00Z.
-    # ..............................
-    #   Jinwon Kim, Sept 30, 2012
-    #   Created from calc_average_on_new_time_unit_K, Peter's original, with 
the modification below:
-    #   Instead of masked array used by Peter, use wh to defined data within 
the averaging range.
-    
#################################################################################################
-
-    print '***  Begin calc_average_on_new_time_unit_KK  ***'
-    # Check if the user-selected temporal grid is valid. If not, EXIT
-    acceptable = 
(unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
-    if not acceptable:
-        print 'Error: unknown unit type selected for time averaging: EXIT'; 
return -1,-1,-1,-1
-
-    # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, 
[200701,200702]), daily (yyyymmdd, [20070101,20070102])
-    # "%02d" is similar to i2.2 in Fortran
-    if unit=='annual':                      # YYYY
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year))
-    elif unit=='monthly':                   # YYYYMM
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month))
-    elif unit=='daily':                     # YYYYMMDD
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % 
dateList[i].month) + str("%02d" % dateList[i].day))
-    elif unit=='full':                      # Full list: a special case
-        comment='Calculating means data over the entire time range: i.e., 
annual-mean climatology'
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(999)             # i.e. we just want the same 
value for all times.
-    timeunits = np.array(timeunits,dtype=int)
-    print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits
-    #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits
-
-    # empty list to store time levels after temporal regridding
-    newTimesList = []
-
-    # Decide whether or not you need to do any time averaging.
-    #   i.e. if data are already on required time unit then just pass data 
through and calculate and return representative datetimes.
-    processing_required = True
-    if len(timeunits)==(len(np.unique(timeunits))):
-        processing_required = False
-    print 'processing_required= ',processing_required,': input time steps= 
',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits))
-    #print 'np.unique(timeunits): ',np.unique(timeunits)
-    print 'data.ndim= ',data.ndim
-
-    if data.ndim==1:                                           # 1D data 
arrays, i.e. 1D time series
-        # Create array to store the temporally regridded data
-        meanstore = np.zeros(len(np.unique(timeunits)))
-        # Calculate the means across each unique time unit
-        i=0
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                wh = timeunits==myunit
-                datam = data[wh]
-                meanstore[i] = ma.average(datam)
-        # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(myunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point 
of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i = i+1
-
-    elif data.ndim==3:                                         # 3D data 
arrays, i.e. 2D time series
-        # Create array to store the resulting data
-        meanstore = 
np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-        # Calculate the means across each unique time unit
-        i=0
-        datamask_store = []
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                wh = timeunits==myunit
-                datam = data[wh,:,:]
-                meanstore[i,:,:] = ma.average(datam,axis=0)
-                # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(smyunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point 
of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i += 1
-
-        if not processing_required:
-            meanstorem = data
-        elif processing_required:
-            meanstorem = meanstore
-
-    print '***  End calc_average_on_new_time_unit_KK  ***'
-    return meanstorem,newTimesList

[1/2] git commit: removing files.py and process.py from te code repo

Reply via email to