[05/10] climate git commit: CLIMATE-551 - Drop old RCMET toolkit from codebase

joyce Mon, 10 Nov 2014 08:06:13 -0800

http://git-wip-us.apache.org/repos/asf/climate/blob/8c142c35/rcmet/src/main/python/rcmes/storage/db.py
----------------------------------------------------------------------
diff --git a/rcmet/src/main/python/rcmes/storage/db.py 
b/rcmet/src/main/python/rcmes/storage/db.py
deleted file mode 100644
index 22a1dfd..0000000
--- a/rcmet/src/main/python/rcmes/storage/db.py
+++ /dev/null
@@ -1,359 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-"""Collection of functions used to interface with the database and to create 
netCDF file
-"""
-import os
-import urllib2
-import re
-import numpy as np
-import numpy.ma as ma
-import json
-import netCDF4
-
-from classes import RCMED
-from toolkit import process
-from datetime import timedelta ,datetime
-from calendar import monthrange
-
-def reorderXYT(lons, lats, times, values):
-    # Re-order values in values array such that when reshaped everywhere is 
where it should be
-    #  (as DB doesn't necessarily return everything in order)
-    order = np.lexsort((lons, lats, times))
-    counter = 0
-    sortedValues = np.zeros_like(values)
-    sortedLats = np.zeros_like(lats)
-    sortedLons = np.zeros_like(lons)
-    for i in order:
-        sortedValues[counter] = values[i]
-        sortedLats[counter] = lats[i]
-        sortedLons[counter] = lons[i]
-        counter += 1
-    
-    return sortedValues, sortedLats, sortedLons
-
-def findUnique(seq, idfun=None):
-    """
-     Function to find unique values (used in construction of unique datetime 
list)
-     NB. order preserving
-     Input: seq  - a list of randomly ordered values
-     Output: result - list of ordered values
-    """
-    if idfun is None:
-        def idfun(x): 
-            return x
-
-    seen = {};
-    result = []
-    
-    for item in seq:
-        marker = idfun(item)
-        # in old Python versions:
-        # if seen.has_key(marker)
-        # but in new ones:
-        if marker in seen: continue
-        seen[marker] = 1
-        result.append(item)
-    return result
-
-def get_param_info(url):
-
-    '''
-    This function will get the general information by given URL from the 
parameter table.
-    '''
-    url = url + "&info=yes"
-    result = urllib2.urlopen(url)
-    datastring = result.read()
-    datastring=json.loads(datastring)
-    database=datastring["database"]
-    timestep=datastring["timestep"]
-    realm=datastring["realm"]
-    instrument=datastring["instrument"]
-    start_date=datastring["start_date"]
-    end_date=datastring["end_date"]
-    unit=datastring["units"]
-    
-    return database, timestep, realm, instrument, start_date, end_date, unit
-
-def get_data(url):
-    
-    '''
-    This function will get the url, query from database and will return 
datapoints' latitude, longitude, level, time and value.
-    '''
-
-    result = urllib2.urlopen(url)
-    datastring = result.read()    
-    d = re.search('data: \r\n', datastring)
-    data = datastring[d.end():len(datastring)]
-    
-    # To create a list of all datapoints
-    data=data.split('\r\n')    
-            
-    latitudes = []
-    longitudes = []
-    levels = []
-    values = []
-    timestamps = []
-    
-    # To make a series of lists from datapoints
-    for i in range(len(data)-1):  # Because the last row is empty, 
"len(data)-1" is used.
-        row=data[i].split(',')
-        latitudes.append(np.float32(row[0]))
-        longitudes.append(np.float32(row[1]))
-        levels.append(np.float32(row[2]))
-        # timestamps are strings so we will leave them alone for now
-        timestamps.append(row[3])
-        values.append(np.float32(row[4]))
-        
-    return latitudes, longitudes, levels, values, timestamps
-    
-
-def create_netCDF(latitudes, longitudes, levels, values, timestamps, database, 
latMin, latMax, lonMin, lonMax, startTime, endTime, unit, netCD_fileName):
-    
-    '''
-    This function will generate netCDF files.
-    '''
-        
-    # To generate netCDF file from database
-    netcdf =  netCDF4.Dataset(netCD_fileName,mode='w')
-    string="The netCDF file for parameter: " + database + ", latMin: " + 
str(latMin) + ", latMax: " + str(latMax) + ", lonMin: " + str(lonMin) + ", 
lonMax: " + str(lonMax) + " startTime: " + str(startTime) + " and endTime: " + 
str(endTime) + "."
-    netcdf.globalAttName = str(string)
-    netcdf.createDimension('dim', len(latitudes))
-    latitude = netcdf.createVariable('lat', 'd', ('dim',))
-    longitude = netcdf.createVariable('lon', 'd', ('dim',))
-    level = netcdf.createVariable('lev', 'd', ('dim',))
-    time = netcdf.createVariable('time', 'd', ('dim',))
-    value = netcdf.createVariable('value', 'd', ('dim',))
-    
-    netcdf.variables['lat'].varAttName = 'latitude'
-    netcdf.variables['lat'].units = 'degrees_north'
-    netcdf.variables['lon'].varAttName = 'longitude'
-    netcdf.variables['lon'].units = 'degrees_east'
-    netcdf.variables['time'].varAttName = 'time'
-    netcdf.variables['time'].units = 'hours since ' + str(startTime)
-    netcdf.variables['value'].varAttName = 'value'
-    netcdf.variables['value'].units = str(unit)
-    netcdf.variables['lev'].varAttName = 'level'
-    netcdf.variables['lev'].units = 'hPa'
-
-    hours=[]
-    timeFormat = "%Y-%m-%d %H:%M:%S"
-    base_date=startTime
-    # To convert the date to hours 
-    for t in timestamps:
-        date=datetime.strptime(t, timeFormat)
-        diff=date-base_date
-        hours.append(diff.days*24)
-        
-    latitude[:]=latitudes[:]
-    longitude[:]=longitudes[:]
-    level[:]=levels[:]
-    time[:]=hours[:]
-    value[:]=values[:]
-    netcdf.close()
-        
-def read_netcdf(netCD_fileName):
-    
-    '''
-    This function will read the existed netCDF file, convert the hours from 
netCDF time variable
-    and return latitudes, longitudes, levels, times and values.
-    '''
-    # To use the created netCDF file
-    netcdf = netCDF4.Dataset(netCD_fileName , mode='r')
-    # To get all data from netCDF file
-    latitudes = netcdf.variables['lat'][:]
-    longitudes = netcdf.variables['lon'][:]
-    levels = netcdf.variables['lev'][:]
-    hours = netcdf.variables['time'][:]
-    values = ma.array(netcdf.variables['value'][:])
-    
-    # To get the base date
-    time_unit=netcdf.variables['time'].units.encode()
-    time_unit=time_unit.split(' ')
-    base_date=time_unit[2] + " " + time_unit[3]
-    
-    netcdf.close()
-    
-    timeFormat = "%Y-%m-%d %H:%M:%S"
-    
-    # Because time in netCDF file is based on hours since a specific date, it 
needs to be converted to date format
-    times=[]
-    # To convert the base date to the python datetime format
-    base_date = datetime.strptime(base_date, timeFormat)
-    for each in range(len(hours)): 
-        hour=timedelta(hours[each]/24)    
-        eachTime=base_date + hour
-        times.append(str(eachTime.year) + '-' + str("%02d" % (eachTime.month)) 
+ '-' + str("%02d" % (eachTime.day)) + ' ' + str("%02d" % (eachTime.hour)) + 
':' + str("%02d" % (eachTime.minute)) + ':' + str("%02d" % (eachTime.second)))
-
-    return latitudes, longitudes, levels, times, values
-
-
-def improve_data(latitudes, longitudes, levels, times, values, timestep):
-    
-    # Make arrays of unique latitudes, longitudes, levels and times
-    uniqueLatitudes = np.unique(latitudes)
-    uniqueLongitudes = np.unique(longitudes)
-    uniqueLevels = np.unique(levels)
-    uniqueTimestamps = np.unique(times)
-    
-    # Calculate nx and ny
-    uniqueLongitudeCount = len(uniqueLongitudes)
-    uniqueLatitudeCount = len(uniqueLatitudes)
-    uniqueLevelCount = len(uniqueLevels)
-    uniqueTimeCount = len(uniqueTimestamps)
-
-    values, latitudes, longitudes = reorderXYT(longitudes, latitudes, times, 
values)
-
-    # Convert each unique time from strings into list of Python datetime 
objects
-    # TODO - LIST COMPS!
-    timeFormat = "%Y-%m-%d %H:%M:%S"
-    timesUnique = [datetime.strptime(t, timeFormat) for t in uniqueTimestamps]
-    timesUnique.sort()
-    timesUnique = process.normalizeDatetimes(timesUnique, timestep)
-
-    # Reshape arrays
-    latitudes = latitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, 
uniqueLongitudeCount, uniqueLevelCount)
-    longitudes = longitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, 
uniqueLongitudeCount, uniqueLevelCount)
-    levels = np.array(levels).reshape(uniqueTimeCount, uniqueLatitudeCount, 
uniqueLongitudeCount, uniqueLevelCount)
-    values = values.reshape(uniqueTimeCount, uniqueLatitudeCount, 
uniqueLongitudeCount, uniqueLevelCount)
-
-    # Flatten dimension if only single level
-    if uniqueLevelCount == 1:
-        values = values[:, :, :, 0]
-        latitudes = latitudes[0, :, :, 0]
-        longitudes = longitudes[0, :, :, 0]
-
-    # Created masked array to deal with missing values
-    #  -these make functions like values.mean(), values.max() etc ignore 
missing values
-    mdi = -9999  # TODO: extract this value from the DB retrieval metadata
-    mdata = ma.masked_array(values, mask=(values == mdi))
-
-
-    return latitudes, longitudes, uniqueLevels, timesUnique, mdata
-    
-    
-def extractData ( datasetID, paramID, latMin, latMax, lonMin, lonMax, 
userStartTime, userEndTime, cachedir, timestep ):
-    
-    """
-    Main function to extract data from DB into numpy masked arrays, and also 
to create monthly netCDF file as cache
-    
-    Input::
-        datasetID, paramID: required identifiers of data in database
-        latMin, latMax, lonMin, lonMax: location range to extract data for
-        startTime, endTime: python datetime objects describing required time 
range to extract
-        cachedir: directory path used to store temporary cache files
-        timestep: "daily" | "monthly" so we can be sure to query the RCMED 
properly
-    Output:
-        uniqueLatitudes,uniqueLongitudes: 1d-numpy array of latitude and 
longitude grid values
-        uniqueLevels:  1d-numpy array of vertical level values
-        timesUnique: list of python datetime objects describing times of 
returned data
-        mdata: masked numpy arrays of data values
-    """
-
-    url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, lonMax, 
userStartTime, userEndTime, cachedir, timestep) 
-    
-    # To get the parameter's information from parameter table
-    database, timestep, realm, instrument, dbStartDate, dbEndDate, unit = 
get_param_info(url)
-        
-    # Create a directory inside the cache directory
-    name = []
-    # activity is a fix value
-    activity = "obs4cmip5"
-    name.append(activity)
-    # product is a fix value
-    product = "observations"
-    name.append(product)
-    # realm, variable,frequency and instrument will be get from parameter table
-    realm = realm
-    name.append(realm)
-    variable = database
-    name.append(variable)
-    frequency = timestep
-    name.append(frequency)
-    data_structure = "grid"
-    name.append(data_structure)
-    institution = "NASA"
-    name.append(institution)
-    project = "RCMES"
-    name.append(project)
-    instrument = instrument
-    name.append(instrument)
-    version = "v1"
-    name.append(version)
-    
-    # Check to see whether the folder is already created for netCDF or not, 
then it will be created
-    temp_path = cachedir
-    for n in name:
-        path = os.path.join(temp_path, n)
-        if os.path.exists(path):
-            temp_path = path
-            pass
-        else:
-            os.mkdir(path)
-            temp_path = path
-
-    processing_level = 'L3'
-    processing_version = "processing_version"  # the processing version is 
still unknown and can be added later
-    
-    timeFormat = "%Y-%m-%d %H:%M:%S"
-   
-    date_list, lats, longs, uniqueLevls, uniqueTimes, vals = [], [], [], [], 
[], []
-
-    # To make a list (date_list) of all months available based on user time 
request
-    while userStartTime <= userEndTime:
-        #To get the beginning of month
-        beginningOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" 
% userStartTime.month) + "-" + "01 00:00:00"
-        #To get the end of month
-        endOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" % 
userStartTime.month) + "-" + 
str(monthrange(userStartTime.year,userStartTime.month)[1]) + " 00:00:00"
-        #To convert both beginning and end of month from string to Python 
datetime format
-        beginningOfMonth = datetime.strptime(beginningOfMonth, timeFormat)
-        endOfMonth = datetime.strptime(endOfMonth, timeFormat)
-        #To add beginning and end of month as a list to the date_list list
-        date_list.append([beginningOfMonth, endOfMonth])
-        #To get the beginning of next month
-        userStartTime= endOfMonth + timedelta(days=1)
-
-    
-    # To loop over all months and return data
-    for i, date in enumerate(date_list):
-        netCDF_name = variable + '_' + project + '_' + processing_level + '_' 
+ processing_version + '_' + str(latMin) + '_' + str(latMax) + '_' + 
str(lonMin) + '_' + str(lonMax) + '_' + str("%04d" % date[0].year) + str("%02d" 
% date[0].month) + '.nc'
-        
-        # To check if netCDF file  exists, then use it
-        if os.path.exists(path+"/"+ netCDF_name):
-            latitudes, longitudes, levels, times, values = read_netcdf(path + 
"/" + netCDF_name)  
-        
-        # If the netCDF file does not exist, then create one and read it.
-        else:            
-            # To just query for one year of data
-            print "%s of %s Database Download(s) Complete" % (i, 
len(date_list))  
-            url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, 
lonMax, date[0], date[1], cachedir, timestep)
-            
-            # To get data from DB
-            latitudes, longitudes, levels, values, timestamps = get_data(url)
-            create_netCDF(latitudes, longitudes, levels, values, timestamps, 
database, latMin, latMax, lonMin, lonMax, date[0], date[1], unit, path + "/" + 
netCDF_name)
-
-            # To read from netCDF files
-            latitudes, longitudes, levels, times, values = read_netcdf(path + 
"/" + netCDF_name)            
-
-        lats=np.append(lats,latitudes)
-        longs=np.append(longs,longitudes)
-        uniqueLevls=np.append(uniqueLevls,levels)
-        uniqueTimes=np.append(uniqueTimes,times)
-        vals=np.append(vals,values)
-        
-    latitudes, longitudes, uniqueLevels, timesUnique, mdata = 
improve_data(lats, longs, uniqueLevls, uniqueTimes, vals, timestep)
-        
-    return latitudes, longitudes, uniqueLevels, timesUnique, mdata


http://git-wip-us.apache.org/repos/asf/climate/blob/8c142c35/rcmet/src/main/python/rcmes/storage/files.py
----------------------------------------------------------------------
diff --git a/rcmet/src/main/python/rcmes/storage/files.py 
b/rcmet/src/main/python/rcmes/storage/files.py
deleted file mode 100644
index b238754..0000000
--- a/rcmet/src/main/python/rcmes/storage/files.py
+++ /dev/null
@@ -1,783 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-"""
-Module for handling data input files.  Requires netCDF and Numpy be 
-installed.
-
-This module can easily open NetCDF, HDF and Grib files.  Search the netCDF4
-documentation for a complete list of supported formats.
-"""
-
-from os import path
-import netCDF4
-import numpy as np
-import numpy.ma as ma
-import sys
-
-from toolkit import process
-from utils import fortranfile
-from utils import misc
-
-
-VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'],
-                  'latitude': ['latitude', 'lat', 'lats', 'latitudes'],
-                  'longitude': ['longitude', 'lon', 'lons', 'longitudes']
-                  }
-
-
-def findunique(seq):
-    keys = {}
-    for e in seq:
-        keys[e] = 1
-    return keys.keys()
-
-def getVariableByType(filename, variableType):
-    """
-    Function that will try to return the variable from a file based on a 
provided
-    parameter type.
-    
-    Input::
-        filename - the file to inspect
-        variableType - time | latitude | longitude
-    
-    Output::
-        variable name OR list of all variables in the file if a single variable
-        name match cannot be found.
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print "netCDF4Error:", sys.exc_info()[0]
-        raise
-    
-    variableKeys = f.variables.keys()
-    f.close()
-    variableKeys = [variable.encode().lower() for variable in variableKeys]
-    variableMatch = VARIABLE_NAMES[variableType]
-
-    commonVariables = list(set(variableKeys).intersection(variableMatch)) 
-
-    if len(commonVariables) == 1:
-        return str(commonVariables[0])
-    
-    else:
-        return variableKeys
-
-def getVariableRange(filename, variableName):
-    """
-    Function to return the min and max values of the given variable in
-    the supplied filename.
-   
-    Input::
-        filename - absolute path to a file
-        variableName - variable whose min and max values should be returned
-
-    Output::
-        variableRange - tuple of order (variableMin, variableMax)
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print "netCDF4Error:", sys.exc_info()[0]
-        raise
-    
-    varArray = f.variables[variableName][:]
-    return (varArray.min(), varArray.max())
-
-
-def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, 
lonVarName):
-    '''
-    Read in data from a list of model files into a single data structure
-   
-    Input:
-       filelist - list of filenames (including path)
-       myvar    - string containing name of variable to load in (as it appears 
in file)
-    Output:
-       lat, lon - 2D array of latitude and longitude values
-       timestore    - list of times
-       t2store  - numpy array containing data from all files    
-   
-     NB. originally written specific for WRF netCDF output files
-         modified to make more general (Feb 2011)
-   
-      Peter Lean July 2010 
-    '''
-
-    filelist.sort()
-    filename = filelist[0]
-    # Crash nicely if 'filelist' is zero length
-    """TODO:  Throw Error instead via try Except"""
-    if len(filelist) == 0:
-        print 'Error: no files have been passed to read_data_from_file_list()'
-        sys.exit()
-
-    # Open the first file in the list to:
-    #    i) read in lats, lons
-    #    ii) find out how many timesteps in the file 
-    #        (assume same ntimes in each file in list)
-    #     -allows you to create an empty array to store variable data for all 
times
-    tmp = netCDF4.Dataset(filename, mode='r')
-    latsraw = tmp.variables[latVarName][:]
-    lonsraw = tmp.variables[lonVarName][:]
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-    if(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-
-    timesraw = tmp.variables[timeVarName]
-    ntimes = len(timesraw)
-    
-    print 'Lats and lons read in for first file in filelist'
-
-    # Create a single empty masked array to store model data from all files
-    t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, 
:])))
-    timestore = ma.zeros((ntimes * len(filelist))) 
-    
-    # Now load in the data for real
-    #  NB. no need to reload in the latitudes and longitudes -assume invariant
-    i = 0
-    timesaccu = 0 # a counter for number of times stored so far in t2store 
-    #  NB. this method allows for missing times in data files 
-    #      as no assumption made that same number of times in each file...
-
-
-    for i, ifile in enumerate(filelist):
-
-        #print 'Loading data from file: ',filelist[i]
-        f = netCDF4.Dataset(ifile, mode='r')
-        t2raw = ma.array(f.variables[myvar][:])
-        timesraw = f.variables[timeVarName]
-        time = timesraw[:]
-        ntimes = len(time)
-        print 'file= ', i, 'ntimes= ', ntimes, filelist[i]
-        print 't2raw shape: ', t2raw.shape
-        
-        # Flatten dimensions which needn't exist, i.e. level 
-        #   e.g. if for single level then often data have 4 dimensions, when 3 
dimensions will do.
-        #  Code requires data to have dimensions, (time,lat,lon)
-        #    i.e. remove level dimensions
-        # Remove 1d axis from the t2raw array
-        # Example: t2raw.shape == (365, 180, 360 1) <maps to (time, lat, lon, 
height)>
-        # After the squeeze you will be left with (365, 180, 360) instead
-        t2tmp = t2raw.squeeze()
-        # Nb. if this happens to be data for a single time only, then we just 
flattened it by accident
-        #     lets put it back... 
-        if t2tmp.ndim == 2:
-            t2tmp = np.expand_dims(t2tmp, 0)
-
-        t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :]
-        timestore[timesaccu + np.arange(ntimes)] = time
-        timesaccu += ntimes
-        f.close()
-        
-    print 'Data read in successfully with dimensions: ', t2store.shape
-    
-    # TODO: search for duplicated entries (same time) and remove duplicates.
-    # Check to see if number of unique times == number of times, if so then no 
problem
-
-    if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())):
-        print 'WARNING: Possible duplicated times'
-
-    # Decode model times into python datetime objects. Note: timestore becomes 
a list (no more an array) here
-    timestore, _ = process.getModelTimes(filename, timeVarName)
-    
-    # Make sure latlon grid is monotonically increasing and that the domains
-    # are correct
-    lat, lon, t2store = checkLatLon(lat, lon, t2store)
-    data_dict = {'lats': lat, 'lons': lon, 'times': timestore, 'data': t2store}
-    return data_dict
-
-def select_var_from_file(myfile, fmt='not set'):
-    '''
-     Routine to act as user interface to allow users to select variable of 
interest from a file.
-     
-      Input:
-         myfile - filename
-         fmt - (optional) specify fileformat for netCDF4 if filename suffix is 
non-standard
-    
-      Output:
-         myvar - variable name in file
-    
-        Peter Lean  September 2010
-    '''
-
-    print fmt
-    
-    if fmt == 'not set':
-        f = netCDF4.Dataset(myfile, mode='r')
-    
-    if fmt != 'not set':
-        f = netCDF4.Dataset(myfile, mode='r', format=fmt)
-    
-    keylist = [key.encode().lower() for key in f.variables.keys()]
-    
-    i = 0
-    for v in keylist:
-        print '[', i, '] ', f.variables[v].long_name, ' (', v, ')'
-        i += 1
-
-    user_selection = raw_input('Please select variable : [0 -' + str(i - 1) + 
']  ')
-    
-    myvar = keylist[int(user_selection)]
-    
-    return myvar
-
-def select_var_from_wrf_file(myfile):
-    '''
-     Routine to act as user interface to allow users to select variable of 
interest from a wrf netCDF file.
-     
-      Input:
-         myfile - filename
-    
-      Output:
-         mywrfvar - variable name in wrf file
-    
-        Peter Lean  September 2010
-    '''
-
-    f = netCDF4.Dataset(myfile, mode='r', format='NETCDF4')
-    keylist = [key.encode().lower() for key in f.variables.keys()]
-
-    i = 0
-    for v in keylist:
-        try:
-            print '[', i, '] ', f.variables[v].description, ' (', v, ')'
-        except:
-            print ''
-
-        i += 1
-    
-    user_selection = raw_input('Please select WRF variable : [0 -' + str(i - 
1) + ']  ')
-    
-    mywrfvar = keylist[int(user_selection)]
-    
-    return mywrfvar
-
-def read_data_from_one_file(ifile, myvar, latVarName, lonVarName, timeVarName, 
file_type):
-    """
-    Purpose::
-        Read in data from one file at a time
-    
-    Input::   
-        filelist - list of filenames (including path)
-        myvar - string containing name of variable to load in (as it appears 
in file)s
-        lonVarName - name of the Longitude Variable
-        timeVarName - name of the Time Variable
-        fileType - type of file we are trying to parse
-        
-     Output::  
-        lat, lon - 2D arrays of latitude and longitude values
-        times - list of times
-        t2store - numpy array containing data from the file for the requested 
variable
-        varUnit - units for the variable given by t2store  
-    """           
-    f = netCDF4.Dataset(ifile, mode='r')
-    try:
-        varUnit = f.variables[myvar].units.encode().upper()
-    except:
-        varUnit = raw_input('Enter the model variable unit: \n> ').upper()
-    t2raw = ma.array(f.variables[myvar][:])
-    t2tmp = t2raw.squeeze()
-    if t2tmp.ndim == 2:
-        t2tmp = np.expand_dims(t2tmp, 0)
-        
-    lonsraw = f.variables[lonVarName][:]
-    latsraw = f.variables[latVarName][:]
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-    if(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-    
-    f.close()
-    print '  success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= 
', t2tmp.shape, ' Unit= ', varUnit
-    timestore = process.decode_model_timesK(ifile, timeVarName, file_type)
-    
-    # Make sure latlon grid is monotonically increasing and that the domains
-    # are correct
-    lat, lon, t2store = checkLatLon(lat, lon, t2tmp)
-    return lat, lon, timestore, t2store, varUnit
-
-def findTimeVariable(filename):
-    """
-     Function to find what the time variable is called in a model file.
-        Input::
-            filename - file to crack open and check for a time variable
-        Output::
-            timeName - name of the input file's time variable
-            variableNameList - list of variable names from the input filename
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print("Unable to open '%s' to try and read the Time variable" % 
filename)
-        raise
-
-    variableNameList = [variable.encode() for variable in f.variables.keys()]
-    # convert all variable names into lower case
-    varNameListLowerCase = [x.lower() for x in variableNameList]
-
-    # Use "set" types for finding common variable name from in the file and 
from the list of possibilities
-    possibleTimeNames = set(['time', 'times', 'date', 'dates', 'julian'])
-    
-    # Use the sets to find the intersection where variable names are in 
possibleNames
-    timeNameSet = possibleTimeNames.intersection(varNameListLowerCase)
-    
-    if len(timeNameSet) == 0:
-        print("Unable to autodetect the Time Variable Name in the '%s'" % 
filename)
-        timeName = misc.askUserForVariableName(variableNameList, targetName 
="Time")
-    
-    else:
-        timeName = timeNameSet.pop()
-    
-    return timeName, variableNameList
-
-
-def findLatLonVarFromFile(filename):
-    """
-    Function to find what the latitude and longitude variables are called in a 
model file.
-    
-    Input:: 
-        -filename 
-    Output::
-        -latVarName
-        -lonVarName
-        -latMin 
-        -latMax
-        -lonMin
-        -lonMax
-    """
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-    except:
-        print("Unable to open '%s' to try and read the Latitude and Longitude 
variables" % filename)
-        raise
-
-    variableNameList = [variable.encode() for variable in f.variables.keys()]
-    # convert all variable names into lower case
-    varNameListLowerCase = [x.lower() for x in variableNameList]
-
-    # Use "set" types for finding common variable name from in the file and 
from the list of possibilities
-    possibleLatNames = set(['latitude', 'lat', 'lats', 'latitudes'])
-    possibleLonNames = set(['longitude', 'lon', 'lons', 'longitudes'])
-    
-    # Use the sets to find the intersection where variable names are in 
possibleNames
-    latNameSet = possibleLatNames.intersection(varNameListLowerCase)
-    lonNameSet = possibleLonNames.intersection(varNameListLowerCase)
-    
-    if len(latNameSet) == 0 or len(lonNameSet) == 0:
-        print("Unable to autodetect Latitude and/or Longitude values in the 
file")
-        latName = misc.askUserForVariableName(variableNameList, targetName 
="Latitude")
-        lonName = misc.askUserForVariableName(variableNameList, targetName 
="Longitude")
-    
-    else:
-        latName = latNameSet.pop()
-        lonName = lonNameSet.pop()
-    
-    lats = np.array(f.variables[latName][:])
-    lons = np.array(f.variables[lonName][:])
-    
-    latMin = lats.min()
-    latMax = lats.max()
-    
-    # Convert the lons from 0:360 into -180:180
-    lons[lons > 180] = lons[lons > 180] - 360.
-    lonMin = lons.min()
-    lonMax = lons.max()
-
-    return latName, lonName, latMin, latMax, lonMin, lonMax
-
-
-def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, 
lonVarName, file_type):
-    
##################################################################################
-    # Read in data from a list of model files into a single data structure
-    # Input:   filelist - list of filenames (including path)
-    #          myvar    - string containing name of variable to load in (as it 
appears in file)
-    # Output:  lat, lon - 2D array of latitude and longitude values
-    #          times    - list of times
-    #          t2store  - numpy array containing data from all files    
-    # Modified from read_data_from_file_list to read data from multiple models 
into a 4-D array
-    # 1. The code now processes model data that completely covers the 20-yr 
period. Thus,
-    #    all model data must have the same time levels (ntimes). Unlike in the 
oroginal, ntimes
-    #    is fixed here.
-    # 2. Because one of the model data exceeds 240 mos (243 mos), the model 
data must be
-    #    truncated to the 240 mons using the ntimes determined from the first 
file.
-    
##################################################################################
-    filelist.sort()
-    nfiles = len(filelist)
-    # Crash nicely if 'filelist' is zero length
-    if nfiles == 0:
-        print 'Error: no files have been passed to read_data_from_file_list(): 
Exit'
-        sys.exit()
-
-    # Open the first file in the list to:
-    #    i)  read in lats, lons
-    #    ii) find out how many timesteps in the file (assume same ntimes in 
each file in list)
-    #     -allows you to create an empty array to store variable data for all 
times
-    tmp = netCDF4.Dataset(filelist[0], mode='r', format=file_type)
-    latsraw = tmp.variables[latVarName][:]
-    lonsraw = tmp.variables[lonVarName][:]
-    timesraw = tmp.variables[timeVarName]
-    
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-        
-    elif(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-    ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :])
-    
-    print 'Lats and lons read in for first file in filelist'
-
-    # Create a single empty masked array to store model data from all files
-    #t2store = ma.zeros((ntimes*nfiles,nygrd,nxgrd))
-    t2store = ma.zeros((nfiles, ntimes, nygrd, nxgrd))
-    #timestore=ma.zeros((ntimes*nfiles)) 
-    
-    ## Now load in the data for real
-    ##  NB. no need to reload in the latitudes and longitudes -assume invariant
-    #timesaccu=0 # a counter for number of times stored so far in t2store 
-    #  NB. this method allows for missing times in data files 
-    #      as no assumption made that same number of times in each file...
-
-    for i, ifile in enumerate(filelist):
-        #print 'Loading data from file: ',filelist[i]
-        f = netCDF4.Dataset(ifile, mode='r')
-        t2raw = ma.array(f.variables[myvar][:])
-        timesraw = f.variables[timeVarName]
-        #ntimes=len(time)
-        #print 'file= ',i,'ntimes= ',ntimes,filelist[i]
-        ## Flatten dimensions which needn't exist, i.e. level 
-        ##   e.g. if for single level then often data have 4 dimensions, when 
3 dimensions will do.
-        ##  Code requires data to have dimensions, (time,lat,lon)
-        ##    i.e. remove level dimensions
-        t2tmp = t2raw.squeeze()
-        ## Nb. if data happen to be for a single time, we flattened it by 
accident; lets put it back... 
-        if t2tmp.ndim == 2:
-            t2tmp = np.expand_dims(t2tmp, 0)
-        #t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:]
-        t2store[i, 0:ntimes, :, :] = t2tmp[0:ntimes, :, :]
-        #timestore[timesaccu+np.arange(ntimes)]=time
-        #timesaccu=timesaccu+ntimes
-        f.close()
-
-    print 'Data read in successfully with dimensions: ', t2store.shape
-    
-    # Decode model times into python datetime objects. Note: timestore becomes 
a list (no more an array) here
-    ifile = filelist[0]
-    timestore, _ = process.getModelTimes(ifile, timeVarName)
-
-    # Make sure latlon grid is monotonically increasing and that the domains
-    # are correct
-    lat, lon, t2store = checkLatLon(lat, lon, t2store)
-    return lat, lon, timestore, t2store
-
-def find_latlon_ranges(filelist, lat_var_name, lon_var_name):
-    # Function to return the latitude and longitude ranges of the data in a 
file,
-    # given the identifying variable names.
-    #
-    #    Input:
-    #            filelist - list of filenames (data is read in from first file 
only)
-    #            lat_var_name - variable name of the 'latitude' variable
-    #            lon_var_name - variable name of the 'longitude' variable
-    #
-    #    Output:
-    #            latMin, latMax, lonMin, lonMax - self explanatory
-    #
-    #                    Peter Lean      March 2011
-    
-    filename = filelist[0]
-    
-    try:
-        f = netCDF4.Dataset(filename, mode='r')
-        
-        lats = f.variables[lat_var_name][:]
-        latMin = lats.min()
-        latMax = lats.max()
-        
-        lons = f.variables[lon_var_name][:]
-        lons[lons > 180] = lons[lons > 180] - 360.
-        lonMin = lons.min()
-        lonMax = lons.max()
-        
-        return latMin, latMax, lonMin, lonMax
-
-    except:
-        print 'Error: there was a problem with finding the latitude and 
longitude ranges in the file'
-        print '       Please check that you specified the filename, and 
variable names correctly.'
-        
-        sys.exit()
-
-def writeBN_lola(fileName, lons, lats):
-    # write a binary data file that include longitude (1-d) and latitude (1-d) 
values
-    
-    F = fortranfile.FortranFile(fileName, mode='w')
-    ngrdY = lons.shape[0]; ngrdX = lons.shape[1]
-    tmpDat = ma.zeros(ngrdX); tmpDat[:] = lons[0, :]; F.writeReals(tmpDat)
-    tmpDat = ma.zeros(ngrdY); tmpDat[:] = lats[:, 0]; F.writeReals(tmpDat)
-    # release temporary arrays
-    tmpDat = 0
-    F.close()
-
-def writeBNdata(fileName, numOBSs, numMDLs, nT, ngrdX, ngrdY, numSubRgn, 
obsData, mdlData, obsRgnAvg, mdlRgnAvg):
-    
#(fileName,maskOption,numOBSs,numMDLs,nT,ngrdX,ngrdY,numSubRgn,obsData,mdlData,obsRgnAvg,mdlRgnAvg):
-    # write spatially- and regionally regridded data into a binary data file
-    missing = -1.e26
-    F = fortranfile.FortranFile(fileName, mode='w')
-    # construct a data array to replace mask flag with a missing value 
(missing=-1.e12) for printing
-    data = ma.zeros((nT, ngrdY, ngrdX))
-    for m in np.arange(numOBSs):
-        data[:, :, :] = obsData[m, :, :, :]; msk = data.mask
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                for i in np.arange(ngrdX):
-                    if msk[n, j, i]: data[n, j, i] = missing
-
-        # write observed data. allowed to write only one row at a time
-        tmpDat = ma.zeros(ngrdX)
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                tmpDat[:] = data[n, j, :]
-                F.writeReals(tmpDat)
-
-    # write model data (dep. on the number of models).
-    for m in np.arange(numMDLs):
-        data[:, :, :] = mdlData[m, :, :, :]
-        msk = data.mask
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                for i in np.arange(ngrdX):
-                    if msk[n, j, i]:
-                        data[n, j, i] = missing
-
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                tmpDat[:] = data[n, j, :]
-                F.writeReals(tmpDat)
-
-    data = 0     # release the array allocated for data
-    # write data in subregions
-    if numSubRgn > 0:
-        print 'Also included are the time series of the means over ', 
numSubRgn, ' areas from obs and model data'
-        tmpDat = ma.zeros(nT); print numSubRgn
-        for m in np.arange(numOBSs):
-            for n in np.arange(numSubRgn):
-                tmpDat[:] = obsRgnAvg[m, n, :]
-                F.writeReals(tmpDat)
-        for m in np.arange(numMDLs):
-            for n in np.arange(numSubRgn):
-                tmpDat[:] = mdlRgnAvg[m, n, :]
-                F.writeReals(tmpDat)
-    tmpDat = 0     # release the array allocated for tmpDat
-    F.close()
-
-def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, 
mdlRgnAvg, obsList, mdlList, subRegions):
-    # write an output file of variables up to 3 dimensions
-    # fileName: the name of the output data file
-    # numSubRgn  : the number of subregions
-    # lons[ngrdX]: longitude
-    # lats[ngrdY]: latitudes
-    # obsData[nT,ngrdY,ngrdX]: the obs time series of the entire model domain
-    # mdlData[numMDLs,nT,ngrdY,ngrdX]: the mdltime series of the entire model 
domain
-    # obsRgnAvg[numSubRgn,nT]: the obs time series for the all subregions
-    # mdlRgnAvg[numMDLs,numSubRgn,nT]: the mdl time series for the all 
subregions
-    dimO = obsData.shape[0]      # the number of obs data
-    dimM = mdlData.shape[0]      # the number of mdl data
-    dimT = mdlData.shape[1]      # the number of time levels
-    dimY = mdlData.shape[2]      # y-dimension
-    dimX = mdlData.shape[3]      # x-dimension
-    dimR = obsRgnAvg.shape[1]    # the number of subregions
-    f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
-    print mdlRgnAvg.shape, dimM, dimR, dimT
-    #create global attributes
-    f.description = ''
-    # create dimensions
-    print 'Creating Dimensions within the NetCDF Object...'
-    f.createDimension('unity', 1)
-    f.createDimension('time', dimT)
-    f.createDimension('west_east', dimX)
-    f.createDimension('south_north', dimY)
-    f.createDimension('obs', dimO)
-    f.createDimension('models', dimM)
-        
-    # create the variable (real*4) to be written in the file
-    print 'Creating Variables...'
-    f.createVariable('lon', 'd', ('south_north', 'west_east'))
-    f.createVariable('lat', 'd', ('south_north', 'west_east'))
-    f.createVariable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east'))
-    f.createVariable('mDat', 'd', ('models', 'time', 'south_north', 
'west_east'))
-    
-    if subRegions:
-        f.createDimension('regions', dimR)
-        f.createVariable('oRgn', 'd', ('obs', 'regions', 'time'))
-        f.createVariable('mRgn', 'd', ('models', 'regions', 'time'))
-        f.variables['oRgn'].varAttName = 'Observation time series: Subregions'
-        f.variables['mRgn'].varAttName = 'Model time series: Subregions'
-
-    loadDataIntoNetCDF(f, obsList, obsData, 'Observation')
-    print 'Loaded the Observations into the NetCDF'
-
-    loadDataIntoNetCDF(f, mdlList, mdlData, 'Model')
-
-    # create attributes and units for the variable
-    print 'Creating Attributes and Units...'
-    f.variables['lon'].varAttName = 'Longitudes'
-    f.variables['lon'].varUnit = 'degrees East'
-    f.variables['lat'].varAttName = 'Latitudes'
-    f.variables['lat'].varUnit = 'degrees North'
-    f.variables['oDat'].varAttName = 'Observation time series: entire domain'
-    f.variables['mDat'].varAttName = 'Model time series: entire domain'
-
-    # assign the values to the variable and write it
-    f.variables['lon'][:] = lons[:]
-    f.variables['lat'][:] = lats[:]
-    if subRegions:
-        f.variables['oRgn'][:] = obsRgnAvg[:]
-        f.variables['mRgn'][:] = mdlRgnAvg[:]
-
-    f.close()
-
-def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType):
-    """
-    Input::
-        fileObject - netCDF4 file object data will be loaded into
-        datasets - List of dataset names
-        dataArray - Multi-dimensional array of data to be loaded into the 
NetCDF file
-        dataType - String with value of either 'Model' or 'Observation'
-    Output::
-        No return value.  netCDF4 file object is updated in place
-    """
-    datasetCount = 0
-    for datasetCount, dataset in enumerate(datasets):
-        if dataType.lower() == 'observation':
-            datasetName = dataset.replace(' ','')
-        elif dataType.lower() == 'model':
-            datasetName = path.splitext(path.basename(dataset))[0]
-        print "Creating variable %s" % datasetName
-        fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 
'west_east'))
-        fileObject.variables[datasetName].varAttName = 'Obseration time 
series: entire domain'
-        print 'Loading values into %s' % datasetName
-        fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:]
-
-def checkLatLon(latsin, lonsin, datain):
-    """
-    Purpose::
-        Checks whether latitudes and longitudes are monotonically increasing
-        within the domains [-90, 90) and [-180, 180) respectively, and 
rearranges the input data
-        accordingly if they are not.
-    
-    Input::
-        latsin - Array of latitudes read from a raw netcdf file
-        lonsin - Array of longitudes read from a raw netcdf file
-        datain  - Array of data values read from a raw netcdf file.
-                   The shape is assumed to be (..., nLat, nLon).
-        
-    Output::
-        latsout - 2D array of (rearranged) latitudes
-        lonsout - 2D array of (rearranged) longitudes
-        dataout - Array of (rearranged) data
-    """
-    # Avoid unnecessary shifting if all lons are higher than 180
-    if lonsin.min() > 180:
-        lonsin -= 360
-        
-    # Make sure lats and lons are monotonically increasing
-    latsDecreasing = np.diff(latsin[:, 0]) < 0
-    lonsDecreasing = np.diff(lonsin[0]) < 0
-    
-    # If all values are decreasing then they just need to be reversed
-    latsReversed, lonsReversed = latsDecreasing.all(), lonsDecreasing.all()
-    
-    # If the lat values are unsorted then raise an exception
-    if not latsReversed and latsDecreasing.any():
-        raise ValueError('Latitudes must be monotonically increasing.')
-    
-    # Perform same checks now for lons
-    if not lonsReversed and lonsDecreasing.any():
-        raise ValueError('Longitudes must be monotonically increasing.')
-    
-    # Also check if lons go from [0, 360), and convert to [-180, 180)
-    # if necessary
-    lonsShifted = lonsin.max() > 180
-    latsout, lonsout, dataout = latsin[:], lonsin[:], datain[:]
-    # Now correct data if latlon grid needs to be shifted    
-    if latsReversed:
-        latsout = latsout[::-1]
-        dataout = dataout[..., ::-1, :]
-        
-    if lonsReversed:
-        lonsout = lonsout[..., ::-1]
-        dataout = dataout[..., ::-1]
-        
-    if lonsShifted:
-        lat1d = latsout[:, 0]
-        dataout, lon1d = shiftgrid(180, dataout, lonsout[0], start=False)
-        lonsout, latsout = np.meshgrid(lon1d, lat1d) 
-        
-    return latsout, lonsout, dataout
-    
-def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0):
-    """
-    Purpose::
-        Shift global lat/lon grid east or west. This function is taken directly
-        from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does 
not
-        currently support arrays with more than two dimensions.
-        https://github.com/matplotlib/basemap
-        
-    Input::
-        lon0 - starting longitude for shifted grid (ending longitude if 
start=False). 
-               lon0 must be on input grid (within the range of lonsin).
-        datain - original data with longitude the right-most dimension.
-        lonsin - original longitudes.
-        start  - if True, lon0 represents the starting longitude of the new 
grid. 
-                 if False, lon0 is the ending longitude. Default True.
-        cyclic - width of periodic domain (default 360)
-
-    Output:: 
-        dataout - data on shifted grid
-        lonsout - lons on shifted grid
-    """
-    if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
-        # Use all data instead of raise ValueError, 'cyclic point not included'
-        start_idx = 0
-    else:
-        # If cyclic, remove the duplicate point
-        start_idx = 1
-    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
-        raise ValueError('lon0 outside of range of lonsin')
-    i0 = np.argmin(np.fabs(lonsin-lon0))
-    i0_shift = len(lonsin)-i0
-    if ma.isMA(datain):
-        dataout  = ma.zeros(datain.shape,datain.dtype)
-    else:
-        dataout  = np.zeros(datain.shape,datain.dtype)
-    if ma.isMA(lonsin):
-        lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
-    else:
-        lonsout = np.zeros(lonsin.shape,lonsin.dtype)
-    if start:
-        lonsout[0:i0_shift] = lonsin[i0:]
-    else:
-        lonsout[0:i0_shift] = lonsin[i0:]-cyclic
-    dataout[...,0:i0_shift] = datain[...,i0:]
-    if start:
-        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
-    else:
-        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
-    dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx]
-    return dataout,lonsout
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/climate/blob/8c142c35/rcmet/src/main/python/rcmes/storage/rcmed.py
----------------------------------------------------------------------
diff --git a/rcmet/src/main/python/rcmes/storage/rcmed.py 
b/rcmet/src/main/python/rcmes/storage/rcmed.py
deleted file mode 100644
index c99334c..0000000
--- a/rcmet/src/main/python/rcmes/storage/rcmed.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-'''This is a collection of functions that provide the single interface to the
-rcmed.  Initial design will includes several functions to interact with the 
-available parameters within rcmed and their metadata.
-
-Future work includes rolling the rcmed querying code into this module as well.
-'''
-
-import requests, json
-
-paramUri = 'http://rcmes.jpl.nasa.gov/bottle/rcmed/param.json'
-
-def getParams(uri=paramUri):
-    '''This will return all of the parameters from the database as
-    a list of dictionaries.
-    
-    If the database is not available, then the method will return None'''
-    # Use a get request to call the Web Service
-    try:
-        httpRequest = requests.get(uri)
-    except:
-        print "HTTPRequest failed.  Bottle WebServer is offline"
-        raise
-    # TODO Check the request status code if it is 400 or 500 range then 
-    #      return None
-    # if the status code is 200 then return the request.text's param list
-    # http_request.status_code is an int we can inspect
-    paramDict = json.loads(httpRequest.text)
-    paramList = paramDict['param']
-    
-    filteredParams = []
-    # Filter list to remove missing data values
-    for param in paramList:
-        paramGood = True
-        for key, value in param.iteritems():
-            if value == None:
-                paramGood = False
-        
-        if paramGood:
-            filteredParams.append(param)
-        else:
-            filteredParams.append(param)
-    
-    
-    return filteredParams
-
-
-
-#class Parameter(object):
-#    
-#    def __init__(self):
-#        self.param_query_uri = 'http://some.url'
-#        self.param_list = self.param_metadata()
-#        
-#    def param_metadata(self):
-#        '''This method will return a list of python dict's.  Each dict will 
-#        contain a complete record for each parameter from rcmed'''
-#        # 1.  Query the Parameters Metadata Endpoint using param_query_uri
-#        # 2.  Parse the returned data and re-format into a dict
-#        # 3.  define self.para_met_dict
-#        test_list = [{"id":12,
-#                      "description":"ERA Dataset 2 Metre Temperature",
-#                      "type":'temp'
-#                      },
-#                      {"id":13,
-#                       "description":"ERA Dataset 2 Metre Dewpoint 
Temperature",
-#                       'type':'temp'
-#                       },
-#                      {"id":14,
-#                       "description":"TRMM Dataset HRF parameter",
-#                       'type':'hrf'
-#                        }
-#                     ]
-#        print "self.param_met_dict has been created"
-#        return test_list
-#    
-#    def get_param_by_id(self, id):
-#        '''This will take in a parameter id and return a single dict.  Can we 
-#        safely assume we will always hold a unique parameter id?  - Currently
-#        this is True'''
-#        for p in self.param_list:
-#            if p['id'] == id: 
-#                return p
-#            else: 
-#                pass
-#    
-#    def get_params_by_type(self, type):
-#        '''This will take in a parameter type like precip, temp, pressure, 
etc.
-#        and will return a list of all the params that are of the given 
type.'''
-#        param_list = [] #empty list to collect the param dicts
-#        for p in self.param_list:
-#            if p['type'] == type:
-#                param_list.append(p)
-#            else:
-#                pass
-#        return param_list
-#
-#
-#class ObsData(object):
-#    
-#    def __init__(self):
-#        self.query_url = 'http://rcmes/rcmed....'  #can we merely insert the 
query criteria into the url attribute?
-#        self.param_id = 6
-#        self.dataset_id = 1
-#        self.lat_range = [25.4,55.0]
-#        self.lon_range = [0.0,10.7]
-#        self.time_range = [start,end]
-#        
-#    def set_param(self, param_dict):
-#        self.param_id = param_dict['id']
-#        self.dataset_id = null
-#        # look up the dataset id using the parameter id and set it
-#        p = Parameter.get_param_by_id(id)
-        
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/climate/blob/8c142c35/rcmet/src/main/python/rcmes/toolkit/__init__.py
----------------------------------------------------------------------
diff --git a/rcmet/src/main/python/rcmes/toolkit/__init__.py 
b/rcmet/src/main/python/rcmes/toolkit/__init__.py
deleted file mode 100644
index c980999..0000000
--- a/rcmet/src/main/python/rcmes/toolkit/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-"""The toolkit Package is a collection of modules that provide a set of tools
-that can be used to process, analyze and plot the analysis results."""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/climate/blob/8c142c35/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
----------------------------------------------------------------------
diff --git a/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py 
b/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
deleted file mode 100644
index 0b910c4..0000000
--- a/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
+++ /dev/null
@@ -1,366 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-"""Prepare Datasets both model and observations for analysis using metrics"""
-
-
-import numpy as np
-import numpy.ma as ma
-import sys, os
-
-from storage import db, files
-import process
-from utils import misc
-
-# TODO:  swap gridBox for Domain
-def prep_data(settings, obsDatasetList, gridBox, modelList):
-    """
-    
-    returns 
numOBSs,numMDLs,nT,ngrdY,ngrdX,Times,lons,lats,obsData,modelData,obsList
-    
-        numOBSs - number of Observational Datasets.  Really need to look at 
using len(obsDatasetList) instead
-        numMDLs - number of Models used.  Again should use the len(modelList) 
instead
-        nT - Time value count after temporal regridding. Should use length of 
the time axis for a given dataset
-        ngrdY - size of the Y-Axis grid after spatial regridding
-        ngrdX - size of the X-Axis grid after spatial regridding
-        Times - list of python datetime objects the represent the list of time 
to be used in further calculations
-        lons - 
-        lats - 
-        obsData - 
-        modelData - 
-        obsList - 
-        
-    
-    """
-    
-    # TODO:  Stop the object Deserialization and work on refactoring the core 
code here
-    cachedir = settings.cacheDir
-    workdir = settings.workDir
-
-    # Use list comprehensions to deconstruct obsDatasetList
-    #  ['TRMM_pr_mon', 'CRU3.1_pr']    Basically a list of Dataset NAME +'_' + 
parameter name - THE 'CRU*' one triggers unit conversion issues later
-    # the plan here is to use the obsDatasetList which contains a longName key 
we can use.
-    obsList = [str(x['longname']) for x in obsDatasetList]
-    # Also using the obsDatasetList with a key of ['dataset_id']
-    obsDatasetId = [str(x['dataset_id']) for x in obsDatasetList]
-    # obsDatasetList ['paramter_id'] list
-    obsParameterId = [str(x['parameter_id']) for x in obsDatasetList]
-    obsTimestep = [str(x['timestep']) for x in obsDatasetList]
-    mdlList = [model.filename for model in modelList]
-
-    # Since all of the model objects in the modelList have the same Varnames 
and Precip Flag, I am going to merely 
-    # pull this from modelList[0] for now
-    modelVarName = modelList[0].varName
-    precipFlag = modelList[0].precipFlag
-    modelTimeVarName = modelList[0].timeVariable
-    modelLatVarName = modelList[0].latVariable
-    modelLonVarName = modelList[0].lonVariable
-    regridOption = settings.spatialGrid
-    timeRegridOption = settings.temporalGrid
-    
-    """
-     Routine to read-in and re-grid both obs and mdl datasets.
-     Processes both single and multiple files of obs and mdl or combinations 
in a general way.
-           i)    retrieve observations from the database
-           ii)   load in model data
-           iii)  spatial regridding
-           iv)   temporal regridding
-           v)    area-averaging
-           Input:
-                   cachedir    - string describing directory path
-                   workdir     - string describing directory path
-                   obsList        - string describing the observation data 
files
-                   obsDatasetId        - int, db dataset id
-                   obsParameterId      - int, db parameter id 
-                   latMin, latMax, lonMin, lonMax, dLat, dLon, naLats, naLons: 
define the evaluation/analysis domain/grid system
-                latMin         - float
-                   latMax              - float
-                   lonMin              - float
-                   lonMax              - float
-                   dLat                - float
-                   dLon                - float
-                   naLats              - integer
-                   naLons              - integer
-                   mdlList     - string describing model file name + path
-                   modelVarName        - string describing name of variable to 
evaluate (as written in model file)
-                precipFlag     - bool  (is this precipitation data? True/False)
-                   modelTimeVarName - string describing name of time variable 
in model file    
-                   modelLatVarName  - string describing name of latitude 
variable in model file 
-                   modelLonVarName  - string describing name of longitude 
variable in model file 
-                   regridOption         - string: 'obs'|'model'|'user'
-                   timeRegridOption -string: 'full'|'annual'|'monthly'|'daily'
-                   maskOption - Boolean
-                   
-                   # TODO:  This isn't true in the current codebase.
-                   Instead the SubRegion's are being used.  You can see that 
these values are not
-                   being used in the code, at least they are not being passed 
in from the function
-                   
-                   maskLatMin - float (only used if maskOption=1)
-                   maskLatMax - float (only used if maskOption=1)
-                maskLonMin - float (only used if maskOption=1)
-                   maskLonMax - float (only used if maskOption=1)
-           Output: image files of plots + possibly data
-           Jinwon Kim, 7/11/2012
-    """
-
-
-    # check the number of obs & model data files
-    numOBSs = len(obsList)
-    numMDLs = len(mdlList)
-    
-    # assign parameters that must be preserved throughout the process
-    
-    print 'start & end time = ', settings.startDate, settings.endDate
-    yymm0 = settings.startDate.strftime("%Y%m")
-    yymm1 = settings.endDate.strftime("%Y%m")
-    print 'start & end eval period = ', yymm0, yymm1
-
-
-
-    #TODO: Wrap in try except blocks instead
-    if numMDLs < 1: 
-        print 'No input model data file. EXIT'
-        sys.exit()
-    if numOBSs < 1: 
-        print 'No input observation data file. EXIT'
-        sys.exit()
-
-    ## Part 0: Setup the regrid variables based on the regridOption given
-
-    # preparation for spatial re-gridding: define the size of horizontal array 
of the target interpolation grid system (ngrdX and ngrdY)
-    print 'regridOption in prep_data= ', regridOption
-    if regridOption == 'model':
-        ifile = mdlList[0]
-        typeF = 'nc'
-        lats, lons, mTimes = files.read_data_from_one_file(ifile, 
modelVarName, 
-                                                           modelLatVarName, 
-                                                           modelLonVarName, 
-                                                           modelTimeVarName, 
-                                                           typeF)[:3]
-        modelObject = modelList[0]
-        latMin = modelObject.latMin
-        latMax = modelObject.latMax
-        lonMin = modelObject.lonMin
-        lonMax = modelObject.lonMax
-    elif regridOption == 'user':
-        # Use the GridBox Object
-        latMin = gridBox.latMin
-        latMax = gridBox.latMax
-        lonMin = gridBox.lonMin
-        lonMax = gridBox.lonMax
-        naLats = gridBox.latCount
-        naLons = gridBox.lonCount
-        dLat = gridBox.latStep
-        dLon = gridBox.lonStep
-        lat = np.arange(naLats) * dLat + latMin
-        lon = np.arange(naLons) * dLon + lonMin
-        lons, lats = np.meshgrid(lon, lat)
-        lon = 0.
-        lat = 0.
-    else:
-        print "INVALID REGRID OPTION USED"
-        sys.exit()
-        
-    ngrdY = lats.shape[0]
-    ngrdX = lats.shape[1]
-
-    regObsData = []
-    
-    
-    
-    ## Part 1: retrieve observation data from the database and regrid them
-    ##       NB. automatically uses local cache if already retrieved.
-    
-    for n in np.arange(numOBSs):
-        # spatial regridding
-        oLats, oLons, _, oTimes, oData = db.extractData(obsDatasetId[n],
-                                                        obsParameterId[n],
-                                                        latMin, latMax,
-                                                        lonMin, lonMax,
-                                                        settings.startDate, 
settings.endDate,
-                                                        cachedir, 
obsTimestep[n])
-        
-        # TODO: modify this if block with new metadata usage.
-        if precipFlag == True and obsList[n][0:3] == 'CRU':
-            oData = 86400.0 * oData
-
-        nstOBSs = oData.shape[0]         # note that the length of obs data 
can vary for different obs intervals (e.g., daily vs. monthly)
-        print 'Regrid OBS dataset onto the ', regridOption, ' grid system: 
ngrdY, ngrdX, nstOBSs= ', ngrdY, ngrdX, nstOBSs
-        print 'For dataset: %s' % obsList[n]
-        
-        tmpOBS = ma.zeros((nstOBSs, ngrdY, ngrdX))
-        
-        print 'tmpOBS shape = ', tmpOBS.shape
-        
-        # OBS SPATIAL REGRIDING 
-        for t in np.arange(nstOBSs):
-            tmpOBS[t, :, :] = process.do_regrid(oData[t, :, :], oLats, oLons, 
lats, lons)
-            
-        # TODO:  Not sure this is needed with Python Garbage Collector
-        # The used memory should be freed when the objects are no longer 
referenced.  If this continues to be an issue we may need to look
-        # at using generators where possible.
-        oLats = 0.0
-        oLons = 0.0       # release the memory occupied by the temporary 
variables oLats and oLons.
-        
-        # OBS TEMPORAL REGRIDING
-        oData, newObsTimes = process.calc_average_on_new_time_unit_K(tmpOBS, 
oTimes, unit=timeRegridOption)
-
-        tmpOBS = 0.0
-        
-        # check the consistency of temporally regridded obs data
-        if n == 0:
-            oldObsTimes = newObsTimes
-        else:
-            if oldObsTimes != newObsTimes:
-                print 'temporally regridded obs data time levels do not match 
at ', n - 1, n
-                print '%s Time through Loop' % (n + 1)
-                print 'oldObsTimes Count: %s' % len(oldObsTimes)
-                print 'newObsTimes Count: %s' % len(newObsTimes)
-                # TODO:  We need to handle these cases using Try Except Blocks 
or insert a sys.exit if appropriate
-                sys.exit()
-            else:
-                oldObsTimes = newObsTimes
-        # if everything's fine, append the spatially and temporally regridded 
data in the obs data array (obsData)
-        regObsData.append(oData)
-
-
-    """ all obs datasets have been read-in and regridded. convert the 
regridded obs data from 'list' to 'array'
-    also finalize 'obsTimes', the time coordinate values of the regridded obs 
data.
-    NOTE: using 'list_to_array' assigns values to the missing points; this has 
become a problem in handling the CRU data.
-          this problem disappears by using 'ma.array'."""
-
-    obsData = ma.array(regObsData)
-    obsTimes = newObsTimes
-
-    # compute the simple multi-obs ensemble if multiple obs are used
-    if numOBSs > 1:
-        ensemble = np.mean(regObsData, axis=0)
-        regObsData.append(ensemble)
-        numOBSs = len(regObsData)
-        obsList.append('ENS-OBS')
-        obsData = ma.array(regObsData) # Cast to a masked array
-
-
-    ## Part 2: load in and regrid model data from file(s)
-    ## NOTE: the two parameters, numMDLs and numMOmx are defined to represent 
the number of models (w/ all 240 mos) &
-    ##       the total number of months, respectively, in later multi-model 
calculations.
-
-    typeF = 'nc'
-    regridMdlData = []
-    # extract the model names and store them in the list 'mdlList'
-    mdlName = []
-    mdlListReversed=[]
-    if len(mdlList) >1:
-       for element in mdlList:
-           mdlListReversed.append(element[::-1])
-       prefix=os.path.commonprefix(mdlList)
-       postfix=os.path.commonprefix(mdlListReversed)[::-1]
-       for element in mdlList:
-           mdlName.append(element.replace(prefix,'').replace(postfix,''))
-    else:
-        mdlName.append('model') 
-
-    
-    for n in np.arange(numMDLs):
-        # read model grid info, then model data
-        ifile = mdlList[n]
-        print 'ifile= ', ifile
-        modelLats, modelLons, mTimes, mdlDat, mvUnit = 
files.read_data_from_one_file(ifile, 
-                                                                               
      modelVarName,
-                                                                               
      modelLatVarName,
-                                                                               
      modelLonVarName,
-                                                                               
      modelTimeVarName, 
-                                                                               
      typeF)
-        mdlT = []
-        mStep = len(mTimes)
-
-        for i in np.arange(mStep):
-            mdlT.append(mTimes[i].strftime("%Y%m"))
-
-        wh = (np.array(mdlT) >= yymm0) & (np.array(mdlT) <= yymm1)
-        modelTimes = list(np.array(mTimes)[wh])
-        mData = mdlDat[wh, :, :]
-   
-        # determine the dimension size from the model time and latitude data.
-        nT = len(modelTimes)
-
-        print '  The shape of model data to be processed= ', mData.shape, ' 
for the period ', min(modelTimes), max(modelTimes)
-        # spatial regridding of the modl data
-        tmpMDL = ma.zeros((nT, ngrdY, ngrdX))
-
-        if regridOption != 'model':
-            for t in np.arange(nT):
-                tmpMDL[t, :, :] = process.do_regrid(mData[t, :, :], modelLats, 
modelLons, lats, lons)
-        else:
-            tmpMDL = mData
-
-        # temporally regrid the model data
-        mData, newMdlTimes = process.regrid_in_time(tmpMDL, modelTimes, 
unit=timeRegridOption)
-        tmpMDL = 0.0
-        
-        # check data consistency for all models 
-        if n == 0:
-            oldMdlTimes = newMdlTimes
-        else:
-            if oldMdlTimes != newMdlTimes:
-                print 'temporally regridded mdl data time levels do not match 
at ', n - 1, n
-                print len(oldMdlTimes), len(newMdlTimes)
-                sys.exit()
-            else:
-                oldMdlTimes = newMdlTimes
-
-        # if everything's fine, append the spatially and temporally regridded 
data in the obs data array (obsData)
-        regridMdlData.append(mData)
-
-    modelData  = ma.array(regridMdlData)
-    modelTimes = newMdlTimes
-
-    if (precipFlag == True) & (mvUnit == 'KG M-2 S-1'):
-        print 'convert model variable unit from mm/s to mm/day'
-        modelData = 86400.*modelData
-    
-    # check consistency between the time levels of the model and obs data
-    #   this is the final update of time levels: 'Times' and 'nT'
-    if obsTimes != modelTimes:
-        print 'time levels of the obs and model data are not consistent. EXIT'
-        print 'obsTimes'
-        print obsTimes
-        print 'modelTimes'
-        print modelTimes
-        sys.exit()
-    #  'Times = modelTimes = obsTimes' has been established and modelTimes and 
obsTimes will not be used hereafter. (de-allocated)
-    Times = modelTimes
-    nT = len(modelTimes)
-    modelTimes = 0
-    obsTimes = 0
-
-    print 'Reading and regridding model data are completed'
-    print 'numMDLs, modelData.shape= ', numMDLs, modelData.shape
-
-    # TODO:  Commented out until I can talk with Jinwon about this
-    # compute the simple multi-model ensemble if multiple models are evaluated
-    if numMDLs > 1:
-        model_ensemble = np.mean(regridMdlData, axis=0)
-        regridMdlData.append(model_ensemble)
-        numMDLs = len(regridMdlData)
-        modelData = ma.array(regridMdlData)
-        mdlName.append('ENS-MODEL')
-        print 'Eval mdl-mean timeseries for the obs periods: modelData.shape= 
',modelData.shape
-    # GOODALE:  This ensemble code should be refactored into process.py module 
since it's purpose is data processing
-
-    # Processing complete
-    print 'data_prep is completed: both obs and mdl data are re-gridded to a 
common analysis grid'
-    return numOBSs, numMDLs, nT, ngrdY, ngrdX, Times, lons, lats, obsData, 
modelData, obsList, mdlName

[05/10] climate git commit: CLIMATE-551 - Drop old RCMET toolkit from codebase

Reply via email to