http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/storage/rcmed.py ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/storage/rcmed.py b/src/main/python/rcmes/storage/rcmed.py new file mode 100755 index 0000000..39a235c --- /dev/null +++ b/src/main/python/rcmes/storage/rcmed.py @@ -0,0 +1,349 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +''' +Classes: + RCMED - A class for retrieving data from Regional Climate Model Evalutaion Database (JPL). +''' + +import urllib, urllib2 +import re +import json +import numpy as np +import numpy.ma as ma +from datetime import datetime +import calendar +#from ocw.dataset import Dataset + + +URL = 'http://rcmes.jpl.nasa.gov/query-api/query.php?' + + +def get_parameters_metadata(): + '''Get the metadata of all parameter from RCMED. + + :returns: Dictionary of information for each parameter stored in one list + :rtype: List of dictionaries + ''' + + param_info_list = [] + url = URL + "¶m_info=yes" + string = urllib2.urlopen(url) + data_string = string.read() + json_format_data = json.loads(data_string) + fields_name = json_format_data['fields_name'] + data = json_format_data['data'] + for row in data: + dic = {} + for name in fields_name: + dic[name] = row[fields_name.index(name)] + param_info_list.append(dic) + + return param_info_list + + +def _make_mask_array(values, parameter_id, parameters_metadata): + '''Created masked array to deal with missing values + + :param values: Numpy array of values which may contain missing values + :type values: Numpy array + :param parameter_id: Parameter's id + :type parameter_id: Integer + :param parameters_metadata: Metadata for all parameters + :type parameters_metadata: List of dictionaries + + :returns: Masked array of values + :rtype: Masked array + ''' + + for each in parameters_metadata: + if each['parameter_id'].encode() == str(parameter_id): + missing_values = each['missingdataflag'].encode() + break + # Need to encode the string to proper dtype so the mask is applied + if 'float' in str(values.dtype): + missing_values = float(missing_values) + if 'int' in str(values.dtype): + missing_values = int(missing_values) + + values = ma.masked_array(values, mask=(values == missing_values)) + + return values + + +def _reshape_values(values, unique_values): + '''Reshape values into 4D array. + + :param values: Raw values data + :type values: numpy array + :param unique_values: Tuple of unique latitudes, longitudes and times data. + :type unique_values: Tuple + + :returns: Reshaped values data + :rtype: Numpy array + ''' + + lats_len = len(unique_values[0]) + lons_len = len(unique_values[1]) + times_len = len(unique_values[2]) + + values = values.reshape(times_len, lats_len, lons_len) + + return values + + +def _calculate_time(unique_times, time_step): + '''Convert each time to the datetime object. + + :param unique_times: Unique time data + :type unique_times: String + :param time_step: Time step + :type time_step: String + + :returns: Unique datetime objects of time data + :rtype: Numpy array + ''' + + time_format = "%Y-%m-%d %H:%M:%S" + unique_times = np.array([datetime.strptime(time, time_format) for time in unique_times]) + #There is no need to sort time. + #This function may required still in RCMES + #unique_times.sort() + #This function should be moved to the data_process. + + return unique_times + + +def _make_unique(lats, lons, times): + '''Find the unique values of input data. + + :param lats: lats + :type lats: Numpy array + :param lons: lons + :type lons: Numpy array + :param times: times + :type times: Numpy array + + :returns: Unique numpy arrays of latitudes, longitudes and times + :rtype: Tuple + ''' + + unique_lats = np.unique(lats) + unique_lons = np.unique(lons) + unique_times = np.unique(times) + + return (unique_lats, unique_lons, unique_times) + + +def _get_data(url): + '''Reterive data from database. + + :param url: url to query from database + :type url: String + + :returns: Latitudes, longitudes, times and values data + :rtype: (Numpy array, Numpy array, Numpy array, Numpy array) + ''' + + string = urllib2.urlopen(url) + data_string = string.read() + index_of_data = re.search('data: \r\n', data_string) + data = data_string[index_of_data.end():len(data_string)] + data = data.split('\r\n') + + lats = [] + lons = [] + #levels = [] + values = [] + times = [] + + for i in range(len(data) - 1): # Because the last row is empty, "len(data)-1" is used. + row = data[i].split(',') + lats.append(np.float32(row[0])) + lons.append(np.float32(row[1])) + # Level is not currently supported in Dataset class. + #levels.append(np.float32(row[2])) + times.append(row[3]) + values.append(np.float32(row[4])) + + lats = np.array(lats) + lons = np.array(lons) + times = np.array(times) + values = np.array(values) + + return lats, lons, times, values + + +def _beginning_of_date(time, time_step): + '''Calculate the beginning of given time, based on time step. + + :param time: Given time + :type time: Datetime + :param time_step: Time step (monthly or daily) + :type time_step: String + + :returns: Beginning of given time + :rtype: Datetime + ''' + + if time_step.lower() == 'monthly': + if time.day != 1: + start_time_string = time.strftime('%Y%m%d') + start_time_string = start_time_string[:6] + '01' + time = datetime.strptime(start_time_string, '%Y%m%d') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, 1) + elif time_step.lower() == 'daily': + if time.hour != 0 or time.minute != 0 or time.second != 0: + start_time_string = time.strftime('%Y%m%d%H%M%S') + start_time_string = start_time_string[:8] + '000000' + time = datetime.strptime(start_time_string, '%Y%m%d%H%M%S') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, time.day, 00, 00, 00) + + return time + + +def _end_of_date(time, time_step): + '''Calculate the end of given time, based on time step. + + :param time: Given time + :type time: Datetime + :param time_step: Time step (monthly or daily) + :type time_step: String + + :returns: End of given time + :rtype: Datetime + ''' + + last_day_of_month = calendar.monthrange(time.year, time.month)[1] + if time.day != last_day_of_month: + end_time_string = time.strftime('%Y%m%d') + end_time_string = end_time_string[:6] + str(last_day_of_month) + time = datetime.strptime(end_time_string, '%Y%m%d') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, lastDayOfMonth) + elif time_step.lower() == 'daily': + end_time_string = time.strftime('%Y%m%d%H%M%S') + end_time_string = end_time_string[:8] + '235959' + time = datetime.strptime(end_time_string, '%Y%m%d%H%M%S') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, end_time.day, 23, 59, 59) + + return time + + +def _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step): + '''Generate the url to query from database + + :param dataset_id: Dataset id. + :type dataset_id: Integer + :param parameter_id: Parameter id + :type parameter_id: Integer + :param min_lat: Minimum latitude + :type min_lat: Float + :param max_lat: Maximum latitude + :type max_lat: Float + :param min_lon: Minimum longitude + :type min_lon: Float + :param max_lon: Maximum longitude + :type max_lon: Float + :param start_time: Start time + :type start_time: Datetime + :param end_time: End time + :type end_time: Datetime + :param time_step: Time step + :type time_step: String + + :returns: url to query from database + :rtype: String + ''' + + start_time = _beginning_of_date(start_time, time_step) + end_time = _end_of_date(end_time, time_step) + start_time = start_time.strftime("%Y%m%dT%H%MZ") + end_time = end_time.strftime("%Y%m%dT%H%MZ") + + query = [('datasetId',dataset_id), ('parameterId',parameter_id), ('latMin',min_lat), ('latMax',max_lat), + ('lonMin', min_lon), ('lonMax',max_lon), ('timeStart', start_time), ('timeEnd', end_time)] + + query_url = urllib.urlencode(query) + url_request = URL + query_url + + return url_request + + +def _get_parameter_info(parameters_metadata, parameter_id): + '''General information for given parameter id. + + :param parameters_metadata: Metadata for all parameters + :type parameters_metadata: List of dictionaries + :param parameter_id: Parameter id + :type parameter_id: Integer + + :returns: Database name, time step, realm, instrument, start_date, end_date and unit for given parameter + :rtype: (string, string, string, string, string, string, string) + ''' + + for dic in parameters_metadata: + if int(dic['parameter_id']) == parameter_id: + database = dic['database'] + time_step = dic['timestep'] + realm = dic['realm'] + instrument = dic['instrument'] + start_date = dic['start_date'] + end_date = dic['end_date'] + unit = dic['units'] + + return (database, time_step, realm, instrument, start_date, end_date, unit) + + +def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time): + '''Get data from one database(parameter). + + :param dataset_id: Dataset id. + :type dataset_id: Integer + :param parameter_id: Parameter id + :type parameter_id: Integer + :param min_lat: Minimum latitude + :type min_lat: Float + :param max_lat: Maximum latitude + :type max_lat: Float + :param min_lon: Minimum longitude + :type min_lon: Float + :param max_lon: Maximum longitude + :type max_lon: Float + :param start_time: Start time + :type start_time: Datetime + :param end_time: End time + :type end_time: Datetime + + :returns: Dataset object + :rtype: Object + ''' + + parameters_metadata = get_parameters_metadata() + parameter_name, time_step, _, _, _, _, _= _get_parameter_info(parameters_metadata, parameter_id) + url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step) + lats, lons, times, values = _get_data(url) + + unique_lats_lons_times = _make_unique(lats, lons, times) + unique_times = _calculate_time(unique_lats_lons_times[2], time_step) + values = _reshape_values(values, unique_lats_lons_times) + values = _make_mask_array(values, parameter_id, parameters_metadata) + + return Dataset(unique_lats_lons_times[0], unique_lats_lons_times[1], unique_times, values, parameter_name)
http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/storage/rcmed.pyc ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/storage/rcmed.pyc b/src/main/python/rcmes/storage/rcmed.pyc new file mode 100644 index 0000000..5b4e858 Binary files /dev/null and b/src/main/python/rcmes/storage/rcmed.pyc differ http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/all-wcprops ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/toolkit/.svn/all-wcprops b/src/main/python/rcmes/toolkit/.svn/all-wcprops new file mode 100755 index 0000000..70196a3 --- /dev/null +++ b/src/main/python/rcmes/toolkit/.svn/all-wcprops @@ -0,0 +1,41 @@ +K 25 +svn:wc:ra_dav:version-url +V 87 +/repos/asf/!svn/ver/1485875/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit +END +visualize.py +K 25 +svn:wc:ra_dav:version-url +V 100 +/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/visualize.py +END +process.py +K 25 +svn:wc:ra_dav:version-url +V 98 +/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py +END +__init__.py +K 25 +svn:wc:ra_dav:version-url +V 99 +/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/__init__.py +END +do_data_prep.py +K 25 +svn:wc:ra_dav:version-url +V 103 +/repos/asf/!svn/ver/1485875/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py +END +plots.py +K 25 +svn:wc:ra_dav:version-url +V 96 +/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/plots.py +END +metrics.py +K 25 +svn:wc:ra_dav:version-url +V 98 +/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py +END http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/entries ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/toolkit/.svn/entries b/src/main/python/rcmes/toolkit/.svn/entries new file mode 100755 index 0000000..0aec30c --- /dev/null +++ b/src/main/python/rcmes/toolkit/.svn/entries @@ -0,0 +1,232 @@ +10 + +dir +1485921 +https://svn.apache.org/repos/asf/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit +https://svn.apache.org/repos/asf + + + +2013-05-23T22:20:38.670815Z +1485875 +goodale + + + + + + + + + + + + + + +13f79535-47bb-0310-9956-ffa450edef68 + +visualize.py +file + + + + +2013-05-24T10:13:49.000000Z +6f61f99e6bb37b883ce3bb0470cd2545 +2012-08-16T14:36:57.243978Z +1473887 +cgoodale +has-props + + + + + + + + + + + + + + + + + + + + +77 + +process.py +file + + + + +2013-05-24T10:13:49.000000Z +6172bcb1200fef493d541d93407c12ed +2013-02-20T16:20:33.453039Z +1474977 +cgoodale + + + + + + + + + + + + + + + + + + + + + +46769 + +__init__.py +file + + + + +2013-05-24T10:13:49.000000Z +8761fbbcde9579ef3c42e09923e56f93 +2012-08-16T14:36:57.243978Z +1473887 +cgoodale +has-props + + + + + + + + + + + + + + + + + + + + +148 + +do_data_prep.py +file + + + + +2013-05-24T10:13:49.000000Z +881100e684bbad5dffcaa35da3d4a216 +2013-05-23T22:20:38.670815Z +1485875 +goodale + + + + + + + + + + + + + + + + + + + + + +16200 + +plots.py +file + + + + +2013-05-24T10:13:49.000000Z +16d8de7760d609df6373d15fee4cffe6 +2013-03-05T16:58:18.846251Z +1475040 +mjjoyce + + + + + + + + + + + + + + + + + + + + + +16323 + +metrics.py +file + + + + +2013-05-24T10:13:49.000000Z +8dbdd12fc067e73c0ae2c5f48c7245a7 +2013-02-10T21:47:51.341146Z +1474940 +cgoodale + + + + + + + + + + + + + + + + + + + + + +58120 + http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base b/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base new file mode 100755 index 0000000..614166f --- /dev/null +++ b/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base @@ -0,0 +1,5 @@ +K 12 +svn:keywords +V 11 +Id Revision +END http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base b/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base new file mode 100755 index 0000000..614166f --- /dev/null +++ b/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base @@ -0,0 +1,5 @@ +K 12 +svn:keywords +V 11 +Id Revision +END http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base b/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base new file mode 100755 index 0000000..11548e6 --- /dev/null +++ b/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base @@ -0,0 +1,2 @@ +"""The toolkit Package is a collection of modules that provide a set of tools +that can be used to process, analyze and plot the analysis results.""" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base ---------------------------------------------------------------------- diff --git a/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base b/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base new file mode 100755 index 0000000..1b5c545 --- /dev/null +++ b/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base @@ -0,0 +1,370 @@ +"""Prepare Datasets both model and observations for analysis using metrics""" + + +import numpy as np +import numpy.ma as ma +import sys, os + +import Nio + +from storage import db, files +import process +from utils import misc + +# TODO: swap gridBox for Domain +def prep_data(settings, obsDatasetList, gridBox, modelList): + """ + + returns numOBSs,numMDLs,nT,ngrdY,ngrdX,Times,lons,lats,obsData,modelData,obsList + + numOBSs - number of Observational Datasets. Really need to look at using len(obsDatasetList) instead + numMDLs - number of Models used. Again should use the len(modelList) instead + nT - Time value count after temporal regridding. Should use length of the time axis for a given dataset + ngrdY - size of the Y-Axis grid after spatial regridding + ngrdX - size of the X-Axis grid after spatial regridding + Times - list of python datetime objects the represent the list of time to be used in further calculations + lons - + lats - + obsData - + modelData - + obsList - + + + """ + + # TODO: Stop the object Deserialization and work on refactoring the core code here + cachedir = settings.cacheDir + workdir = settings.workDir + + # Use list comprehensions to deconstruct obsDatasetList + # ['TRMM_pr_mon', 'CRU3.1_pr'] Basically a list of Dataset NAME +'_' + parameter name - THE 'CRU*' one triggers unit conversion issues later + # the plan here is to use the obsDatasetList which contains a longName key we can use. + obsList = [str(x['longname']) for x in obsDatasetList] + # Also using the obsDatasetList with a key of ['dataset_id'] + obsDatasetId = [str(x['dataset_id']) for x in obsDatasetList] + # obsDatasetList ['paramter_id'] list + obsParameterId = [str(x['parameter_id']) for x in obsDatasetList] + obsTimestep = [str(x['timestep']) for x in obsDatasetList] + mdlList = [model.filename for model in modelList] + + # Since all of the model objects in the modelList have the same Varnames and Precip Flag, I am going to merely + # pull this from modelList[0] for now + modelVarName = modelList[0].varName + precipFlag = modelList[0].precipFlag + modelTimeVarName = modelList[0].timeVariable + modelLatVarName = modelList[0].latVariable + modelLonVarName = modelList[0].lonVariable + regridOption = settings.spatialGrid + timeRegridOption = settings.temporalGrid + + """ + Routine to read-in and re-grid both obs and mdl datasets. + Processes both single and multiple files of obs and mdl or combinations in a general way. + i) retrieve observations from the database + ii) load in model data + iii) temporal regridding + iv) spatial regridding + v) area-averaging + Input: + cachedir - string describing directory path + workdir - string describing directory path + obsList - string describing the observation data files + obsDatasetId - int, db dataset id + obsParameterId - int, db parameter id + latMin, latMax, lonMin, lonMax, dLat, dLon, naLats, naLons: define the evaluation/analysis domain/grid system + latMin - float + latMax - float + lonMin - float + lonMax - float + dLat - float + dLon - float + naLats - integer + naLons - integer + mdlList - string describing model file name + path + modelVarName - string describing name of variable to evaluate (as written in model file) + precipFlag - bool (is this precipitation data? True/False) + modelTimeVarName - string describing name of time variable in model file + modelLatVarName - string describing name of latitude variable in model file + modelLonVarName - string describing name of longitude variable in model file + regridOption - string: 'obs'|'model'|'user' + timeRegridOption -string: 'full'|'annual'|'monthly'|'daily' + maskOption - Boolean + + # TODO: This isn't true in the current codebase. + Instead the SubRegion's are being used. You can see that these values are not + being used in the code, at least they are not being passed in from the function + + maskLatMin - float (only used if maskOption=1) + maskLatMax - float (only used if maskOption=1) + maskLonMin - float (only used if maskOption=1) + maskLonMax - float (only used if maskOption=1) + Output: image files of plots + possibly data + Jinwon Kim, 7/11/2012 + """ + + + # check the number of obs & model data files + numOBSs = len(obsList) + numMDLs = len(mdlList) + + # assign parameters that must be preserved throughout the process + + print 'start & end time = ', settings.startDate, settings.endDate + yymm0 = settings.startDate.strftime("%Y%m") + yymm1 = settings.endDate.strftime("%Y%m") + print 'start & end eval period = ', yymm0, yymm1 + + + + #TODO: Wrap in try except blocks instead + if numMDLs < 1: + print 'No input model data file. EXIT' + sys.exit() + if numOBSs < 1: + print 'No input observation data file. EXIT' + sys.exit() + + ## Part 1: retrieve observation data from the database and regrid them + ## NB. automatically uses local cache if already retrieved. + + # preparation for spatial re-gridding: define the size of horizontal array of the target interpolation grid system (ngrdX and ngrdY) + print 'regridOption in prep_data= ', regridOption + if regridOption == 'model': + ifile = mdlList[0] + typeF = 'nc' + lats, lons, mTimes = files.read_lolaT_from_file(ifile, modelLatVarName, modelLonVarName, modelTimeVarName, typeF) + modelObject = modelList[0] + latMin = modelObject.latMin + latMax = modelObject.latMax + lonMin = modelObject.lonMin + lonMax = modelObject.lonMax + elif regridOption == 'user': + # Use the GridBox Object + latMin = gridBox.latMin + latMax = gridBox.latMax + lonMin = gridBox.lonMin + lonMax = gridBox.lonMax + naLats = gridBox.latCount + naLons = gridBox.lonCount + dLat = gridBox.latStep + dLon = gridBox.lonStep + lat = np.arange(naLats) * dLat + latMin + lon = np.arange(naLons) * dLon + lonMin + lons, lats = np.meshgrid(lon, lat) + lon = 0. + lat = 0. + else: + print "INVALID REGRID OPTION USED" + sys.exit() + + ngrdY = lats.shape[0] + ngrdX = lats.shape[1] + + regObsData = [] + + for n in np.arange(numOBSs): + # spatial regridding + oLats, oLons, _, oTimes, oData = db.extractData(obsDatasetId[n], + obsParameterId[n], + latMin, latMax, + lonMin, lonMax, + settings.startDate, settings.endDate, + cachedir, obsTimestep[n]) + + # TODO: modify this if block with new metadata usage. + if precipFlag == True and obsList[n][0:3] == 'CRU': + oData = 86400.0 * oData + + nstOBSs = oData.shape[0] # note that the length of obs data can vary for different obs intervals (e.g., daily vs. monthly) + print 'Regrid OBS dataset onto the ', regridOption, ' grid system: ngrdY, ngrdX, nstOBSs= ', ngrdY, ngrdX, nstOBSs + print 'For dataset: %s' % obsList[n] + + tmpOBS = ma.zeros((nstOBSs, ngrdY, ngrdX)) + + print 'tmpOBS shape = ', tmpOBS.shape + + for t in np.arange(nstOBSs): + tmpOBS[t, :, :] = process.do_regrid(oData[t, :, :], oLats, oLons, lats, lons) + + # TODO: Not sure this is needed with Python Garbage Collector + # The used memory should be freed when the objects are no longer referenced. If this continues to be an issue we may need to look + # at using generators where possible. + oLats = 0.0 + oLons = 0.0 # release the memory occupied by the temporary variables oLats and oLons. + + # temporally regrid the spatially regridded obs data + oData, newObsTimes = process.calc_average_on_new_time_unit_K(tmpOBS, oTimes, unit=timeRegridOption) + + tmpOBS = 0.0 + + # check the consistency of temporally regridded obs data + if n == 0: + oldObsTimes = newObsTimes + else: + if oldObsTimes != newObsTimes: + print 'temporally regridded obs data time levels do not match at ', n - 1, n + print '%s Time through Loop' % (n + 1) + print 'oldObsTimes Count: %s' % len(oldObsTimes) + print 'newObsTimes Count: %s' % len(newObsTimes) + # TODO: We need to handle these cases using Try Except Blocks or insert a sys.exit if appropriate + sys.exit() + else: + oldObsTimes = newObsTimes + # if everything's fine, append the spatially and temporally regridded data in the obs data array (obsData) + regObsData.append(oData) + + + """ all obs datasets have been read-in and regridded. convert the regridded obs data from 'list' to 'array' + also finalize 'obsTimes', the time coordinate values of the regridded obs data. + NOTE: using 'list_to_array' assigns values to the missing points; this has become a problem in handling the CRU data. + this problem disappears by using 'ma.array'.""" + + obsData = ma.array(regObsData) + obsTimes = newObsTimes + regObsData = 0 + oldObsTimes = 0 + nT = len(obsTimes) + + # TODO: Refactor this into a function within the toolkit module + # compute the simple multi-obs ensemble if multiple obs are used + if numOBSs > 1: + print 'numOBSs = ', numOBSs + oData = obsData + print 'oData shape = ', oData.shape + obsData = ma.zeros((numOBSs + 1, nT, ngrdY, ngrdX)) + print 'obsData shape = ', obsData.shape + avg = ma.zeros((nT, ngrdY, ngrdX)) + + for i in np.arange(numOBSs): + obsData[i, :, :, :] = oData[i, :, :, :] + avg[:, :, :] = avg[:, :, :] + oData[i, :, :, :] + + avg = avg / float(numOBSs) + obsData[numOBSs, :, :, :] = avg[:, :, :] # store the model-ensemble data + numOBSs = numOBSs + 1 # update the number of obs data to include the model ensemble + obsList.append('ENS-OBS') + print 'OBS regridded: ', obsData.shape + + + ## Part 2: load in and regrid model data from file(s) + ## NOTE: tthe wo parameters, numMDLs and numMOmx are defined to represent the number of models (w/ all 240 mos) & + ## the total number of months, respectively, in later multi-model calculations. + + typeF = 'nc' + regridMdlData = [] + # extract the model names and store them in the list 'mdlList' + mdlName = [] + mdlListReversed=[] + if len(mdlList) >1: + for element in mdlList: + mdlListReversed.append(element[::-1]) + prefix=os.path.commonprefix(mdlList) + postfix=os.path.commonprefix(mdlListReversed)[::-1] + for element in mdlList: + mdlName.append(element.replace(prefix,'').replace(postfix,'')) + else: + mdlName.append('model') + + + for n in np.arange(numMDLs): + # read model grid info, then model data + ifile = mdlList[n] + print 'ifile= ', ifile + modelLats, modelLons, mTimes = files.read_lolaT_from_file(ifile, modelLatVarName, modelLonVarName, modelTimeVarName, typeF) + mTime, mdlDat, mvUnit = files.read_data_from_one_file(ifile, modelVarName, modelTimeVarName, modelLats, typeF) + mdlT = [] + mStep = len(mTimes) + + for i in np.arange(mStep): + mdlT.append(mTimes[i].strftime("%Y%m")) + + wh = (np.array(mdlT) >= yymm0) & (np.array(mdlT) <= yymm1) + modelTimes = list(np.array(mTimes)[wh]) + mData = mdlDat[wh, :, :] + + # determine the dimension size from the model time and latitude data. + nT = len(modelTimes) + + # UNUSED VARIABLES - WILL DELETE AFTER TESTING + # nmdlY=modelLats.shape[0] + # nmdlX=modelLats.shape[1] + #print 'nT, ngrdY, ngrdX = ',nT,ngrdY, ngrdX,min(modelTimes),max(modelTimes) + print ' The shape of model data to be processed= ', mData.shape, ' for the period ', min(modelTimes), max(modelTimes) + # spatial regridding of the modl data + tmpMDL = ma.zeros((nT, ngrdY, ngrdX)) + + if regridOption != 'model': + for t in np.arange(nT): + tmpMDL[t, :, :] = process.do_regrid(mData[t, :, :], modelLats, modelLons, lats, lons) + else: + tmpMDL = mData + + # temporally regrid the model data + mData, newMdlTimes = process.regrid_in_time(tmpMDL, modelTimes, unit=timeRegridOption) + tmpMDL = 0.0 + + # check data consistency for all models + if n == 0: + oldMdlTimes = newMdlTimes + else: + if oldMdlTimes != newMdlTimes: + print 'temporally regridded mdl data time levels do not match at ', n - 1, n + print len(oldMdlTimes), len(newMdlTimes) + sys.exit() + else: + oldMdlTimes = newMdlTimes + + # if everything's fine, append the spatially and temporally regridded data in the obs data array (obsData) + regridMdlData.append(mData) + + modelData = ma.array(regridMdlData) + modelTimes = newMdlTimes + regridMdlData = 0 + oldMdlTimes = 0 + newMdlTimes = 0 + if (precipFlag == True) & (mvUnit == 'KG M-2 S-1'): + print 'convert model variable unit from mm/s to mm/day' + modelData = 86400.*modelData + + # check consistency between the time levels of the model and obs data + # this is the final update of time levels: 'Times' and 'nT' + if obsTimes != modelTimes: + print 'time levels of the obs and model data are not consistent. EXIT' + print 'obsTimes' + print obsTimes + print 'modelTimes' + print modelTimes + sys.exit() + # 'Times = modelTimes = obsTimes' has been established and modelTimes and obsTimes will not be used hereafter. (de-allocated) + Times = modelTimes + nT = len(modelTimes) + modelTimes = 0 + obsTimes = 0 + + print 'Reading and regridding model data are completed' + print 'numMDLs, modelData.shape= ', numMDLs, modelData.shape + + # TODO: Do we need to make this a user supplied flag, or do we just create an ensemble ALWAYS + # TODO: Add in Kyo's code here as well + # TODO: Commented out until I can talk with Jinwon about this + # compute the simple multi-model ensemble if multiple models are evaluated + if numMDLs > 1: + mdlData=modelData + modelData=ma.zeros((numMDLs+1,nT,ngrdY,ngrdX)) + avg=ma.zeros((nT,ngrdY,ngrdX)) + for i in np.arange(numMDLs): + modelData[i,:,:,:]=mdlData[i,:,:,:] + avg[:,:,:]=avg[:,:,:]+mdlData[i,:,:,:] + avg=avg/float(numMDLs) + modelData[numMDLs,:,:,:]=avg[:,:,:] # store the model-ensemble data + # THESE ARE NOT USED. WILL DELETE AFTER TESTING + # i0mdl=0 + # i1mdl=numMDLs + numMDLs=numMDLs+1 + mdlList.append('ENS-MODEL') + print 'Eval mdl-mean timeseries for the obs periods: modelData.shape= ',modelData.shape + # GOODALE: This ensemble code should be refactored into process.py module since it's purpose is data processing + + # Processing complete + print 'data_prep is completed: both obs and mdl data are re-gridded to a common analysis grid' + return numOBSs, numMDLs, nT, ngrdY, ngrdX, Times, lons, lats, obsData, modelData, obsList, mdlName
