[04/10] climate git commit: CLIMATE-551 - Drop old RCMET toolkit from codebase

joyce Mon, 10 Nov 2014 08:06:13 -0800

http://git-wip-us.apache.org/repos/asf/climate/blob/8c142c35/rcmet/src/main/python/rcmes/toolkit/metrics.py
----------------------------------------------------------------------
diff --git a/rcmet/src/main/python/rcmes/toolkit/metrics.py 
b/rcmet/src/main/python/rcmes/toolkit/metrics.py
deleted file mode 100644
index 7fbe174..0000000
--- a/rcmet/src/main/python/rcmes/toolkit/metrics.py
+++ /dev/null
@@ -1,1080 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-'''
-Module storing functions to calculate statistical metrics from numpy arrays
-'''
-
-import subprocess
-import os, sys
-import datetime
-import numpy as np
-import numpy.ma as ma
-import scipy.stats as stats
-import matplotlib.pyplot as plt
-from toolkit import plots, process
-from utils import misc
-from storage import files 
-
-def calcAnnualCycleMeans(dataset1):
-    '''
-    Purpose:: 
-        Calculate annual cycle in terms of monthly means at every grid point.
-
-    Input::
-        dataset1 - 3d numpy array of data in (t,lat,lon) or 1d numpy array 
timeseries
-
-    Output:: 
-        means - if 3d numpy was entered, 3d (# of months,lat,lon), if 1d data 
entered
-        it is a timeseries of the data of length # of months
-
-     '''
-    data = misc.reshapeMonthlyData(dataset1)
-    means = data.mean(axis = 0)
-    return data, means
-
-def calcAnnualCycleMeansSubRegion(dataset1):
-    '''
-    Purpose:: 
-        Calculate annual cycle in terms of monthly means at every sub-region.
-
-    Input::
-        dataset1 - 2d numpy array of data in (region, t)
-        
-    Output:: 
-        means - (region, # of months)
-
-     '''
-    nregion, nT = dataset1.shape
-    data = dataset1.reshape[nregion, nT/12, 12]
-    means = data.mean(axis = 1)
-    return data, means
-
-
-def calcClimYear(dataset1):
-    '''
-    Purpose:: 
-       Calculate annual mean timeseries and climatology for both 2-D and point 
time series.
-
-    Input::
-        dataset1 - 3d numpy array of data in (t,lat,lon) or 1d numpy array 
timeseries
-
-    Output:: 
-        tSeries - if 3d numpy was entered, 3d (nYr,lat,lon), if 1d data entered
-        it is a timeseries of the data of length nYr
-        means - if 3d numpy was entered, 2d (lat,lon), if 1d data entered
-        it is a floating point number representing the overall mean
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    tSeries = data.mean(axis = 1)
-    means = tSeries.mean(axis = 0)
-    return tSeries, means
-
-def calcClimSeason(monthBegin, monthEnd, dataset1):
-    '''
-    Purpose :: 
-       Calculate seasonal mean montheries and climatology for both 3-D and 
point time series.
-       For example, to calculate DJF mean time series, monthBegin = 12, 
monthEnd =2 
-       This can handle monthBegin=monthEnd i.e. for climatology of a specific 
month
-
-    Input::
-        monthBegin - an integer for the beginning month (Jan =1)
-        monthEnd - an integer for the ending month (Jan = 1)
-        dataset1 - 3d numpy array of data in (t,lat,lon) or 1d numpy array 
montheries
-
-    Output:: 
-        tSeries - if 3d numpy was entered, 3d (number of years/number of years 
-1 if monthBegin > monthEnd,lat,lon),
-        if 1d data entered it is a montheries of the data of length number of 
years
-        means - if 3d numpy was entered, 2d (lat,lon), if 1d data entered
-        it is a floating point number representing the overall mean
-    '''
-    if monthBegin > monthEnd:
-        # Offset the original array so that the the first month
-        # becomes monthBegin, note that this cuts off the first year of data
-        offset = slice(monthBegin - 1, monthBegin - 13)
-        data = misc.reshapeMonthlyData(dataset1[offset])
-        monthIndex = slice(0, 13 - monthBegin + monthEnd)
-    else:
-        # Since monthBegin <= monthEnd, just take a slice containing those 
months
-        data = misc.reshapeMonthlyData(dataset1)
-        monthIndex =  slice(monthBegin - 1, monthEnd)
-        
-    tSeries = data[:, monthIndex].mean(axis = 1)
-    means = tSeries.mean(axis = 0)        
-    return tSeries, means
-
-def calcClimSeasonSubRegion(monthBegin, monthEnd, dataset1):
-    '''
-    Purpose :: 
-       Calculate seasonal mean montheries and climatology for both 3-D and 
point time series.
-       For example, to calculate DJF mean time series, monthBegin = 12, 
monthEnd =2 
-       This can handle monthBegin=monthEnd i.e. for climatology of a specific 
month
-
-    Input::
-        monthBegin - an integer for the beginning month (Jan =1)
-        monthEnd - an integer for the ending month (Jan = 1)
-        dataset1 - 3d numpy array of data in (region, t) or 1d numpy array 
montheries
-
-    Output:: 
-        tSeries - (region, number of years/number of years -1 if monthBegin > 
monthEnd,lat,lon),
-        means - (region)
-    '''
-    nregion, nT = dataset1.shape
-    nYR = nT/12
-    if monthBegin > monthEnd:
-        # Offset the original array so that the the first month
-        # becomes monthBegin, note that this cuts off the first year of data
-        offset = slice(monthBegin - 1, monthBegin - 13)
-        data = dataset1[:,offset].reshape([nregion,nYR-1, 12])
-        monthIndex = slice(0, 13 - monthBegin + monthEnd)
-    else:
-        # Since monthBegin <= monthEnd, just take a slice containing those 
months
-        data = dataset1.reshape([nregion,nYR,12])
-        monthIndex =  slice(monthBegin - 1, monthEnd)
-        
-    tSeries = data[:, :, monthIndex].mean(axis = 2)
-    means = tSeries.mean(axis = 1)        
-    return tSeries, means
-
-def calcAnnualCycleStdev(dataset1):
-    '''
-     Purpose:: 
-        Calculate monthly standard deviations for every grid point
-     
-     Input::
-        dataset1 - 3d numpy array of data in (12* number of years,lat,lon) 
-
-     Output:: 
-        stds - if 3d numpy was entered, 3d (12,lat,lon)
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    stds = data.std(axis = 0, ddof = 1)
-    return stds
-
-def calcAnnualCycleStdevSubRegion(dataset1):
-    '''
-     Purpose:: 
-        Calculate monthly standard deviations for every sub-region
-     
-     Input::
-        dataset1 - 2d numpy array of data in (nregion, 12* number of years) 
-
-     Output:: 
-        stds - (nregion, 12)
-    '''
-    nregion, nT = data1.shape
-    data = dataset1.reshape([nregion, nT/12, 12])
-    stds = data.std(axis = 1, ddof = 1)
-    return stds
-
-def calcAnnualCycleDomainMeans(dataset1):
-    '''
-     Purpose:: 
-        Calculate spatially averaged monthly climatology and standard deviation
-
-     Input::
-        dataset1 - 3d numpy array of data in (12* number of years,lat,lon) 
-
-     Output::  
-        means - time series (12)
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    
-    # Calculate the means, month by month
-    means = np.zeros(12)
-    for i in np.arange(12):
-        means[i] = data[:, i, :, :].mean()
-        
-    return means
-
-def calcSpatialStdevRatio(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the ratio of spatial standard deviation (model standard 
deviation)/(observed standard deviation)
-
-    Input ::
-        evaluationData - model data array (lat, lon)
-        referenceData- observation data array (lat,lon)
-
-    Output::
-        ratio of standard deviation (a scholar) 
-    
-    '''
-    stdevRatio = evaluationData[(evaluationData.mask==False) & 
(referenceData.mask==False)].std()/ \
-                 referenceData[(evaluationData.mask==False) & 
(referenceData.mask==False)].std()  
-    return stdevRatio
-
-def calcTemporalStdev(dataset1):
-    '''
-     Purpose:: 
-        Calculate sample standard deviations over the time
-
-     Input::
-        dataset1 - 3d numpy array of data in (time,lat,lon) 
-        
-
-     Output::  
-        stds - time series (lat, lon)
-    '''
-    stds = dataset1.std(axis = 0, ddof = 1)
-    return stds
-
-def calcAnnualCycleDomainStdev(dataset1):
-    '''
-     Purpose:: 
-        Calculate sample standard deviations representing the domain in each 
month
-
-     Input::
-        dataset1 - 3d numpy array of data in (12* number of years,lat,lon) 
-
-     Output::  
-        stds - time series (12)
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    
-    # Calculate the standard deviation, month by months
-    stds = np.zeros(12)
-    for i in np.arange(12):
-        stds[i] = data[:, i, :, :].std(ddof = 1)
-        
-    return stds
-
-def calcBiasAveragedOverTime(evaluationData, referenceData, option):        # 
Mean Bias
-    '''
-    Purpose:: 
-        Calculate the mean difference between two fields over time for each 
grid point.
-
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        option - string indicating absolute values or not
-
-     Output::  
-        bias - difference between referenceData and evaluationData averaged 
over the first dimension
-    
-    '''
-    # Calculate mean difference between two fields over time for each grid 
point
-    # Precrocessing of both obs and model data ensures the absence of missing 
values
-    diff = evaluationData - referenceData
-    if(option == 'abs'): 
-        diff = abs(diff)
-    bias = diff.mean(axis = 0)
-    return bias
-
-
-def calcBiasAveragedOverTimeAndSigLev(evaluationData, referenceData):
-    '''
-    Purpose::
-        Calculate mean difference between two fields over time for each grid 
point
-    
-    Classify missing data resulting from multiple times (using threshold 
-    data requirement)
-    
-    i.e. if the working time unit is monthly data, and we are dealing with 
-    multiple months of data then when we show mean of several months, we need
-    to decide what threshold of missing data we tolerate before classifying a
-    data point as missing data.
- 
-        
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-
-     Output::  
-        bias - difference between referenceData and evaluationData averaged 
over the first dimension
-        sigLev - significance of the difference (masked array)
-        For example: sig[iy,ix] = 0.95 means that the observation and model is 
different at 95% confidence level 
-        at X=ix and Y=iy
-    '''
-    # If either gridcell in each data set is missing, set that cell to
-    # missing for the output significance level
-    evaluationDataMask = process.create_mask_using_threshold(evaluationData, 
threshold = 0.75)
-    referenceDataMask = process.create_mask_using_threshold(referenceData, 
threshold = 0.75)
-    
-    # The overall mask associated with missing data
-    overallMask = np.logical_or(evaluationDataMask, referenceDataMask)
-    
-    diff = evaluationData - referenceData
-    bias = diff.mean(axis = 0)
-    sigLev = 1 - stats.ttest_rel(evaluationData, referenceData, axis = 0)[1]
-    sigLev[overallMask] = -100.
-    sigLev = ma.masked_equal(sigLev, -100.) 
-    # Set mask for bias metric using missing data in obs or model data series
-    # i.e. if obs contains more than threshold (e.g.50%) missing data 
-    # then classify time average bias as missing data for that location. 
-    bias = ma.masked_array(bias.data, overallMask)
-    return bias, sigLev
-
-
-def calcBiasAveragedOverTimeAndDomain(evaluationData, referenceData):
-    '''
-    Purpose:: 
-        Calculate the mean difference between two fields over time and domain
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        bias - difference between referenceData and evaluationData averaged 
over time and space
-    
-    '''
-
-    diff = evaluationData - referenceData
-    
-    bias = diff.mean()
-    return bias
-
-def calcBias(evaluationData, referenceData):
-    '''
-    Purpose:: 
-        Calculate the difference between two fields at each grid point
-
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        diff - difference between referenceData and evaluationData
-    
-    '''
-
-    diff = evaluationData - referenceData
-    return diff
-
-def calcRootMeanSquaredDifferenceAveragedOverTime(evaluationData, 
referenceData):
-    '''
-    Purpose:: 
-        Calculate root mean squared difference (RMS errors) averaged over time 
between two fields for each grid point
-
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        rms - root mean squared difference, if the input is 1-d data, the 
output becomes a single floating number.
-    
-    '''
-    sqdiff = (evaluationData - referenceData)** 2
-    rms = np.sqrt(sqdiff.mean(axis = 0))
-    return rms
-
-
-def calcRootMeanSquaredDifferenceAveragedOverTimeAndDomain(evaluationData, 
referenceData):
-    '''
-    Purpose:: 
-        Calculate root mean squared difference (RMS errors) averaged over time 
and space between two fields
-
-     Input::
-        referenceData - array of data (should be 3-d array)
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        rms - root mean squared difference averaged over time and space
-    '''
-    sqdiff = (evaluationData - referenceData)** 2
-    rms = np.sqrt(sqdiff.mean())
-    return rms
-
-def calcTemporalCorrelation(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the temporal correlation.
-    
-    Assumption(s) ::
-        The first dimension of two datasets is the time axis.
-    
-    Input ::
-        evaluationData - model data array of any shape
-        referenceData- observation data array of any shape
-            
-    Output::
-        temporalCorelation - A 2-D array of temporal correlation coefficients 
at each subregion
-        sigLev - A 2-D array of confidence levels related to 
temporalCorelation 
-    
-    REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 
467pp.
-    sigLev: the correlation between model and observation is significant at 
sigLev * 100 %
-    '''
-    evaluationDataMask = process.create_mask_using_threshold(evaluationData, 
threshold = 0.75)
-    referenceDataMask = process.create_mask_using_threshold(referenceData, 
threshold = 0.75)
-    
-    nregion = evaluationData.shape[0]
-    temporalCorrelation = ma.zeros([nregion])-100.
-    sigLev = ma.zeros([nregion])-100.
-    for iregion in np.arange(nregion):
-        temporalCorrelation[iregion], sigLev[iregion] = 
stats.pearsonr(evaluationData[iregion,:], referenceData[iregion,:])
-        sigLev[iregion] = 1 - sigLev[iregion]
-                    
-    temporalCorrelation=ma.masked_equal(temporalCorrelation.data, -100.)       
 
-    sigLev=ma.masked_equal(sigLev.data, -100.)    
-    
-    return temporalCorrelation, sigLev
-
-def calcTemporalCorrelationSubRegion(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the temporal correlation.
-    
-    Assumption(s) ::
-        both evaluation and reference data are subregion averaged time series
-    
-    Input ::
-        evaluationData - model data array [region,t]
-        referenceData- observation data [region, t]
-            
-    Output::
-        temporalCorelation - A 1-D array of temporal correlation coefficients 
at each grid point.
-        sigLev - A 1-D array of confidence levels related to 
temporalCorelation 
-    
-    REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 
467pp.
-    sigLev: the correlation between model and observation is significant at 
sigLev * 100 %
-    '''
-    evaluationDataMask = process.create_mask_using_threshold(evaluationData, 
threshold = 0.75)
-    referenceDataMask = process.create_mask_using_threshold(referenceData, 
threshold = 0.75)
-    
-    ngrdY = evaluationData.shape[1] 
-    ngrdX = evaluationData.shape[2] 
-    temporalCorrelation = ma.zeros([ngrdY,ngrdX])-100.
-    sigLev = ma.zeros([ngrdY,ngrdX])-100.
-    for iy in np.arange(ngrdY):
-        for ix in np.arange(ngrdX):
-            if not evaluationDataMask[iy,ix] and not referenceDataMask[iy,ix]:
-                t1=evaluationData[:,iy,ix]
-                t2=referenceData[:,iy,ix]
-                if t1.min()!=t1.max() and t2.min()!=t2.max():
-                    temporalCorrelation[iy,ix], 
sigLev[iy,ix]=stats.pearsonr(t1[(t1.mask==False) & (t2.mask==False)],
-                                                  t2[(t1.mask==False) & 
(t2.mask==False)])
-                    sigLev[iy,ix]=1-sigLev[iy,ix]  # p-value => confidence 
level
-                    
-    temporalCorrelation=ma.masked_equal(temporalCorrelation.data, -100.)       
 
-    sigLev=ma.masked_equal(sigLev.data, -100.)    
-    
-    return temporalCorrelation, sigLev
-
-def calcPatternCorrelation(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the spatial correlation.
-
-    Input ::
-        evaluationData - model data array (lat, lon)
-        referenceData- observation data array (lat,lon)
-
-    Output::
-        patternCorrelation - a single floating point
-        sigLev - a single floating point representing the confidence level 
-    
-    '''
-   
-    patternCorrelation, sigLev = 
stats.pearsonr(evaluationData[(evaluationData.mask==False) & 
(referenceData.mask==False)],
-                          referenceData[(evaluationData.mask==False) & 
(referenceData.mask==False)])
-    return patternCorrelation, sigLev
-
-
-def calcPatternCorrelationEachTime(evaluationData, referenceData):
-    '''
-     Purpose ::
-        Calculate the spatial correlation for each time
-
-     Assumption(s) ::
-        The first dimension of two datasets is the time axis.
-
-     Input ::
-        evaluationData - model data array (time,lat, lon)
-        referenceData- observation data array (time,lat,lon)
-
-     Output::
-        patternCorrelation - a timeseries (time)
-        sigLev - a time series (time)
-    ''' 
-    nT = evaluationData.shape[0]
-    patternCorrelation = ma.zeros(nT)-100.
-    sigLev = ma.zeros(nT)-100.
-    for it in np.arange(nT):
-        patternCorrelation[it], sigLev[it] = 
calcPatternCorrelation(evaluationData[it,:,:], referenceData[it,:,:])
-
-    return patternCorrelation,sigLev
-
-def calcNashSutcliff(evaluationData, referenceData):
-    '''
-    Assumption(s)::  
-       Both evaluationData and referenceData are the same shape.
-        * lat, lon must match up
-        * time steps must align (i.e. months vs. months)
-    
-    Input::
-       evaluationData - 3d (time, lat, lon) array of data
-        referenceData - 3d (time, lat, lon) array of data
-    
-    Output:
-        nashcor - 1d array aligned along the time dimension of the input
-        datasets. Time Series of Nash-Sutcliff Coefficient of efficiency
-     
-    '''
-    # Flatten the spatial dimensions
-    data1 = evaluationData[:]
-    data2 = referenceData[:]
-    nT = data1.shape[0]
-    data1.shape = nT, data1.size / nT
-    data2.shape = nT, data2.size / nT 
-    meanData2 = data2.mean(axis = 1)
-    
-    # meanData2 must be reshaped to 2D as to obey
-    # numpy broadcasting rules
-    meanData2.shape = nT, 1
-    nashcor = 1 - ((((data2 - data1) ** 2).sum(axis = 1)) / 
-               (((data2 - meanData2) ** 2).sum(axis = 1)))
-    return nashcor
-
-def getPdfInputValues():
-    print '****PDF input values from user required **** \n'
-    nbins = int (raw_input('Please enter the number of bins to use. \n'))
-    minEdge = float(raw_input('Please enter the minimum value to use for the 
edge. \n'))
-    maxEdge = float(raw_input('Please enter the maximum value to use for the 
edge. \n'))
-    
-    return nbins, minEdge, maxEdge
-
-
-def calcPdf(evaluationData, referenceData, settings=None):
-    '''
-    Routine to calculate a normalized Probability Distribution Function with 
-    bins set according to data range.
-    Equation from Perkins et al. 2007
-
-        PS=sum(min(Z_O_i, Z_M_i)) where Z is the distribution (histogram of 
the data for either set)
-        called in do_rcmes_processing_sub.py
-         
-    Inputs::
-        evaluationData (3D numpy array): array shape (time, lat, lon)
-        referenceData (3D numpy array): array shape (time, lat, lon)
-        settings (tuple): [optional] format (binCount, minEdge, maxEdge)
-            binCount (int): number of bins to use
-            minEdge (int|float): minimum edge
-            maxEdge (int|float): maximum edge
-        
-        NB, time here is the number of time values eg for time period
-         199001010000 - 199201010000 
-        
-    Assumptions::
-        If annual means-opt 1, was chosen, then referenceData.shape = 
(YearsCount,lat,lon)
-        
-        If monthly means - opt 2, was choosen, then referenceData.shape = 
(MonthsCount,lat,lon)
-        
-    Output::
-
-        one float which represents the PDF for the year
-
-    TODO:  Clean up this docstring so we have a single purpose statement
-     
-    Routine to calculate a normalised PDF with bins set according to data 
range.
-
-    Input::
-        2 data  arrays, modelData and obsData
-
-    Output::
-        PDF for the year
-
-    '''
-    similarityScore = 0.0
-    print 'min modelData', evaluationData[:, :, :].min()
-    print 'max modelData', evaluationData[:, :, :].max()
-    print 'min obsData', referenceData[:, :, :].min()
-    print 'max obsData', referenceData[:, :, :].max()
-
-    if settings == None:
-        nbins, minEdge, maxEdge = getPdfInputValues()
-    else:
-        nbins, minEdge, maxEdge = settings
-
-    
-    mybins = np.linspace(minEdge, maxEdge, nbins)
-    print 'nbins is', nbins, 'mybins are', mybins
-    
-    pdfMod, edges = np.histogram(evaluationData, bins = mybins, normed = True, 
density = True)  
-    print 'evaluationData distribution and edges', pdfMod, edges
-    pdfObs, edges = np.histogram(referenceData, bins = mybins, normed = True, 
density = True)           
-    print 'referenceData distribution and edges', pdfObs, edges    
-    
-    #find minimum at each bin between lists 
-    i = 0
-    for model_value in pdfMod :
-        print 'model_value is', model_value, 'pdfObs[', i, '] is', pdfObs[i]
-        if model_value < pdfObs[i]:
-            similarityScore += model_value
-        else:
-            similarityScore += pdfObs[i] 
-        i += 1 
-    print 'similarity_score is', similarityScore
-    return similarityScore
-
-
-def metrics_plots(varName, numOBS, numMDL, nT, ngrdY, ngrdX, Times, lons, 
-                  lats, obsData, mdlData, obsList, mdlList, 
workdir,subRegions, FoutOption):
-    '''
-    Calculate evaluation metrics and generate plots.
-    '''
-    
##################################################################################################################
-    # Routine to compute evaluation metrics and generate plots
-    #    (1)  metric calculation
-    #    (2) plot production
-    #    Input: 
-    #        numOBS           - the number of obs data. either 1 or >2 as obs 
ensemble is added for multi-obs cases
-    #        numMDL           - the number of mdl data. either 1 or >2 as obs 
ensemble is added for multi-mdl cases
-    #        nT               - the length of the data in the time dimension
-    #        ngrdY            - the length of the data in Y dimension
-    #        ngrdX            - the length of the data in the X dimension
-    #        Times            - time stamps
-    #        lons,lats        - longitude & latitude values of the data domain 
(same for model & obs)
-    #        obsData          - obs data, either a single or multiple + 
obs_ensemble, interpolated onto the time- and
-    #                           grid for analysis
-    #        mdlData          - mdl data, either a single or multiple + 
obs_ensemble, interpolated onto the time- and
-    #                           spatial grid for analysis
-    #JK2.0:  obsRgn           - obs time series averaged for subregions: Local 
variable in v2.0
-    #JK2.0:  mdlRgn           - obs time series averaged for subregions: Local 
variable in v2.0
-    #        obsList          - string describing the observation data files
-    #        mdlList          - string describing model file names
-    #        workdir        - string describing the directory path for storing 
results and plots
-    #JK2.0:  mdlSelect        - the mdl data to be evaluated: Locally 
determined in v.2.0
-    #JK2.0:  obsSelect        - the obs data to be used as the reference for 
evaluation: Locally determined in v.2.0
-    #JK2.0:  numSubRgn        - the number of subregions: Locally determined 
in v.2.0
-    #JK2.0:  subRgnName       - the names of subregions: Locally determined in 
v.2.0
-    #JK2.0:  rgnSelect        - the region for which the area-mean time series 
is to be 
-    #                               evaluated/plotted: Locally determined in 
v.2.0
-    #        obsParameterId   - int, db parameter id. ** this is non-essential 
once the correct 
-    #                               metadata use is implemented
-    #      precipFlag       - to be removed once the correct metadata use is 
implemented
-    #        timeRegridOption - string: 'full'|'annual'|'monthly'|'daily'
-    #        seasonalCycleOption - int (=1 if set) (probably should be bool 
longterm) 
-    #      metricOption - string: 
'bias'|'mae'|'acc'|'pdf'|'patcor'|'rms'|'diff'
-    #        titleOption - string describing title to use in plot graphic
-    #        plotFileNameOption - string describing filename stub to use for 
plot graphic i.e. {stub}.png
-    #    Output: image files of plots + possibly data
-    #******************************************************
-    # JK2.0: Only the data interpolated temporally and spatially onto the 
analysis grid 
-    #        are transferred into this routine. The rest of processing (e.g., 
area-averaging, etc.) 
-    #        are to be performed in this routine. Do not overwrite 
obsData[numOBs,nt,ngrdY,ngrdX] & 
-    #        mdlData[numMDL,nt,ngrdY,ngrdX]. These are the raw, re-gridded 
data to be used repeatedly 
-    #        for multiple evaluation steps as desired by an evaluator
-    
##################################################################################################################
-
-    
#####################################################################################################
-    # JK2.0: Compute evaluation metrics and plots to visualize the results
-    
#####################################################################################################
-    # (mp.001) Sub-regions for local timeseries analysis
-    #--------------------------------
-    # Enter the location of the subrgns via screen input of data; 
-    # modify this to add an option to read-in from data file(s)
-    
#----------------------------------------------------------------------------------------------------
-    if subRegions:
-        numSubRgn = len(subRegions)
-        subRgnName = [ x.name   for x in subRegions ]
-        subRgnLon0 = [ x.lonMin for x in subRegions ]
-        subRgnLon1 = [ x.lonMax for x in subRegions ]
-        subRgnLat0 = [ x.latMin for x in subRegions ]
-        subRgnLat1 = [ x.latMax for x in subRegions ]
-    else:
-        print ''
-        ans = raw_input('Calculate area-mean timeseries for subregions? y/n: 
\n> ')
-        print ''
-        if ans == 'y':
-            ans = raw_input('Input subregion info interactively? y/n: \n> ')
-            if ans == 'y':
-                numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, 
subRgnLat1 = misc.createSubRegionObjectInteractively()
-            else:
-                print 'Read subregion info from a pre-fabricated text file'
-                ans = raw_input('Read from a defaule file (workdir + 
"/sub_regions.txt")? y/n: \n> ')
-                if ans == 'y':
-                    subRgnFileName = workdir + "/sub_regions.txt"
-                else:
-                    subRgnFileName = raw_input('Enter the subRgnFileName to 
read from \n')
-                print 'subRgnFileName ', subRgnFileName
-                numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, 
subRgnLat1 = misc.assign_subRgns_from_a_text_file(subRgnFileName)
-            print subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1
-        else:
-            numSubRgn = 0
-
-    # compute the area-mean timeseries for all subregions if subregion(s) are 
defined.
-    #   the number of subregions is usually small and memory usage is usually 
not a concern
-    obsRgn = ma.zeros((numOBS, numSubRgn, nT))
-    mdlRgn = ma.zeros((numMDL, numSubRgn, nT))
-    if numSubRgn > 0:
-        print 'Enter area-averaging: mdlData.shape, obsData.shape ', 
mdlData.shape, obsData.shape
-        print 'Use Latitude/Longitude Mask for Area Averaging'
-        for n in np.arange(numSubRgn):
-            # Define mask using regular lat/lon box specified by users 
('mask=True' defines the area to be excluded)
-            maskLonMin = subRgnLon0[n] 
-            maskLonMax = subRgnLon1[n]
-            maskLatMin = subRgnLat0[n]
-            maskLatMax = subRgnLat1[n]
-            mask = np.logical_or(np.logical_or(lats <= maskLatMin, lats >= 
maskLatMax), 
-                                np.logical_or(lons <= maskLonMin, lons >= 
maskLonMax))
-            # Calculate area-weighted averages within this region and store in 
a new list
-            for k in np.arange(numOBS):           # area-average obs data
-                Store = []
-                for t in np.arange(nT):
-                    Store.append(process.calc_area_mean(obsData[k, t, :, :], 
lats, lons, mymask = mask))
-                obsRgn[k, n, :] = ma.array(Store[:])
-            for k in np.arange(numMDL):           # area-average mdl data
-                Store = []
-                for t in np.arange(nT):
-                    Store.append(process.calc_area_mean(mdlData[k, t, :, :], 
lats, lons, mymask = mask))
-                mdlRgn[k, n, :] = ma.array(Store[:])
-            Store = []                               # release the memory 
allocated by temporary vars
-
-    #-------------------------------------------------------------------------
-    # (mp.002) FoutOption: The option to create a binary or netCDF file of 
processed 
-    #                      (re-gridded and regionally-averaged) data for 
user-specific processing. 
-    #                      This option is useful for advanced users who need 
more than
-    #                      the metrics and vidualization provided in the basic 
package.
-    
#----------------------------------------------------------------------------------------------------
-    print ''
-    if not FoutOption:
-        FoutOption = raw_input('Option for output files of obs/model data: 
Enter no/bn/nc \
-                               for no, binary, netCDF file \n> ').lower()
-    print ''
-    # write a binary file for post-processing if desired
-    if FoutOption == 'bn':
-        fileName = workdir + '/lonlat_eval_domain' + '.bn'
-        if(os.path.exists(fileName) == True):
-            cmnd = 'rm -f ' + fileName
-            subprocess.call(cmnd, shell=True)
-        files.writeBN_lola(fileName, lons, lats)
-        fileName = workdir + '/Tseries_' + varName + '.bn'
-        print "Create regridded data file ", fileName, " for offline 
processingr"
-        print 'The file includes time series of ', numOBS, ' obs and ', 
numMDL, \
-            ' models ', nT, ' steps ', ngrdX, 'x', ngrdY, ' grids'
-        if(os.path.exists(fileName) == True):
-            cmnd = 'rm -f ' + fileName
-            subprocess.call(cmnd, shell=True)
-        files.writeBNdata(fileName, numOBS, numMDL, nT, ngrdX, ngrdY, 
numSubRgn, obsData, mdlData, obsRgn, mdlRgn)
-    # write a netCDF file for post-processing if desired
-    if FoutOption == 'nc':
-        fileName = workdir + '/'+varName+'_Tseries.nc' 
-        if(os.path.exists(fileName) == True):
-            print "removing %s from the local filesystem, so it can be 
replaced..." % (fileName,)
-        files.writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, 
obsRgn, mdlRgn, obsList, mdlList, subRegions)
-    if FoutOption == 'bn':
-        print 'The regridded obs and model data are written in the binary file 
', fileName
-    elif FoutOption == 'nc':
-        print 'The regridded obs and model data are written in the netCDF file 
', fileName
-
-    
#####################################################################################################
-    ###################### Metrics calculation and plotting cycle starts from 
here ######################
-    
#####################################################################################################
-
-    print ''
-    print 'OBS and MDL data have been prepared for the evaluation step'
-    print ''
-    doMetricsOption = raw_input('Want to calculate metrics and plot them? 
[y/n]\n> ').lower()
-    print ''
-
-    ####################################################
-    # Terminate job if no metrics are to be calculated #
-    ####################################################
-
-    neval = 0
-
-    while doMetricsOption == 'y':
-        neval += 1
-        print ' '
-        print 'neval= ', neval
-        print ' '
-        #--------------------------------
-        # (mp.003) Preparation
-        
#----------------------------------------------------------------------------------------------------
-        # Determine info on years (the years in the record [YR] and its 
number[numYR])
-        yy = ma.zeros(nT, 'i')
-        for n in np.arange(nT):
-            yy[n] = Times[n].strftime("%Y")
-        YR = np.unique(yy)
-        yy = 0
-        nYR = len(YR)
-        print 'nYR, YR = ', nYR, YR
-
-        # Select the eval domain: over the entire domain (spatial distrib) or 
regional time series
-        anlDomain = 'n'
-        anlRgn = 'n'
-        print ' '
-        analSelect = int(raw_input('Eval over domain (Enter 0) or time series 
of selected regions (Enter 1) \n> '))
-        print ' '
-        if analSelect == 0:
-            anlDomain = 'y'
-        elif analSelect == 1:
-            anlRgn = 'y'
-        else:
-            print 'analSelect= ', analSelect, ' is Not a valid option: CRASH'
-            sys.exit()
-
-        #--------------------------------
-        # (mp.004) Select the model and data to be used in the evaluation step
-        
#----------------------------------------------------------------------------------------------------
-        mdlSelect = misc.select_data(numMDL, Times, mdlList, 'mdl')
-        obsSelect = misc.select_data(numOBS, Times, obsList, 'obs')
-        mdlName = mdlList[mdlSelect]
-        obsName = obsList[obsSelect]
-        print 'selected obs and model for evaluation= ', obsName, mdlName
-
-
-        #--------------------------------
-        # (mp.005) Spatial distribution analysis/Evaluation (anlDomain='y')
-        #          All climatology variables are 2-d arrays (e.g., mClim = 
ma.zeros((ngrdY,ngrdX))
-        
#----------------------------------------------------------------------------------------------------
-        if anlDomain == 'y':
-            # first determine the temporal properties to be evaluated
-            print ''
-            timeOption = misc.select_timOpt()
-            print 'timeOption=',timeOption
-            if timeOption == 1:
-                timeScale = 'Annual'
-                # compute the annual-mean time series and climatology. 
-                # mTser=ma.zeros((nYR,ngrdY,ngrdX)), mClim = 
ma.zeros((ngrdY,ngrdX))
-                mTser, mClim = calcClimYear(mdlData[mdlSelect, :, :, :])
-                oTser, oClim = calcClimYear(obsData[obsSelect, :, :, :])
-            elif timeOption == 2:
-                timeScale = 'Seasonal'
-                # select the timeseries and climatology for a season specifiec 
by a user
-                print ' '
-                moBgn = int(raw_input('Enter the beginning month for your 
season. 1-12: \n> '))
-                moEnd = int(raw_input('Enter the ending month for your season. 
1-12: \n> '))
-                print ' '
-                mTser, mClim = calcClimSeason(moBgn, moEnd, mdlData[mdlSelect, 
:, :, :])
-                oTser, oClim = calcClimSeason(moBgn, moEnd, obsData[obsSelect, 
:, :, :])
-            elif timeOption == 3:
-                timeScale = 'Monthly'
-                # compute the monthly-mean time series and climatology
-                # Note that the shapes of the output vars are: 
-                #   mTser = ma.zeros((nYR,12,ngrdY,ngrdX)) & mClim = 
ma.zeros((12,ngrdY,ngrdX))
-                # Also same for oTser = ma.zeros((nYR,12,ngrdY,ngrdX)) &,oClim 
= ma.zeros((12,ngrdY,ngrdX))
-                mTser, mClim = calcAnnualCycleMeans(mdlData[mdlSelect, :, :, 
:])
-                oTser, oClim = calcAnnualCycleMeans(obsData[obsSelect, :, :, 
:])
-            else:
-                # undefined process options. exit
-                print 'The desired temporal scale is not available this time. 
END the job'
-                sys.exit()
-
-            #--------------------------------
-            # (mp.006) Select metric to be calculated
-            # bias, mae, acct, accs, pcct, pccs, rmst, rmss, pdfSkillScore
-            
#----------------------------------------------------------------------------------------------------
-            print ' '
-            metricOption = misc.select_metrics()
-            print ' '
-
-            # metrics below yields 2-d array, i.e., metricDat = 
ma.array((ngrdY,ngrdX))
-            if metricOption == 'BIAS':
-                metricDat, sigLev = calcBiasAveragedOverTimeAndSigLev(mTser, 
oTser)
-                oStdv = calcTemporalStdev(oTser)
-            elif metricOption == 'MAE':
-                metricDat= calcBiasAveragedOverTime(mTser, oTser, 'abs')
-            # metrics below yields 1-d time series
-            elif metricOption == 'PCt':
-                metricDat, sigLev = calcPatternCorrelationEachTime(mTser, 
oTser)
-            # metrics below yields 2-d array, i.e., metricDat = 
ma.array((ngrdY,ngrdX))
-            elif metricOption == 'TC':
-                
-                metricDat, sigLev = calcTemporalCorrelation(mTser, oTser)
-            # metrics below yields a scalar values
-            elif metricOption == 'PCC':
-                metricDat, sigLev = calcPatternCorrelation(mClim, oClim)
-            # metrics below yields 2-d array, i.e., metricDat = 
ma.array((ngrdY,ngrdX))
-            elif metricOption == 'RMSt':
-                metricDat = 
calcRootMeanSquaredDifferenceAveragedOverTime(mTser, oTser)
-
-            #--------------------------------
-            # (mp.007) Plot the metrics. First, enter plot info
-            
#----------------------------------------------------------------------------------------------------
-
-            # 2-d contour plots
-            if metricDat.ndim == 2:
-                # assign plot file name and delete old (un-saved) plot files
-                plotFileName = workdir + '/' + timeScale + '_' + varName + '_' 
+ metricOption 
-                if(os.path.exists(plotFileName) == True):
-                    cmnd = 'rm -f ' + plotFileName
-                    subprocess.call(cmnd, shell=True)
-                # assign plot title
-                plotTitle = metricOption + '_' + varName
-                # Data-specific plot options: i.e. adjust model data units & 
set plot color bars
-                #cMap = 'rainbow'
-                cMap = plt.cm.RdBu_r
-                #cMap = 'BlWhRe'
-                #cMap = 'BlueRed'
-                #cMap = 'GreyWhiteGrey'
-                # Calculate color bar ranges for data such that same range is 
used 
-                # in obs and model plots for like-with-like comparison.
-                obsDataMask = np.zeros_like(oClim.data[:, :])
-                metricDat = ma.masked_array(metricDat, obsDataMask)
-                oClim = ma.masked_array(oClim, obsDataMask)
-                oStdv = ma.masked_array(oClim, obsDataMask)
-                plotDat = metricDat
-                mymax = plotDat.max()
-                mymin = plotDat.min()
-                if metricOption == 'BIAS':
-                    abs_mymin = abs(mymin)
-                    if abs_mymin <= mymax:
-                        mymin = -mymax
-                    else:
-                        mymax = abs_mymin
-                print 'Plot bias over the model domain: data MIN/MAX= ', 
mymin, mymax
-                ans = raw_input('Do you want to use different scale for 
plotting? [y/n]\n> ').lower()
-                if ans == 'y':
-                    mymin = float(raw_input('Enter the minimum plot scale \n> 
'))
-                    mymax = float(raw_input('Enter the maximum plot scale \n> 
'))
-                wksType = 'png'
-                # TODO This shouldn't return anything. Handle a "status" the 
proper way
-                _ = plots.draw_cntr_map_single(plotDat, lats, lons, mymin, 
mymax, 
-                                                      plotTitle, plotFileName, 
cMap=cMap)
-                # if bias, plot also normalzied values and means: first, 
normalized by mean
-                if metricOption == 'BIAS':
-                    print ''
-                    makePlot = raw_input('Plot bias in terms of % of OBS mean? 
[y/n]\n> ').lower()
-                    print ''
-                    if makePlot == 'y':
-                        plotDat = 100.*metricDat / oClim
-                        mymax = plotDat.max()
-                        mymin = plotDat.min()
-                        mymn = -100.
-                        mymx = 105.
-                        print 'Plot mean-normalized bias: data MIN/MAX= ', 
mymin, mymax, \
-                            ' Default MIN/MAX= ', mymn, mymx
-                        ans = raw_input('Do you want to use different scale 
for plotting? [y/n]\n> ').lower()
-                        if ans == 'y':
-                            mymin = float(raw_input('Enter the minimum plot 
scale \n> '))
-                            mymax = float(raw_input('Enter the maximum plot 
scale \n> '))
-                        else:
-                            mymin = mymn
-                            mymax = mymx
-                        plotFileName = workdir + '/' + timeScale + '_' + 
varName + '_' + metricOption + '_Mean'
-                        if(os.path.exists(plotFileName) == True):
-                            cmnd = 'rm -f ' + plotFileName
-                            subprocess.call(cmnd, shell = True)
-                        plotTitle = 'Bias (% MEAN)'
-                        # TODO Again, this shouldn't return a status
-                        _ = plots.draw_cntr_map_single(plotDat, lats, lons, 
mymin, mymax, 
-                                                              plotTitle, 
plotFileName, wksType, cMap)
-                # normalized by sigma
-                makePlot = raw_input('Plot bias in terms of % of interann 
sigma? [y/n]\n> ').lower()
-                if makePlot == 'y':
-                    plotDat = 100.*metricDat / oStdv
-                    mymax = plotDat.max()
-                    mymin = plotDat.min()
-                    mymn = -200.
-                    mymx = 205.
-                    print 'Plot STD-normalized bias: data MIN/MAX= ', mymin, 
mymax, ' Default MIN/MAX= ', mymn, mymx
-                    ans = raw_input('Do you want to use different scale for 
plotting? [y/n]\n> ').lower()
-                    if ans == 'y':
-                        mymin = float(raw_input('Enter the minimum plot scale 
\n> '))
-                        mymax = float(raw_input('Enter the maximum plot scale 
\n> '))
-                    else:
-                        mymin = mymn
-                        mymax = mymx
-                    plotFileName = workdir + '/' + timeScale + '_' + varName + 
'_' + metricOption + '_SigT'
-                    if(os.path.exists(plotFileName) == True):
-                        cmnd = 'rm -f ' + plotFileName
-                        subprocess.call(cmnd, shell = True)
-                    plotTitle = 'Bias (% SIGMA_time)'
-                    # TODO Hay look! A todo re. status returns!
-                    _ = plots.draw_cntr_map_single(plotDat, lats, lons, mymin, 
mymax, 
-                                                          plotTitle, 
plotFileName, wksType, cMap)
-                # obs climatology
-                makePlot = raw_input('Plot observation? [y/n]\n> ').lower()
-                if makePlot == 'y':
-                    if varName == 'pr':
-                        cMap = plt.cm.jet
-                    else:
-                        cMap = plt.cm.jet
-                    plotDat = oClim
-                    mymax = plotDat.max()
-                    mymin = plotDat.min()
-                    print 'Plot STD-normalized bias over the model domain: 
data MIN/MAX= ', mymin, mymax
-                    ans = raw_input('Do you want to use different scale for 
plotting? [y/n]\n> ').lower()
-                    if ans == 'y':
-                        mymin = float(raw_input('Enter the minimum plot scale 
\n> '))
-                        mymax = float(raw_input('Enter the maximum plot scale 
\n> '))
-                    plotFileName = workdir + '/' + timeScale + '_' + varName + 
'_OBS'
-                    if(os.path.exists(plotFileName) == True):
-                        cmnd = 'rm -f ' + plotFileName
-                        subprocess.call(cmnd, shell=True)
-                    plotTitle = 'OBS (mm/day)'
-                    # TODO Yep
-                    _ = plots.draw_cntr_map_single(plotDat, lats, lons, mymin, 
mymax, 
-                                                          plotTitle, 
plotFileName, wksType, cMap)
-
-                # Repeat for another metric
-                print ''
-                print 'Evaluation completed'
-                print ''
-                doMetricsOption = raw_input('Do you want to perform another 
evaluation? [y/n]\n> ').lower()
-                print ''
-
-        # metrics and plots for regional time series
-        elif anlRgn == 'y':
-            # select the region(s) for evaluation. model and obs have been 
selected before entering this if block
-            print 'There are ', numSubRgn, ' subregions. Select the 
subregion(s) for evaluation'
-            rgnSelect = misc.selectSubRegion(subRegions)
-            print 'selected region for evaluation= ', rgnSelect
-            # Select the model & obs data to be evaluated
-            oData = ma.zeros(nT)
-            mData = ma.zeros(nT)
-            oData = obsRgn[obsSelect, rgnSelect, :]
-            mData = mdlRgn[mdlSelect, rgnSelect, :]
-
-            # compute the monthly-mean climatology to construct the annual 
cycle
-            obsTseries, obsAnnualCycle = calcAnnualCycleMeans(oData)
-            mdlTseries, mdlAnnualCycle = calcAnnualCycleMeans(mData)
-            print 'obsAnnCyc= ', obsAnnualCycle
-            print 'mdlAnnCyc= ', mdlAnnualCycle
-
-            #--------------------------------
-            # (mp.008) Select performance metric
-            
#----------------------------------------------------------------------------------------------------
-            #metricOption = misc.select_metrics()
-            # Temporarily, compute the RMSE and pattern correlation for the 
simulated 
-            # and observed annual cycle based on monthly means
-            # TODO This aren't used. Missing something here???
-            
-            tempRMS = 
calcRootMeanSquaredDifferenceAveragedOverTime(mdlAnnualCycle, obsAnnualCycle)
-            tempCOR, tempSIG = stats.pearsonr(mdlAnnualCycle, obsAnnualCycle)
-
-            #--------------------------------
-            # (mp.009) Plot results
-            
#----------------------------------------------------------------------------------------------------
-            # Data-specific plot options: i.e. adjust model data units & set 
plot color bars
-            colorbar = 'rainbow'
-            if varName == 'pr':                      # set color bar for prcp
-                colorbar = plt.cm.jet
-
-            # 1-d data, e.g. Time series plots
-            plotFileName = 'anncyc_' + varName + '_' + 
subRgnName[rgnSelect][0:3]
-            if(os.path.exists(plotFileName) == True):
-                cmnd = 'rm -f ' + plotFileName
-                subprocess.call(cmnd, shell = True)
-            year_labels = False         # for annual cycle plots
-            mytitle = 'Annual Cycle of ' + varName + ' at Sub-Region ' + 
subRgnName[rgnSelect][0:3]
-            # Create a list of datetimes to represent the annual cycle, one 
per month.
-            times = []
-            for m in xrange(12):
-                times.append(datetime.datetime(2000, m + 1, 1, 0, 0, 0, 0))
-            #for i in np.arange(12):
-            #  times.append(i+1)
-            _ = plots.draw_time_series_plot(mdlAnnualCycle, times, 
plotFileName, workdir, 
-                                                   data2 = obsAnnualCycle, 
mytitle = mytitle, ytitle = 'Y', 
-                                                   xtitle = 'MONTH', 
year_labels = year_labels)
-
-            # Repeat for another metric
-            doMetricsOption = raw_input('Do you want to perform another 
evaluation? [y/n]\n> ').lower()
-
-    # Processing complete if a user enters 'n' for 'doMetricsOption'
-    print 'RCMES processing completed.'


http://git-wip-us.apache.org/repos/asf/climate/blob/8c142c35/rcmet/src/main/python/rcmes/toolkit/metrics_kyo.py
----------------------------------------------------------------------
diff --git a/rcmet/src/main/python/rcmes/toolkit/metrics_kyo.py 
b/rcmet/src/main/python/rcmes/toolkit/metrics_kyo.py
deleted file mode 100644
index cdc4c9d..0000000
--- a/rcmet/src/main/python/rcmes/toolkit/metrics_kyo.py
+++ /dev/null
@@ -1,718 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-'''
-Module storing functions to calculate statistical metrics from numpy arrays
-'''
-
-import subprocess
-import os, sys
-import datetime
-import numpy as np
-import numpy.ma as ma
-import scipy.stats as stats
-import matplotlib.pyplot as plt
-from toolkit import plots, process
-from ocw import plotter
-from utils import misc
-from storage import files 
-
-def calcAnnualCycleMeans(dataset1):
-    '''
-    Purpose:: 
-        Calculate annual cycle in terms of monthly means at every grid point.
-
-    Input::
-        dataset1 - 3d numpy array of data in (t,lat,lon) or 1d numpy array 
timeseries
-
-    Output:: 
-        means - if 3d numpy was entered, 3d (# of months,lat,lon), if 1d data 
entered
-        it is a timeseries of the data of length # of months
-
-     '''
-    data = misc.reshapeMonthlyData(dataset1)
-    means = data.mean(axis = 0)
-    return data, means
-
-def calcAnnualCycleMeansSubRegion(dataset1):
-    '''
-    Purpose:: 
-        Calculate annual cycle in terms of monthly means at every sub-region.
-
-    Input::
-        dataset1 - 2d numpy array of data in (region, t)
-        
-    Output:: 
-        means - (region, # of months)
-
-     '''
-    nregion, nT = dataset1.shape
-    data = dataset1.reshape([nregion, nT/12, 12])
-    means = data.mean(axis = 1)
-    return data, means
-
-
-def calcClimYear(dataset1):
-    '''
-    Purpose:: 
-       Calculate annual mean timeseries and climatology for both 2-D and point 
time series.
-
-    Input::
-        dataset1 - 3d numpy array of data in (t,lat,lon) or 1d numpy array 
timeseries
-
-    Output:: 
-        tSeries - if 3d numpy was entered, 3d (nYr,lat,lon), if 1d data entered
-        it is a timeseries of the data of length nYr
-        means - if 3d numpy was entered, 2d (lat,lon), if 1d data entered
-        it is a floating point number representing the overall mean
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    tSeries = data.mean(axis = 1)
-    means = tSeries.mean(axis = 0)
-    return tSeries, means
-
-def calcClimSeason(monthBegin, monthEnd, dataset1):
-    '''
-    Purpose :: 
-       Calculate seasonal mean montheries and climatology for both 3-D and 
point time series.
-       For example, to calculate DJF mean time series, monthBegin = 12, 
monthEnd =2 
-       This can handle monthBegin=monthEnd i.e. for climatology of a specific 
month
-
-    Input::
-        monthBegin - an integer for the beginning month (Jan =1)
-        monthEnd - an integer for the ending month (Jan = 1)
-        dataset1 - 3d numpy array of data in (t,lat,lon) or 1d numpy array 
montheries
-
-    Output:: 
-        tSeries - if 3d numpy was entered, 3d (number of years/number of years 
-1 if monthBegin > monthEnd,lat,lon),
-        if 1d data entered it is a montheries of the data of length number of 
years
-        means - if 3d numpy was entered, 2d (lat,lon), if 1d data entered
-        it is a floating point number representing the overall mean
-    '''
-    if monthBegin > monthEnd:
-        # Offset the original array so that the the first month
-        # becomes monthBegin, note that this cuts off the first year of data
-        offset = slice(monthBegin - 1, monthBegin - 13)
-        data = misc.reshapeMonthlyData(dataset1[offset])
-        monthIndex = slice(0, 13 - monthBegin + monthEnd)
-    else:
-        # Since monthBegin <= monthEnd, just take a slice containing those 
months
-        data = misc.reshapeMonthlyData(dataset1)
-        monthIndex =  slice(monthBegin - 1, monthEnd)
-        
-    tSeries = data[:, monthIndex].mean(axis = 1)
-    means = tSeries.mean(axis = 0)        
-    return tSeries, means
-
-def calcClimSeasonSubRegion(monthBegin, monthEnd, dataset1):
-    '''
-    Purpose :: 
-       Calculate seasonal mean montheries and climatology for both 3-D and 
point time series.
-       For example, to calculate DJF mean time series, monthBegin = 12, 
monthEnd =2 
-       This can handle monthBegin=monthEnd i.e. for climatology of a specific 
month
-
-    Input::
-        monthBegin - an integer for the beginning month (Jan =1)
-        monthEnd - an integer for the ending month (Jan = 1)
-        dataset1 - 3d numpy array of data in (region, t) or 1d numpy array 
montheries
-
-    Output:: 
-        tSeries - (region, number of years/number of years -1 if monthBegin > 
monthEnd,lat,lon),
-        means - (region)
-    '''
-    nregion, nT = dataset1.shape
-    nYR = nT/12
-    if monthBegin > monthEnd:
-        # Offset the original array so that the the first month
-        # becomes monthBegin, note that this cuts off the first year of data
-        offset = slice(monthBegin - 1, monthBegin - 13)
-        data = dataset1[:,offset].reshape([nregion,nYR-1, 12])
-        monthIndex = slice(0, 13 - monthBegin + monthEnd)
-    else:
-        # Since monthBegin <= monthEnd, just take a slice containing those 
months
-        data = dataset1.reshape([nregion,nYR,12])
-        monthIndex =  slice(monthBegin - 1, monthEnd)
-        
-    tSeries = data[:, :, monthIndex].mean(axis = 2)
-    means = tSeries.mean(axis = 1)        
-    return tSeries, means
-
-def calcAnnualCycleStdev(dataset1):
-    '''
-     Purpose:: 
-        Calculate monthly standard deviations for every grid point
-     
-     Input::
-        dataset1 - 3d numpy array of data in (12* number of years,lat,lon) 
-
-     Output:: 
-        stds - if 3d numpy was entered, 3d (12,lat,lon)
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    stds = data.std(axis = 0, ddof = 1)
-    return stds
-
-def calcAnnualCycleStdevSubRegion(dataset1):
-    '''
-     Purpose:: 
-        Calculate monthly standard deviations for every sub-region
-     
-     Input::
-        dataset1 - 2d numpy array of data in (nregion, 12* number of years) 
-
-     Output:: 
-        stds - (nregion, 12)
-    '''
-    nregion, nT = dataset1.shape
-    data = dataset1.reshape([nregion, nT/12, 12])
-    stds = data.std(axis = 1, ddof = 1)
-    return stds
-
-def calcAnnualCycleDomainMeans(dataset1):
-    '''
-     Purpose:: 
-        Calculate spatially averaged monthly climatology and standard deviation
-
-     Input::
-        dataset1 - 3d numpy array of data in (12* number of years,lat,lon) 
-
-     Output::  
-        means - time series (12)
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    
-    # Calculate the means, month by month
-    means = np.zeros(12)
-    for i in np.arange(12):
-        means[i] = data[:, i, :, :].mean()
-        
-    return means
-
-def calcSpatialStdevRatio(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the ratio of spatial standard deviation (model standard 
deviation)/(observed standard deviation)
-
-    Input ::
-        evaluationData - model data array (lat, lon)
-        referenceData- observation data array (lat,lon)
-
-    Output::
-        ratio of standard deviation (a scholar) 
-    
-    '''
-    stdevRatio = evaluationData[(evaluationData.mask==False) & 
(referenceData.mask==False)].std()/ \
-                 referenceData[(evaluationData.mask==False) & 
(referenceData.mask==False)].std()  
-    return stdevRatio
-
-def calcTemporalStdev(dataset1):
-    '''
-     Purpose:: 
-        Calculate sample standard deviations over the time
-
-     Input::
-        dataset1 - 3d numpy array of data in (time,lat,lon) 
-        
-
-     Output::  
-        stds - time series (lat, lon)
-    '''
-    stds = dataset1.std(axis = 0, ddof = 1)
-    return stds
-
-def calcAnnualCycleDomainStdev(dataset1):
-    '''
-     Purpose:: 
-        Calculate sample standard deviations representing the domain in each 
month
-
-     Input::
-        dataset1 - 3d numpy array of data in (12* number of years,lat,lon) 
-
-     Output::  
-        stds - time series (12)
-    '''
-    data = misc.reshapeMonthlyData(dataset1)
-    
-    # Calculate the standard deviation, month by months
-    stds = np.zeros(12)
-    for i in np.arange(12):
-        stds[i] = data[:, i, :, :].std(ddof = 1)
-        
-    return stds
-
-def calcBiasAveragedOverTime(evaluationData, referenceData, option):        # 
Mean Bias
-    '''
-    Purpose:: 
-        Calculate the mean difference between two fields over time for each 
grid point.
-
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        option - string indicating absolute values or not
-
-     Output::  
-        bias - difference between referenceData and evaluationData averaged 
over the first dimension
-    
-    '''
-    # Calculate mean difference between two fields over time for each grid 
point
-    # Precrocessing of both obs and model data ensures the absence of missing 
values
-    diff = evaluationData - referenceData
-    if(option == 'abs'): 
-        diff = abs(diff)
-    bias = diff.mean(axis = 0)
-    return bias
-
-
-def calcBiasAveragedOverTimeAndSigLev(evaluationData, referenceData):
-    '''
-    Purpose::
-        Calculate mean difference between two fields over time for each grid 
point
-    
-    Classify missing data resulting from multiple times (using threshold 
-    data requirement)
-    
-    i.e. if the working time unit is monthly data, and we are dealing with 
-    multiple months of data then when we show mean of several months, we need
-    to decide what threshold of missing data we tolerate before classifying a
-    data point as missing data.
- 
-        
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-
-     Output::  
-        bias - difference between referenceData and evaluationData averaged 
over the first dimension
-        sigLev - significance of the difference (masked array)
-        For example: sig[iy,ix] = 0.95 means that the observation and model is 
different at 95% confidence level 
-        at X=ix and Y=iy
-    '''
-    # If either gridcell in each data set is missing, set that cell to
-    # missing for the output significance level
-    evaluationDataMask = process.create_mask_using_threshold(evaluationData, 
threshold = 0.75)
-    referenceDataMask = process.create_mask_using_threshold(referenceData, 
threshold = 0.75)
-    
-    # The overall mask associated with missing data
-    overallMask = np.logical_or(evaluationDataMask, referenceDataMask)
-    
-    diff = evaluationData - referenceData
-    bias = diff.mean(axis = 0)
-    sigLev = 1 - stats.ttest_rel(evaluationData, referenceData, axis = 0)[1]
-    sigLev[overallMask] = -100.
-    sigLev = ma.masked_equal(sigLev, -100.) 
-    # Set mask for bias metric using missing data in obs or model data series
-    # i.e. if obs contains more than threshold (e.g.50%) missing data 
-    # then classify time average bias as missing data for that location. 
-    bias = ma.masked_array(bias.data, overallMask)
-    return bias, sigLev
-
-
-def calcBiasAveragedOverTimeAndDomain(evaluationData, referenceData):
-    '''
-    Purpose:: 
-        Calculate the mean difference between two fields over time and domain
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        bias - difference between referenceData and evaluationData averaged 
over time and space
-    
-    '''
-
-    diff = evaluationData - referenceData
-    
-    bias = diff.mean()
-    return bias
-
-def calcBias(evaluationData, referenceData):
-    '''
-    Purpose:: 
-        Calculate the difference between two fields at each grid point
-
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        diff - difference between referenceData and evaluationData
-    
-    '''
-
-    diff = evaluationData - referenceData
-    return diff
-
-def calcRootMeanSquaredDifferenceAveragedOverTime(evaluationData, 
referenceData):
-    '''
-    Purpose:: 
-        Calculate root mean squared difference (RMS errors) averaged over time 
between two fields for each grid point
-
-     Input::
-        referenceData - array of data 
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        rms - root mean squared difference, if the input is 1-d data, the 
output becomes a single floating number.
-    
-    '''
-    sqdiff = (evaluationData - referenceData)** 2
-    rms = np.sqrt(sqdiff.mean(axis = 0))
-    return rms
-
-
-def calcRootMeanSquaredDifferenceAveragedOverTimeAndDomain(evaluationData, 
referenceData):
-    '''
-    Purpose:: 
-        Calculate root mean squared difference (RMS errors) averaged over time 
and space between two fields
-
-     Input::
-        referenceData - array of data (should be 3-d array)
-        evaluationData - array of data with same dimension of referenceData 
-        
-     Output::  
-        rms - root mean squared difference averaged over time and space
-    '''
-    sqdiff = (evaluationData - referenceData)** 2
-    rms = np.sqrt(sqdiff.mean())
-    return rms
-
-def calcTemporalCorrelation(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the temporal correlation.
-    
-    Assumption(s) ::
-        The first dimension of two datasets is the time axis.
-    
-    Input ::
-        evaluationData - model data array of any shape
-        referenceData- observation data array of any shape
-            
-    Output::
-        temporalCorelation - A 2-D array of temporal correlation coefficients 
at each subregion
-        sigLev - A 2-D array of confidence levels related to 
temporalCorelation 
-    
-    REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 
467pp.
-    sigLev: the correlation between model and observation is significant at 
sigLev * 100 %
-    '''
-    evaluationDataMask = process.create_mask_using_threshold(evaluationData, 
threshold = 0.75)
-    referenceDataMask = process.create_mask_using_threshold(referenceData, 
threshold = 0.75)
-    
-    nregion = evaluationData.shape[0]
-    temporalCorrelation = ma.zeros([nregion])-100.
-    sigLev = ma.zeros([nregion])-100.
-    for iregion in np.arange(nregion):
-        temporalCorrelation[iregion], sigLev[iregion] = 
stats.pearsonr(evaluationData[iregion,:], referenceData[iregion,:])
-        sigLev[iregion] = 1 - sigLev[iregion]
-                    
-    temporalCorrelation=ma.masked_equal(temporalCorrelation.data, -100.)       
 
-    sigLev=ma.masked_equal(sigLev.data, -100.)    
-    
-    return temporalCorrelation, sigLev
-
-def calcTemporalCorrelationSubRegion(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the temporal correlation.
-    
-    Assumption(s) ::
-        both evaluation and reference data are subregion averaged time series
-    
-    Input ::
-        evaluationData - model data array [region,t]
-        referenceData- observation data [region, t]
-            
-    Output::
-        temporalCorelation - A 1-D array of temporal correlation coefficients 
at each grid point.
-        sigLev - A 1-D array of confidence levels related to 
temporalCorelation 
-    
-    REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 
467pp.
-    sigLev: the correlation between model and observation is significant at 
sigLev * 100 %
-    '''
-        
-    
-    temporalCorrelation = 0.
-    sigLev = 0.
-    t1=evaluationData[:]
-    t2=referenceData[:]
-    if t1.min()!=t1.max() and t2.min()!=t2.max():
-        temporalCorrelation, sigLev=stats.pearsonr(t1,t2)
-        sigLev=1.-sigLev  # p-value => confidence level
-                    
-        return temporalCorrelation
-
-def calcPatternCorrelation(evaluationData, referenceData):
-    '''
-    Purpose ::
-        Calculate the spatial correlation.
-
-    Input ::
-        evaluationData - model data array (lat, lon)
-        referenceData- observation data array (lat,lon)
-
-    Output::
-        patternCorrelation - a single floating point
-        sigLev - a single floating point representing the confidence level 
-    
-    '''
-   
-    patternCorrelation, sigLev = 
stats.pearsonr(evaluationData[(evaluationData.mask==False) & 
(referenceData.mask==False)],
-                          referenceData[(evaluationData.mask==False) & 
(referenceData.mask==False)])
-    return patternCorrelation, sigLev
-
-
-def calcPatternCorrelationEachTime(evaluationData, referenceData):
-    '''
-     Purpose ::
-        Calculate the spatial correlation for each time
-
-     Assumption(s) ::
-        The first dimension of two datasets is the time axis.
-
-     Input ::
-        evaluationData - model data array (time,lat, lon)
-        referenceData- observation data array (time,lat,lon)
-
-     Output::
-        patternCorrelation - a timeseries (time)
-        sigLev - a time series (time)
-    ''' 
-    nT = evaluationData.shape[0]
-    patternCorrelation = ma.zeros(nT)-100.
-    sigLev = ma.zeros(nT)-100.
-    for it in np.arange(nT):
-        patternCorrelation[it], sigLev[it] = 
calcPatternCorrelation(evaluationData[it,:,:], referenceData[it,:,:])
-
-    return patternCorrelation,sigLev
-
-def calcNashSutcliff(evaluationData, referenceData):
-    '''
-    Assumption(s)::  
-       Both evaluationData and referenceData are the same shape.
-        * lat, lon must match up
-        * time steps must align (i.e. months vs. months)
-    
-    Input::
-       evaluationData - 3d (time, lat, lon) array of data
-        referenceData - 3d (time, lat, lon) array of data
-    
-    Output:
-        nashcor - 1d array aligned along the time dimension of the input
-        datasets. Time Series of Nash-Sutcliff Coefficient of efficiency
-     
-    '''
-    # Flatten the spatial dimensions
-    data1 = evaluationData[:]
-    data2 = referenceData[:]
-    nT = data1.shape[0]
-    data1.shape = nT, data1.size / nT
-    data2.shape = nT, data2.size / nT 
-    meanData2 = data2.mean(axis = 1)
-    
-    # meanData2 must be reshaped to 2D as to obey
-    # numpy broadcasting rules
-    meanData2.shape = nT, 1
-    nashcor = 1 - ((((data2 - data1) ** 2).sum(axis = 1)) / 
-               (((data2 - meanData2) ** 2).sum(axis = 1)))
-    return nashcor
-
-
-def calcPdf(evaluationData, referenceData):
-    '''
-    Routine to calculate a normalized Probability Distribution Function with 
-    bins set according to data range.
-    Equation from Perkins et al. 2007
-
-        PS=sum(min(Z_O_i, Z_M_i)) where Z is the distribution (histogram of 
the data for either set)
-        called in do_rcmes_processing_sub.py
-         
-    Inputs::
-        2 arrays of data
-        t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here 
-        is the number of time values eg for time period 199001010000 - 
199201010000 
-        
-        if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon)
-        
-        if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon)
-        
-    User inputs: number of bins to use and edges (min and max)
-    Output:
-
-        one float which represents the PDF for the year
-
-    TODO:  Clean up this docstring so we have a single purpose statement
-     
-    Routine to calculate a normalised PDF with bins set according to data 
range.
-
-    Input::
-        2 data  arrays, modelData and obsData
-
-    Output::
-        PDF for the year
-
-    '''
-    # float to store the final PDF similarity score
-    similarityScore = 0.0
-
-    print 'min modelData', evaluationData[:, :, :].min()
-    print 'max modelData', evaluationData[:, :, :].max()
-    print 'min obsData', referenceData[:, :, :].min()
-    print 'max obsData', referenceData[:, :, :].max()
-    # find a distribution for the entire dataset
-    #prompt the user to enter the min, max and number of bin values. 
-    # The max, min info above is to help guide the user with these choises
-    print '****PDF input values from user required **** \n'
-    nbins = int (raw_input('Please enter the number of bins to use. \n'))
-    minEdge = float(raw_input('Please enter the minimum value to use for the 
edge. \n'))
-    maxEdge = float(raw_input('Please enter the maximum value to use for the 
edge. \n'))
-    
-    mybins = np.linspace(minEdge, maxEdge, nbins)
-    print 'nbins is', nbins, 'mybins are', mybins
-    
-    pdfMod, edges = np.histogram(evaluationData, bins = mybins, normed = True, 
density = True)  
-    print 'evaluationData distribution and edges', pdfMod, edges
-    pdfObs, edges = np.histogram(referenceData, bins = mybins, normed = True, 
density = True)           
-    print 'referenceData distribution and edges', pdfObs, edges    
-    
-    #find minimum at each bin between lists 
-    i = 0
-    for model_value in pdfMod :
-        print 'model_value is', model_value, 'pdfObs[', i, '] is', pdfObs[i]
-        if model_value < pdfObs[i]:
-            similarityScore += model_value
-        else:
-            similarityScore += pdfObs[i] 
-        i += 1 
-    print 'similarity_score is', similarityScore
-    return similarityScore
-    
-
-
-def calculate_metrics_and_make_plots(varName, workdir, lons, lats, obsData, 
mdlData, obsRgn, mdlRgn, obsList, mdlList, subRegions, \
-                                     subRgnLon0, subRgnLon1, subRgnLat0, 
subRgnLat1):
-    '''
-    Purpose:: 
-        Calculate all the metrics used in Kim et al. [2013] paper and plot 
them 
-
-    Input::
-        varName - evaluating variable
-        workdir -
-        lons -
-        lats -
-        obsData -
-        mdlData -
-        obsRgn -
-        mdlRgn -
-        obsList -
-        mdlList -
-        subRegions - 
-        subRgnLon0, subRgnLat0 - southwest boundary of sub-regions [numSubRgn]
-        subRgnLon1, subRgnLat1 - northeast boundary of sub-regions [numSubRgn]
-    Output:: 
-        png files
-        
-     '''
-   
-   
-    nobs, nt, ny, nx = obsData.shape
-    nmodel = mdlData.shape[0]
-    ### TODO: unit conversion (K to C)
-    if varName == 'temp':
-        obsData[0, :, :, :] = obsData[0, :, :, :] - 273.15
-        if subRegions:
-            obsRgn[0, :, :] = obsRgn[0, :, :] - 273.15
-    if varName == 'prec' and obsData.max() > mdlData.max()*1000.:
-        mdlData[:, :, :, :] = mdlData[:, :, :, :]*86400.
-        if subRegions:
-            mdlRgn[:, :, :] = mdlRgn[:, :, :]*86400.
-        
-    oTser, oClim = calcClimYear( obsData[0, :, :, :])
-    bias_of_overall_average = ma.zeros([nmodel, ny, nx])
-    spatial_stdev_ratio = np.zeros([nmodel])
-    spatial_corr = np.zeros([nmodel])
-    mdlList.append('ENS')
-    
-    for imodel in np.arange(nmodel):
-        mTser, mClim = calcClimYear( mdlData[imodel,:,:,:])
-        bias_of_overall_average[imodel,:,:] = calcBias(mClim, oClim)
-        spatial_corr[imodel], sigLev = calcPatternCorrelation(oClim, mClim)
-        spatial_stdev_ratio[imodel] = calcSpatialStdevRatio(mClim, oClim)   
-    fig_return = plotter.draw_contour_map(oClim, lats, lons, 
workdir+'/observed_climatology_'+varName, fmt='png', gridshape=(1, 1),
-                   clabel='', ptitle='', subtitles=obsList, cmap=None, 
-                   clevs=None, nlevs=10, parallels=None, meridians=None,
-                   extend='neither')    
-    # TODO:
-    # Be sure to update "gridshape" argument to be the number of sub plots 
(rows,columns). This should be improved so that the 
-    # gridshape is optimally determined for a given number of models. For 
example:
-    # For 3 models, a gridshape of (2,2) would be sensible:
-    # X X 
-    # X
-    #
-    fig_return = plotter.draw_contour_map(bias_of_overall_average, lats, lons, 
workdir+'/bias_of_climatology_'+varName, fmt='png', gridshape=(6, 2),
-                   clabel='', ptitle='', subtitles=mdlList, cmap=None, 
-                   clevs=None, nlevs=10, parallels=None, meridians=None,
-                   extend='neither')
-    Taylor_data = np.array([spatial_stdev_ratio, spatial_corr]).transpose()
-    
-    fig_return = plotter.draw_taylor_diagram(Taylor_data, mdlList, 
refname='CRU', fname = workdir+'/Taylor_'+varName, fmt='png',frameon=False)
-
-    if subRegions:
-        nseason = 2      # (0: summer and 1: winter)
-        nregion = len(subRgnLon0)
-        season_name = ['summer','winter']
-        rowlabels = 
['PNw','PNe','CAn','CAs','SWw','SWe','COL','GPn','GPc','GC','GL','NE','SE','FL']
-        collabels = ['M1','M2','M3','M4','M5','M6','ENS']
-        collabels[nmodel-1] = 'ENS'
-        
-        for iseason in [0,1]:
-            portrait_subregion = np.zeros([4, nregion, nmodel])
-            portrait_titles = ['(a) Normalized Bias', '(b) Normalized STDV', 
'(c) Normalized RMSE', '(d) Correlation']
-            if iseason == 0:
-                monthBegin=6
-                monthEnd=8
-            if iseason == 1:
-                monthBegin=12
-                monthEnd=2
-                      
-            obsTser,obsClim = calcClimSeasonSubRegion(6,8,obsRgn[0,:,:])
-            for imodel in np.arange(nmodel):
-                mTser, mClim =  calcClimSeasonSubRegion(6,8,mdlRgn[imodel,:,:])
-                for iregion in np.arange(nregion):
-                      portrait_subregion[0,iregion,imodel] = 
calcBias(mClim[iregion],obsClim[iregion])/calcTemporalStdev(obsTser[iregion,:]) 
  
-                      portrait_subregion[1,iregion,imodel] = 
calcTemporalStdev(mTser[iregion,:])/ calcTemporalStdev(obsTser[iregion,:]) 
-                      portrait_subregion[2,iregion,imodel] = 
calcRootMeanSquaredDifferenceAveragedOverTime(mTser[iregion,:], 
obsTser[iregion,:])/calcTemporalStdev(obsTser[iregion,:])
-                      portrait_subregion[3,iregion, imodel] = 
calcTemporalCorrelationSubRegion(mTser[iregion,:],obsTser[iregion,:])
-            portrait_return = 
plotter.draw_portrait_diagram(portrait_subregion, rowlabels, 
collabels[0:nmodel], 
workdir+'/portrait_diagram_'+season_name[iseason]+'_'+varName, fmt='png', 
-                             gridshape=(2, 2), xlabel='', ylabel='', 
clabel='', 
-                             ptitle='', subtitles=portrait_titles, cmap=None, 
clevs=None, 
-                             nlevs=10, extend='neither')  
-            # annual cycle
-            nmonth = 12
-            times = np.arange(nmonth)
-            data_names = [obsList[0]] + list(mdlList)
-            annual_cycle = np.zeros([nregion, nmonth, nmodel+1])
-            obsTser, annual_cycle[:, :, 0] = 
calcAnnualCycleMeansSubRegion(obsRgn[0,:,:])
-            obsStd = calcAnnualCycleStdevSubRegion(obsRgn[0,:,:])
-            for imodel in np.arange(nmodel):
-                mdlTser, annual_cycle[:, :, imodel+1] = 
calcAnnualCycleMeansSubRegion(mdlRgn[imodel, :, :])
-            # Make annual_cycle shape compatible with draw_time_series
-            annual_cycle = annual_cycle.swapaxes(1, 2)
-            tseries_return = plotter.draw_time_series(annual_cycle, times, 
data_names, workdir+'/time_series_'+varName, gridshape=(7, 2), 
-                  subtitles=rowlabels, label_month=True)
-            
-         
-

[04/10] climate git commit: CLIMATE-551 - Drop old RCMET toolkit from codebase

Reply via email to