[1/5] climate git commit: CLIMATE-657 - Adding functions to calculate metrics

huikyole Tue, 18 Aug 2015 17:29:18 -0700

Repository: climate
Updated Branches:
  refs/heads/master 06161fa4a -> 3d0c32116



CLIMATE-657 - Adding functions to calculate metrics

- Remove metrics.SpatialMeanOfTemporalMeanBias
- Add metrics.SpatialPatternTaylorDiagram
- Add functions calc_bias, calc_stddev, calc_stddev_ratio, calc_correlation, 
calc_rmse
- Update test_metrics.py


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/892676e7
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/892676e7
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/892676e7

Branch: refs/heads/master
Commit: 892676e77b1bd599597ebaed20ab52233e06ce96
Parents: d4eeb03
Author: huikyole <[email protected]>
Authored: Tue Aug 11 18:41:12 2015 -0700
Committer: huikyole <[email protected]>
Committed: Tue Aug 11 18:41:12 2015 -0700

----------------------------------------------------------------------
 ocw/metrics.py            | 170 +++++++++++++++++++++++++++++++----------
 ocw/tests/test_metrics.py |  69 +++++++----------
 2 files changed, 155 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/892676e7/ocw/metrics.py
----------------------------------------------------------------------
diff --git a/ocw/metrics.py b/ocw/metrics.py
index 9b9aad6..4142498 100644
--- a/ocw/metrics.py
+++ b/ocw/metrics.py
@@ -84,10 +84,37 @@ class Bias(BinaryMetric):
             reference dataset in this metric run.
         :type target_dataset: :class:`dataset.Dataset`
 
+        :param average_over_time: if True, calculated bias is averaged for the 
axis=0
+        :type average_over_time: 'bool'
+
         :returns: The difference between the reference and target datasets.
         :rtype: :class:`numpy.ndarray`
         '''
-        return target_dataset.values - ref_dataset.values  
+        return calc_bias(target_dataset.values,ref_dataset.values) 
+
+class SpatialPatternTaylorDiagram(BinaryMetric):
+    ''' Calculate the target to reference ratio of spatial standard deviation 
and pattern correlation'''
+
+    def run(self, ref_dataset, target_dataset):
+        '''Calculate two metrics to plot a Taylor diagram to compare spatial 
patterns      
+
+        .. note::
+           Overrides BinaryMetric.run() 
+        
+        :param ref_dataset: The reference dataset to use in this metric run.
+        :type ref_dataset: :class:`dataset.Dataset`
+
+        :param target_dataset: The target dataset to evaluate against the
+            reference dataset in this metric run.
+        :type target_dataset: :class:`dataset.Dataset`
+
+        :returns: standard deviation ratio, pattern correlation coefficient
+        :rtype: :float:'float','float' 
+        '''
+        if ref_dataset.values.ndim >= 3 and target_dataset.values.ndim >= 3:
+            return calc_stddev_ratio(ref_dataset.values, 
target_dataset.values), calc_correlation(ref_dataset.values, 
target_dataset.values)
+        else:
+            print 'Please check if both reference and target datasets have 
time dimensions' 
 
 
 class TemporalStdDev(UnaryMetric):
@@ -106,7 +133,7 @@ class TemporalStdDev(UnaryMetric):
         :returns: The temporal standard deviation of the target dataset
         :rtype: :class:`ndarray`
         '''
-        return ma.std(target_dataset.values, axis=0, ddof=1)
+        return calc_stddev(target_dataset.values, axis=0)
 
 
 class StdDevRatio(BinaryMetric):
@@ -127,7 +154,8 @@ class StdDevRatio(BinaryMetric):
 
         :returns: The standard deviation ratio of the reference and target
         '''
-        return ma.std(target_dataset.values)/ma.std(ref_dataset.values)
+       
+        return calc_stddev_ratio(ref_dataset.values, target_dataset.values)
 
 
 class PatternCorrelation(BinaryMetric):
@@ -151,7 +179,8 @@ class PatternCorrelation(BinaryMetric):
         # stats.pearsonr returns correlation_coefficient, 2-tailed p-value
         # We only care about the correlation coefficient
         # Docs at 
http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html
-        return mstats.pearsonr(ref_dataset.values.flatten(), 
target_dataset.values.flatten())[0]
+
+        return calc_correlation(ref_dataset.values, target_dataset.values)
 
 
 class TemporalCorrelation(BinaryMetric):
@@ -179,23 +208,18 @@ class TemporalCorrelation(BinaryMetric):
         '''
         num_times, num_lats, num_lons = reference_dataset.values.shape
         coefficients = ma.zeros([num_lats, num_lons])
-        levels = ma.zeros([num_lats, num_lons])
         for i in numpy.arange(num_lats):
             for j in numpy.arange(num_lons):
-                coefficients[i, j], levels[i, j] = (
-                    mstats.pearsonr(
+                coefficients[i, j] = calc_correlation(
                         reference_dataset.values[:, i, j],
-                        target_dataset.values[:, i, j]
-                    )
-                )
-                levels[i, j] = 1 - levels[i, j]
-        return coefficients, levels 
+                        target_dataset.values[:, i, j])
+        return coefficients 
 
 
 class TemporalMeanBias(BinaryMetric):
     '''Calculate the bias averaged over time.'''
 
-    def run(self, ref_dataset, target_dataset, absolute=False):
+    def run(self, ref_dataset, target_dataset):
         '''Calculate the bias averaged over time.
 
         .. note::
@@ -211,19 +235,17 @@ class TemporalMeanBias(BinaryMetric):
         :returns: The mean bias between a reference and target dataset over 
time.
         '''
 
-        diff = target_dataset.values - ref_dataset.values 
-        if absolute:
-            diff = abs(diff)
-        mean_bias = ma.mean(diff, axis=0)
+        return calc_bias(target_dataset.values,ref_dataset.values, 
average_over_time=True) 
 
-        return mean_bias
 
 
-class SpatialMeanOfTemporalMeanBias(BinaryMetric):
-    '''Calculate the bias averaged over time and domain.'''
+class RMSError(BinaryMetric):
+    '''Calculate the Root Mean Square Difference (RMS Error), with the mean
+       calculated over time and space.'''
 
     def run(self, reference_dataset, target_dataset):
-        '''Calculate the bias averaged over time and domain.
+        '''Calculate the Root Mean Square Difference (RMS Error), with the mean
+           calculated over time and space.
 
         .. note::
            Overrides BinaryMetric.run()
@@ -236,35 +258,99 @@ class SpatialMeanOfTemporalMeanBias(BinaryMetric):
             reference dataset in this metric run
         :type target_dataset: :class:`dataset.Dataset`
 
-        :returns: The bias averaged over time and domain
+        :returns: The RMS error, with the mean calculated over time and space
         '''
 
-        bias = target_dataset.values - reference_dataset.values 
-        return ma.mean(bias)
+        return calc_rmse(target_dataset.values, reference_dataset.values)
 
+def calc_bias(target_array, reference_array, average_over_time = False):
+    ''' Calculate difference between two arrays
 
-class RMSError(BinaryMetric):
-    '''Calculate the Root Mean Square Difference (RMS Error), with the mean
-       calculated over time and space.'''
+    :param target_array: an array to be evaluated, as model output
+    :type target_array: :class:'numpy.ma.core.MaskedArray'
 
-    def run(self, reference_dataset, target_dataset):
-        '''Calculate the Root Mean Square Difference (RMS Error), with the mean
-           calculated over time and space.
+    :param reference_array: an array of reference dataset
+    :type reference_array: :class:'numpy.ma.core.MaskedArray'
 
-        .. note::
-           Overrides BinaryMetric.run()
+    :param average_over_time: if True, calculated bias is averaged for the 
axis=0
+    :type average_over_time: 'bool'
 
-        :param reference_dataset: The reference dataset to use in this metric
-            run
-        :type reference_dataset: :class:`dataset.Dataset`
+    :returns: Biases array of the target dataset
+    :rtype: :class:'numpy.ma.core.MaskedArray'
+    '''
+    
+    bias = target_array - reference_array
+    if average_over_time:
+        return ma.average(bias, axis=0)
+    else:
+        return bias
 
-        :param target_dataset: The target dataset to evaluate against the
-            reference dataset in this metric run
-        :type target_dataset: :class:`dataset.Dataset`
+def calc_stddev(array, axis=None):
+    ''' Calculate a sample standard deviation of an array along the array
 
-        :returns: The RMS error, with the mean calculated over time and space
-        '''
+    :param array: an array to calculate sample standard deviation
+    :type array: :class:'numpy.ma.core.MaskedArray'
+    
+    :param axis: Axis along which the sample standard deviation is computed.
+    :type axis: 'int'
+
+    :returns: sample standard deviation of array
+    :rtype: :class:'numpy.ma.core.MaskedArray'
+    '''
+
+    if isinstance(axis, int):
+        return ma.std(array, axis=axis, ddof=1)
+    else:
+        return ma.std(array, ddof=1)
+        
+
+def calc_stddev_ratio(target_array, reference_array):
+    ''' Calculate ratio of standard deivations of the two arrays
+
+    :param target_array: an array to be evaluated, as model output
+    :type target_array: :class:'numpy.ma.core.MaskedArray'
+
+    :param reference_array: an array of reference dataset
+    :type reference_array: :class:'numpy.ma.core.MaskedArray'
+
+    :param average_over_time: if True, calculated bias is averaged for the 
axis=0
+    :type average_over_time: 'bool'
+
+    :returns: (standard deviation of target_array)/(standard deviation of 
reference array)
+    :rtype: :class:'numpy.ma.core.MaskedArray'
+    '''
+
+    return calc_stddev(target_array)/calc_stddev(reference_array)
+
+def calc_correlation(target_array, reference_array):
+    '''Calculate the correlation coefficient between two arrays.
+
+    :param target_array: an array to be evaluated, as model output
+    :type target_array: :class:'numpy.ma.core.MaskedArray'
+
+    :param reference_array: an array of reference dataset
+    :type reference_array: :class:'numpy.ma.core.MaskedArray'
+
+    :returns: pearson's correlation coefficient between the two input arrays
+    :rtype: :class:'numpy.ma.core.MaskedArray'
+    '''
+
+    return mstats.pearsonr(reference_array.flatten(), 
target_array.flatten())[0]  
+       
+def calc_rmse(target_array, reference_array):
+    ''' Calculate ratio of standard deivations of the two arrays
+
+    :param target_array: an array to be evaluated, as model output
+    :type target_array: :class:'numpy.ma.core.MaskedArray'
+
+    :param reference_array: an array of reference dataset
+    :type reference_array: :class:'numpy.ma.core.MaskedArray'
+
+    :param average_over_time: if True, calculated bias is averaged for the 
axis=0
+    :type average_over_time: 'bool'
 
-        sqdiff = (reference_dataset.values - target_dataset.values) ** 2
-        return (ma.mean(sqdiff))**0.5
+    :returns: root mean square error
+    :rtype: :class:'float'
+    '''
 
+    return (ma.mean((calc_bias(target_array, reference_array))**2))**0.5 

http://git-wip-us.apache.org/repos/asf/climate/blob/892676e7/ocw/tests/test_metrics.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_metrics.py b/ocw/tests/test_metrics.py
index 8edeaff..b61f9da 100644
--- a/ocw/tests/test_metrics.py
+++ b/ocw/tests/test_metrics.py
@@ -56,6 +56,30 @@ class TestBias(unittest.TestCase):
         expected_result.fill(-300)
         np.testing.assert_array_equal(self.bias.run(self.target_dataset, 
self.reference_dataset), expected_result)
 
+class TestSpatialPatternTaylorDiagram(unittest.TestCase):
+    '''Test the metrics.SpatialPatternTaylorDiagram'''
+    def setUp(self):
+        self.taylor_diagram = metrics.SpatialPatternTaylorDiagram()
+        self.ref_dataset = Dataset(
+            np.array([1., 1., 1., 1., 1.]),
+            np.array([1., 1., 1., 1., 1.]),
+            np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]),
+            # Reshapped array with 300 values incremented by 5
+            np.arange(0, 1500, 5).reshape(12, 5, 5),
+            'ds1'
+        )
+
+        self.tar_dataset = Dataset(
+            np.array([1., 1., 1., 1., 1.]),
+            np.array([1., 1., 1., 1., 1.]),
+            np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]),
+            # Reshapped array with 300 values incremented by 2
+            np.arange(0, 600, 2).reshape(12, 5, 5),
+            'ds2'
+        )
+
+    def test_function_run(self):
+        self.assertTrue(self.taylor_diagram.run(self.ref_dataset, 
self.tar_dataset), [2.5,1.0])
 
 class TestTemporalStdDev(unittest.TestCase):
     '''Test the metrics.TemporalStdDev metric.'''
@@ -161,22 +185,18 @@ class TestTemporalCorrelation(unittest.TestCase):
 
     def test_identical_inputs(self):
         expected = np.ones(25).reshape(5, 5)
-        tc, cl = self.metric.run(self.ref_dataset, self.ref_dataset)
+        tc = self.metric.run(self.ref_dataset, self.ref_dataset)
         np.testing.assert_array_equal(tc, expected)
-        np.testing.assert_array_equal(cl, expected)
 
     def test_positive_correlation(self):
         expected = np.ones(25).reshape(5, 5)
-        tc, cl = self.metric.run(self.ref_dataset, self.tgt_dataset_inc)
+        tc = self.metric.run(self.ref_dataset, self.tgt_dataset_inc)
         np.testing.assert_array_equal(tc, expected)
-        np.testing.assert_array_equal(cl, expected)
 
     def test_negative_correlation(self):
         expected_tc = np.array([-1] * 25).reshape(5, 5)
-        expected_cl = np.ones(25).reshape(5, 5)
-        tc, cl = self.metric.run(self.ref_dataset, self.tgt_dataset_dec)
+        tc = self.metric.run(self.ref_dataset, self.tgt_dataset_dec)
         np.testing.assert_array_equal(tc, expected_tc)
-        np.testing.assert_array_equal(cl, expected_cl)
 
 
 class TestTemporalMeanBias(unittest.TestCase):
@@ -208,41 +228,6 @@ class TestTemporalMeanBias(unittest.TestCase):
         expected_result.fill(-300)
         
np.testing.assert_array_equal(self.mean_bias.run(self.target_dataset,self.reference_dataset),
 expected_result)
 
-    def test_function_run_abs(self):
-        '''Test mean bias function between reference dataset and target 
dataset with abs as True.'''
-        expected_result = np.zeros((5, 5), dtype=np.int)
-        expected_result.fill(300)
-        
np.testing.assert_array_equal(self.mean_bias.run(self.reference_dataset, 
self.target_dataset, True), expected_result)
-
-
-class TestSpatialMeanOfTemporalMeanBias(unittest.TestCase):
-    '''Test the metrics.SpatialMeanOfTemporalMeanBias metric.'''
-    def setUp(self):
-        # Set metric.
-        self.metric = metrics.SpatialMeanOfTemporalMeanBias()
-        # Initialize reference dataset.
-        self.ref_lats = np.array([10, 20, 30, 40, 50])
-        self.ref_lons = np.array([5, 15, 25, 35, 45])
-        self.ref_times = np.array([dt.datetime(2000, x, 1)
-                                   for x in range(1, 13)])
-        self.ref_values = np.array(range(300)).reshape(12, 5, 5)
-        self.ref_variable = "ref"
-        self.ref_dataset = Dataset(self.ref_lats, self.ref_lons,
-            self.ref_times, self.ref_values, self.ref_variable)
-        # Initialize target dataset.
-        self.tgt_lats = np.array([10, 20, 30, 40, 50])
-        self.tgt_lons = np.array([5, 15, 25, 35, 45])
-        self.tgt_times = np.array([dt.datetime(2000, x, 1)
-                                   for x in range(1, 13)])
-        self.tgt_values = np.array(range(299, -1, -1)).reshape(12, 5, 5)
-        self.tgt_variable = "tgt"
-        self.tgt_dataset = Dataset(self.tgt_lats, self.tgt_lons,
-            self.tgt_times, self.tgt_values, self.tgt_variable)
-
-    def test_function_run(self):
-        result = self.metric.run(self.ref_dataset, self.tgt_dataset)
-        self.assertEqual(result, 0.0)
-
 
 class TestRMSError(unittest.TestCase):
     '''Test the metrics.RMSError metric.'''

[1/5] climate git commit: CLIMATE-657 - Adding functions to calculate metrics

Reply via email to