Repository: climate Updated Branches: refs/heads/master 06161fa4a -> 3d0c32116
CLIMATE-657 - Adding functions to calculate metrics - Remove metrics.SpatialMeanOfTemporalMeanBias - Add metrics.SpatialPatternTaylorDiagram - Add functions calc_bias, calc_stddev, calc_stddev_ratio, calc_correlation, calc_rmse - Update test_metrics.py Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/892676e7 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/892676e7 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/892676e7 Branch: refs/heads/master Commit: 892676e77b1bd599597ebaed20ab52233e06ce96 Parents: d4eeb03 Author: huikyole <[email protected]> Authored: Tue Aug 11 18:41:12 2015 -0700 Committer: huikyole <[email protected]> Committed: Tue Aug 11 18:41:12 2015 -0700 ---------------------------------------------------------------------- ocw/metrics.py | 170 +++++++++++++++++++++++++++++++---------- ocw/tests/test_metrics.py | 69 +++++++---------- 2 files changed, 155 insertions(+), 84 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/892676e7/ocw/metrics.py ---------------------------------------------------------------------- diff --git a/ocw/metrics.py b/ocw/metrics.py index 9b9aad6..4142498 100644 --- a/ocw/metrics.py +++ b/ocw/metrics.py @@ -84,10 +84,37 @@ class Bias(BinaryMetric): reference dataset in this metric run. :type target_dataset: :class:`dataset.Dataset` + :param average_over_time: if True, calculated bias is averaged for the axis=0 + :type average_over_time: 'bool' + :returns: The difference between the reference and target datasets. :rtype: :class:`numpy.ndarray` ''' - return target_dataset.values - ref_dataset.values + return calc_bias(target_dataset.values,ref_dataset.values) + +class SpatialPatternTaylorDiagram(BinaryMetric): + ''' Calculate the target to reference ratio of spatial standard deviation and pattern correlation''' + + def run(self, ref_dataset, target_dataset): + '''Calculate two metrics to plot a Taylor diagram to compare spatial patterns + + .. note:: + Overrides BinaryMetric.run() + + :param ref_dataset: The reference dataset to use in this metric run. + :type ref_dataset: :class:`dataset.Dataset` + + :param target_dataset: The target dataset to evaluate against the + reference dataset in this metric run. + :type target_dataset: :class:`dataset.Dataset` + + :returns: standard deviation ratio, pattern correlation coefficient + :rtype: :float:'float','float' + ''' + if ref_dataset.values.ndim >= 3 and target_dataset.values.ndim >= 3: + return calc_stddev_ratio(ref_dataset.values, target_dataset.values), calc_correlation(ref_dataset.values, target_dataset.values) + else: + print 'Please check if both reference and target datasets have time dimensions' class TemporalStdDev(UnaryMetric): @@ -106,7 +133,7 @@ class TemporalStdDev(UnaryMetric): :returns: The temporal standard deviation of the target dataset :rtype: :class:`ndarray` ''' - return ma.std(target_dataset.values, axis=0, ddof=1) + return calc_stddev(target_dataset.values, axis=0) class StdDevRatio(BinaryMetric): @@ -127,7 +154,8 @@ class StdDevRatio(BinaryMetric): :returns: The standard deviation ratio of the reference and target ''' - return ma.std(target_dataset.values)/ma.std(ref_dataset.values) + + return calc_stddev_ratio(ref_dataset.values, target_dataset.values) class PatternCorrelation(BinaryMetric): @@ -151,7 +179,8 @@ class PatternCorrelation(BinaryMetric): # stats.pearsonr returns correlation_coefficient, 2-tailed p-value # We only care about the correlation coefficient # Docs at http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html - return mstats.pearsonr(ref_dataset.values.flatten(), target_dataset.values.flatten())[0] + + return calc_correlation(ref_dataset.values, target_dataset.values) class TemporalCorrelation(BinaryMetric): @@ -179,23 +208,18 @@ class TemporalCorrelation(BinaryMetric): ''' num_times, num_lats, num_lons = reference_dataset.values.shape coefficients = ma.zeros([num_lats, num_lons]) - levels = ma.zeros([num_lats, num_lons]) for i in numpy.arange(num_lats): for j in numpy.arange(num_lons): - coefficients[i, j], levels[i, j] = ( - mstats.pearsonr( + coefficients[i, j] = calc_correlation( reference_dataset.values[:, i, j], - target_dataset.values[:, i, j] - ) - ) - levels[i, j] = 1 - levels[i, j] - return coefficients, levels + target_dataset.values[:, i, j]) + return coefficients class TemporalMeanBias(BinaryMetric): '''Calculate the bias averaged over time.''' - def run(self, ref_dataset, target_dataset, absolute=False): + def run(self, ref_dataset, target_dataset): '''Calculate the bias averaged over time. .. note:: @@ -211,19 +235,17 @@ class TemporalMeanBias(BinaryMetric): :returns: The mean bias between a reference and target dataset over time. ''' - diff = target_dataset.values - ref_dataset.values - if absolute: - diff = abs(diff) - mean_bias = ma.mean(diff, axis=0) + return calc_bias(target_dataset.values,ref_dataset.values, average_over_time=True) - return mean_bias -class SpatialMeanOfTemporalMeanBias(BinaryMetric): - '''Calculate the bias averaged over time and domain.''' +class RMSError(BinaryMetric): + '''Calculate the Root Mean Square Difference (RMS Error), with the mean + calculated over time and space.''' def run(self, reference_dataset, target_dataset): - '''Calculate the bias averaged over time and domain. + '''Calculate the Root Mean Square Difference (RMS Error), with the mean + calculated over time and space. .. note:: Overrides BinaryMetric.run() @@ -236,35 +258,99 @@ class SpatialMeanOfTemporalMeanBias(BinaryMetric): reference dataset in this metric run :type target_dataset: :class:`dataset.Dataset` - :returns: The bias averaged over time and domain + :returns: The RMS error, with the mean calculated over time and space ''' - bias = target_dataset.values - reference_dataset.values - return ma.mean(bias) + return calc_rmse(target_dataset.values, reference_dataset.values) +def calc_bias(target_array, reference_array, average_over_time = False): + ''' Calculate difference between two arrays -class RMSError(BinaryMetric): - '''Calculate the Root Mean Square Difference (RMS Error), with the mean - calculated over time and space.''' + :param target_array: an array to be evaluated, as model output + :type target_array: :class:'numpy.ma.core.MaskedArray' - def run(self, reference_dataset, target_dataset): - '''Calculate the Root Mean Square Difference (RMS Error), with the mean - calculated over time and space. + :param reference_array: an array of reference dataset + :type reference_array: :class:'numpy.ma.core.MaskedArray' - .. note:: - Overrides BinaryMetric.run() + :param average_over_time: if True, calculated bias is averaged for the axis=0 + :type average_over_time: 'bool' - :param reference_dataset: The reference dataset to use in this metric - run - :type reference_dataset: :class:`dataset.Dataset` + :returns: Biases array of the target dataset + :rtype: :class:'numpy.ma.core.MaskedArray' + ''' + + bias = target_array - reference_array + if average_over_time: + return ma.average(bias, axis=0) + else: + return bias - :param target_dataset: The target dataset to evaluate against the - reference dataset in this metric run - :type target_dataset: :class:`dataset.Dataset` +def calc_stddev(array, axis=None): + ''' Calculate a sample standard deviation of an array along the array - :returns: The RMS error, with the mean calculated over time and space - ''' + :param array: an array to calculate sample standard deviation + :type array: :class:'numpy.ma.core.MaskedArray' + + :param axis: Axis along which the sample standard deviation is computed. + :type axis: 'int' + + :returns: sample standard deviation of array + :rtype: :class:'numpy.ma.core.MaskedArray' + ''' + + if isinstance(axis, int): + return ma.std(array, axis=axis, ddof=1) + else: + return ma.std(array, ddof=1) + + +def calc_stddev_ratio(target_array, reference_array): + ''' Calculate ratio of standard deivations of the two arrays + + :param target_array: an array to be evaluated, as model output + :type target_array: :class:'numpy.ma.core.MaskedArray' + + :param reference_array: an array of reference dataset + :type reference_array: :class:'numpy.ma.core.MaskedArray' + + :param average_over_time: if True, calculated bias is averaged for the axis=0 + :type average_over_time: 'bool' + + :returns: (standard deviation of target_array)/(standard deviation of reference array) + :rtype: :class:'numpy.ma.core.MaskedArray' + ''' + + return calc_stddev(target_array)/calc_stddev(reference_array) + +def calc_correlation(target_array, reference_array): + '''Calculate the correlation coefficient between two arrays. + + :param target_array: an array to be evaluated, as model output + :type target_array: :class:'numpy.ma.core.MaskedArray' + + :param reference_array: an array of reference dataset + :type reference_array: :class:'numpy.ma.core.MaskedArray' + + :returns: pearson's correlation coefficient between the two input arrays + :rtype: :class:'numpy.ma.core.MaskedArray' + ''' + + return mstats.pearsonr(reference_array.flatten(), target_array.flatten())[0] + +def calc_rmse(target_array, reference_array): + ''' Calculate ratio of standard deivations of the two arrays + + :param target_array: an array to be evaluated, as model output + :type target_array: :class:'numpy.ma.core.MaskedArray' + + :param reference_array: an array of reference dataset + :type reference_array: :class:'numpy.ma.core.MaskedArray' + + :param average_over_time: if True, calculated bias is averaged for the axis=0 + :type average_over_time: 'bool' - sqdiff = (reference_dataset.values - target_dataset.values) ** 2 - return (ma.mean(sqdiff))**0.5 + :returns: root mean square error + :rtype: :class:'float' + ''' + return (ma.mean((calc_bias(target_array, reference_array))**2))**0.5 http://git-wip-us.apache.org/repos/asf/climate/blob/892676e7/ocw/tests/test_metrics.py ---------------------------------------------------------------------- diff --git a/ocw/tests/test_metrics.py b/ocw/tests/test_metrics.py index 8edeaff..b61f9da 100644 --- a/ocw/tests/test_metrics.py +++ b/ocw/tests/test_metrics.py @@ -56,6 +56,30 @@ class TestBias(unittest.TestCase): expected_result.fill(-300) np.testing.assert_array_equal(self.bias.run(self.target_dataset, self.reference_dataset), expected_result) +class TestSpatialPatternTaylorDiagram(unittest.TestCase): + '''Test the metrics.SpatialPatternTaylorDiagram''' + def setUp(self): + self.taylor_diagram = metrics.SpatialPatternTaylorDiagram() + self.ref_dataset = Dataset( + np.array([1., 1., 1., 1., 1.]), + np.array([1., 1., 1., 1., 1.]), + np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), + # Reshapped array with 300 values incremented by 5 + np.arange(0, 1500, 5).reshape(12, 5, 5), + 'ds1' + ) + + self.tar_dataset = Dataset( + np.array([1., 1., 1., 1., 1.]), + np.array([1., 1., 1., 1., 1.]), + np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), + # Reshapped array with 300 values incremented by 2 + np.arange(0, 600, 2).reshape(12, 5, 5), + 'ds2' + ) + + def test_function_run(self): + self.assertTrue(self.taylor_diagram.run(self.ref_dataset, self.tar_dataset), [2.5,1.0]) class TestTemporalStdDev(unittest.TestCase): '''Test the metrics.TemporalStdDev metric.''' @@ -161,22 +185,18 @@ class TestTemporalCorrelation(unittest.TestCase): def test_identical_inputs(self): expected = np.ones(25).reshape(5, 5) - tc, cl = self.metric.run(self.ref_dataset, self.ref_dataset) + tc = self.metric.run(self.ref_dataset, self.ref_dataset) np.testing.assert_array_equal(tc, expected) - np.testing.assert_array_equal(cl, expected) def test_positive_correlation(self): expected = np.ones(25).reshape(5, 5) - tc, cl = self.metric.run(self.ref_dataset, self.tgt_dataset_inc) + tc = self.metric.run(self.ref_dataset, self.tgt_dataset_inc) np.testing.assert_array_equal(tc, expected) - np.testing.assert_array_equal(cl, expected) def test_negative_correlation(self): expected_tc = np.array([-1] * 25).reshape(5, 5) - expected_cl = np.ones(25).reshape(5, 5) - tc, cl = self.metric.run(self.ref_dataset, self.tgt_dataset_dec) + tc = self.metric.run(self.ref_dataset, self.tgt_dataset_dec) np.testing.assert_array_equal(tc, expected_tc) - np.testing.assert_array_equal(cl, expected_cl) class TestTemporalMeanBias(unittest.TestCase): @@ -208,41 +228,6 @@ class TestTemporalMeanBias(unittest.TestCase): expected_result.fill(-300) np.testing.assert_array_equal(self.mean_bias.run(self.target_dataset,self.reference_dataset), expected_result) - def test_function_run_abs(self): - '''Test mean bias function between reference dataset and target dataset with abs as True.''' - expected_result = np.zeros((5, 5), dtype=np.int) - expected_result.fill(300) - np.testing.assert_array_equal(self.mean_bias.run(self.reference_dataset, self.target_dataset, True), expected_result) - - -class TestSpatialMeanOfTemporalMeanBias(unittest.TestCase): - '''Test the metrics.SpatialMeanOfTemporalMeanBias metric.''' - def setUp(self): - # Set metric. - self.metric = metrics.SpatialMeanOfTemporalMeanBias() - # Initialize reference dataset. - self.ref_lats = np.array([10, 20, 30, 40, 50]) - self.ref_lons = np.array([5, 15, 25, 35, 45]) - self.ref_times = np.array([dt.datetime(2000, x, 1) - for x in range(1, 13)]) - self.ref_values = np.array(range(300)).reshape(12, 5, 5) - self.ref_variable = "ref" - self.ref_dataset = Dataset(self.ref_lats, self.ref_lons, - self.ref_times, self.ref_values, self.ref_variable) - # Initialize target dataset. - self.tgt_lats = np.array([10, 20, 30, 40, 50]) - self.tgt_lons = np.array([5, 15, 25, 35, 45]) - self.tgt_times = np.array([dt.datetime(2000, x, 1) - for x in range(1, 13)]) - self.tgt_values = np.array(range(299, -1, -1)).reshape(12, 5, 5) - self.tgt_variable = "tgt" - self.tgt_dataset = Dataset(self.tgt_lats, self.tgt_lons, - self.tgt_times, self.tgt_values, self.tgt_variable) - - def test_function_run(self): - result = self.metric.run(self.ref_dataset, self.tgt_dataset) - self.assertEqual(result, 0.0) - class TestRMSError(unittest.TestCase): '''Test the metrics.RMSError metric.'''
