[
https://issues.apache.org/jira/browse/CLIMATE-88?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Alex Goodman updated CLIMATE-88:
--------------------------------
Description:
The performance for many of the functions in metrics.py can be improved by
removing some unnecessary loops and utilizing shape manipulation.
Here is some example code that benchmarks the differences between the original
and proposed methodology if you did not see it in the mailing list:
# Test script comparing two different methodologies for determining monthly
climatology.
# Author: Alex Goodman
from timeit import Timer
import os
import numpy as np
def calcAnnualCycleStdev(dataset1, times):
'''
Purpose::
Calculate monthly standard deviations for every grid point
Input::
dataset1 - 3d numpy array of data in (12* number of years,lat,lon)
times - an array of python datetime objects
Output::
stds - if 3d numpy was entered, 3d (12,lat,lon)
'''
# Extract months from time variable
months = times
# empty array to store means
stds = np.empty((12, dataset1.shape[1], dataset1.shape[2]))
# Calculate sample standard deviation month by month (January - December)
for i in np.arange(12):
stds[i, :, :] = dataset1[months == i+1, :, :].std(axis = 0, ddof = 1)
return stds
def calcAnnualCycleStdev2(dataset1, times):
'''
Purpose::
Calculate monthly standard deviations for every grid point
Input::
dataset1 - 3d numpy array of data in (12* number of years,lat,lon)
times - an array of python datetime objects
Output::
stds - if 3d numpy was entered, 3d (12,lat,lon)
'''
# Extract months from time variable
months = times
nMonth, nGrdY, nGrdX = dataset1.shape
# Find the std month by month as before, but this time change the
# shape of the input array instead of a loop
dataset1.shape = nMonth/12, 12, nGrdY, nGrdX
stds = dataset1.std(axis = 0, ddof = 1)
# Since numpy arrays are treated as shallow copies when passed into a
# function, the shape must be reset as to insure that dataset1's
# original shape is preserved after this function is called
dataset1.shape = nMonth, nGrdX, nGrdY
return stds
def main():
nyrs = [5, 10, 20, 50, 100]
for nyr in nyrs:
print '---Starting test for %d years of data---' %(nyr)
# Generate a random numpy array for our benchmark
data = np.random.rand(12*nyr, 500, 500)
month = np.tile(np.arange(1,13), nyr)
print 'Testing original function...'
t = Timer(lambda: calcAnnualCycleStdev(data, month))
t1 = t.timeit(number=1)
print 'Approximate runtime: %1.2f s' %(t1)
# Test revised function
print 'Testing revised function...'
t = Timer(lambda: calcAnnualCycleStdev2(data, month))
t2 = t.timeit(number=1)
print 'Approximate runtime: %1.2f s' %(t2)
if __name__ == '__main__':
main()
was:The performance for many of the functions in metrics.py can be improved
by removing some unnecessary loops and utilizing shape manipulation.
> Performance improvements for metrics.py
> ---------------------------------------
>
> Key: CLIMATE-88
> URL: https://issues.apache.org/jira/browse/CLIMATE-88
> Project: Apache Open Climate Workbench
> Issue Type: Improvement
> Components: metrics
> Affects Versions: 0.1-incubating
> Reporter: Alex Goodman
> Assignee: Chris A. Mattmann
> Fix For: 0.1-incubating
>
> Attachments: benchmark_metrics.py
>
>
> The performance for many of the functions in metrics.py can be improved by
> removing some unnecessary loops and utilizing shape manipulation.
> Here is some example code that benchmarks the differences between the
> original and proposed methodology if you did not see it in the mailing list:
> # Test script comparing two different methodologies for determining monthly
> climatology.
> # Author: Alex Goodman
> from timeit import Timer
> import os
> import numpy as np
> def calcAnnualCycleStdev(dataset1, times):
> '''
> Purpose::
> Calculate monthly standard deviations for every grid point
> Input::
> dataset1 - 3d numpy array of data in (12* number of years,lat,lon)
> times - an array of python datetime objects
> Output::
> stds - if 3d numpy was entered, 3d (12,lat,lon)
> '''
> # Extract months from time variable
> months = times
>
> # empty array to store means
> stds = np.empty((12, dataset1.shape[1], dataset1.shape[2]))
>
> # Calculate sample standard deviation month by month (January - December)
> for i in np.arange(12):
> stds[i, :, :] = dataset1[months == i+1, :, :].std(axis = 0, ddof = 1)
>
> return stds
>
> def calcAnnualCycleStdev2(dataset1, times):
> '''
> Purpose::
> Calculate monthly standard deviations for every grid point
> Input::
> dataset1 - 3d numpy array of data in (12* number of years,lat,lon)
> times - an array of python datetime objects
> Output::
> stds - if 3d numpy was entered, 3d (12,lat,lon)
> '''
> # Extract months from time variable
> months = times
> nMonth, nGrdY, nGrdX = dataset1.shape
> # Find the std month by month as before, but this time change the
> # shape of the input array instead of a loop
> dataset1.shape = nMonth/12, 12, nGrdY, nGrdX
> stds = dataset1.std(axis = 0, ddof = 1)
> # Since numpy arrays are treated as shallow copies when passed into a
> # function, the shape must be reset as to insure that dataset1's
> # original shape is preserved after this function is called
> dataset1.shape = nMonth, nGrdX, nGrdY
>
> return stds
>
> def main():
> nyrs = [5, 10, 20, 50, 100]
> for nyr in nyrs:
> print '---Starting test for %d years of data---' %(nyr)
> # Generate a random numpy array for our benchmark
> data = np.random.rand(12*nyr, 500, 500)
> month = np.tile(np.arange(1,13), nyr)
> print 'Testing original function...'
> t = Timer(lambda: calcAnnualCycleStdev(data, month))
> t1 = t.timeit(number=1)
> print 'Approximate runtime: %1.2f s' %(t1)
>
> # Test revised function
> print 'Testing revised function...'
> t = Timer(lambda: calcAnnualCycleStdev2(data, month))
> t2 = t.timeit(number=1)
> print 'Approximate runtime: %1.2f s' %(t2)
>
> if __name__ == '__main__':
> main()
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira