Repository: climate Updated Branches: refs/heads/master 3ce4e2094 -> 0f2bc5bda
CLIMATE-813 - Fix PEP8 violations in utils module Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/6fbde124 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/6fbde124 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/6fbde124 Branch: refs/heads/master Commit: 6fbde124c1ac1ba0c2431fcdd4f5c023ca12cab7 Parents: 705403a Author: Ibrahim Jarif <jarifibra...@gmail.com> Authored: Sat Jun 18 15:54:19 2016 +0530 Committer: Ibrahim Jarif <jarifibra...@gmail.com> Committed: Sat Jun 18 15:56:04 2016 +0530 ---------------------------------------------------------------------- ocw/utils.py | 153 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 88 insertions(+), 65 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/6fbde124/ocw/utils.py ---------------------------------------------------------------------- diff --git a/ocw/utils.py b/ocw/utils.py index 37eb444..86ac842 100755 --- a/ocw/utils.py +++ b/ocw/utils.py @@ -21,12 +21,12 @@ import sys import datetime as dt import numpy as np import numpy.ma as ma -import datetime from mpl_toolkits.basemap import shiftgrid from dateutil.relativedelta import relativedelta from netCDF4 import num2date + def decode_time_values(dataset, time_var_name): ''' Decode NetCDF time values into Python datetime objects. @@ -50,7 +50,6 @@ def decode_time_values(dataset, time_var_name): time_base = parse_time_base(time_format) times = [] - arg = {} if time_units == 'months': # datetime.timedelta doesn't support a 'months' option. To remedy # this, a month == 30 days for our purposes. @@ -62,9 +61,11 @@ def decode_time_values(dataset, time_var_name): except: times_calendar = 'standard' - times = num2date(time_data[:], units=time_format, calendar=times_calendar) + times = num2date( + time_data[:], units=time_format, calendar=times_calendar) return times + def parse_time_units(time_format): ''' Parse units value from time units string. @@ -93,6 +94,7 @@ def parse_time_units(time_format): ) raise ValueError(err) + def parse_time_base(time_format): ''' Parse time base object from the time units string. @@ -119,17 +121,19 @@ def parse_time_base(time_format): '%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M', '%Y:%m:%d %H:%M', '%Y%m%d %H:%M', '%Y-%m-%d', '%Y/%m/%d', '%Y:%m:%d', '%Y%m%d', '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H', - '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ' + '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ' ] - # Attempt to match the base time string with a possible format parsing string. + # Attempt to match the base time string with a possible format parsing + # string. for time_format in possible_time_formats: try: stripped_time = dt.datetime.strptime(base_time_string, time_format) break except ValueError: - # This exception means that the time format attempted was incorrect. - # No need to report or raise this, simply try the next one! + # This exception means that the time format attempted was + # incorrect. No need to report or raise this, + # simply try the next one! pass # If we got through the entire loop without a break, we couldn't parse the # date string with our known formats. @@ -145,6 +149,7 @@ def parse_time_base(time_format): return stripped_time + def parse_base_time_string(time_format): ''' Retrieve base time string from time data units information. @@ -169,6 +174,7 @@ def parse_base_time_string(time_format): return time_format.split('since')[1].strip() + def normalize_lat_lon_values(lats, lons, values): ''' Normalize lat/lon values @@ -182,45 +188,48 @@ def normalize_lat_lon_values(lats, lons, values): :type lons: :class:`numpy.ndarray` :param values: A 3D array of data values. - :returns: A :func:`tuple` of the form (adjust_lats, adjusted_lons, adjusted_values) + :returns: A :func:`tuple` of the form (adjust_lats, adjusted_lons, + adjusted_values) :raises ValueError: If the lat/lon values are not sorted. ''' - if lats.ndim ==1 and lons.ndim ==1: + if lats.ndim == 1 and lons.ndim == 1: # Avoid unnecessary shifting if all lons are higher than 180 if lons.min() > 180: - lons-=360 + lons -= 360 - # Make sure lats and lons are monotonically increasing - lats_decreasing = np.diff(lats) < 0 - lons_decreasing = np.diff(lons) < 0 + # Make sure lats and lons are monotonically increasing + lats_decreasing = np.diff(lats) < 0 + lons_decreasing = np.diff(lons) < 0 - # If all values are decreasing then they just need to be reversed - lats_reversed, lons_reversed = lats_decreasing.all(), lons_decreasing.all() + # If all values are decreasing then they just need to be reversed + lats_reversed, lons_reversed = (lats_decreasing.all(), + lons_decreasing.all()) - # If the lat values are unsorted then raise an exception - if not lats_reversed and lats_decreasing.any(): + # If the lat values are unsorted then raise an exception + if not lats_reversed and lats_decreasing.any(): raise ValueError('Latitudes must be sorted.') - # Perform same checks now for lons - if not lons_reversed and lons_decreasing.any(): + # Perform same checks now for lons + if not lons_reversed and lons_decreasing.any(): raise ValueError('Longitudes must be sorted.') - # Also check if lons go from [0, 360), and convert to [-180, 180) - # if necessary - lons_shifted = lons.max() > 180 - lats_out, lons_out, data_out = lats[:], lons[:], values[:] - # Now correct data if latlon grid needs to be shifted - if lats_reversed: + # Also check if lons go from [0, 360), and convert to [-180, 180) + # if necessary + lons_shifted = lons.max() > 180 + lats_out, lons_out, data_out = lats[:], lons[:], values[:] + # Now correct data if latlon grid needs to be shifted + if lats_reversed: lats_out = lats_out[::-1] data_out = data_out[..., ::-1, :] - if lons_reversed: + if lons_reversed: lons_out = lons_out[::-1] data_out = data_out[..., :, ::-1] - if lons_shifted: - data_out, lons_out = shiftgrid(180, data_out, lons_out, start=False) + if lons_shifted: + data_out, lons_out = shiftgrid( + 180, data_out, lons_out, start=False) return lats_out, lons_out, data_out else: @@ -264,19 +273,21 @@ def reshape_monthly_to_annually(dataset): return values + def calc_temporal_mean(dataset): - ''' Calculate temporal mean of dataset's values + ''' Calculate temporal mean of dataset's values - :param dataset: OCW Dataset whose first dimension is time + :param dataset: OCW Dataset whose first dimension is time :type dataset: :class:`dataset.Dataset` :returns: Mean values averaged for the first dimension (time) ''' return ma.mean(dataset.values, axis=0) + def calc_climatology_year(dataset): ''' Calculate climatology of dataset's values for each year - + :param dataset: Monthly binned Dataset object with an evenly divisible number of months. :type dataset: :class:`dataset.Dataset` @@ -295,16 +306,17 @@ def calc_climatology_year(dataset): else: # Get values reshaped to (num_year, 12, num_lats, num_lons) values = reshape_monthly_to_annually(dataset) - # Calculate mean values over year (num_year, num_lats, num_lons) + # Calculate mean values over year (num_year, num_lats, num_lons) annually_mean = values.mean(axis=1) # Calculate mean values over all years (num_lats, num_lons) total_mean = annually_mean.mean(axis=0) return annually_mean, total_mean + def calc_climatology_monthly(dataset): ''' Calculate monthly mean values for a dataset. - Follow COARDS climo stats calculation, the year can be given as 0 + Follow COARDS climo stats calculation, the year can be given as 0 but the min year allowed in Python is 1 http://www.cgd.ucar.edu/cms/eaton/netcdf/CF-20010629.htm#climatology @@ -312,9 +324,10 @@ def calc_climatology_monthly(dataset): divisible by 12 :type dataset: :class:`dataset.Dataset` - :returns: Mean values for each month of the year of shape (12, num_lats, num_lons) - and times array of datetime objects of length 12 - + :returns: Mean values for each month of the year of shape + (12, num_lats, num_lons) and times array of datetime objects + of length 12 + :raise ValueError: If the number of monthly bins is not divisible by 12 ''' @@ -326,97 +339,107 @@ def calc_climatology_monthly(dataset): raise ValueError(error) else: values = reshape_monthly_to_annually(dataset).mean(axis=0) - + # A year can commence from any month first_month = dataset.times[0].month - times = np.array([datetime.datetime(1, first_month, 1) + relativedelta(months = x) - for x in range(12)]) + times = np.array([dt.datetime(1, first_month, 1) + + relativedelta(months=x) + for x in range(12)]) return values, times + def calc_time_series(dataset): ''' Calculate time series mean values for a dataset - :param dataset: Dataset object + :param dataset: Dataset object :type dataset: :class:`dataset.Dataset` :returns: time series for the dataset of shape (nT) ''' - t_series =[] + t_series = [] for t in xrange(dataset.values.shape[0]): - t_series.append(dataset.values[t,:,:].mean()) - + t_series.append(dataset.values[t, :, :].mean()) + return t_series + def get_temporal_overlap(dataset_array): ''' Find the maximum temporal overlap across the observation and model datasets :param dataset_array: an array of OCW datasets ''' - start_time =[] - end_time =[] + start_time = [] + end_time = [] for dataset in dataset_array: start_time.append(dataset.time_range()[0]) end_time.append(dataset.time_range()[1]) return np.max(start_time), np.min(end_time) + def calc_subregion_area_mean_and_std(dataset_array, subregions): - ''' Calculate area mean and standard deviation values for a given subregions using datasets on common grid points + ''' Calculate area mean and standard deviation values for a given + subregions using datasets on common grid points :param dataset_array: An array of OCW Dataset Objects - :type list: + :type list: :param subregions: list of subregions :type subregions: :class:`numpy.ma.array` - :returns: area averaged time series for the dataset of shape (ntime, nsubregion) + :returns: area averaged time series for the dataset of shape + (ntime, nsubregion) ''' ndata = len(dataset_array) dataset0 = dataset_array[0] if dataset0.lons.ndim == 1: - lons, lats = np.meshgrid(dataset0.lons, dataset0.lats) + lons, lats = np.meshgrid(dataset0.lons, dataset0.lats) else: - lons = dataset0.lons - lats = dataset0.lats + lons = dataset0.lons + lats = dataset0.lats subregion_array = np.zeros(lons.shape) - mask_array = dataset_array[0].values[0,:].mask + mask_array = dataset_array[0].values[0, :].mask # dataset0.values.shsape[0]: length of the time dimension # spatial average - t_series =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) + t_series = ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) # spatial standard deviation - spatial_std =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) + spatial_std = ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) for iregion, subregion in enumerate(subregions): lat_min, lat_max, lon_min, lon_max = subregion[1] - y_index,x_index = np.where((lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max)) - subregion_array[y_index,x_index] = iregion+1 + y_index, x_index = np.where((lats >= lat_min) & ( + lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max)) + subregion_array[y_index, x_index] = iregion + 1 for idata in np.arange(ndata): - t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[:,y_index, x_index], axis=1) - spatial_std[idata, :, iregion] = ma.std(dataset_array[idata].values[:,y_index, x_index], axis=1) - subregion_array = ma.array(subregion_array, mask=mask_array) + t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[ + :, y_index, x_index], axis=1) + spatial_std[idata, :, iregion] = ma.std( + dataset_array[idata].values[:, y_index, x_index], axis=1) + subregion_array = ma.array(subregion_array, mask=mask_array) return t_series, spatial_std, subregion_array + def calc_area_weighted_spatial_average(dataset, area_weight=False): '''Calculate area weighted average of the values in OCW dataset - :param dataset: Dataset object + :param dataset: Dataset object :type dataset: :class:`dataset.Dataset` :returns: time series for the dataset of shape (nT) ''' - if dataset.lats.ndim ==1: + if dataset.lats.ndim == 1: lons, lats = np.meshgrid(dataset.lons, dataset.lats) else: - lons = dataset.lons lats = dataset.lats - weights = np.cos(lats*np.pi/180.) + weights = np.cos(lats * np.pi / 180.) nt, ny, nx = dataset.values.shape spatial_average = ma.zeros(nt) for it in np.arange(nt): if area_weight: - spatial_average[it] = ma.average(dataset.values[it,:], weights = weights) + spatial_average[it] = ma.average( + dataset.values[it, :], weights=weights) else: - spatial_average[it] = ma.average(dataset.values[it,:]) + spatial_average[it] = ma.average(dataset.values[it, :]) return spatial_average