[1/3] climate git commit: CLIMATE-903 - Adding a module to calculate the least square trend and standard error

huikyole Mon, 27 Mar 2017 11:18:25 -0700

Repository: climate
Updated Branches:
  refs/heads/master 34bb0aaec -> 6598b9f1a



CLIMATE-903 - Adding a module to calculate the least square trend and standard 
error

- debugging in dataset_processor
- A new module (utils.calculate_temporal_trends) to calculate the least square 
trend and standard error in the input time series.
- A new module (plotter.fill_US_states_with_color) to visualize US NCA regions


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/77674bde
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/77674bde
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/77674bde

Branch: refs/heads/master
Commit: 77674bde27814c63a2d8da32019c92d928be1e12
Parents: 34bb0aa
Author: huikyole <huiky...@argo.jpl.nasa.gov>
Authored: Thu Mar 23 10:18:27 2017 -0700
Committer: huikyole <huiky...@argo.jpl.nasa.gov>
Committed: Thu Mar 23 10:18:27 2017 -0700

----------------------------------------------------------------------
 examples/temperature_trends_over_CONUS.py | 102 +++++++++++++++++++++++++
 ocw/dataset_processor.py                  | 102 ++++++++++++++++---------
 ocw/plotter.py                            |  71 +++++++++++++++++
 ocw/utils.py                              |  95 ++++++++++++++++++++---
 4 files changed, 325 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/examples/temperature_trends_over_CONUS.py
----------------------------------------------------------------------
diff --git a/examples/temperature_trends_over_CONUS.py 
b/examples/temperature_trends_over_CONUS.py
new file mode 100644
index 0000000..0467291
--- /dev/null
+++ b/examples/temperature_trends_over_CONUS.py
@@ -0,0 +1,102 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import numpy as np
+import datetime
+
+# import Apache OCW dependences
+import ocw.data_source.local as local
+from ocw.dataset import Bounds as Bounds
+import ocw.dataset_processor as dsp
+import ocw.metrics as metrics
+import ocw.plotter as plotter
+import ocw.utils as utils
+import ssl
+if hasattr(ssl, '_create_unverified_context'):
+    ssl._create_default_https_context = ssl._create_unverified_context
+
+# nClimDiv observation file
+file_obs = 'nClimDiv/nClimDiv_tave_1895-2005.nc'
+
+# temporal boundary
+start_date = datetime.datetime(1979, 12, 1)
+end_date = datetime.datetime(2005, 8, 31)
+
+month_start = 6 # June
+month_end = 8   # August
+
+regions = []
+regions.append(['WA', 'OR', 'ID'])
+regions.append(['CA', 'NV', 'AZ', 'NM', 'UT','CO'])
+regions.append(['MT', 'WY', 'ND', 'SD', 'NE'])
+regions.append(['KS', 'TX', 'OK'])
+regions.append(['MN', 'IA', 'MO', 'WI', 'IL', 'IN', 'MI','OH'])
+regions.append(['ME', 'VT', 'NH', 'MA', 'NY', 'CT', 'RI','NJ','PA','WV','DE', 
'MD'])
+regions.append(['KY', 'VA', 'AR','AL', 'LA','MS', 'FL', 'GA','NC','SC', 
'DC','TN'])
+
+plotter.fill_US_states_with_color(regions, 'NCA_seven_regions', colors=True,
+                                
region_names=['NW','SW','NGP','SGP','MW','NE','SE'])
+
+# CONUS regional boundaries
+NW_bounds = Bounds(boundary_type='us_states', 
+                   us_states=regions[0])
+SW_bounds = Bounds(boundary_type='us_states', 
+                   us_states=regions[1])
+NGP_bounds = Bounds(boundary_type='us_states', 
+                   us_states=regions[2])
+SGP_bounds = Bounds(boundary_type='us_states', 
+                   us_states=regions[3])
+MW_bounds = Bounds(boundary_type='us_states', 
+                   us_states=regions[4])
+NE_bounds = Bounds(boundary_type='us_states', 
+                   us_states=regions[5])
+SE_bounds = Bounds(boundary_type='us_states', 
+                   us_states=regions[6])
+
+
+""" Load nClimDiv file into OCW Dataset """
+obs_dataset = local.load_file(file_obs, variable_name='tave') 
+
+""" Temporal subset of obs_dataset """
+obs_dataset_subset = dsp.temporal_slice(obs_dataset, 
+                  start_time=start_date, end_time=end_date)
+obs_dataset_season = dsp.temporal_subset(obs_dataset_subset, month_start, 
month_end,
+                      average_each_year=True)
+""" Spatial subset of obs_dataset and generate time series """
+NW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, 
NW_bounds)) 
+SW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, 
SW_bounds)) 
+NGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, 
NGP_bounds)) 
+SGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, 
SGP_bounds)) 
+MW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, 
MW_bounds)) 
+NE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, 
NE_bounds)) 
+SE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, 
SE_bounds)) 
+
+year = np.arange(len(NW_timeseries))
+
+regional_trends=[]
+regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, 
NW_timeseries)[0])
+regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, 
SW_timeseries)[0])
+regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, 
NGP_timeseries)[0])
+regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, 
SGP_timeseries)[0])
+regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, 
MW_timeseries)[0])
+regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, 
NE_timeseries)[0])
+regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, 
SE_timeseries)[0])
+
+plotter.fill_US_states_with_color(regions, 'NCA_tave_trends_JJA_1980-2005', 
+                                  values=regional_trends,
+                                  region_names=['%.3f' %(10*i) for i in 
regional_trends])
+

http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index b113bc7..f8e740a 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -401,23 +401,28 @@ def subset(target_dataset, subregion, 
subregion_name=None, extract=True, user_ma
         _are_bounds_contained_by_dataset(target_dataset, subregion)
 
         if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2:
-            start_time_index = np.where(
-                target_dataset.times >= subregion.start)[0][0]
-            end_time_index = np.where(
-                target_dataset.times <= subregion.end)[0][-1]
-            target_dataset = temporal_slice(
-                target_dataset, start_time_index, end_time_index)
-            nt, ny, nx = target_dataset.values.shape
+            temporal_subset = temporal_slice(
+                target_dataset, subregion.start, subregion.end)
+            nt, ny, nx = temporal_subset.values.shape
             y_index, x_index = np.where(
                 (target_dataset.lats >= subregion.lat_max) | (
                     target_dataset.lats <= subregion.lat_min) |
                 (target_dataset.lons >= subregion.lon_max) | (
                     target_dataset.lons <= subregion.lon_min))
             for it in np.arange(nt):
-                target_dataset.values[it, y_index, x_index] = 1.e+20
-            target_dataset.values = ma.masked_equal(
-                target_dataset.values, 1.e+20)
-            return target_dataset
+                temporal_subset.values[it, y_index, x_index] = 1.e+20
+            new_values = ma.masked_equal(
+                temporal_subset.values, 1.e+20)
+            return ds.Dataset(
+                target_dataset.lats,
+                target_dataset.lons,
+                temporal_subset.times,
+                new_values,            
+                variable=target_dataset.variable,
+                units=target_dataset.units,
+                name=subregion_name,
+                origin=target_dataset.origin
+                )
 
         elif target_dataset.lats.ndim == 1 and target_dataset.lons.ndim == 1:
             # Get subregion indices into subregion data
@@ -466,36 +471,52 @@ def subset(target_dataset, subregion, 
subregion_name=None, extract=True, user_ma
             )
 
     if subregion.boundary_type == 'us_states' or subregion.boundary_type == 
'countries':
-        start_time_index = np.where(
-            target_dataset.times == subregion.start)[0][0]
-        end_time_index = np.where(target_dataset.times == subregion.end)[0][0]
-        target_dataset = temporal_slice(
-            target_dataset, start_time_index, end_time_index)
-        nt, ny, nx = target_dataset.values.shape
+        temporal_subset = temporal_slice(
+            target_dataset, subregion.start, subregion.end)
         spatial_mask = utils.mask_using_shapefile_info(target_dataset.lons, 
target_dataset.lats,
                                                        
subregion.masked_regions, extract=extract)
-        target_dataset.values = utils.propagate_spatial_mask_over_time(
-            target_dataset.values, mask=spatial_mask)
-        return target_dataset
+        subset_values = utils.propagate_spatial_mask_over_time(
+            temporal_subset.values, mask=spatial_mask)
+        return ds.Dataset(
+            target_dataset.lats,
+            target_dataset.lons,
+            temporal_subset.times,
+            subset_values,
+            variable=target_dataset.variable,
+            units=target_dataset.units,
+            name=subregion_name,
+            origin=target_dataset.origin
+            )
 
     if subregion.boundary_type == 'user':
+        temporal_subset = temporal_slice(
+            target_dataset, subregion.start, subregion.end)
         spatial_mask = utils.regrid_spatial_mask(target_dataset.lons, 
target_dataset.lats,
                                                  subregion.mask_longitude, 
subregion.mask_latitude, subregion.mask_variable,
                                                  user_mask_values, 
extract=extract)
-        target_dataset.values = utils.propagate_spatial_mask_over_time(
-            target_dataset.values, mask=spatial_mask)
-        return target_dataset
+        subset_values = utils.propagate_spatial_mask_over_time(
+            temporal_subset.values, mask=spatial_mask)
+        return ds.Dataset(
+            target_dataset.lats,
+            target_dataset.lons,
+            temporal_subset.times,
+            subset_values,
+            variable=target_dataset.variable,
+            units=target_dataset.units,
+            name=subregion_name,
+            origin=target_dataset.origin
+            )
 
 
-def temporal_slice(target_dataset, start_time_index, end_time_index):
+def temporal_slice(target_dataset, start_time, end_time):
     '''Temporally slice given dataset(s) with subregion information. This does 
not
     spatially subset the target_Dataset
 
-    :param start_time_index: time index of the start time
-    :type start_time_index: :class:'int'
+    :param start_time: start time
+    :type start_time: :class:'int'
 
-    :param end_time_index: time index of the end time
-    :type end_time_index: :class:'int'
+    :param end_time: end time
+    :type end_time: :class:'datetime.datetime'
 
     :param target_dataset: The Dataset object to subset.
     :type target_dataset: :class:`dataset.Dataset`
@@ -505,14 +526,23 @@ def temporal_slice(target_dataset, start_time_index, 
end_time_index):
 
     :raises: ValueError
     '''
-    start_date = target_dataset.times[start_time_index]
-    end_date = target_dataset.times[end_time_index]
-    timeStart = min(np.nonzero(target_dataset.times >= start_date)[0])
-    timeEnd = max(np.nonzero(target_dataset.times <= end_date)[0])
-    target_dataset.times = target_dataset.times[timeStart:timeEnd + 1]
-    target_dataset.values = target_dataset.values[timeStart:timeEnd + 1, :]
-
-    return target_dataset
+    start_time_index = np.where(
+        target_dataset.times >= start_time)[0][0]
+    end_time_index = np.where(
+        target_dataset.times <= end_time)[0][-1]
+    new_times = target_dataset.times[start_time_index:end_time_index + 1]
+    new_values = target_dataset.values[start_time_index:end_time_index + 1, :]
+
+    return ds.Dataset(
+        target_dataset.lats,
+        target_dataset.lons,
+        new_times,
+        new_values,  
+        variable=target_dataset.variable,
+        units=target_dataset.units,
+        origin=target_dataset.origin
+        )
+         
 
 
 def safe_subset(target_dataset, subregion, subregion_name=None):

http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/ocw/plotter.py
----------------------------------------------------------------------
diff --git a/ocw/plotter.py b/ocw/plotter.py
index 9e2e0ad..8e56b1d 100755
--- a/ocw/plotter.py
+++ b/ocw/plotter.py
@@ -18,12 +18,16 @@
 from tempfile import TemporaryFile
 import matplotlib as mpl
 import matplotlib.pyplot as plt
+from matplotlib.patches import Polygon
+from matplotlib.collections import PatchCollection
 from mpl_toolkits.basemap import Basemap
 from mpl_toolkits.axes_grid1 import ImageGrid
 import scipy.stats.mstats as mstats
 import numpy as np
 import numpy.ma as ma
 
+import ocw.utils as utils
+
 # Set the default colormap to coolwarm
 mpl.rc('image', cmap='coolwarm')
 
@@ -1087,3 +1091,70 @@ def draw_histogram(dataset_array, data_names, fname, 
fmt='png', nbins=10):
                      data_max + (data_max - data_min) * 0.15])
 
     fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight', dpi=fig.dpi)
+
+def fill_US_states_with_color(regions, fname, fmt='png', ptitle='',
+                              colors=False, values=None, region_names=None):
+    
+    ''' Fill the States over the contiguous US with colors 
+   
+    :param regions: The list of subregions(lists of US States) to be filled 
+                    with different colors.
+    :type regions: :class:`list`  
+    :param fname: The filename of the plot.
+    :type fname: :mod:`string`
+    :param fmt: (Optional) filetype for the output.
+    :type fmt: :mod:`string`
+    :param ptitle: (Optional) plot title.
+    :type ptitle: :mod:`string`
+    :param colors: (Optional) : If True, each region will be filled  
+                                with different colors without using values
+    :type colors: :class:`bool` 
+    :param values: (Optional) : If colors==False, color for each region scales 
+                                an associated element in values   
+    :type values: :class:`numpy.ndarray` 
+    '''
+
+    nregion = len(regions)
+    if colors:
+        cmap = plt.cm.rainbow
+    if values:
+        cmap = plt.cm.seismic
+        max_abs = np.abs(values).max()
+
+    # Set up the figure
+    fig = plt.figure()
+    fig.set_size_inches((8.5, 11.))
+    fig.dpi = 300
+    ax = fig.add_subplot(111)
+
+    # create the map
+    m = Basemap(llcrnrlon=-127,llcrnrlat=22,urcrnrlon=-65,urcrnrlat=52,
+                ax=ax)
+
+    for iregion, region in enumerate(regions):
+        shapes = utils.shapefile_boundary('us_states', region)
+        patches=[]
+        lats=np.empty(0)
+        lons=np.empty(0)
+        for shape in shapes:
+            patches.append(Polygon(np.array(shape), True))
+            
+            lons = np.append(lons, shape[:,0])
+            lats = np.append(lats, shape[:,1])
+        if colors:
+            color_to_fill=cmap((iregion+0.5)/nregion)
+        if values:
+            value = values[iregion]
+            color_to_fill = cmap(0.5+np.sign(value)*abs(value)/max_abs*0.45)
+        ax.add_collection(PatchCollection(patches, facecolor=color_to_fill))
+        if region_names:
+            ax.text(lons.mean(), lats.mean(), region_names[iregion],
+                    ha='center', va='center', fontsize=10)
+    m.drawcountries(linewidth=0.)
+
+    # Add the title
+    ax.set_title(ptitle) 
+
+    # Save the figure
+    fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight', dpi=fig.dpi)
+    fig.clf()

http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/ocw/utils.py
----------------------------------------------------------------------
diff --git a/ocw/utils.py b/ocw/utils.py
index ed7f0cb..ccc4ea9 100755
--- a/ocw/utils.py
+++ b/ocw/utils.py
@@ -26,8 +26,9 @@ import numpy.ma as ma
 from mpl_toolkits.basemap import shiftgrid, Basemap
 from matplotlib.path import Path
 from dateutil.relativedelta import relativedelta
-from netCDF4 import num2date
+from netCDF4 import num2date, date2num
 import scipy.interpolate as interpolate
+import scipy.stats as stats
 from scipy.ndimage import map_coordinates
 
 
@@ -470,8 +471,7 @@ def shapefile_boundary(boundary_type, region_names):
     regions = []
     shapefile_dir = os.sep.join([os.path.dirname(__file__), 'shape'])
     map_read.readshapefile(os.path.join(shapefile_dir, boundary_type),
-                           boundary_type)
-                           
+                           boundary_type, drawbounds=False)
     # Note: The shapefile may contain countries or states with latin letters.
     # Hence, the output of readshapefile can be a mix of ascii and unicode
     # strings. Because python 2 and 3 treat them differently, we must
@@ -480,8 +480,9 @@ def shapefile_boundary(boundary_type, region_names):
         for region_name in region_names:
             region_name = _force_unicode(region_name)
             for iregion, region_info in enumerate(map_read.us_states_info):
-                state = _force_unicode(region_info['st'], 'latin-1')
-                if state == region_name:
+                state1 = _force_unicode(region_info['st'], 'latin-1')
+                state2 = _force_unicode(region_info['state'], 'latin-1')
+                if state1 == region_name or state2 == region_name:
                     regions.append(np.array(map_read.us_states[iregion]))
     elif boundary_type == 'countries':
         for region_name in region_names:
@@ -594,10 +595,19 @@ def regrid_spatial_mask(target_lon, target_lat, mask_lon, 
mask_lat, mask_var,
 def propagate_spatial_mask_over_time(data_array, mask):
     if data_array.ndim == 3 and mask.ndim == 2:
         nt = data_array.shape[0]
-        for it in np.arange(nt):
-            mask_original = data_array[it, :].mask
-            data_array.mask[it, :] = mask | mask_original
-        return data_array
+        new_data_array = ma.zeros(data_array.shape)
+        if isinstance(data_array.mask, bool):
+            new_mask = mask
+            for it in np.arange(nt):
+                new_data_array[it,:] = ma.array(data_array[it,:],
+                                            mask=new_mask)            
+        else:
+            for it in np.arange(nt):
+                new_mask = data_array[it, :].mask | mask
+                new_data_array[it,:] = ma.array(data_array[it,:],
+                                            mask=new_mask)                
+
+        return new_data_array
 
 
 def convert_lat_lon_2d_array(lon, lat):
@@ -614,3 +624,70 @@ def _force_unicode(s, encoding='utf-8'):
         s = s.decode(encoding=encoding)
         
     return s
+
+def calculate_temporal_trends(dataset):
+    ''' Calculate temporal trends in dataset.values
+    :param dataset: The dataset from which time values should be extracted.
+    :type dataset: :class:`dataset.Dataset'     
+
+    :returns: Arrays of the temporal trend and standard error
+    :rtype: :class:`numpy.ma.core.MaskedArray`
+    '''
+
+    nt, ny, nx = dataset.values.shape
+    x = np.arange(nt)
+
+    trend = np.zeros([ny, nx])-999.
+    slope_err = np.zeros([ny, nx])-999.
+    for iy in np.arange(ny):
+        for ix in np.arange(nx): 
+            if dataset.values[:,iy,ix].count() == nt: 
+                trend[iy,ix], slope_err[iy,ix] = 
calculate_temporal_trend_of_time_series(
+                                               x, dataset.values[:,iy,ix]) 
+    
+    return ma.masked_equal(trend, -999.), ma.masked_equal(slope_err, -999.)    
+
+def calculate_ensemble_temporal_trends(timeseries_array, 
number_of_samples=1000):
+    ''' Calculate temporal trends in an ensemble of time series  
+    :param timeseries_array: A list whose elements are one-dimensional numpy 
arrays 
+    :type timeseries_array: :class:`list'
+
+    :param sampling: A list whose elements are one-dimensional numpy arrays 
+    :type timeseries_array: :class:`list'
+
+    :returns: temporal trend and estimated error from bootstrapping
+    :rtype: :float:`float','float'
+    '''
+   
+    nmodels = len(timeseries_array)
+    nt = len(timeseries_array[0])
+    x = np.arange(nt)
+    merged_array = np.zeros([nmodels, nt])
+    sampled_trend = np.zeros(number_of_samples)
+    for imodel,timeseries in enumerate(timeseries_array):
+        merged_array[imodel,:] = timeseries[:]
+    ensemble_trend, _ = calculate_temporal_trend_of_time_series(
+                        x, np.mean(merged_array, axis=0))
+    
+    for isample in np.arange(number_of_samples):
+        index = np.random.choice(nmodels, size=nmodels, replace=True)
+        random_ensemble = np.mean(merged_array[index, :], axis=0)
+        sampled_trend[isample], _ = calculate_temporal_trend_of_time_series(
+                                    x, random_ensemble)
+    return ensemble_trend, np.std(sample_trend, ddof=1)
+     
+
+def calculate_temporal_trend_of_time_series(x,y):
+    ''' Calculate least-square trends (a) in y = ax+b and a's standard error
+    :param x: time series
+    :type x: :class:`numpy.ndarray'     
+
+    :param x: time series
+    :type x: :class:`numpy.ndarray'     
+
+    :returns: temporal trend and standard error
+    :rtype: :float:`float','float'
+    '''
+    slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
+    n = len(x)
+    return slope, std_err/np.std(x)/n**0.5

[1/3] climate git commit: CLIMATE-903 - Adding a module to calculate the least square trend and standard error

Reply via email to