http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/code/process.py ---------------------------------------------------------------------- diff --git a/mccsearch/code/process.py b/mccsearch/code/process.py index 5a362ec..7367ee7 100644 --- a/mccsearch/code/process.py +++ b/mccsearch/code/process.py @@ -1,4 +1,20 @@ -"""Collection of functions that process numpy arrays of varying shapes. +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collection of functions that process numpy arrays of varying shapes. Functions can range from subsetting to regridding""" # Python Standard Libary Imports @@ -10,38 +26,38 @@ import sys import time # 3rd Party Imports -import Nio +import netCDF4 import numpy as np import numpy.ma as ma -from scipy.ndimage import map_coordinates + def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax): ''' Extract a sub-region from a data array. - + Example:: **e.g.** the user may load a global model file, but only want to examine data over North America This function extracts a sub-domain from the original data. The defined sub-region must be a regular lat/lon bounding box, but the model data may be on a non-regular grid (e.g. rotated, or Guassian grid layout). - Data are kept on the original model grid and a rectangular (in model-space) region + Data are kept on the original model grid and a rectangular (in model-space) region is extracted which contains the rectangular (in lat/lon space) user supplied region. - + INPUT:: data - 3d masked data array lats - 2d array of latitudes corresponding to data array lons - 2d array of longitudes corresponding to data array latmin, latmax, lonmin, lonmax - bounding box of required region to extract - + OUTPUT:: data2 - subset of original data array containing only data from required subregion lats2 - subset of original latitude data array lons2 - subset of original longitude data array ''' - - + + # Mask model data array to find grid points inside the supplied bounding box whlat = (lats > latmin) & (lats < latmax) @@ -67,37 +83,37 @@ def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, def calc_area_mean(data, lats, lons, mymask='not set'): ''' Calculate Area Average of data in a masked array - + INPUT:: data: a masked array of data (NB. only data from one time expected to be passed at once) lats: 2d array of regularly gridded latitudes lons: 2d array of regularly gridded longitudes mymask: (optional) defines spatial region to do averaging over - + OUTPUT:: area_mean: a value for the mean inside the area ''' - - + + # If mask not passed in, then set maks to cover whole data domain if(mymask == 'not set'): - mymask = np.empty(data.shape) - mymask[:] = False # NB. mask means (don't show), so False everywhere means use everything. - + mymask = np.empty(data.shape) + mymask[:] = False # NB. mask means (don't show), so False everywhere means use everything. + # Dimension check on lats, lons # Sometimes these arrays are 3d, sometimes 2d, sometimes 1d # This bit of code just converts to the required 2d array shape if(len(lats.shape) == 3): - lats = lats[0, :, :] + lats = lats[0, :, :] if(len(lons.shape) == 3): - lons = lons[0, :, :] + lons = lons[0, :, :] if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)): - lons, lats = np.meshgrid(lons, lats) - + lons, lats = np.meshgrid(lons, lats) + # Calculate grid length (assuming regular lat/lon grid) dlat = lats[1, 0] - lats[0, 0] dlon = lons[0, 1] - lons[0, 0] @@ -108,28 +124,28 @@ def calc_area_mean(data, lats, lons, mymask='not set'): # Create a new masked array covering just user selected area (defined in mymask) # NB. this preserves missing data points in the observations data subdata = ma.masked_array(data, mask=mymask) - + if(myweights.shape != subdata.shape): - myweights.resize(subdata.shape) - myweights[1:, :] = myweights[0, :] + myweights.resize(subdata.shape) + myweights[1:, :] = myweights[0, :] # Calculate weighted mean using ma.average (which takes weights) area_mean = ma.average(subdata, weights=myweights) - + return area_mean def calc_area_in_grid_box(latitude, dlat, dlon): ''' Calculate area of regular lat-lon grid box - + INPUT:: latitude: latitude of grid box (degrees) dlat: grid length in latitude direction (degrees) dlon: grid length in longitude direction (degrees) - + OUTPUT:: A: area of the grid box - + ''' R = 6371000 # radius of Earth in metres @@ -141,422 +157,286 @@ def calc_area_in_grid_box(latitude, dlat, dlon): return A -def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999): +def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999): + """ + This function has been moved to the ocw/dataset_processor module + """ + from ocw import dataset_processor + q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1) + + return q2 + +def create_mask_using_threshold(masked_array, threshold=0.5): + """ + .. deprecated:: 0.3-incubating + Use :func:`ocw.dataset_processor._rcmes_create_mask_using_threshold` instead. + """ + from ocw import dataset_processor + mask = dataset_processor._rcmes_create_mask_using_threshold(masked_array, threshold) + + return mask + +def calc_average_on_new_time_unit(data, dateList, unit='monthly'): ''' - Perform regridding from one set of lat,lon values onto a new set (lat2,lon2) - + Routine to calculate averages on longer time units than the data exists on. + + Example:: + If the data is 6-hourly, calculate daily, or monthly means. + Input:: - q - the variable to be regridded - lat,lon - original co-ordinates corresponding to q values - lat2,lon2 - new set of latitudes and longitudes that you want to regrid q onto - order - (optional) interpolation order 1=bi-linear, 3=cubic spline - mdi - (optional) fill value for missing data (used in creation of masked array) - - Output:: - q2 - q regridded onto the new set of lat2,lon2 - + data - data values + dateList - list of python datetime structures corresponding to data values + unit - string describing time unit to average onto. + *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'* + + Output: + meanstorem - numpy masked array of data values meaned over required time period + newTimesList - a list of python datetime objects representing the data in the new averagin period + *NB.* currently set to beginning of averaging period, + **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.** ''' - nlat = q.shape[0] - nlon = q.shape[1] - - nlat2 = lat2.shape[0] - nlon2 = lon2.shape[1] - - # To make our lives easier down the road, let's - # turn these into arrays of x & y coords - loni = lon2.ravel() - lati = lat2.ravel() - - loni = loni.copy() # NB. it won't run unless you do this... - lati = lati.copy() - - # Now, we'll set points outside the boundaries to lie along an edge - loni[loni > lon.max()] = lon.max() - loni[loni < lon.min()] = lon.min() - - # To deal with the "hard" break, we'll have to treat y differently, - # so we're just setting the min here... - lati[lati > lat.max()] = lat.max() - lati[lati < lat.min()] = lat.min() - - - # We need to convert these to (float) indicies - # (xi should range from 0 to (nx - 1), etc) - loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min()) - - # Deal with the "hard" break in the y-direction - lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min()) - - # Notes on dealing with MDI when regridding data. - # Method adopted here: - # Use bilinear interpolation of data by default (but user can specify other order using order=... in call) - # Perform bilinear interpolation of data, and of mask. - # To be conservative, new grid point which contained some missing data on the old grid is set to missing data. - # -this is achieved by looking for any non-zero interpolated mask values. - # To avoid issues with bilinear interpolation producing strong gradients leading into the MDI, - # set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now. - - # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating - # Preserve MDI mask, by only changing data part of masked array object. - for shift in (-1, 1): - for axis in (0, 1): - q_shifted = np.roll(q, shift=shift, axis=axis) - idx = ~q_shifted.mask * q.mask - q.data[idx] = q_shifted[idx] - - # Now we actually interpolate - # map_coordinates does cubic interpolation by default, - # use "order=1" to preform bilinear interpolation instead... - q2 = map_coordinates(q, [lati, loni], order=order) - q2 = q2.reshape([nlat2, nlon2]) - - # Set values to missing data outside of original domain - q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(), - lat2 <= lat.min()), - np.logical_or(lon2 <= lon.min(), - lon2 >= lon.max()))) - - # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these - qmdi = np.zeros_like(q) - qmdi[q.mask == True] = 1. - qmdi[q.mask == False] = 0. - qmdi_r = map_coordinates(qmdi, [lati, loni], order=order) - qmdi_r = qmdi_r.reshape([nlat2, nlon2]) - mdimask = (qmdi_r != 0.0) - - # Combine missing data mask, with outside domain mask define above. - q2.mask = np.logical_or(mdimask, q2.mask) + # First catch unknown values of time unit passed in by user + acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad') - return q2 + if not acceptable: + print 'Error: unknown unit type selected for time averaging' + print ' Please check your code.' + return -def create_mask_using_threshold(masked_array, threshold=0.5): - ''' - Routine to create a mask, depending on the proportion of times with missing data. - For each pixel, calculate proportion of times that are missing data, - **if** the proportion is above a specified threshold value, - then mark the pixel as missing data. - - Input:: - masked_array - a numpy masked array of data (assumes time on axis 0, and space on axes 1 and 2). - threshold - (optional) threshold proportion above which a pixel is marked as 'missing data'. - NB. default threshold = 50% - - Output:: - mymask - a numpy array describing the mask. NB. not a masked array, just the mask itself. + # Calculate arrays of years (2007,2007), + # yearsmonths (200701,200702), + # or yearmonthdays (20070101,20070102) + # -depending on user selected averaging period. - ''' - + # Year list + if unit == 'annual': + print 'Calculating annual mean data' + timeunits = [] - # try, except used as some model files don't have a full mask, but a single bool - # the except catches this situation and deals with it appropriately. - try: - nT = masked_array.mask.shape[0] + for i in np.arange(len(dateList)): + timeunits.append(str(dateList[i].year)) - # For each pixel, count how many times are masked. - nMasked = masked_array.mask[:, :, :].sum(axis=0) + timeunits = np.array(timeunits, dtype=int) - # Define new mask as when a pixel has over a defined threshold ratio of masked data - # e.g. if the threshold is 75%, and there are 10 times, - # then a pixel will be masked if more than 5 times are masked. - mymask = nMasked > (nT * threshold) + # YearMonth format list + if unit == 'monthly': + print 'Calculating monthly mean data' + timeunits = [] - except: - mymask = np.zeros_like(masked_array.data[0, :, :]) + for i in np.arange(len(dateList)): + timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month)) - return mymask + timeunits = np.array(timeunits, dtype=int) -def calc_average_on_new_time_unit(data, dateList, unit='monthly'): - ''' - Routine to calculate averages on longer time units than the data exists on. - - Example:: - If the data is 6-hourly, calculate daily, or monthly means. - - Input:: - data - data values - dateList - list of python datetime structures corresponding to data values - unit - string describing time unit to average onto. - *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'* - - Output: - meanstorem - numpy masked array of data values meaned over required time period - newTimesList - a list of python datetime objects representing the data in the new averagin period - *NB.* currently set to beginning of averaging period, - **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.** - ''' + # YearMonthDay format list + if unit == 'daily': + print 'Calculating daily mean data' + timeunits = [] - # First catch unknown values of time unit passed in by user - acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad') + for i in np.arange(len(dateList)): + timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \ + str("%02d" % dateList[i].day)) - if not acceptable: - print 'Error: unknown unit type selected for time averaging' - print ' Please check your code.' - return + timeunits = np.array(timeunits, dtype=int) - # Calculate arrays of years (2007,2007), - # yearsmonths (200701,200702), - # or yearmonthdays (20070101,20070102) - # -depending on user selected averaging period. - # Year list - if unit == 'annual': - print 'Calculating annual mean data' - timeunits = [] + # TODO: add pentad setting using Julian days? - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year)) + # Full list: a special case + if unit == 'full': + print 'Calculating means data over full time range' + timeunits = [] - timeunits = np.array(timeunits, dtype=int) - - # YearMonth format list - if unit == 'monthly': - print 'Calculating monthly mean data' - timeunits = [] + for i in np.arange(len(dateList)): + timeunits.append(999) # i.e. we just want the same value for all times. - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month)) + timeunits = np.array(timeunits, dtype=int) - timeunits = np.array(timeunits, dtype=int) - # YearMonthDay format list - if unit == 'daily': - print 'Calculating daily mean data' - timeunits = [] - - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \ - str("%02d" % dateList[i].day)) - timeunits = np.array(timeunits, dtype=int) + # empty list to store new times + newTimesList = [] + # Decide whether or not you need to do any time averaging. + # i.e. if data are already on required time unit then just pass data through and + # calculate and return representative datetimes. + processing_required = True + if len(timeunits) == (len(np.unique(timeunits))): + processing_required = False - # TODO: add pentad setting using Julian days? + # 1D data arrays, i.e. time series + if data.ndim == 1: + # Create array to store the resulting data + meanstore = np.zeros(len(np.unique(timeunits))) + # Calculate the means across each unique time unit + i = 0 + for myunit in np.unique(timeunits): + if processing_required: + datam = ma.masked_array(data, timeunits != myunit) + meanstore[i] = ma.average(datam) - # Full list: a special case - if unit == 'full': - print 'Calculating means data over full time range' - timeunits = [] + # construct new times list + smyunit = str(myunit) + if len(smyunit) == 4: # YYYY + yyyy = int(myunit[0:4]) + mm = 1 + dd = 1 + if len(smyunit) == 6: # YYYYMM + yyyy = int(smyunit[0:4]) + mm = int(smyunit[4:6]) + dd = 1 + if len(smyunit) == 8: # YYYYMMDD + yyyy = int(smyunit[0:4]) + mm = int(smyunit[4:6]) + dd = int(smyunit[6:8]) + if len(smyunit) == 3: # Full time range + # Need to set an appropriate time representing the mid-point of the entire time span + dt = dateList[-1] - dateList[0] + halfway = dateList[0] + (dt / 2) + yyyy = int(halfway.year) + mm = int(halfway.month) + dd = int(halfway.day) - for i in np.arange(len(dateList)): - timeunits.append(999) # i.e. we just want the same value for all times. + newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0)) + i = i + 1 - timeunits = np.array(timeunits, dtype=int) + # 3D data arrays + if data.ndim == 3: + #datamask = create_mask_using_threshold(data,threshold=0.75) + # Create array to store the resulting data + meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]]) - # empty list to store new times - newTimesList = [] + # Calculate the means across each unique time unit + i = 0 + datamask_store = [] + for myunit in np.unique(timeunits): + if processing_required: - # Decide whether or not you need to do any time averaging. - # i.e. if data are already on required time unit then just pass data through and - # calculate and return representative datetimes. - processing_required = True - if len(timeunits) == (len(np.unique(timeunits))): - processing_required = False + mask = np.zeros_like(data) + mask[timeunits != myunit, :, :] = 1.0 - # 1D data arrays, i.e. time series - if data.ndim == 1: - # Create array to store the resulting data - meanstore = np.zeros(len(np.unique(timeunits))) - - # Calculate the means across each unique time unit - i = 0 - for myunit in np.unique(timeunits): - if processing_required: - datam = ma.masked_array(data, timeunits != myunit) - meanstore[i] = ma.average(datam) - - # construct new times list - smyunit = str(myunit) - if len(smyunit) == 4: # YYYY - yyyy = int(myunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit) == 6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit) == 8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit) == 3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1] - dateList[0] - halfway = dateList[0] + (dt / 2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - - newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0)) - i = i + 1 - - # 3D data arrays - if data.ndim == 3: - - #datamask = create_mask_using_threshold(data,threshold=0.75) - - # Create array to store the resulting data - meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]]) - - # Calculate the means across each unique time unit - i = 0 - datamask_store = [] - for myunit in np.unique(timeunits): - if processing_required: - - mask = np.zeros_like(data) - mask[timeunits != myunit, :, :] = 1.0 - - # Calculate missing data mask within each time unit... - datamask_at_this_timeunit = np.zeros_like(data) - datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75) - # Store results for masking later - datamask_store.append(datamask_at_this_timeunit[0]) - - # Calculate means for each pixel in this time unit, ignoring missing data (using masked array). - datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit)) - meanstore[i, :, :] = ma.average(datam, axis=0) - - # construct new times list - smyunit = str(myunit) - if len(smyunit) == 4: # YYYY - yyyy = int(smyunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit) == 6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit) == 8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit) == 3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1] - dateList[0] - halfway = dateList[0] + (dt / 2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - - newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0)) - - i += 1 - - if not processing_required: - meanstorem = data - - if processing_required: - # Create masked array (using missing data mask defined above) - datamask_store = np.array(datamask_store) - meanstorem = ma.masked_array(meanstore, datamask_store) - - return meanstorem, newTimesList + # Calculate missing data mask within each time unit... + datamask_at_this_timeunit = np.zeros_like(data) + datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75) + # Store results for masking later + datamask_store.append(datamask_at_this_timeunit[0]) + + # Calculate means for each pixel in this time unit, ignoring missing data (using masked array). + datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit)) + meanstore[i, :, :] = ma.average(datam, axis=0) + + # construct new times list + smyunit = str(myunit) + if len(smyunit) == 4: # YYYY + yyyy = int(smyunit[0:4]) + mm = 1 + dd = 1 + if len(smyunit) == 6: # YYYYMM + yyyy = int(smyunit[0:4]) + mm = int(smyunit[4:6]) + dd = 1 + if len(smyunit) == 8: # YYYYMMDD + yyyy = int(smyunit[0:4]) + mm = int(smyunit[4:6]) + dd = int(smyunit[6:8]) + if len(smyunit) == 3: # Full time range + # Need to set an appropriate time representing the mid-point of the entire time span + dt = dateList[-1] - dateList[0] + halfway = dateList[0] + (dt / 2) + yyyy = int(halfway.year) + mm = int(halfway.month) + dd = int(halfway.day) + + newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0)) + + i += 1 + + if not processing_required: + meanstorem = data + + if processing_required: + # Create masked array (using missing data mask defined above) + datamask_store = np.array(datamask_store) + meanstorem = ma.masked_array(meanstore, datamask_store) + + return meanstorem, newTimesList def calc_running_accum_from_period_accum(data): ''' Routine to calculate running total accumulations from individual period accumulations. :: - + e.g. 0,0,1,0,0,2,2,1,0,0 -> 0,0,1,1,1,3,5,6,6,6 - + Input:: data: numpy array with time in the first axis - + Output:: running_acc: running accumulations - + ''' - - + + running_acc = np.zeros_like(data) - + if(len(data.shape) == 1): running_acc[0] = data[0] - + if(len(data.shape) > 1): running_acc[0, :] = data[0, :] - + for i in np.arange(data.shape[0] - 1): if(len(data.shape) == 1): running_acc[i + 1] = running_acc[i] + data[i + 1] - + if(len(data.shape) > 1): running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :] - + return running_acc def ignore_boundaries(data, rim=10): ''' Routine to mask the lateral boundary regions of model data to ignore them from calculations. - + Input:: data - a masked array of model data rim - (optional) number of grid points to ignore - + Output:: data - data array with boundary region masked - + ''' - + nx = data.shape[1] ny = data.shape[0] - + rimmask = np.zeros_like(data) for j in np.arange(rim): rimmask[j, 0:nx - 1] = 1.0 - + for j in ny - 1 - np.arange(rim): rimmask[j, 0:nx - 1] = 1.0 - + for i in np.arange(rim): rimmask[0:ny - 1, i] = 1.0 - + for i in nx - 1 - np.arange(rim): rimmask[0:ny - 1, i] = 1.0 - + data = ma.masked_array(data, mask=rimmask) - + return data def normalizeDatetimes(datetimes, timestep): - """ - Input:: - datetimes - list of datetime objects that need to be normalized - timestep - string of value ('daily' | 'monthly') - Output:: - normalDatetimes - list of datetime objects that have been normalized - - Normalization Rules:: - Daily data will be forced to an hour value of 00:00:00 - Monthly data will be forced to the first of the month at midnight - """ - normalDatetimes = [] - if timestep.lower() == 'monthly': - for inputDatetime in datetimes: - if inputDatetime.day != 1: - # Clean the inputDatetime - inputDatetimeString = inputDatetime.strftime('%Y%m%d') - normalInputDatetimeString = inputDatetimeString[:6] + '01' - inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d') - - normalDatetimes.append(inputDatetime) - - elif timestep.lower() == 'daily': - for inputDatetime in datetimes: - if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0: - datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S') - normalDatetimeString = datetimeString[:8] + '000000' - inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S') - - normalDatetimes.append(inputDatetime) - - return normalDatetimes + """ This function has been moved to the ocw/dataset_processor module """ + from ocw import dataset_processor as dsp + return dsp._rcmes_normalize_datetimes(datetimes, timestep) def getModelTimes(modelFile, timeVarName): ''' @@ -573,21 +453,19 @@ def getModelTimes(modelFile, timeVarName): modelTimeStep - 'hourly','daily','monthly','annual' ''' - f = Nio.open_file(modelFile) + f = netCDF4.Dataset(modelFile, mode='r') xtimes = f.variables[timeVarName] - timeFormat = xtimes.attributes['units'] - + timeFormat = xtimes.units.encode() + # search to check if 'since' appears in units try: - sinceLoc = re.search('since', timeFormat).end() - + sinceLoc = re.search('since', timeFormat).end() - #KDW the below block generates and error. But the print statement, indicates that sinceLoc found something except AttributeError: - print 'Error decoding model times: time variable attributes do not contain "since"' - raise + print 'Error decoding model times: time variable attributes do not contain "since"' + raise - units = '' + units = None TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years') # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units for unit in TIME_UNITS: @@ -597,17 +475,14 @@ def getModelTimes(modelFile, timeVarName): # cut out base time (the bit following 'since') base_time_string = string.lstrip(timeFormat[sinceLoc:]) - # decode base time base_time = decodeTimeFromString(base_time_string) - times = [] - - for xtime in xtimes[:]: - - # Cast time as an int ***KDW remove this so fractional xtime can be read from MERG + + # Cast time as an int + #TODO: KDW this may cause problems for data that is hourly with more than one timestep in it xtime = int(xtime) if units == 'minutes': @@ -631,23 +506,24 @@ def getModelTimes(modelFile, timeVarName): elif units == 'years': dt = datetime.timedelta(years=xtime) new_time = base_time + dt - - #print "xtime is:", xtime, "dt is: ", dt - - times.append(new_time) - + try: - timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12) + if len(xtimes) == 1: + timeStepLength = 0 + else: + timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12) + modelTimeStep = getModelTimeStep(units, timeStepLength) - - #KDW if timeStepLength is zero do not normalize times as this would create an empty list + + #if timeStepLength is zero do not normalize times as this would create an empty list for MERG (hourly) data if timeStepLength != 0: - times = normalizeDatetimes(times, modelTimeStep) + times = normalizeDatetimes(times, modelTimeStep) except: raise + return times, modelTimeStep def getModelTimeStep(units, stepSize): @@ -661,22 +537,22 @@ def getModelTimeStep(units, stepSize): # 28 days through 31 days elif 40320 <= stepSize <= 44640: modelTimeStep = 'monthly' - # 365 days through 366 days + # 365 days through 366 days elif 525600 <= stepSize <= 527040: - modelTimeStep = 'annual' + modelTimeStep = 'annual' else: raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize) elif units == 'hours': - #need a check for fractional hrs and only one hr i.e. stepSize=0 + #need a check for fractional hrs and only one hr i.e. stepSize=0 e.g. with MERG data if stepSize == 0 or stepSize == 1: modelTimeStep = 'hourly' elif stepSize == 24: modelTimeStep = 'daily' elif 672 <= stepSize <= 744: - modelTimeStep = 'monthly' + modelTimeStep = 'monthly' elif 8760 <= stepSize <= 8784: - modelTimeStep = 'annual' + modelTimeStep = 'annual' else: raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize) @@ -717,7 +593,7 @@ def decodeTimeFromString(time_string): :: **Input:** time_string - a string that represents a date/time - + **Output:** mytime - a python datetime object ''' @@ -774,9 +650,9 @@ def decodeTimeFromString(time_string): except ValueError: pass -#KDW added + try: - mytime = time.strptime(time_string, '%Y-%m-%d') + mytime = time.strptime(time_string, '%Y-%m-%d %H') mytime = datetime.datetime(*mytime[0:6]) return mytime @@ -784,210 +660,286 @@ def decodeTimeFromString(time_string): pass try: - mytime = time.strptime(time_string, '%Y-%m-%d %H') + mytime = time.strptime(time_string, '%Y-%m-%d') mytime = datetime.datetime(*mytime[0:6]) return mytime except ValueError: pass - print 'Error decoding time string: string does not match a predefined time format' return 0 def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, wrflons): - ''' - Wrapper routine for regridding. - - Either regrids model to obs grid, or obs to model grid, depending on user choice - - Inputs:: - regrid_choice - [0] = Regrid obs data onto model grid or - [1] = Regrid model data onto obs grid - - obsdata,wrfdata - data arrays - obslats,obslons - observation lat,lon arrays - wrflats,wrflons - model lat,lon arrays - - Output:: - rdata,rdata2 - regridded data - lats,lons - latitudes and longitudes of regridded data - - ''' + ''' + Wrapper routine for regridding. + + Either regrids model to obs grid, or obs to model grid, depending on user choice + + Inputs:: + regrid_choice - [0] = Regrid obs data onto model grid or + [1] = Regrid model data onto obs grid + + obsdata,wrfdata - data arrays + obslats,obslons - observation lat,lon arrays + wrflats,wrflons - model lat,lon arrays + + Output:: + rdata,rdata2 - regridded data + lats,lons - latitudes and longitudes of regridded data - # Regrid obs data onto model grid - if(regrid_choice == '0'): + ''' + + # Regrid obs data onto model grid + if(regrid_choice == '0'): + + ndims = len(obsdata.shape) + if(ndims == 3): + newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]] + nT = obsdata.shape[0] - ndims = len(obsdata.shape) - if(ndims == 3): - newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]] - nT = obsdata.shape[0] + if(ndims == 2): + newshape = [wrfdata.shape[0], wrfdata.shape[1]] + nT = 0 - if(ndims == 2): - newshape = [wrfdata.shape[0], wrfdata.shape[1]] - nT = 0 + rdata = wrfdata + lats, lons = wrflats, wrflons - rdata = wrfdata - lats, lons = wrflats, wrflons + # Create a new masked array with the required dimensions + tmp = np.zeros(newshape) + rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0)) + tmp = 0 # free up some memory - # Create a new masked array with the required dimensions - tmp = np.zeros(newshape) - rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0)) - tmp = 0 # free up some memory + rdata2[:] = 0.0 - rdata2[:] = 0.0 + if(nT > 0): + for t in np.arange(nT): + rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :]) - if(nT > 0): - for t in np.arange(nT): - rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :]) + if(nT == 0): + rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :]) - if(nT == 0): - rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :]) + # Regrid model data onto obs grid + if(regrid_choice == '1'): + ndims = len(wrfdata.shape) + if(ndims == 3): + newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]] + nT = obsdata.shape[0] - # Regrid model data onto obs grid - if(regrid_choice == '1'): - ndims = len(wrfdata.shape) - if(ndims == 3): - newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]] - nT = obsdata.shape[0] + if(ndims == 2): + newshape = [obsdata.shape[0], obsdata.shape[1]] + nT = 0 - if(ndims == 2): - newshape = [obsdata.shape[0], obsdata.shape[1]] - nT = 0 + rdata2 = obsdata + lats, lons = obslats, obslons - rdata2 = obsdata - lats, lons = obslats, obslons + tmp = np.zeros(newshape) + rdata = ma.MaskedArray(tmp, mask=(tmp == 0)) + tmp = 0 # free up some memory - tmp = np.zeros(newshape) - rdata = ma.MaskedArray(tmp, mask=(tmp == 0)) - tmp = 0 # free up some memory + rdata[:] = 0.0 - rdata[:] = 0.0 - - if(nT > 0): - for t in np.arange(nT): - rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :]) + if(nT > 0): + for t in np.arange(nT): + rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :]) - if(nT == 0): - rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :]) + if(nT == 0): + rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :]) - - return rdata, rdata2, lats, lons + + return rdata, rdata2, lats, lons def extract_sub_time_selection(allTimes, subTimes, data): ''' Routine to extract a sub-selection of times from a data array. Example:: - Suppose a data array has 30 time values for daily data for a whole month, + Suppose a data array has 30 time values for daily data for a whole month, but you only want the data from the 5th-15th of the month. - + Input:: allTimes - list of datetimes describing what times the data in the data array correspond to subTimes - the times you want to extract data from data - the data array - + Output:: subdata - subselection of data array - + ''' # Create new array to store the subselection subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]]) - + # Loop over all Times and when it is a member of the required subselection, copy the data across i = 0 # counter for allTimes subi = 0 # counter for subTimes for t in allTimes: - if(np.setmember1d(allTimes, subTimes)): + if(np.in1d(allTimes, subTimes)): subdata[subi, :, :] = data[i, :, :] subi += 1 i += 1 - + return subdata def calc_average_on_new_time_unit_K(data, dateList, unit): - ''' - Routine to calculate averages on longer time units than the data exists on. - e.g. if the data is 6-hourly, calculate daily, or monthly means. - - Input: - data - data values - dateList - list of python datetime structures corresponding to data values - unit - string describing time unit to average onto - e.g. 'monthly', 'daily', 'pentad','annual','decadal' - Output: - meanstorem - numpy masked array of data values meaned over required time period - newTimesList - a list of python datetime objects representing the data in the new averagin period - NB. currently set to beginning of averaging period, - i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z. - ''' + """ + This function has been moved to the ocw/dataset_processor module + """ + from ocw import dataset_processor + temporally_regridded_data = dataset_processor._rcmes_calc_average_on_new_time_unit_K(data, dateList, unit) + return temporally_regridded_data + +def decode_model_timesK(ifile,timeVarName,file_type): + ################################################################################################# + # Convert model times ('hours since 1900...', 'days since ...') into a python datetime structure + # Input: + # filelist - list of model files + # timeVarName - name of the time variable in the model files + # Output: + # times - list of python datetime objects describing model data times + # Peter Lean February 2011 + ################################################################################################# + f = netCDF4.Dataset(ifile,mode='r',format=file_type) + xtimes = f.variables[timeVarName] + timeFormat = xtimes.units.encode() + #timeFormat = "days since 1979-01-01 00:00:00" + # search to check if 'since' appears in units + try: + sinceLoc = re.search('since',timeFormat).end() + except: + print 'Error decoding model times: time var attributes do not contain "since": Exit' + sys.exit() + # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units + # TODO: Swap out this section for a set of if...elseif statements + units = '' + try: + _ = re.search('minutes',timeFormat).end() + units = 'minutes' + except: + pass + try: + _ = re.search('hours',timeFormat).end() + units = 'hours' + except: + pass + try: + _ = re.search('days',timeFormat).end() + units = 'days' + except: + pass + try: + _ = re.search('months',timeFormat).end() + units = 'months' + except: + pass + try: + _ = re.search('years',timeFormat).end() + units = 'years' + except: + pass + # cut out base time (the bit following 'since') + base_time_string = string.lstrip(timeFormat[sinceLoc:]) + # decode base time + # 9/25/2012: datetime.timedelta has problem with the argument because xtimes is NioVariable. + # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it into an integer xtime. + base_time = decodeTimeFromString(base_time_string) + times=[] + for ttmp in xtimes[:]: + xtime = int(ttmp) + if(units=='minutes'): + dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt + if(units=='hours'): + dt = datetime.timedelta(hours=xtime); new_time = base_time + dt + if(units=='days'): + dt = datetime.timedelta(days=xtime); new_time = base_time + dt + if(units=='months'): # NB. adding months in python is complicated as month length varies and hence ambigous. + # Perform date arithmatic manually + # Assumption: the base_date will usually be the first of the month + # NB. this method will fail if the base time is on the 29th or higher day of month + # -as can't have, e.g. Feb 31st. + new_month = int(base_time.month + xtime % 12) + new_year = int(math.floor(base_time.year + xtime / 12.)) + #print type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second) + new_time = datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0) + if(units=='years'): + dt = datetime.timedelta(years=xtime); new_time = base_time + dt + times.append(new_time) + return times + + +def regrid_in_time(data,dateList,unit): + ################################################################################################# + # Routine to calculate averages on longer time units than the data exists on. + # e.g. if the data is 6-hourly, calculate daily, or monthly means. + # Input: + # data - data values + # dateList - list of python datetime structures corresponding to data values + # unit - string describing time unit to average onto + # e.g. 'monthly', 'daily', 'pentad','annual','decadal' + # Output: + # meanstorem - numpy masked array of data values meaned over required time period + # newTimesList - a list of python datetime objects representing the data in the new averagin period + # NB. currently set to beginning of averaging period, + # i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z. + # .............................. + # Jinwon Kim, Sept 30, 2012 + # Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below: + # Instead of masked array used by Peter, use wh to defined data within the averaging range. + ################################################################################################# + + print '*** Begin calc_average_on_new_time_unit_KK ***' # Check if the user-selected temporal grid is valid. If not, EXIT acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad') if not acceptable: - print 'Error: unknown unit type selected for time averaging: EXIT' - return -1,-1,-1,-1 - - # Calculate arrays of: annual timeseries: year (2007,2007), - # monthly time series: year-month (200701,200702), - # daily timeseries: year-month-day (20070101,20070102) - # depending on user-selected averaging period. + print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1 - # Year list - if unit=='annual': + # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102]) + # "%02d" is similar to i2.2 in Fortran + if unit=='annual': # YYYY timeunits = [] for i in np.arange(len(dateList)): timeunits.append(str(dateList[i].year)) - timeunits = np.array(timeunits, dtype=int) - - # YearMonth format list - if unit=='monthly': + elif unit=='monthly': # YYYYMM timeunits = [] for i in np.arange(len(dateList)): timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month)) - timeunits = np.array(timeunits,dtype=int) - - # YearMonthDay format list - if unit=='daily': + elif unit=='daily': # YYYYMMDD timeunits = [] for i in np.arange(len(dateList)): timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day)) - timeunits = np.array(timeunits,dtype=int) - - # TODO: add pentad setting using Julian days? - - # Full list: a special case - if unit == 'full': + elif unit=='full': # Full list: a special case comment='Calculating means data over the entire time range: i.e., annual-mean climatology' timeunits = [] for i in np.arange(len(dateList)): - timeunits.append(999) # i.e. we just want the same value for all times. - timeunits = np.array(timeunits, dtype=int) + timeunits.append(999) # i.e. we just want the same value for all times. + timeunits = np.array(timeunits,dtype=int) + print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits + #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits - # empty list to store new times + # empty list to store time levels after temporal regridding newTimesList = [] # Decide whether or not you need to do any time averaging. - # i.e. if data are already on required time unit then just pass data through and - # calculate and return representative datetimes. + # i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes. processing_required = True if len(timeunits)==(len(np.unique(timeunits))): processing_required = False + print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits)) + #print 'np.unique(timeunits): ',np.unique(timeunits) + print 'data.ndim= ',data.ndim - # 1D data arrays, i.e. time series - if data.ndim==1: - # Create array to store the resulting data + if data.ndim==1: # 1D data arrays, i.e. 1D time series + # Create array to store the temporally regridded data meanstore = np.zeros(len(np.unique(timeunits))) - # Calculate the means across each unique time unit i=0 for myunit in np.unique(timeunits): if processing_required: - datam=ma.masked_array(data,timeunits!=myunit) + wh = timeunits==myunit + datam = data[wh] meanstore[i] = ma.average(datam) - - # construct new times list + # construct new times list smyunit = str(myunit) if len(smyunit)==4: # YYYY yyyy = int(myunit[0:4]) @@ -1008,32 +960,21 @@ def calc_average_on_new_time_unit_K(data, dateList, unit): yyyy = int(halfway.year) mm = int(halfway.month) dd = int(halfway.day) - newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0)) i = i+1 - # 3D data arrays - if data.ndim==3: - # datamask = create_mask_using_threshold(data,threshold=0.75) + elif data.ndim==3: # 3D data arrays, i.e. 2D time series # Create array to store the resulting data meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]]) - # Calculate the means across each unique time unit i=0 datamask_store = [] for myunit in np.unique(timeunits): if processing_required: - mask = np.zeros_like(data) - mask[timeunits!=myunit,:,:] = 1.0 - # Calculate missing data mask within each time unit... - datamask_at_this_timeunit = np.zeros_like(data) - datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75) - # Store results for masking later - datamask_store.append(datamask_at_this_timeunit[0]) - # Calculate means for each pixel in this time unit, ignoring missing data (using masked array). - datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit)) + wh = timeunits==myunit + datam = data[wh,:,:] meanstore[i,:,:] = ma.average(datam,axis=0) - # construct new times list + # construct new times list smyunit = str(myunit) if len(smyunit)==4: # YYYY yyyy = int(smyunit[0:4]) @@ -1059,226 +1000,8 @@ def calc_average_on_new_time_unit_K(data, dateList, unit): if not processing_required: meanstorem = data + elif processing_required: + meanstorem = meanstore - if processing_required: - # Create masked array (using missing data mask defined above) - datamask_store = np.array(datamask_store) - meanstorem = ma.masked_array(meanstore, datamask_store) - + print '*** End calc_average_on_new_time_unit_KK ***' return meanstorem,newTimesList - -def decode_model_timesK(ifile,timeVarName,file_type): - ################################################################################################# - # Convert model times ('hours since 1900...', 'days since ...') into a python datetime structure - # Input: - # filelist - list of model files - # timeVarName - name of the time variable in the model files - # Output: - # times - list of python datetime objects describing model data times - # Peter Lean February 2011 - ################################################################################################# - f = Nio.open_file(ifile,mode='r',options=None,format=file_type) - xtimes = f.variables[timeVarName] - timeFormat = xtimes.attributes['units'] - #timeFormat = "days since 1979-01-01 00:00:00" - # search to check if 'since' appears in units - try: - sinceLoc = re.search('since',timeFormat).end() - except: - print 'Error decoding model times: time var attributes do not contain "since": Exit' - sys.exit() - # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units - # TODO: Swap out this section for a set of if...elseif statements - units = '' - try: - _ = re.search('minutes',timeFormat).end() - units = 'minutes' - except: - pass - try: - _ = re.search('hours',timeFormat).end() - units = 'hours' - except: - pass - try: - _ = re.search('days',timeFormat).end() - units = 'days' - except: - pass - try: - _ = re.search('months',timeFormat).end() - units = 'months' - except: - pass - try: - _ = re.search('years',timeFormat).end() - units = 'years' - except: - pass - # cut out base time (the bit following 'since') - base_time_string = string.lstrip(timeFormat[sinceLoc:]) - # decode base time - # 9/25/2012: datetime.timedelta has problem with the argument because xtimes is NioVariable. - # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it into an integer xtime. - base_time = decodeTimeFromString(base_time_string) - times=[] - for ttmp in xtimes[:]: - xtime = int(ttmp) - if(units=='minutes'): - dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt - if(units=='hours'): - dt = datetime.timedelta(hours=xtime); new_time = base_time + dt - if(units=='days'): - dt = datetime.timedelta(days=xtime); new_time = base_time + dt - if(units=='months'): # NB. adding months in python is complicated as month length varies and hence ambigous. - # Perform date arithmatic manually - # Assumption: the base_date will usually be the first of the month - # NB. this method will fail if the base time is on the 29th or higher day of month - # -as can't have, e.g. Feb 31st. - new_month = int(base_time.month + xtime % 12) - new_year = int(math.floor(base_time.year + xtime / 12.)) - #print type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second) - new_time = datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0) - if(units=='years'): - dt = datetime.timedelta(years=xtime); new_time = base_time + dt - times.append(new_time) - return times - - -def regrid_in_time(data,dateList,unit): - ################################################################################################# - # Routine to calculate averages on longer time units than the data exists on. - # e.g. if the data is 6-hourly, calculate daily, or monthly means. - # Input: - # data - data values - # dateList - list of python datetime structures corresponding to data values - # unit - string describing time unit to average onto - # e.g. 'monthly', 'daily', 'pentad','annual','decadal' - # Output: - # meanstorem - numpy masked array of data values meaned over required time period - # newTimesList - a list of python datetime objects representing the data in the new averagin period - # NB. currently set to beginning of averaging period, - # i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z. - # .............................. - # Jinwon Kim, Sept 30, 2012 - # Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below: - # Instead of masked array used by Peter, use wh to defined data within the averaging range. - ################################################################################################# - - print '*** Begin calc_average_on_new_time_unit_KK ***' - # Check if the user-selected temporal grid is valid. If not, EXIT - acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad') - if not acceptable: - print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1 - - # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102]) - # "%02d" is similar to i2.2 in Fortran - if unit=='annual': # YYYY - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year)) - elif unit=='monthly': # YYYYMM - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month)) - elif unit=='daily': # YYYYMMDD - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day)) - elif unit=='full': # Full list: a special case - comment='Calculating means data over the entire time range: i.e., annual-mean climatology' - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(999) # i.e. we just want the same value for all times. - timeunits = np.array(timeunits,dtype=int) - print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits - #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits - - # empty list to store time levels after temporal regridding - newTimesList = [] - - # Decide whether or not you need to do any time averaging. - # i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes. - processing_required = True - if len(timeunits)==(len(np.unique(timeunits))): - processing_required = False - print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits)) - #print 'np.unique(timeunits): ',np.unique(timeunits) - print 'data.ndim= ',data.ndim - - if data.ndim==1: # 1D data arrays, i.e. 1D time series - # Create array to store the temporally regridded data - meanstore = np.zeros(len(np.unique(timeunits))) - # Calculate the means across each unique time unit - i=0 - for myunit in np.unique(timeunits): - if processing_required: - wh = timeunits==myunit - datam = data[wh] - meanstore[i] = ma.average(datam) - # construct new times list - smyunit = str(myunit) - if len(smyunit)==4: # YYYY - yyyy = int(myunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit)==6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit)==8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit)==3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1]-dateList[0] - halfway = dateList[0]+(dt/2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0)) - i = i+1 - - elif data.ndim==3: # 3D data arrays, i.e. 2D time series - # Create array to store the resulting data - meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]]) - # Calculate the means across each unique time unit - i=0 - datamask_store = [] - for myunit in np.unique(timeunits): - if processing_required: - wh = timeunits==myunit - datam = data[wh,:,:] - meanstore[i,:,:] = ma.average(datam,axis=0) - # construct new times list - smyunit = str(myunit) - if len(smyunit)==4: # YYYY - yyyy = int(smyunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit)==6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit)==8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit)==3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1]-dateList[0] - halfway = dateList[0]+(dt/2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0)) - i += 1 - - if not processing_required: - meanstorem = data - elif processing_required: - meanstorem = meanstore - - print '*** End calc_average_on_new_time_unit_KK ***' - return meanstorem,newTimesList
http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/docs/mccsearch.md ---------------------------------------------------------------------- diff --git a/mccsearch/docs/mccsearch.md b/mccsearch/docs/mccsearch.md index 466e027..6b3ab1a 100644 --- a/mccsearch/docs/mccsearch.md +++ b/mccsearch/docs/mccsearch.md @@ -9,14 +9,14 @@ The data is read from netCDF files into arrays with the dimensions time, latitud ##What is needed? * Python 2.7.4 (We used Anaconda 1.5.1 64 bit libraries, which installed most of the dependencies.) Other module dependences: - * Nio - * netCDF - * sciPy - * NumPy - * Networkx - * matplotlib - * GrADS (We used OpenGrADS grads2 Version 2.0.1.oga.1) - * LATS4D + * Nio - https://www.pyngl.ucar.edu/Nio.shtml + * netCDF - http://www.unidata.ucar.edu/software/netcdf/ + * sciPy - http://www.scipy.org/scipylib/index.html + * NumPy - http://www.scipy.org/scipylib/download.html + * Networkx - https://networkx.github.io/download.html + * matplotlib - http://matplotlib.org/downloads.html + * GrADS (We used OpenGrADS grads2 Version 2.0.1.oga.1) - http://sourceforge.net/projects/opengrads/files/ + * LATS4D - http://sourceforge.net/projects/opengrads/files/ ##Download the source code and store in a folder * mccSearch.py contains all the function needed @@ -24,7 +24,7 @@ The data is read from netCDF files into arrays with the dimensions time, latitud * process.py contains some needed functions (from older version of Apache OCW) * file.py contains some needed functions (from older version of Apache OCW) * mainProg.py contains a sample of the general workflow of the order the modules should be called. There are three main inputs you will have to supply: - * mainDirStr : this is the directory where you wish all the output files âimages, textfiles, clipped netCDF files- to be store + * mainDirStr : this is the directory where you wish all the output files âimages, textfiles, clipped netCDF files- to be stored * TRMMdirName : this is the directory where the original TRMM data in netCDF format is stored * CEoriDirName : this is the directory where the original MERG data in netCDF format is stored * Store the GrADsScripts folder (and contents) in a separate folder in the same main directory as the source code folder.
