Repository: climate Updated Branches: refs/heads/master 9263dc60c -> ece260d1c
CLIMATE-687 - A new loader to read GPM precipitation data with a file list - ocw.data_source.local.load_GPM_IMERG_files is added Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/32b184d7 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/32b184d7 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/32b184d7 Branch: refs/heads/master Commit: 32b184d7c53d44b82d00e7ebf5da6cf46a0e8992 Parents: 9263dc6 Author: huikyole <[email protected]> Authored: Wed Oct 7 16:40:33 2015 -0700 Committer: huikyole <[email protected]> Committed: Wed Oct 7 16:40:33 2015 -0700 ---------------------------------------------------------------------- ocw/data_source/local.py | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/32b184d7/ocw/data_source/local.py ---------------------------------------------------------------------- diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py index 60fcb50..97e482b 100644 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@ -27,6 +27,7 @@ from ocw.dataset import Dataset import ocw.utils as utils import netCDF4 +import h5py import numpy import numpy.ma as ma @@ -330,3 +331,59 @@ def load_multiple_files(file_path, lat_name=lat_name, lon_name=lon_name, time_name=time_name)) return datasets, data_name + +def load_GPM_IMERG_files(file_path=None, + filename_pattern=None, + filelist=None, + variable_name='precipitationCal', + name='GPM_IMERG'): + ''' Load multiple GPM Level 3 IMEGE files containing calibrated precipitation and generate an OCW Dataset obejct. + :param file_path: Directory to the HDF files to load. + :type file_path: :mod:`string` + :param filename_pattern: Path to the HDF files to load. + :type filename_pattern: :list:`string` + :param filelist: A list of filenames + :type filelist: :list:`string` + :param variable_name: The variable name to load from the HDF file. + :type variable_name: :mod:`string` + :param name: (Optional) A name for the loaded dataset. + :type name: :mod:`string` + :returns: An OCW Dataset object with the requested variable's data from + the HDF file. + :rtype: :class:`dataset.Dataset` + :raises ValueError: + ''' + + if not filelist: + GPM_files = [] + for pattern in filename_pattern: + GPM_files.extend(glob(file_path + pattern)) + else: + GPM_files = [line.rstrip('\n') for line in open(filelist)] + + GPM_files.sort() + + file_object_first = h5py.File(GPM_files[0]) + lats = file_object_first['Grid']['lat'][:] + lons = file_object_first['Grid']['lon'][:] + + lons, lats = np.meshgrid(lons, lats) + + times = [] + nfile = len(GPM_files) + for ifile, file in enumerate(GPM_files): + print 'Reading file '+str(ifile+1)+'/'+str(nfile), file + file_object = h5py.File(file) + time_struct_parsed = strptime(file[-39:-23],"%Y%m%d-S%H%M%S") + for ihour in numpy.arange(24): + times.append(datetime(*time_struct_parsed[:6])) + values0= ma.masked_less(file_object['Grid'][variable_name][:], 0.) + if ifile == 0: + values = values0 + variable_unit = file_object.variables[variable_name].units + else: + values = numpy.concatenate((values, values0)) + file_object.close() + times = numpy.array(times) + return Dataset(lats, lons, times, values, variable_name, units=variable_unit, name=name) +
