[ https://issues.apache.org/jira/browse/CLIMATE-838?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15413869#comment-15413869 ]
ASF GitHub Bot commented on CLIMATE-838: ---------------------------------------- Github user huikyole commented on a diff in the pull request: https://github.com/apache/climate/pull/384#discussion_r74102524 --- Diff: RCMES/run_RCMES.py --- @@ -51,123 +65,110 @@ time_info = config['time'] temporal_resolution = time_info['temporal_resolution'] +# Read time info start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') +# Read space info space_info = config['space'] min_lat = space_info['min_lat'] max_lat = space_info['max_lat'] min_lon = space_info['min_lon'] max_lon = space_info['max_lon'] +kwargs = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, + 'max_lon': max_lon, 'start_time': start_time, 'end_time': end_time} -ref_data_info = config['datasets']['reference'] +# Get the dataset loader options +obs_data_info = config['datasets']['reference'] model_data_info = config['datasets']['targets'] -if ref_data_info['data_source'] == 'ESGF' or model_data_info['data_source'] == 'ESGF': - username=raw_input('Enter your ESGF OpenID:\n') - password=getpass(prompt='Enter your ESGF password:\n') - -""" Step 1: Load the reference data """ -ref_lat_name = None -ref_lon_name = None -if 'latitude_name' in ref_data_info.keys(): - ref_lat_name = ref_data_info['latitude_name'] -if 'longitude_name' in ref_data_info.keys(): - ref_lon_name = ref_data_info['longitude_name'] -print 'Loading observation dataset:\n',ref_data_info -ref_name = ref_data_info['data_name'] -if ref_data_info['data_source'] == 'local': - ref_dataset = local.load_file(ref_data_info['path'], - ref_data_info['variable'], name=ref_name, - lat_name=ref_lat_name, lon_name=ref_lon_name) -elif ref_data_info['data_source'] == 'rcmed': - ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'], - ref_data_info['parameter_id'], - min_lat, max_lat, min_lon, max_lon, - start_time, end_time) -elif ref_data_info['data_source'] == 'ESGF': - ds = esgf.load_dataset(dataset_id = ref_data_info['dataset_id'], - variable = ref_data_info['variable'], - esgf_username=username, - esgf_password=password) - ref_dataset = ds[0] -else: - print ' ' -if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, temporal_resolution) -if 'multiplying_factor' in ref_data_info.keys(): - ref_dataset.values = ref_dataset.values*ref_data_info['multiplying_factor'] + +# Extract info we don't want to put into the loader config +# Multiplying Factor to scale obs by +multiplying_factor = np.ones(len(obs_data_info)) +for i, info in enumerate(obs_data_info): + if 'multiplying_factor' in info: + multiplying_factor[i] = info.pop('multiplying_factor') + +# If models are GCMs we can skip boundary check. Probably need to find a more +# elegant way to express this in the config file API. +boundary_check = True +for i, info in enumerate(model_data_info): + if 'boundary_check' in info: + boundary_check = info.pop('boundary_check') + +""" Step 1: Load the observation data """ +print 'Loading observation datasets:\n',obs_data_info +obs_datasets = load_datasets_from_config(*obs_data_info, **kwargs) +obs_names = [dataset.name for dataset in obs_datasets] +for i, dataset in enumerate(obs_datasets): + if temporal_resolution == 'daily' or temporal_resolution == 'monthly': + obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset, + temporal_resolution) + + if multiplying_factor[i] != 1: + obs_dataset.values *= multiplying_factor[i] """ Step 2: Load model NetCDF Files into OCW Dataset Objects """ -model_lat_name = None -model_lon_name = None -if 'latitude_name' in model_data_info.keys(): - model_lat_name = model_data_info['latitude_name'] -if 'longitude_name' in model_data_info.keys(): - model_lon_name = model_data_info['longitude_name'] -boundary_check_model = True -if 'GCM_data' in model_data_info.keys(): - if model_data_info['GCM_data']: - boundary_check_model = False -print 'Loading model datasets:\n',model_data_info -if model_data_info['data_source'] == 'local': - model_datasets = local.load_multiple_files(file_path=model_data_info['path'], - variable_name =model_data_info['variable'], - lat_name=model_lat_name, lon_name=model_lon_name) - model_names = [dataset.name for dataset in model_datasets] -elif model_data_info['data_source'] == 'ESGF': - md = esgf.load_dataset(dataset_id=model_data_info['dataset_id'], - variable=model_data_info['variable'], - esgf_username=username, - esgf_password=password) - model_datasets = [] - model_names = [] - model_datasets.append(md[0]) - model_names.append(model_data_info['data_name']) -else: - print ' ' - # TO DO: support RCMED +model_datasets = load_datasets_from_config(*model_data_info, **kwargs) +model_names = [dataset.name for dataset in model_datasets] if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.normalize_dataset_datetimes(dataset, temporal_resolution) + for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.normalize_dataset_datetimes(dataset, + temporal_resolution) """ Step 3: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting if time_info['maximum_overlap_period']: - start_time, end_time = utils.get_temporal_overlap([ref_dataset]+model_datasets) + start_time, end_time = utils.get_temporal_overlap(obs_datasets+model_datasets) print 'Maximum overlap period' print 'start_time:', start_time print 'end_time:', end_time if temporal_resolution == 'monthly' and end_time.day !=1: - end_time = end_time.replace(day=1) -if ref_data_info['data_source'] == 'rcmed': - min_lat = np.max([min_lat, ref_dataset.lats.min()]) - max_lat = np.min([max_lat, ref_dataset.lats.max()]) - min_lon = np.max([min_lon, ref_dataset.lons.min()]) - max_lon = np.min([max_lon, ref_dataset.lons.max()]) -bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) - -ref_dataset = dsp.subset(ref_dataset, bounds) -if ref_dataset.temporal_resolution() != temporal_resolution: - ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution) -for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.subset(dataset, bounds) + end_time = end_time.replace(day=1) + +for i, dataset in enumerate(obs_datasets): + min_lat = np.max([min_lat, dataset.lats.min()]) + max_lat = np.min([max_lat, dataset.lats.max()]) + min_lon = np.max([min_lon, dataset.lons.min()]) + max_lon = np.min([max_lon, dataset.lons.max()]) + +bounds = Bounds(lat_min=min_lat, + lat_max=max_lat, + lon_min=min_lon, + lon_max=max_lon, + start=start_time, + end=end_time) + +for i, dataset in enumerate(obs_datasets): + obs_datasets[i] = dsp.subset(dataset, bounds) + if dataset.temporal_resolution() != temporal_resolution: + obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) + +for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: - model_datasets[idata] = dsp.temporal_rebin(dataset, temporal_resolution) + model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) -# Temporaly subset both observation and model datasets for the user specified season +# Temporally subset both observation and model datasets +# for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] -ref_dataset = dsp.temporal_subset(ref_dataset,month_start, month_end,average_each_year) -for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.temporal_subset(dataset,month_start, month_end,average_each_year) +# TODO: Fully support multiple observation / reference datasets. --- End diff -- We need to open another JIRA issue for this. > Update configuration files and example scripts to use new DatasetLoader > ----------------------------------------------------------------------- > > Key: CLIMATE-838 > URL: https://issues.apache.org/jira/browse/CLIMATE-838 > Project: Apache Open Climate Workbench > Issue Type: Improvement > Components: config, data sources > Affects Versions: 1.1.0 > Reporter: Alex Goodman > Assignee: Huikyo Lee > Fix For: 1.2.0 > > > The new {{DatasetLoader}} class found in {{dataset_loader.py}} was designed > to do the following: > 1) Unify the loader functions for each data source in one module. > 2) Greatly simplify the process of parsing input from configuration files > into the loaders. > 3) Allow users to easily load datasets from many different data sources in a > single evaluation run. > The unit tests I have written can test some of this functionality, but it is > now time to put it into practice. This means updating the configuration files > and the associated evaluation scripts (eg run_RCMES.py). -- This message was sent by Atlassian JIRA (v6.3.4#6332)