Github user huikyole commented on a diff in the pull request: https://github.com/apache/climate/pull/384#discussion_r74102524 --- Diff: RCMES/run_RCMES.py --- @@ -51,123 +65,110 @@ time_info = config['time'] temporal_resolution = time_info['temporal_resolution'] +# Read time info start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') +# Read space info space_info = config['space'] min_lat = space_info['min_lat'] max_lat = space_info['max_lat'] min_lon = space_info['min_lon'] max_lon = space_info['max_lon'] +kwargs = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, + 'max_lon': max_lon, 'start_time': start_time, 'end_time': end_time} -ref_data_info = config['datasets']['reference'] +# Get the dataset loader options +obs_data_info = config['datasets']['reference'] model_data_info = config['datasets']['targets'] -if ref_data_info['data_source'] == 'ESGF' or model_data_info['data_source'] == 'ESGF': - username=raw_input('Enter your ESGF OpenID:\n') - password=getpass(prompt='Enter your ESGF password:\n') - -""" Step 1: Load the reference data """ -ref_lat_name = None -ref_lon_name = None -if 'latitude_name' in ref_data_info.keys(): - ref_lat_name = ref_data_info['latitude_name'] -if 'longitude_name' in ref_data_info.keys(): - ref_lon_name = ref_data_info['longitude_name'] -print 'Loading observation dataset:\n',ref_data_info -ref_name = ref_data_info['data_name'] -if ref_data_info['data_source'] == 'local': - ref_dataset = local.load_file(ref_data_info['path'], - ref_data_info['variable'], name=ref_name, - lat_name=ref_lat_name, lon_name=ref_lon_name) -elif ref_data_info['data_source'] == 'rcmed': - ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'], - ref_data_info['parameter_id'], - min_lat, max_lat, min_lon, max_lon, - start_time, end_time) -elif ref_data_info['data_source'] == 'ESGF': - ds = esgf.load_dataset(dataset_id = ref_data_info['dataset_id'], - variable = ref_data_info['variable'], - esgf_username=username, - esgf_password=password) - ref_dataset = ds[0] -else: - print ' ' -if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, temporal_resolution) -if 'multiplying_factor' in ref_data_info.keys(): - ref_dataset.values = ref_dataset.values*ref_data_info['multiplying_factor'] + +# Extract info we don't want to put into the loader config +# Multiplying Factor to scale obs by +multiplying_factor = np.ones(len(obs_data_info)) +for i, info in enumerate(obs_data_info): + if 'multiplying_factor' in info: + multiplying_factor[i] = info.pop('multiplying_factor') + +# If models are GCMs we can skip boundary check. Probably need to find a more +# elegant way to express this in the config file API. +boundary_check = True +for i, info in enumerate(model_data_info): + if 'boundary_check' in info: + boundary_check = info.pop('boundary_check') + +""" Step 1: Load the observation data """ +print 'Loading observation datasets:\n',obs_data_info +obs_datasets = load_datasets_from_config(*obs_data_info, **kwargs) +obs_names = [dataset.name for dataset in obs_datasets] +for i, dataset in enumerate(obs_datasets): + if temporal_resolution == 'daily' or temporal_resolution == 'monthly': + obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset, + temporal_resolution) + + if multiplying_factor[i] != 1: + obs_dataset.values *= multiplying_factor[i] """ Step 2: Load model NetCDF Files into OCW Dataset Objects """ -model_lat_name = None -model_lon_name = None -if 'latitude_name' in model_data_info.keys(): - model_lat_name = model_data_info['latitude_name'] -if 'longitude_name' in model_data_info.keys(): - model_lon_name = model_data_info['longitude_name'] -boundary_check_model = True -if 'GCM_data' in model_data_info.keys(): - if model_data_info['GCM_data']: - boundary_check_model = False -print 'Loading model datasets:\n',model_data_info -if model_data_info['data_source'] == 'local': - model_datasets = local.load_multiple_files(file_path=model_data_info['path'], - variable_name =model_data_info['variable'], - lat_name=model_lat_name, lon_name=model_lon_name) - model_names = [dataset.name for dataset in model_datasets] -elif model_data_info['data_source'] == 'ESGF': - md = esgf.load_dataset(dataset_id=model_data_info['dataset_id'], - variable=model_data_info['variable'], - esgf_username=username, - esgf_password=password) - model_datasets = [] - model_names = [] - model_datasets.append(md[0]) - model_names.append(model_data_info['data_name']) -else: - print ' ' - # TO DO: support RCMED +model_datasets = load_datasets_from_config(*model_data_info, **kwargs) +model_names = [dataset.name for dataset in model_datasets] if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.normalize_dataset_datetimes(dataset, temporal_resolution) + for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.normalize_dataset_datetimes(dataset, + temporal_resolution) """ Step 3: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting if time_info['maximum_overlap_period']: - start_time, end_time = utils.get_temporal_overlap([ref_dataset]+model_datasets) + start_time, end_time = utils.get_temporal_overlap(obs_datasets+model_datasets) print 'Maximum overlap period' print 'start_time:', start_time print 'end_time:', end_time if temporal_resolution == 'monthly' and end_time.day !=1: - end_time = end_time.replace(day=1) -if ref_data_info['data_source'] == 'rcmed': - min_lat = np.max([min_lat, ref_dataset.lats.min()]) - max_lat = np.min([max_lat, ref_dataset.lats.max()]) - min_lon = np.max([min_lon, ref_dataset.lons.min()]) - max_lon = np.min([max_lon, ref_dataset.lons.max()]) -bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) - -ref_dataset = dsp.subset(ref_dataset, bounds) -if ref_dataset.temporal_resolution() != temporal_resolution: - ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution) -for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.subset(dataset, bounds) + end_time = end_time.replace(day=1) + +for i, dataset in enumerate(obs_datasets): + min_lat = np.max([min_lat, dataset.lats.min()]) + max_lat = np.min([max_lat, dataset.lats.max()]) + min_lon = np.max([min_lon, dataset.lons.min()]) + max_lon = np.min([max_lon, dataset.lons.max()]) + +bounds = Bounds(lat_min=min_lat, + lat_max=max_lat, + lon_min=min_lon, + lon_max=max_lon, + start=start_time, + end=end_time) + +for i, dataset in enumerate(obs_datasets): + obs_datasets[i] = dsp.subset(dataset, bounds) + if dataset.temporal_resolution() != temporal_resolution: + obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) + +for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: - model_datasets[idata] = dsp.temporal_rebin(dataset, temporal_resolution) + model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) -# Temporaly subset both observation and model datasets for the user specified season +# Temporally subset both observation and model datasets +# for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] -ref_dataset = dsp.temporal_subset(ref_dataset,month_start, month_end,average_each_year) -for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.temporal_subset(dataset,month_start, month_end,average_each_year) +# TODO: Fully support multiple observation / reference datasets. --- End diff -- We need to open another JIRA issue for this.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---