[GitHub] climate pull request #384: CLIMATE-838 - Update configuration files and exam...

huikyole Tue, 09 Aug 2016 10:17:43 -0700

Github user huikyole commented on a diff in the pull request:

    https://github.com/apache/climate/pull/384#discussion_r74102524
  
    --- Diff: RCMES/run_RCMES.py ---
    @@ -51,123 +65,110 @@
     time_info = config['time']
     temporal_resolution = time_info['temporal_resolution']
     
    +# Read time info
     start_time = 
datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d')
     end_time = 
datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d')
     
    +# Read space info
     space_info = config['space']
     min_lat = space_info['min_lat']
     max_lat = space_info['max_lat']
     min_lon = space_info['min_lon']
     max_lon = space_info['max_lon']
    +kwargs = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon,
    +          'max_lon': max_lon, 'start_time': start_time, 'end_time': 
end_time}
     
    -ref_data_info = config['datasets']['reference']
    +# Get the dataset loader options
    +obs_data_info = config['datasets']['reference']
     model_data_info = config['datasets']['targets']
    -if ref_data_info['data_source'] == 'ESGF' or 
model_data_info['data_source'] == 'ESGF':
    -    username=raw_input('Enter your ESGF OpenID:\n')
    -    password=getpass(prompt='Enter your ESGF password:\n')
    -
    -""" Step 1: Load the reference data """
    -ref_lat_name = None
    -ref_lon_name = None
    -if 'latitude_name' in ref_data_info.keys():
    -    ref_lat_name = ref_data_info['latitude_name']
    -if 'longitude_name' in ref_data_info.keys():
    -    ref_lon_name = ref_data_info['longitude_name']
    -print 'Loading observation dataset:\n',ref_data_info
    -ref_name = ref_data_info['data_name']
    -if ref_data_info['data_source'] == 'local':
    -    ref_dataset = local.load_file(ref_data_info['path'],
    -                                  ref_data_info['variable'], name=ref_name,
    -                                  lat_name=ref_lat_name, 
lon_name=ref_lon_name)
    -elif ref_data_info['data_source'] == 'rcmed':
    -      ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'],
    -                                            ref_data_info['parameter_id'],
    -                                            min_lat, max_lat, min_lon, 
max_lon,
    -                                            start_time, end_time)
    -elif ref_data_info['data_source'] == 'ESGF':
    -      ds = esgf.load_dataset(dataset_id = ref_data_info['dataset_id'],
    -                             variable = ref_data_info['variable'],
    -                             esgf_username=username,
    -                             esgf_password=password)
    -      ref_dataset = ds[0]
    -else:
    -    print ' '
    -if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
    -    ref_dataset =  dsp.normalize_dataset_datetimes(ref_dataset, 
temporal_resolution)
    -if 'multiplying_factor' in ref_data_info.keys():
    -    ref_dataset.values = 
ref_dataset.values*ref_data_info['multiplying_factor']
    +
    +# Extract info we don't want to put into the loader config
    +# Multiplying Factor to scale obs by
    +multiplying_factor = np.ones(len(obs_data_info))
    +for i, info in enumerate(obs_data_info):
    +    if 'multiplying_factor' in info:
    +        multiplying_factor[i] = info.pop('multiplying_factor')
    +
    +# If models are GCMs we can skip boundary check. Probably need to find a 
more
    +# elegant way to express this in the config file API.
    +boundary_check = True
    +for i, info in enumerate(model_data_info):
    +    if 'boundary_check' in info:
    +        boundary_check = info.pop('boundary_check')
    +
    +""" Step 1: Load the observation data """
    +print 'Loading observation datasets:\n',obs_data_info
    +obs_datasets = load_datasets_from_config(*obs_data_info, **kwargs)
    +obs_names = [dataset.name for dataset in obs_datasets]
    +for i, dataset in enumerate(obs_datasets):
    +    if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
    +        obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
    +                                                          
temporal_resolution)
    +
    +    if multiplying_factor[i] != 1:
    +        obs_dataset.values *= multiplying_factor[i]
     
     """ Step 2: Load model NetCDF Files into OCW Dataset Objects """
    -model_lat_name = None
    -model_lon_name = None
    -if 'latitude_name' in model_data_info.keys():
    -    model_lat_name = model_data_info['latitude_name']
    -if 'longitude_name' in model_data_info.keys():
    -    model_lon_name = model_data_info['longitude_name']
    -boundary_check_model = True
    -if 'GCM_data' in model_data_info.keys():
    -    if model_data_info['GCM_data']:
    -        boundary_check_model = False
    -print 'Loading model datasets:\n',model_data_info
    -if model_data_info['data_source'] == 'local':
    -    model_datasets = 
local.load_multiple_files(file_path=model_data_info['path'],
    -                                               variable_name 
=model_data_info['variable'],
    -                                               lat_name=model_lat_name, 
lon_name=model_lon_name)
    -    model_names = [dataset.name for dataset in model_datasets]
    -elif model_data_info['data_source'] == 'ESGF':
    -      md = esgf.load_dataset(dataset_id=model_data_info['dataset_id'],
    -                             variable=model_data_info['variable'],
    -                             esgf_username=username,
    -                             esgf_password=password)
    -      model_datasets = []
    -      model_names = []
    -      model_datasets.append(md[0])
    -      model_names.append(model_data_info['data_name'])
    -else:
    -    print ' '
    -    # TO DO: support RCMED
    +model_datasets = load_datasets_from_config(*model_data_info, **kwargs)
    +model_names = [dataset.name for dataset in model_datasets]
     if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
    -    for idata,dataset in enumerate(model_datasets):
    -        model_datasets[idata] = dsp.normalize_dataset_datetimes(dataset, 
temporal_resolution)
    +    for i, dataset in enumerate(model_datasets):
    +        model_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
    +                                                            
temporal_resolution)
     
     """ Step 3: Subset the data for temporal and spatial domain """
     # Create a Bounds object to use for subsetting
     if time_info['maximum_overlap_period']:
    -    start_time, end_time = 
utils.get_temporal_overlap([ref_dataset]+model_datasets)
    +    start_time, end_time = 
utils.get_temporal_overlap(obs_datasets+model_datasets)
         print 'Maximum overlap period'
         print 'start_time:', start_time
         print 'end_time:', end_time
     
     if temporal_resolution == 'monthly' and end_time.day !=1:
    -    end_time = end_time.replace(day=1)
    -if ref_data_info['data_source'] == 'rcmed':
    -    min_lat = np.max([min_lat, ref_dataset.lats.min()])
    -    max_lat = np.min([max_lat, ref_dataset.lats.max()])
    -    min_lon = np.max([min_lon, ref_dataset.lons.min()])
    -    max_lon = np.min([max_lon, ref_dataset.lons.max()])
    -bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, 
lon_max=max_lon, start=start_time, end=end_time)
    -
    -ref_dataset = dsp.subset(ref_dataset, bounds)
    -if ref_dataset.temporal_resolution() != temporal_resolution:
    -    ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution)
    -for idata,dataset in enumerate(model_datasets):
    -    model_datasets[idata] = dsp.subset(dataset, bounds)
    +    end_time = end_time.replace(day=1)
    +
    +for i, dataset in enumerate(obs_datasets):
    +    min_lat = np.max([min_lat, dataset.lats.min()])
    +    max_lat = np.min([max_lat, dataset.lats.max()])
    +    min_lon = np.max([min_lon, dataset.lons.min()])
    +    max_lon = np.min([max_lon, dataset.lons.max()])
    +
    +bounds = Bounds(lat_min=min_lat,
    +                lat_max=max_lat,
    +                lon_min=min_lon,
    +                lon_max=max_lon,
    +                start=start_time,
    +                end=end_time)
    +
    +for i, dataset in enumerate(obs_datasets):
    +    obs_datasets[i] = dsp.subset(dataset, bounds)
    +    if dataset.temporal_resolution() != temporal_resolution:
    +        obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
    +
    +for i, dataset in enumerate(model_datasets):
    +    model_datasets[i] = dsp.subset(dataset, bounds)
         if dataset.temporal_resolution() != temporal_resolution:
    -        model_datasets[idata] = dsp.temporal_rebin(dataset, 
temporal_resolution)
    +        model_datasets[i] = dsp.temporal_rebin(dataset, 
temporal_resolution)
     
    -# Temporaly subset both observation and model datasets for the user 
specified season
    +# Temporally subset both observation and model datasets
    +# for the user specified season
     month_start = time_info['month_start']
     month_end = time_info['month_end']
     average_each_year = time_info['average_each_year']
     
    -ref_dataset = dsp.temporal_subset(ref_dataset,month_start, 
month_end,average_each_year)
    -for idata,dataset in enumerate(model_datasets):
    -    model_datasets[idata] = dsp.temporal_subset(dataset,month_start, 
month_end,average_each_year)
    +# TODO: Fully support multiple observation / reference datasets.
    --- End diff --
    
    We need to open another JIRA issue for this.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] climate pull request #384: CLIMATE-838 - Update configuration files and exam...

Reply via email to