Adrian - are you going to commit this? Or do you want one of us to do so for you?
On Dec 9, 2013, at 1:39 PM, Josh Hursey <jjhur...@open-mpi.org> wrote: > With the modification that Ralph mentioned below, I think the patch it good > to go. > > Thanks! > > > On Mon, Dec 9, 2013 at 2:19 PM, Ralph Castain <r...@open-mpi.org> wrote: > On Dec 9, 2013, at 10:07 AM, Ralph Castain <r...@open-mpi.org> wrote: > > > I see some things in here that concern me. First, there are variables being > > added to functions that would appear to generate "not used" warnings if ft > > is not enabled - they need to be properly protected. Second, I see > > references like this one: > > > > - (ret = orte_oob.ft_event(state)) ) { > > + if( ORTE_SUCCESS != (ret = orte_rml_oob_module.super.ft_event(state)) > > ) { > > Took me awhile to grok what you were doing with the above line - it's > actually okay, but you could avoid going thru the module dereference by just > calling > > orte_rml_oob_ft_event > > No need to reference thru the module unless you want to for some reason. > > > > > > This doesn't seem right - if we are referencing the OOB, then we need to go > > directly to it. I'll have to check/correct the code, but the RML shouldn't > > even be storing a pointer to the OOB in it as there no longer is a direct > > linkage. > > > > > > On Dec 9, 2013, at 5:38 AM, Adrian Reber <adr...@lisas.de> wrote: > > > >> From: Adrian Reber <adrian.re...@hs-esslingen.de> > >> > >> This are the remaining changes to get C/R to compile again. This patch > >> includes various fixes all over the C/R code and are hard to group > >> like the previous patches. > >> > >> Changes from V1: > >> * explain why mca_base_component_distill_checkpoint_ready no longer works > >> * compare return result of opal functions with OPAL_* values > >> > >> Signed-off-by: Adrian Reber <adrian.re...@hs-esslingen.de> > >> --- > >> ompi/mca/bml/r2/bml_r2_ft.c | 10 +++++----- > >> opal/mca/base/mca_base_components_open.c | 9 +++++++++ > >> opal/mca/crs/self/crs_self_component.c | 16 ++++++++-------- > >> opal/tools/opal-restart/opal-restart.c | 2 +- > >> orte/mca/errmgr/base/errmgr_base_fns.c | 2 +- > >> orte/mca/ess/env/ess_env_module.c | 2 +- > >> orte/mca/plm/base/plm_base_launch_support.c | 1 + > >> orte/mca/rml/oob/rml_oob_component.c | 9 ++------- > >> orte/mca/snapc/base/snapc_base_frame.c | 4 ++-- > >> orte/mca/snapc/full/snapc_full_app.c | 15 +++++++++++++++ > >> orte/mca/snapc/full/snapc_full_component.c | 7 ++++--- > >> orte/mca/snapc/full/snapc_full_global.c | 8 ++++---- > >> orte/mca/snapc/full/snapc_full_local.c | 2 +- > >> orte/mca/sstore/stage/sstore_stage_component.c | 5 +++++ > >> 14 files changed, 59 insertions(+), 33 deletions(-) > >> > >> diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c > >> index 1448c04..fc16452 100644 > >> --- a/ompi/mca/bml/r2/bml_r2_ft.c > >> +++ b/ompi/mca/bml/r2/bml_r2_ft.c > >> @@ -191,7 +191,7 @@ int mca_bml_r2_ft_event(int state) > >> > >> for(p = 0; p < (int)num_procs; ++p) { > >> if( NULL != > >> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { > >> - OBJ_RELEASE((mca_bml_base_endpoint_t*) > >> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]); > >> + > >> OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]); > >> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = > >> NULL; > >> } > >> > >> @@ -263,9 +263,9 @@ int mca_bml_r2_ft_event(int state) > >> mca_base_var_get_value(param_type, &btl_list, NULL, NULL); > >> opal_output_verbose(11, ompi_cr_output, > >> "Restart (New BTL MCA): <%s>\n", btl_list ? > >> btl_list[0] : ""); > >> - if( NULL != param_list ) { > >> - free(param_list); > >> - param_list = NULL; > >> + if( NULL != btl_list ) { > >> + free(btl_list); > >> + btl_list = NULL; > >> } > >> > >> /* > >> @@ -286,7 +286,7 @@ int mca_bml_r2_ft_event(int state) > >> > >> for(p = 0; p < (int)num_procs; ++p) { > >> if( NULL != > >> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { > >> - OBJ_RELEASE((mca_bml_base_endpoint_t*) > >> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]); > >> + > >> OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]); > >> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL; > >> } > >> > >> diff --git a/opal/mca/base/mca_base_components_open.c > >> b/opal/mca/base/mca_base_components_open.c > >> index e46e0f3..4568a51 100644 > >> --- a/opal/mca/base/mca_base_components_open.c > >> +++ b/opal/mca/base/mca_base_components_open.c > >> @@ -141,9 +141,18 @@ static int open_components(mca_base_framework_t > >> *framework) > >> * NTH: Logic moved to mca_base_components_filter. > >> */ > >> #if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) > >> +#ifdef ENABLE_FT_FIXED > >> + /* FIXME_FT > >> + * > >> + * the variable mca_base_component_distill_checkpoint_ready > >> + * was removed by commit 8181c8273c486bba59b3dead324939eac1a58b8c > >> (r28237) > >> + * "Introduce the MCA framework system. This formalizes the interface > >> frameworks must provide." > >> + * > >> + * */ > >> if (mca_base_component_distill_checkpoint_ready) { > >> open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT; > >> } > >> +#endif /* ENABLE_FT_FIXED */ > >> #endif /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */ > >> > >> /* If mca_base_framework_register_components was called with the > >> MCA_BASE_COMPONENTS_ALL flag > >> diff --git a/opal/mca/crs/self/crs_self_component.c > >> b/opal/mca/crs/self/crs_self_component.c > >> index e0ca1ab..eb45d59 100644 > >> --- a/opal/mca/crs/self/crs_self_component.c > >> +++ b/opal/mca/crs/self/crs_self_component.c > >> @@ -90,9 +90,9 @@ static int crs_self_register (void) > >> mca_crs_self_component.super.priority = 20; > >> ret = mca_base_component_var_register > >> (&mca_crs_self_component.super.base_version, > >> "priority", "Priority of the CRS > >> self component " > >> - "(default: 20)", > >> MCA_BASE_VAR_TYPE_INT, NULL, > >> + "(default: 20)", > >> MCA_BASE_VAR_TYPE_INT, NULL, 0, > >> MCA_BASE_VAR_FLAG_SETTABLE, > >> - OPAL_INFO_LVL_9, > >> MPI_BASE_VAR_SCOPE_ALL_EQ, > >> + OPAL_INFO_LVL_9, > >> MCA_BASE_VAR_SCOPE_ALL_EQ, > >> > >> &mca_crs_self_component.super.priority); > >> if (0 > ret) { > >> return ret; > >> @@ -102,8 +102,8 @@ static int crs_self_register (void) > >> ret = mca_base_component_var_register > >> (&mca_crs_self_component.super.base_version, > >> "verbose", > >> "Verbose level for the CRS self > >> component", > >> - MCA_BASE_VAR_TYPE_INT, > >> NULL,MCA_BASE_VAR_FLAG_SETTABLE, > >> - OPAL_INFO_LVL_9, > >> MPI_BASE_VAR_SCOPE_LOCAL, > >> + MCA_BASE_VAR_TYPE_INT, NULL, > >> 0, MCA_BASE_VAR_FLAG_SETTABLE, > >> + OPAL_INFO_LVL_9, > >> MCA_BASE_VAR_SCOPE_LOCAL, > >> > >> &mca_crs_self_component.super.verbose); > >> if (0 > ret) { > >> return ret; > >> @@ -116,8 +116,8 @@ static int crs_self_register (void) > >> ret = mca_base_component_var_register > >> (&mca_crs_self_component.super.base_version, > >> "prefix", > >> "Prefix for user defined > >> callback functions", > >> - MCA_BASE_VAR_TYPE_STRING, > >> NULL, MCA_BASE_VAR_FLAG_SETTABLE, > >> - OPAL_INFO_LVL_9, > >> MPI_BASE_VAR_SCOPE_LOCAL, > >> + MCA_BASE_VAR_TYPE_STRING, > >> NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, > >> + OPAL_INFO_LVL_9, > >> MCA_BASE_VAR_SCOPE_LOCAL, > >> &mca_crs_self_component.prefix); > >> if (0 > ret) { > >> return ret; > >> @@ -126,8 +126,8 @@ static int crs_self_register (void) > >> ret = mca_base_component_var_register > >> (&mca_crs_self_component.super.base_version, > >> "do_restart", > >> "Start execution by calling > >> restart callback", > >> - MCA_BASE_VAR_TYPE_BOOL, NULL, > >> MCA_BASE_VAR_FLAG_SETTABLE, > >> - OPAL_INFO_LVL_9, > >> MPI_BASE_VAR_SCOPE_LOCAL, > >> + MCA_BASE_VAR_TYPE_BOOL, NULL, > >> 0, MCA_BASE_VAR_FLAG_SETTABLE, > >> + OPAL_INFO_LVL_9, > >> MCA_BASE_VAR_SCOPE_LOCAL, > >> > >> &mca_crs_self_component.do_restart); > >> return (0 > ret) ? ret : OPAL_SUCCESS; > >> } > >> diff --git a/opal/tools/opal-restart/opal-restart.c > >> b/opal/tools/opal-restart/opal-restart.c > >> index 35b7843..53da7f3 100644 > >> --- a/opal/tools/opal-restart/opal-restart.c > >> +++ b/opal/tools/opal-restart/opal-restart.c > >> @@ -247,7 +247,7 @@ main(int argc, char *argv[]) > >> * restart on this node because it doesn't have the proper checkpointer > >> * available. > >> */ > >> - if( OPAL_SUCCESS != (ret = opal_crs_base_open()) ) { > >> + if( OPAL_SUCCESS != (ret = opal_crs_base_open(MCA_BASE_OPEN_DEFAULT)) > >> ) { > >> opal_show_help("help-opal-restart.txt", "comp_select_failure", true, > >> "crs", ret); > >> exit_status = ret; > >> diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c > >> b/orte/mca/errmgr/base/errmgr_base_fns.c > >> index 399c237..e8f41a2 100644 > >> --- a/orte/mca/errmgr/base/errmgr_base_fns.c > >> +++ b/orte/mca/errmgr/base/errmgr_base_fns.c > >> @@ -366,7 +366,7 @@ void orte_errmgr_base_migrate_state_notify(int state) > >> } > >> } > >> > >> -void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, > >> orte_process_name_t *proc) > >> +int orte_errmgr_base_proc_state_notify(orte_proc_state_t state, > >> orte_process_name_t *proc) > >> { > >> if (NULL != proc) { > >> switch(state) { > >> diff --git a/orte/mca/ess/env/ess_env_module.c > >> b/orte/mca/ess/env/ess_env_module.c > >> index 6a71230..9b80099 100644 > >> --- a/orte/mca/ess/env/ess_env_module.c > >> +++ b/orte/mca/ess/env/ess_env_module.c > >> @@ -422,7 +422,7 @@ static int rte_ft_event(int state) > >> exit_status = ret; > >> goto cleanup; > >> } > >> - if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) { > >> + if (OPAL_SUCCESS != (ret = opal_db.remove(NULL, NULL))) { > >> ORTE_ERROR_LOG(ret); > >> exit_status = ret; > >> goto cleanup; > >> diff --git a/orte/mca/plm/base/plm_base_launch_support.c > >> b/orte/mca/plm/base/plm_base_launch_support.c > >> index 3deee11..6aba2c2 100644 > >> --- a/orte/mca/plm/base/plm_base_launch_support.c > >> +++ b/orte/mca/plm/base/plm_base_launch_support.c > >> @@ -333,6 +333,7 @@ void orte_plm_base_complete_setup(int fd, short args, > >> void *cbdata) > >> { > >> orte_job_t *jdata, *jdatorted; > >> orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; > >> + int rc; > >> > >> /* if we don't want to launch the apps, now is the time to leave */ > >> if (orte_do_not_launch) { > >> diff --git a/orte/mca/rml/oob/rml_oob_component.c > >> b/orte/mca/rml/oob/rml_oob_component.c > >> index dd539cd..b91f4a3 100644 > >> --- a/orte/mca/rml/oob/rml_oob_component.c > >> +++ b/orte/mca/rml/oob/rml_oob_component.c > >> @@ -11,11 +11,7 @@ > >> * Copyright (c) 2004-2005 The Regents of the University of California. > >> * All rights reserved. > >> * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. > >> -<<<<<<< .mine > >> - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. > >> -======= > >> * Copyright (c) 2011-2013 Los Alamos National Security, LLC. > >> ->>>>>>> .r28253 > >> * All rights reserved. > >> * $COPYRIGHT$ > >> * > >> @@ -189,8 +185,7 @@ orte_rml_oob_ft_event(int state) { > >> ; > >> } > >> > >> - if( ORTE_SUCCESS != > >> - (ret = orte_oob.ft_event(state)) ) { > >> + if( ORTE_SUCCESS != (ret = orte_rml_oob_module.super.ft_event(state)) > >> ) { > >> ORTE_ERROR_LOG(ret); > >> exit_status = ret; > >> goto cleanup; > >> @@ -212,7 +207,7 @@ orte_rml_oob_ft_event(int state) { > >> goto cleanup; > >> } > >> > >> - if( ORTE_SUCCESS != (ret = mca_oob_base_select())) { > >> + if( ORTE_SUCCESS != (ret = orte_oob_base_select())) { > >> ORTE_ERROR_LOG(ret); > >> exit_status = ret; > >> goto cleanup; > >> diff --git a/orte/mca/snapc/base/snapc_base_frame.c > >> b/orte/mca/snapc/base/snapc_base_frame.c > >> index edb8e6e..a46c77a 100644 > >> --- a/orte/mca/snapc/base/snapc_base_frame.c > >> +++ b/orte/mca/snapc/base/snapc_base_frame.c > >> @@ -79,7 +79,7 @@ static int > >> orte_snapc_base_register(mca_base_register_flag_t flags) > >> return ORTE_SUCCESS; > >> } > >> > >> -static int orte_snapc_base_close(void) > >> +int orte_snapc_base_close(void) > >> { > >> /* Close the selected component */ > >> if( NULL != orte_snapc.snapc_finalize ) { > >> @@ -93,7 +93,7 @@ static int orte_snapc_base_close(void) > >> * Function for finding and opening either all MCA components, > >> * or the one that was specifically requested via a MCA parameter. > >> */ > >> -static int orte_snapc_base_open(mca_base_open_flag_t flags) > >> +int orte_snapc_base_open(mca_base_open_flag_t flags) > >> { > >> /* Init the sequence (interval) number */ > >> orte_snapc_base_snapshot_seq_number = 0; > >> diff --git a/orte/mca/snapc/full/snapc_full_app.c > >> b/orte/mca/snapc/full/snapc_full_app.c > >> index 1ff036e..c7438f1 100644 > >> --- a/orte/mca/snapc/full/snapc_full_app.c > >> +++ b/orte/mca/snapc/full/snapc_full_app.c > >> @@ -99,6 +99,12 @@ static int current_cr_state = OPAL_CRS_NONE; > >> static orte_sstore_base_handle_t current_ss_handle = > >> ORTE_SSTORE_HANDLE_INVALID, last_ss_handle = ORTE_SSTORE_HANDLE_INVALID; > >> static opal_crs_base_ckpt_options_t *current_options = NULL; > >> > >> +static void snapc_full_app_callback_recv(int status, > >> + orte_process_name_t* sender, > >> + opal_buffer_t* buffer, > >> + orte_rml_tag_t tag, > >> + void* cbdata); > >> + > >> /************************ > >> * Function Definitions > >> ************************/ > >> @@ -1673,3 +1679,12 @@ int > >> app_coord_request_op(orte_snapc_base_request_op_t *datum) > >> > >> return exit_status; > >> } > >> + > >> +/* dummy implementation of a callback function to get it to compile again > >> */ > >> +static void snapc_full_app_callback_recv(int status, > >> + orte_process_name_t* sender, > >> + opal_buffer_t* buffer, > >> + orte_rml_tag_t tag, > >> + void* cbdata) > >> +{ > >> +} > >> diff --git a/orte/mca/snapc/full/snapc_full_component.c > >> b/orte/mca/snapc/full/snapc_full_component.c > >> index 7815363..b953e17 100644 > >> --- a/orte/mca/snapc/full/snapc_full_component.c > >> +++ b/orte/mca/snapc/full/snapc_full_component.c > >> @@ -32,6 +32,7 @@ const char *orte_snapc_full_component_version_string = > >> */ > >> static int snapc_full_open(void); > >> static int snapc_full_close(void); > >> +static int snapc_full_register(void); > >> > >> bool orte_snapc_full_skip_app = false; > >> bool orte_snapc_full_timing_enabled = false; > >> @@ -74,7 +75,7 @@ orte_snapc_full_component_t mca_snapc_full_component = { > >> } > >> }; > >> > >> -static int snaps_full_register (void) > >> +static int snapc_full_register (void) > >> { > >> mca_base_component_t *component = > >> &mca_snapc_full_component.super.base_version; > >> /* > >> @@ -129,7 +130,7 @@ static int snaps_full_register (void) > >> OPAL_INFO_LVL_9, > >> MCA_BASE_VAR_SCOPE_LOCAL, > >> > >> &orte_snapc_full_progress_meter); > >> - orte_snapc_full_progress_meter = (value % 101); > >> + orte_snapc_full_progress_meter %= 101; > >> > >> return ORTE_SUCCESS; > >> } > >> @@ -148,7 +149,7 @@ static int snapc_full_open(void) > >> } > >> > >> /* recheck the progress meter (it may have changed between register and > >> open) */ > >> - orte_snapc_full_progress_meter = (value % 101); > >> + orte_snapc_full_progress_meter %= 101; > >> > >> /* > >> * Debug Output > >> diff --git a/orte/mca/snapc/full/snapc_full_global.c > >> b/orte/mca/snapc/full/snapc_full_global.c > >> index c88c6db..9f6da34 100644 > >> --- a/orte/mca/snapc/full/snapc_full_global.c > >> +++ b/orte/mca/snapc/full/snapc_full_global.c > >> @@ -513,7 +513,7 @@ int global_coord_end_ckpt(orte_snapc_base_quiesce_t > >> *datum) > >> if( currently_migrating ) { > >> OPAL_OUTPUT_VERBOSE((10, > >> mca_snapc_full_component.super.output_handle, > >> "Global) End Ckpt: Flush the modex cached > >> data\n")); > >> - if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) { > >> + if (OPAL_SUCCESS != (ret = opal_db.remove(NULL, NULL))) { > >> ORTE_ERROR_LOG(ret); > >> exit_status = ret; > >> goto cleanup; > >> @@ -1138,7 +1138,7 @@ void snapc_full_global_orted_recv(int status, > >> OPAL_OUTPUT_VERBOSE((10, > >> mca_snapc_full_component.super.output_handle, > >> "Global) Command: Job State Update > >> (quick)")); > >> > >> - snapc_full_process_job_update_cmd(&sender, buffer, true); > >> + snapc_full_process_job_update_cmd(sender, buffer, true); > >> break; > >> > >> case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD: > >> @@ -1974,7 +1974,7 @@ static void > >> snapc_full_process_job_update_cmd(orte_process_name_t* sender, > >> > >> static int snapc_full_establish_snapshot_dir(bool empty_metadata) > >> { > >> - const char **value = NULL; > >> + char **value = NULL; > >> int idx = 0; > >> > >> /********************* > >> @@ -1998,7 +1998,7 @@ static int snapc_full_establish_snapshot_dir(bool > >> empty_metadata) > >> opal_show_help("help-orte-restart.txt", "amca_param_not_found", > >> true); > >> } > >> if( 0 < idx ) { > >> - mca_base_var_get_value (idx, &value, sizeof (value), NULL, NULL); > >> + mca_base_var_get_value (idx, &value, NULL, NULL); > >> > >> if (*value) { > >> orte_sstore.set_attr(global_snapshot.ss_handle, > >> diff --git a/orte/mca/snapc/full/snapc_full_local.c > >> b/orte/mca/snapc/full/snapc_full_local.c > >> index c0b168a..b13fce9 100644 > >> --- a/orte/mca/snapc/full/snapc_full_local.c > >> +++ b/orte/mca/snapc/full/snapc_full_local.c > >> @@ -1776,7 +1776,7 @@ static void snapc_full_local_comm_read_event(int fd, > >> short flags, void *arg) > >> if( currently_migrating && !flushed_modex ) { > >> OPAL_OUTPUT_VERBOSE((10, > >> mca_snapc_full_component.super.output_handle, > >> "Local) Read Event: Flush the modex cached > >> data\n")); > >> - if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) { > >> + if (OPAL_SUCCESS != (ret = opal_db.remove(NULL, NULL))) { > >> ORTE_ERROR_LOG(ret); > >> exit_status = ret; > >> goto cleanup; > >> diff --git a/orte/mca/sstore/stage/sstore_stage_component.c > >> b/orte/mca/sstore/stage/sstore_stage_component.c > >> index 19d7c75..aca2b46 100644 > >> --- a/orte/mca/sstore/stage/sstore_stage_component.c > >> +++ b/orte/mca/sstore/stage/sstore_stage_component.c > >> @@ -235,3 +235,8 @@ static int sstore_stage_close(void) > >> > >> return ORTE_SUCCESS; > >> } > >> + > >> +static int sstore_stage_register(void) > >> +{ > >> + return ORTE_SUCCESS; > >> +} > >> -- > >> 1.8.4.2 > >> > >> _______________________________________________ > >> devel mailing list > >> de...@open-mpi.org > >> http://www.open-mpi.org/mailman/listinfo.cgi/devel > > > > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel > > > > -- > Joshua Hursey > Assistant Professor of Computer Science > University of Wisconsin-La Crosse > http://cs.uwlax.edu/~jjhursey > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel