Re: [OMPI devel] [PATCH v2 2/2] Trying to get the C/R code to compile again. (send_*_nb)
Thanks for the review. I am re-spinning the patches and sending the new version in a few moments. On Wed, Dec 18, 2013 at 06:56:47AM -0800, Ralph Castain wrote: > In the case of the send, there really isn't any problem with just replacing > things - the non-blocking change won't impact anything, so no need to retain > the old code. People were only concerned about the recv's as those places > will require further repair, and they wanted to ensure we know where those > places are located. > > You also need to change those comparisons, however, as the return code isn't > the number of bytes sent any more - it is just ORTE_SUCCESS or else an error > code, so you should be testing for ORTE_SUCCESS == > > > > > On Dec 18, 2013, at 6:42 AM, Adrian Reber wrote: > > > From: Adrian Reber > > > > This patch changes all send/send_buffer occurrences in the C/R code > > to send_nb/send_buffer_nb. > > The old code is still there but disabled using ifdefs (ENABLE_FT_FIXED). > > The new code compiles but does not work. > > > > Changes from V1: > > * #ifdef out the code (so it is preserved for later re-design) > > * marked the broken C/R code with ENABLE_FT_FIXED > > > > Signed-off-by: Adrian Reber > > --- > > ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c| 18 +++ > > orte/mca/errmgr/base/errmgr_base_tool.c | 4 ++ > > orte/mca/rml/ftrm/rml_ftrm.h| 19 > > orte/mca/rml/ftrm/rml_ftrm_component.c | 2 - > > orte/mca/rml/ftrm/rml_ftrm_module.c | 63 > > + > > orte/mca/snapc/full/snapc_full_app.c| 20 > > orte/mca/snapc/full/snapc_full_global.c | 12 + > > orte/mca/snapc/full/snapc_full_local.c | 4 ++ > > orte/mca/sstore/central/sstore_central_app.c| 8 > > orte/mca/sstore/central/sstore_central_global.c | 4 ++ > > orte/mca/sstore/central/sstore_central_local.c | 12 + > > orte/mca/sstore/stage/sstore_stage_app.c| 8 > > orte/mca/sstore/stage/sstore_stage_global.c | 4 ++ > > orte/mca/sstore/stage/sstore_stage_local.c | 16 +++ > > orte/tools/orte-checkpoint/orte-checkpoint.c| 4 ++ > > orte/tools/orte-migrate/orte-migrate.c | 4 ++ > > 16 files changed, 130 insertions(+), 72 deletions(-) > > > > diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > > b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > > index cba7586..4f7bd7f 100644 > > --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > > +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > > @@ -5102,7 +5102,11 @@ static int wait_quiesce_drained(void) > > PACK_BUFFER(buffer, response, 1, OPAL_SIZE, ""); > > > > /* JJH - Performance Optimization? - Why not post all isends, > > then wait? */ > > +#ifdef ENABLE_FT_FIXED > > +/* This is the old, now broken code */ > > if ( 0 > ( ret = > > ompi_rte_send_buffer(&(cur_peer_ref->proc_name), buffer, > > OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { > > +#endif /* ENABLE_FT_FIXED */ > > +if ( 0 > ( ret = > > ompi_rte_send_buffer_nb(&(cur_peer_ref->proc_name), buffer, > > OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { > > exit_status = ret; > > goto cleanup; > > } > > @@ -5303,7 +5307,11 @@ static int send_bookmarks(int peer_idx) > > PACK_BUFFER(buffer, (peer_ref->total_msgs_recvd), 1, OPAL_UINT32, > > "crcp:bkmrk: send_bookmarks: Unable to pack > > total_msgs_recvd"); > > > > +#ifdef ENABLE_FT_FIXED > > +/* This is the old, now broken code */ > > if ( 0 > ( ret = ompi_rte_send_buffer(&peer_name, buffer, > > OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { > > +#endif /* ENABLE_FT_FIXED */ > > +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&peer_name, buffer, > > OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { > > opal_output(mca_crcp_bkmrk_component.super.output_handle, > > "crcp:bkmrk: send_bookmarks: Failed to send bookmark to > > peer %s: Return %d\n", > > OMPI_NAME_PRINT(&peer_name), > > @@ -5599,8 +5607,13 @@ static int > > do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, > > /* > > * Do the send... > > */ > > +#ifdef ENABLE_FT_FIXED > > +/* This is the old, now broken code */ > > if ( 0 > ( ret = ompi_rte_send_buffer(&peer_ref->proc_name, buffer, > > OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) > > ) { > > +#endif /* ENABLE_FT_FIXED */ > > +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&peer_ref->proc_name, buffer, > > + OMPI_CRCP_COORD_BOOKMARK_TAG, > > orte_rml_send_callback, NULL)) ) { > > opal_output(mca_crcp_bkmrk_component.super.output_handle, > > "crcp:bkmrk: do_send_msg_detail: Unable to send message > > details to peer %s: Return %d\n", > > OMPI_NAME_PRINT(&peer_ref->proc_name), > > @@ -62
Re: [OMPI devel] [PATCH v2 2/2] Trying to get the C/R code to compile again. (send_*_nb)
In the case of the send, there really isn't any problem with just replacing things - the non-blocking change won't impact anything, so no need to retain the old code. People were only concerned about the recv's as those places will require further repair, and they wanted to ensure we know where those places are located. You also need to change those comparisons, however, as the return code isn't the number of bytes sent any more - it is just ORTE_SUCCESS or else an error code, so you should be testing for ORTE_SUCCESS == On Dec 18, 2013, at 6:42 AM, Adrian Reber wrote: > From: Adrian Reber > > This patch changes all send/send_buffer occurrences in the C/R code > to send_nb/send_buffer_nb. > The old code is still there but disabled using ifdefs (ENABLE_FT_FIXED). > The new code compiles but does not work. > > Changes from V1: > * #ifdef out the code (so it is preserved for later re-design) > * marked the broken C/R code with ENABLE_FT_FIXED > > Signed-off-by: Adrian Reber > --- > ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c| 18 +++ > orte/mca/errmgr/base/errmgr_base_tool.c | 4 ++ > orte/mca/rml/ftrm/rml_ftrm.h| 19 > orte/mca/rml/ftrm/rml_ftrm_component.c | 2 - > orte/mca/rml/ftrm/rml_ftrm_module.c | 63 + > orte/mca/snapc/full/snapc_full_app.c| 20 > orte/mca/snapc/full/snapc_full_global.c | 12 + > orte/mca/snapc/full/snapc_full_local.c | 4 ++ > orte/mca/sstore/central/sstore_central_app.c| 8 > orte/mca/sstore/central/sstore_central_global.c | 4 ++ > orte/mca/sstore/central/sstore_central_local.c | 12 + > orte/mca/sstore/stage/sstore_stage_app.c| 8 > orte/mca/sstore/stage/sstore_stage_global.c | 4 ++ > orte/mca/sstore/stage/sstore_stage_local.c | 16 +++ > orte/tools/orte-checkpoint/orte-checkpoint.c| 4 ++ > orte/tools/orte-migrate/orte-migrate.c | 4 ++ > 16 files changed, 130 insertions(+), 72 deletions(-) > > diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > index cba7586..4f7bd7f 100644 > --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c > @@ -5102,7 +5102,11 @@ static int wait_quiesce_drained(void) > PACK_BUFFER(buffer, response, 1, OPAL_SIZE, ""); > > /* JJH - Performance Optimization? - Why not post all isends, > then wait? */ > +#ifdef ENABLE_FT_FIXED > +/* This is the old, now broken code */ > if ( 0 > ( ret = ompi_rte_send_buffer(&(cur_peer_ref->proc_name), > buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { > +#endif /* ENABLE_FT_FIXED */ > +if ( 0 > ( ret = > ompi_rte_send_buffer_nb(&(cur_peer_ref->proc_name), buffer, > OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { > exit_status = ret; > goto cleanup; > } > @@ -5303,7 +5307,11 @@ static int send_bookmarks(int peer_idx) > PACK_BUFFER(buffer, (peer_ref->total_msgs_recvd), 1, OPAL_UINT32, > "crcp:bkmrk: send_bookmarks: Unable to pack > total_msgs_recvd"); > > +#ifdef ENABLE_FT_FIXED > +/* This is the old, now broken code */ > if ( 0 > ( ret = ompi_rte_send_buffer(&peer_name, buffer, > OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { > +#endif /* ENABLE_FT_FIXED */ > +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&peer_name, buffer, > OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { > opal_output(mca_crcp_bkmrk_component.super.output_handle, > "crcp:bkmrk: send_bookmarks: Failed to send bookmark to > peer %s: Return %d\n", > OMPI_NAME_PRINT(&peer_name), > @@ -5599,8 +5607,13 @@ static int > do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, > /* > * Do the send... > */ > +#ifdef ENABLE_FT_FIXED > +/* This is the old, now broken code */ > if ( 0 > ( ret = ompi_rte_send_buffer(&peer_ref->proc_name, buffer, > OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) > { > +#endif /* ENABLE_FT_FIXED */ > +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&peer_ref->proc_name, buffer, > + OMPI_CRCP_COORD_BOOKMARK_TAG, > orte_rml_send_callback, NULL)) ) { > opal_output(mca_crcp_bkmrk_component.super.output_handle, > "crcp:bkmrk: do_send_msg_detail: Unable to send message > details to peer %s: Return %d\n", > OMPI_NAME_PRINT(&peer_ref->proc_name), > @@ -6217,8 +6230,13 @@ static int > do_recv_msg_detail_resp(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, > "crcp:bkmrk: recv_msg_details: Unable to ask peer for more > messages"); > PACK_BUFFER(buffer, total_found, 1, OPAL_UINT32, > "crcp:bkmrk: recv_msg_details: Unable to ask peer for more > messages"); > +#ifdef ENABLE_FT_FIXED > +/* This
[OMPI devel] [PATCH v2 2/2] Trying to get the C/R code to compile again. (send_*_nb)
From: Adrian Reber This patch changes all send/send_buffer occurrences in the C/R code to send_nb/send_buffer_nb. The old code is still there but disabled using ifdefs (ENABLE_FT_FIXED). The new code compiles but does not work. Changes from V1: * #ifdef out the code (so it is preserved for later re-design) * marked the broken C/R code with ENABLE_FT_FIXED Signed-off-by: Adrian Reber --- ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c| 18 +++ orte/mca/errmgr/base/errmgr_base_tool.c | 4 ++ orte/mca/rml/ftrm/rml_ftrm.h| 19 orte/mca/rml/ftrm/rml_ftrm_component.c | 2 - orte/mca/rml/ftrm/rml_ftrm_module.c | 63 + orte/mca/snapc/full/snapc_full_app.c| 20 orte/mca/snapc/full/snapc_full_global.c | 12 + orte/mca/snapc/full/snapc_full_local.c | 4 ++ orte/mca/sstore/central/sstore_central_app.c| 8 orte/mca/sstore/central/sstore_central_global.c | 4 ++ orte/mca/sstore/central/sstore_central_local.c | 12 + orte/mca/sstore/stage/sstore_stage_app.c| 8 orte/mca/sstore/stage/sstore_stage_global.c | 4 ++ orte/mca/sstore/stage/sstore_stage_local.c | 16 +++ orte/tools/orte-checkpoint/orte-checkpoint.c| 4 ++ orte/tools/orte-migrate/orte-migrate.c | 4 ++ 16 files changed, 130 insertions(+), 72 deletions(-) diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c index cba7586..4f7bd7f 100644 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c @@ -5102,7 +5102,11 @@ static int wait_quiesce_drained(void) PACK_BUFFER(buffer, response, 1, OPAL_SIZE, ""); /* JJH - Performance Optimization? - Why not post all isends, then wait? */ +#ifdef ENABLE_FT_FIXED +/* This is the old, now broken code */ if ( 0 > ( ret = ompi_rte_send_buffer(&(cur_peer_ref->proc_name), buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { +#endif /* ENABLE_FT_FIXED */ +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&(cur_peer_ref->proc_name), buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { exit_status = ret; goto cleanup; } @@ -5303,7 +5307,11 @@ static int send_bookmarks(int peer_idx) PACK_BUFFER(buffer, (peer_ref->total_msgs_recvd), 1, OPAL_UINT32, "crcp:bkmrk: send_bookmarks: Unable to pack total_msgs_recvd"); +#ifdef ENABLE_FT_FIXED +/* This is the old, now broken code */ if ( 0 > ( ret = ompi_rte_send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { +#endif /* ENABLE_FT_FIXED */ +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { opal_output(mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: send_bookmarks: Failed to send bookmark to peer %s: Return %d\n", OMPI_NAME_PRINT(&peer_name), @@ -5599,8 +5607,13 @@ static int do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, /* * Do the send... */ +#ifdef ENABLE_FT_FIXED +/* This is the old, now broken code */ if ( 0 > ( ret = ompi_rte_send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { +#endif /* ENABLE_FT_FIXED */ +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&peer_ref->proc_name, buffer, + OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { opal_output(mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: do_send_msg_detail: Unable to send message details to peer %s: Return %d\n", OMPI_NAME_PRINT(&peer_ref->proc_name), @@ -6217,8 +6230,13 @@ static int do_recv_msg_detail_resp(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, "crcp:bkmrk: recv_msg_details: Unable to ask peer for more messages"); PACK_BUFFER(buffer, total_found, 1, OPAL_UINT32, "crcp:bkmrk: recv_msg_details: Unable to ask peer for more messages"); +#ifdef ENABLE_FT_FIXED +/* This is the old, now broken code */ if ( 0 > ( ret = ompi_rte_send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { +#endif /* ENABLE_FT_FIXED */ + +if ( 0 > ( ret = ompi_rte_send_buffer_nb(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, orte_rml_send_callback, NULL)) ) { opal_output(mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: recv_msg_detail_resp: Unable to send message detail response to peer %s: Return %d\n", OMPI_NAME_PRINT(&peer_ref->proc_name), diff --git a/orte/mca/errmgr/base/errmgr_base_tool.c b/orte/mca/errmgr/base/errmgr_base_tool.c index b982e46..e274bae 100644 --- a/orte/mca/errmgr/base/errmgr_base_tool.c +