Re: [OMPI devel] [PATCH v2 1/2] Trying to get the C/R code to compile again. (recv_*_nb)

2014-01-02 Thread Josh Hursey
(Sorry for the delay, just catching up on email after the holidays)

I agree with Ralph. You can remove the old function signatures, but keep
the places where you replace a blocking send/recv with a non-blocking
version. Then I think it is good.

Thanks,
Josh



On Wed, Dec 18, 2013 at 9:52 AM, Ralph Castain  wrote:

> Hi Adrian
>
> No point in keeping the old code for those places where you update the
> syntax of a non-blocking recv (i.e., you remove the no-longer-reqd extra
> param). I would only keep it where you have to replace a blocking recv with
> a non-blocking one as that is where the behavior will change.
>
> Other than that, it looks fine to me.
>
> On Dec 18, 2013, at 6:42 AM, Adrian Reber  wrote:
>
> > From: Adrian Reber 
> >
> > This patch changes all recv/recv_buffer occurrences in the C/R code
> > to recv_nb/recv_buffer_nb.
> > The old code is still there but disabled using ifdefs (ENABLE_FT_FIXED).
> > The new code compiles but does not work.
> >
> > Changes from V1:
> > * #ifdef out the code (so it is preserved for later re-design)
> > * marked the broken C/R code with ENABLE_FT_FIXED
> >
> > Signed-off-by: Adrian Reber 
> > ---
> > ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c| 19 ++
> > orte/mca/errmgr/base/errmgr_base_tool.c |  6 +-
> > orte/mca/rml/ftrm/rml_ftrm.h| 27 ++---
> > orte/mca/rml/ftrm/rml_ftrm_component.c  |  2 -
> > orte/mca/rml/ftrm/rml_ftrm_module.c | 78
> +++--
> > orte/mca/snapc/full/snapc_full_app.c| 12 
> > orte/mca/snapc/full/snapc_full_global.c | 25 
> > orte/mca/snapc/full/snapc_full_local.c  | 24 
> > orte/mca/sstore/central/sstore_central_app.c|  6 ++
> > orte/mca/sstore/central/sstore_central_global.c | 11 ++--
> > orte/mca/sstore/central/sstore_central_local.c  | 11 ++--
> > orte/mca/sstore/stage/sstore_stage_app.c|  5 ++
> > orte/mca/sstore/stage/sstore_stage_global.c | 11 ++--
> > orte/mca/sstore/stage/sstore_stage_local.c  | 11 ++--
> > orte/tools/orte-checkpoint/orte-checkpoint.c|  9 ++-
> > orte/tools/orte-migrate/orte-migrate.c  |  9 ++-
> > 16 files changed, 124 insertions(+), 142 deletions(-)
> >
> > diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> > index 5d4005f..cba7586 100644
> > --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> > +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> > @@ -4739,6 +4739,8 @@ static int ft_event_post_drain_acks(void)
> > drain_msg_ack =
> (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t*)item;
> >
> > /* Post the receive */
> > +#ifdef ENABLE_FT_FIXED
> > +/* This is the old, now broken code */
> > if( OMPI_SUCCESS != (ret = ompi_rte_recv_buffer_nb(
> _msg_ack->peer,
> >
> OMPI_CRCP_COORD_BOOKMARK_TAG,
> > 0,
> > @@ -4750,6 +4752,9 @@ static int ft_event_post_drain_acks(void)
> > OMPI_NAME_PRINT(&(drain_msg_ack->peer)));
> > return ret;
> > }
> > +#endif /* ENABLE_FT_FIXED */
> > +ompi_rte_recv_buffer_nb(_msg_ack->peer,
> OMPI_CRCP_COORD_BOOKMARK_TAG,
> > +0, drain_message_ack_cbfunc, NULL);
> > }
> >
> > return OMPI_SUCCESS;
> > @@ -5330,6 +5335,8 @@ static int recv_bookmarks(int peer_idx)
> > peer_name.jobid  = OMPI_PROC_MY_NAME->jobid;
> > peer_name.vpid   = peer_idx;
> >
> > +#ifdef ENABLE_FT_FIXED
> > +/* This is the old, now broken code */
> > if ( 0 > (ret = ompi_rte_recv_buffer_nb(_name,
> > OMPI_CRCP_COORD_BOOKMARK_TAG,
> > 0,
> > @@ -5342,6 +5349,9 @@ static int recv_bookmarks(int peer_idx)
> > exit_status = ret;
> > goto cleanup;
> > }
> > +#endif /* ENABLE_FT_FIXED */
> > +ompi_rte_recv_buffer_nb(_name, OMPI_CRCP_COORD_BOOKMARK_TAG,
> > +   0, recv_bookmarks_cbfunc, NULL);
> >
> > ++total_recv_bookmarks;
> >
> > @@ -5616,6 +5626,8 @@ static int
> do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
> > /*
> >  * Recv the ACK msg
> >  */
> > +#ifdef ENABLE_FT_FIXED
> > +/* This is the old, now broken code */
> > if ( 0 > (ret = ompi_rte_recv_buffer(_ref->proc_name, buffer,
> >  OMPI_CRCP_COORD_BOOKMARK_TAG,
> 0) ) ) {
> > opal_output(mca_crcp_bkmrk_component.super.output_handle,
> > @@ -5626,6 +5638,9 @@ static int
> do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
> > exit_status = ret;
> > goto cleanup;
> > }
> > +#endif /* ENABLE_FT_FIXED */
> > +ompi_rte_recv_buffer_nb(_ref->proc_name,
> OMPI_CRCP_COORD_BOOKMARK_TAG, 0,
> > +orte_rml_recv_callback, NULL);

Re: [OMPI devel] [PATCH v2 1/2] Trying to get the C/R code to compile again. (recv_*_nb)

2013-12-18 Thread Ralph Castain
Hi Adrian

No point in keeping the old code for those places where you update the syntax 
of a non-blocking recv (i.e., you remove the no-longer-reqd extra param). I 
would only keep it where you have to replace a blocking recv with a 
non-blocking one as that is where the behavior will change.

Other than that, it looks fine to me.

On Dec 18, 2013, at 6:42 AM, Adrian Reber  wrote:

> From: Adrian Reber 
> 
> This patch changes all recv/recv_buffer occurrences in the C/R code
> to recv_nb/recv_buffer_nb.
> The old code is still there but disabled using ifdefs (ENABLE_FT_FIXED).
> The new code compiles but does not work.
> 
> Changes from V1:
> * #ifdef out the code (so it is preserved for later re-design)
> * marked the broken C/R code with ENABLE_FT_FIXED
> 
> Signed-off-by: Adrian Reber 
> ---
> ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c| 19 ++
> orte/mca/errmgr/base/errmgr_base_tool.c |  6 +-
> orte/mca/rml/ftrm/rml_ftrm.h| 27 ++---
> orte/mca/rml/ftrm/rml_ftrm_component.c  |  2 -
> orte/mca/rml/ftrm/rml_ftrm_module.c | 78 +++--
> orte/mca/snapc/full/snapc_full_app.c| 12 
> orte/mca/snapc/full/snapc_full_global.c | 25 
> orte/mca/snapc/full/snapc_full_local.c  | 24 
> orte/mca/sstore/central/sstore_central_app.c|  6 ++
> orte/mca/sstore/central/sstore_central_global.c | 11 ++--
> orte/mca/sstore/central/sstore_central_local.c  | 11 ++--
> orte/mca/sstore/stage/sstore_stage_app.c|  5 ++
> orte/mca/sstore/stage/sstore_stage_global.c | 11 ++--
> orte/mca/sstore/stage/sstore_stage_local.c  | 11 ++--
> orte/tools/orte-checkpoint/orte-checkpoint.c|  9 ++-
> orte/tools/orte-migrate/orte-migrate.c  |  9 ++-
> 16 files changed, 124 insertions(+), 142 deletions(-)
> 
> diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c 
> b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> index 5d4005f..cba7586 100644
> --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> @@ -4739,6 +4739,8 @@ static int ft_event_post_drain_acks(void)
> drain_msg_ack = (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t*)item;
> 
> /* Post the receive */
> +#ifdef ENABLE_FT_FIXED
> +/* This is the old, now broken code */
> if( OMPI_SUCCESS != (ret = ompi_rte_recv_buffer_nb( 
> _msg_ack->peer,
> 
> OMPI_CRCP_COORD_BOOKMARK_TAG,
> 0,
> @@ -4750,6 +4752,9 @@ static int ft_event_post_drain_acks(void)
> OMPI_NAME_PRINT(&(drain_msg_ack->peer)));
> return ret;
> }
> +#endif /* ENABLE_FT_FIXED */
> +ompi_rte_recv_buffer_nb(_msg_ack->peer, 
> OMPI_CRCP_COORD_BOOKMARK_TAG,
> +0, drain_message_ack_cbfunc, NULL);
> }
> 
> return OMPI_SUCCESS;
> @@ -5330,6 +5335,8 @@ static int recv_bookmarks(int peer_idx)
> peer_name.jobid  = OMPI_PROC_MY_NAME->jobid;
> peer_name.vpid   = peer_idx;
> 
> +#ifdef ENABLE_FT_FIXED
> +/* This is the old, now broken code */
> if ( 0 > (ret = ompi_rte_recv_buffer_nb(_name,
> OMPI_CRCP_COORD_BOOKMARK_TAG,
> 0,
> @@ -5342,6 +5349,9 @@ static int recv_bookmarks(int peer_idx)
> exit_status = ret;
> goto cleanup;
> }
> +#endif /* ENABLE_FT_FIXED */
> +ompi_rte_recv_buffer_nb(_name, OMPI_CRCP_COORD_BOOKMARK_TAG,
> +   0, recv_bookmarks_cbfunc, NULL);
> 
> ++total_recv_bookmarks;
> 
> @@ -5616,6 +5626,8 @@ static int 
> do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
> /*
>  * Recv the ACK msg
>  */
> +#ifdef ENABLE_FT_FIXED
> +/* This is the old, now broken code */
> if ( 0 > (ret = ompi_rte_recv_buffer(_ref->proc_name, buffer,
>  OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) 
> {
> opal_output(mca_crcp_bkmrk_component.super.output_handle,
> @@ -5626,6 +5638,9 @@ static int 
> do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
> exit_status = ret;
> goto cleanup;
> }
> +#endif /* ENABLE_FT_FIXED */
> +ompi_rte_recv_buffer_nb(_ref->proc_name, 
> OMPI_CRCP_COORD_BOOKMARK_TAG, 0,
> +orte_rml_recv_callback, NULL);
> 
> UNPACK_BUFFER(buffer, recv_response, 1, OPAL_UINT32,
>   "crcp:bkmrk: send_msg_details: Failed to unpack the ACK 
> from peer buffer.");
> @@ -5790,6 +5805,8 @@ static int 
> do_recv_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
> /*
>  * Recv the msg
>  */
> +#ifdef ENABLE_FT_FIXED
> +/* This is the old, now broken code */
> if ( 0 > (ret = ompi_rte_recv_buffer(_ref->proc_name, buffer, 
> 

[OMPI devel] [PATCH v2 1/2] Trying to get the C/R code to compile again. (recv_*_nb)

2013-12-18 Thread Adrian Reber
From: Adrian Reber 

This patch changes all recv/recv_buffer occurrences in the C/R code
to recv_nb/recv_buffer_nb.
The old code is still there but disabled using ifdefs (ENABLE_FT_FIXED).
The new code compiles but does not work.

Changes from V1:
* #ifdef out the code (so it is preserved for later re-design)
* marked the broken C/R code with ENABLE_FT_FIXED

Signed-off-by: Adrian Reber 
---
 ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c| 19 ++
 orte/mca/errmgr/base/errmgr_base_tool.c |  6 +-
 orte/mca/rml/ftrm/rml_ftrm.h| 27 ++---
 orte/mca/rml/ftrm/rml_ftrm_component.c  |  2 -
 orte/mca/rml/ftrm/rml_ftrm_module.c | 78 +++--
 orte/mca/snapc/full/snapc_full_app.c| 12 
 orte/mca/snapc/full/snapc_full_global.c | 25 
 orte/mca/snapc/full/snapc_full_local.c  | 24 
 orte/mca/sstore/central/sstore_central_app.c|  6 ++
 orte/mca/sstore/central/sstore_central_global.c | 11 ++--
 orte/mca/sstore/central/sstore_central_local.c  | 11 ++--
 orte/mca/sstore/stage/sstore_stage_app.c|  5 ++
 orte/mca/sstore/stage/sstore_stage_global.c | 11 ++--
 orte/mca/sstore/stage/sstore_stage_local.c  | 11 ++--
 orte/tools/orte-checkpoint/orte-checkpoint.c|  9 ++-
 orte/tools/orte-migrate/orte-migrate.c  |  9 ++-
 16 files changed, 124 insertions(+), 142 deletions(-)

diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c 
b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
index 5d4005f..cba7586 100644
--- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
+++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
@@ -4739,6 +4739,8 @@ static int ft_event_post_drain_acks(void)
 drain_msg_ack = (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t*)item;

 /* Post the receive */
+#ifdef ENABLE_FT_FIXED
+/* This is the old, now broken code */
 if( OMPI_SUCCESS != (ret = ompi_rte_recv_buffer_nb( 
_msg_ack->peer,
 
OMPI_CRCP_COORD_BOOKMARK_TAG,
 0,
@@ -4750,6 +4752,9 @@ static int ft_event_post_drain_acks(void)
 OMPI_NAME_PRINT(&(drain_msg_ack->peer)));
 return ret;
 }
+#endif /* ENABLE_FT_FIXED */
+ompi_rte_recv_buffer_nb(_msg_ack->peer, 
OMPI_CRCP_COORD_BOOKMARK_TAG,
+0, drain_message_ack_cbfunc, NULL);
 }

 return OMPI_SUCCESS;
@@ -5330,6 +5335,8 @@ static int recv_bookmarks(int peer_idx)
 peer_name.jobid  = OMPI_PROC_MY_NAME->jobid;
 peer_name.vpid   = peer_idx;

+#ifdef ENABLE_FT_FIXED
+/* This is the old, now broken code */
 if ( 0 > (ret = ompi_rte_recv_buffer_nb(_name,
 OMPI_CRCP_COORD_BOOKMARK_TAG,
 0,
@@ -5342,6 +5349,9 @@ static int recv_bookmarks(int peer_idx)
 exit_status = ret;
 goto cleanup;
 }
+#endif /* ENABLE_FT_FIXED */
+ompi_rte_recv_buffer_nb(_name, OMPI_CRCP_COORD_BOOKMARK_TAG,
+   0, recv_bookmarks_cbfunc, NULL);

 ++total_recv_bookmarks;

@@ -5616,6 +5626,8 @@ static int 
do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
 /*
  * Recv the ACK msg
  */
+#ifdef ENABLE_FT_FIXED
+/* This is the old, now broken code */
 if ( 0 > (ret = ompi_rte_recv_buffer(_ref->proc_name, buffer,
  OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
 opal_output(mca_crcp_bkmrk_component.super.output_handle,
@@ -5626,6 +5638,9 @@ static int 
do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
 exit_status = ret;
 goto cleanup;
 }
+#endif /* ENABLE_FT_FIXED */
+ompi_rte_recv_buffer_nb(_ref->proc_name, 
OMPI_CRCP_COORD_BOOKMARK_TAG, 0,
+orte_rml_recv_callback, NULL);

 UNPACK_BUFFER(buffer, recv_response, 1, OPAL_UINT32,
   "crcp:bkmrk: send_msg_details: Failed to unpack the ACK from 
peer buffer.");
@@ -5790,6 +5805,8 @@ static int 
do_recv_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
 /*
  * Recv the msg
  */
+#ifdef ENABLE_FT_FIXED
+/* This is the old, now broken code */
 if ( 0 > (ret = ompi_rte_recv_buffer(_ref->proc_name, buffer, 
OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
 opal_output(mca_crcp_bkmrk_component.super.output_handle,
 "crcp:bkmrk: do_recv_msg_detail: %s <-- %s Failed to 
receive buffer from peer. Return %d\n",
@@ -5799,6 +5816,8 @@ static int 
do_recv_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
 exit_status = ret;
 goto cleanup;
 }
+#endif /* ENABLE_FT_FIXED */
+ompi_rte_recv_buffer_nb(_ref->proc_name, 
OMPI_CRCP_COORD_BOOKMARK_TAG, 0, orte_rml_recv_callback, NULL);

 /* Pull out the communicator ID */
 UNPACK_BUFFER(buffer,