Unfortunately a lot less than expected. There are still a lot of places in the critical path where these macros can be used, and at the end we might something like 2-3% improvement. Which is not worth the effort, I think.

In same time, I played with the memcpy framework (replacing manually the MEMCPY macro in the datatype engine) and there I got a lot more. So, right now I think that's the way to go ...

  george.

On Jul 7, 2007, at 3:51 AM, Rainer Keller wrote:

Hi George,
joust out of curiosity, how much did that get You?

CU,
Rainer

On Saturday 07 July 2007 06:31, bosi...@osl.iu.edu wrote:
Author: bosilca
Date: 2007-07-07 00:31:06 EDT (Sat, 07 Jul 2007)
New Revision: 15302
URL: https://svn.open-mpi.org/trac/ompi/changeset/15302

Log:
Add few OPAL_LIKELY/OPAL_UNLIKELY to the datatype engine.

Text files modified:
   trunk/ompi/datatype/convertor.c |    20 ++++++++++----------
   trunk/ompi/datatype/convertor.h |     9 +++++----
   2 files changed, 15 insertions(+), 14 deletions(-)

Modified: trunk/ompi/datatype/convertor.c
===================================================================== ======
=== --- trunk/ompi/datatype/convertor.c (original)
+++ trunk/ompi/datatype/convertor.c 2007-07-07 00:31:06 EDT (Sat, 07 Jul
2007) @@ -229,7 +229,7 @@
 {
OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size,
max_data );

-    if( pConv->flags & CONVERTOR_NO_OP ) {
+    if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) {
         /* We are doing conversion on a contiguous datatype on a
homogeneous * environment. The convertor contain minimal informations, we
only * use the bConverted to manage the conversion.
@@ -245,7 +245,7 @@
             if( iov[i].iov_len >= pending_length ) {
                 goto complete_contiguous_data_pack;
             }
-            if( NULL == iov[i].iov_base )
+            if( OPAL_LIKELY(NULL == iov[i].iov_base) )
                 iov[i].iov_base = base_pointer;
             else
MEMCPY( iov[i].iov_base, base_pointer, iov [i].iov_len );
@@ -257,7 +257,7 @@
         return 0;
     complete_contiguous_data_pack:
         iov[i].iov_len = pending_length;
-        if( NULL == iov[i].iov_base )
+        if( OPAL_LIKELY(NULL == iov[i].iov_base) )
             iov[i].iov_base = base_pointer;
         else
             MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );
@@ -276,7 +276,7 @@
 {
OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size,
max_data );

-    if( pConv->flags & CONVERTOR_NO_OP ) {
+    if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) {
         /* We are doing conversion on a contiguous datatype on a
homogeneous * environment. The convertor contain minimal informations, we
only * use the bConverted to manage the conversion.
@@ -340,7 +340,7 @@
     /* we save the current displacement starting from the begining
      * of this data.
      */
-    if( 0 == count ) {
+    if( OPAL_LIKELY(0 == count) ) {
         pStack[1].type     = pElems->elem.common.type;
         pStack[1].count    = pElems->elem.count;
         pStack[1].disp     = pElems->elem.disp;
@@ -403,7 +403,7 @@
         rc = ompi_convertor_create_stack_at_begining( convertor,
ompi_ddt_local_sizes ); if( 0 == (*position) ) return rc;
     }
-    if( convertor->flags & DT_FLAG_CONTIGUOUS ) {
+    if( OPAL_LIKELY(convertor->flags & DT_FLAG_CONTIGUOUS) ) {
         rc = ompi_convertor_create_stack_with_pos_contig( convertor,
(*position), ompi_ddt_local_sizes ); } else {
@@ -443,13 +443,13 @@
* completed. With this flag set the pack and unpack functions \ * will not do anything. \ */ \ - if( OPAL_UNLIKELY(0 == convertor->local_size) ) { \ + if( OPAL_UNLIKELY((0 == count) || (0 == datatype- >size)) ) { \ convertor->flags |= CONVERTOR_COMPLETED; \ convertor->remote_size = 0; \ return OMPI_SUCCESS; \ } \ \ - if( convertor->remoteArch == ompi_mpi_local_arch ) { \ + if( OPAL_LIKELY(convertor->remoteArch == ompi_mpi_local_arch) ) {
\ convertor->remote_size = convertor->local_size;             \ if(
(convertor->flags & (CONVERTOR_WITH_CHECKSUM | DT_FLAG_NO_GAPS)) ==
DT_FLAG_NO_GAPS ) { \ return OMPI_SUCCESS;
  \ @@ -599,7 +599,7 @@
     destination->local_size        = source->local_size;
     destination->remote_size       = source->remote_size;
     /* create the stack */
-    if( source->stack_size > DT_STATIC_STACK_SIZE ) {
+    if( OPAL_UNLIKELY(source->stack_size > DT_STATIC_STACK_SIZE) ) {
destination->pStack = (dt_stack_t*)malloc(sizeof (dt_stack_t) *
source->stack_size ); } else {
         destination->pStack = destination->static_stack;
@@ -607,7 +607,7 @@
     destination->stack_size = source->stack_size;

     /* initialize the stack */
-    if( 0 == copy_stack ) {
+    if( OPAL_LIKELY(0 == copy_stack) ) {
         destination->bConverted = -1;
         destination->stack_pos  = -1;
     } else {

Modified: trunk/ompi/datatype/convertor.h
===================================================================== ======
=== --- trunk/ompi/datatype/convertor.h (original)
+++ trunk/ompi/datatype/convertor.h 2007-07-07 00:31:06 EDT (Sat, 07 Jul
2007) @@ -23,6 +23,7 @@
 #include "ompi_config.h"
 #include "ompi/constants.h"
 #include "ompi/datatype/datatype.h"
+#include "opal/prefetch.h"
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
@@ -143,7 +144,7 @@
  */
static inline int ompi_convertor_cleanup( ompi_convertor_t* convertor )
 {
-    if( convertor->stack_size > DT_STATIC_STACK_SIZE ) {
+ if( OPAL_UNLIKELY(convertor->stack_size > DT_STATIC_STACK_SIZE) ) {
         free( convertor->pStack );
         convertor->pStack     = convertor->static_stack;
         convertor->stack_size = DT_STATIC_STACK_SIZE;
@@ -256,13 +257,13 @@
     /*
* If the convertor is already at the correct position we are happy.
      */
-    if( (*position) == convertor->bConverted ) return OMPI_SUCCESS;
+    if( OPAL_LIKELY((*position) == convertor->bConverted) ) return
OMPI_SUCCESS;

     /*
* Do not allow the convertor to go outside the data boundaries. This
test include * the check for datatype with size zero as well as for
convertors with a count of zero. */
-    if( convertor->local_size <= *position) {
+    if( OPAL_UNLIKELY(convertor->local_size <= *position) ) {
         convertor->flags |= CONVERTOR_COMPLETED;
         convertor->bConverted = convertor->local_size;
         *position = convertor->bConverted;
@@ -291,7 +292,7 @@
 {
     convertor->flags |= flags;

-    if( NULL == position )
+    if( OPAL_UNLIKELY(NULL == position) )
         return OMPI_SUCCESS;
     return ompi_convertor_set_position( convertor, position );
 }
_______________________________________________
svn-full mailing list
svn-f...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/svn-full

--
----------------------------------------------------------------
Dipl.-Inf. Rainer Keller       http://www.hlrs.de/people/keller
 High Performance Computing       Tel: ++49 (0)711-685 6 5858
   Center Stuttgart (HLRS)           Fax: ++49 (0)711-685 6 5832
 POSTAL:Nobelstrasse 19                 email: kel...@hlrs.de
 ACTUAL:Allmandring 30, R.O.030            AIM:rusraink
 70550 Stuttgart
_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel

Attachment: smime.p7s
Description: S/MIME cryptographic signature

Reply via email to