Steven Dake wrote:
> Honza,
> 
> Great work as usual - small nit about the todo below otherwise
> 
> Reviewed-by: Steven Dake <[email protected]>
> 
> Where you able to notice a difference with oprofile cpu utilization?
> 

Actually not too much. There were for sure difference in about 10% in
CPU usage showed in top, but numbers from oprofile was same.

> Regards
> -steve
> 
> On 05/18/2011 07:38 AM, Jan Friesse wrote:
>> X86 processors are able to handle unaligned memory access. Improve
>> performance by using that feature on i386 and x86_64 compatible
>> processors, and use old aligning code on different processors.
>> ---
>>  exec/totempg.c |   35 ++++++++++++++++++++++++++---------
>>  1 files changed, 26 insertions(+), 9 deletions(-)
>>
>> diff --git a/exec/totempg.c b/exec/totempg.c
>> index 9188958..116e5d8 100644
>> --- a/exec/totempg.c
>> +++ b/exec/totempg.c
>> @@ -115,6 +115,12 @@ struct totempg_mcast_header {
>>      short type;
>>  };
>>  
>> +#if !(defined(__i386__) || defined(__x86_64__))
>> +/*
>> + * Need align on architectures different then i386 or x86_64
>> + */
>> +#define TOTEMPG_NEED_ALIGN 1
>> +#endif
>>  
>>  /*
>>   * totempg_mcast structure
>> @@ -374,8 +380,9 @@ static inline void group_endian_convert (
>>      int i;
>>      char *aligned_msg;
>>  
>> +#ifdef TOTEMPG_NEED_ALIGN
>>      /*
>> -     * Align data structure for sparc and ia64
>> +     * Align data structure for not i386 or x86_64
>>       */
>>      if ((size_t)msg % 4 != 0) {
>>              aligned_msg = alloca(msg_len);
>> @@ -383,6 +390,9 @@ static inline void group_endian_convert (
>>      } else {
>>              aligned_msg = msg;
>>      }
>> +#else
>> +    aligned_msg = msg;
>> +#endif
>>  
>>      group_len = (unsigned short *)aligned_msg;
>>      group_len[0] = swab16(group_len[0]);
>> @@ -406,12 +416,15 @@ static inline int group_matches (
>>      char *group_name;
>>      int i;
>>      int j;
>> +#ifdef TOTEMPG_NEED_ALIGN
>>          struct iovec iovec_aligned = { NULL, 0 };
>> +#endif
>>  
>>      assert (iov_len == 1);
>>  
>> +#ifdef TOTEMPG_NEED_ALIGN
>>      /*
>> -     * Align data structure for sparc and ia64
>> +     * Align data structure for not i386 or x86_64
>>       */
>>      if ((size_t)iovec->iov_base % 4 != 0) {
>>              iovec_aligned.iov_base = alloca(iovec->iov_len);
>> @@ -419,7 +432,7 @@ static inline int group_matches (
>>              iovec_aligned.iov_len = iovec->iov_len;
>>              iovec = &iovec_aligned;
>>      }
>> -
>> +#endif
>>  
>>      group_len = (unsigned short *)iovec->iov_base;
>>      group_name = ((char *)iovec->iov_base) +
>> @@ -469,15 +482,18 @@ static inline void app_deliver_fn (
>>              group_endian_convert (msg, msg_len);
>>      }
>>  
>> -/*
>> - * TODO This function needs to be rewritten for proper alignment to avoid 
>> 3+ memory copies
>> - */
> 
> should change todo to "fragmentation/assembly need to be redesigned to
> provide aligned access in all cases to avoid memory copies on non386 archs"
> 
>> +#ifdef TOTEMPG_NEED_ALIGN
>>      /*
>> -     * Align data structure for sparc and ia64
>> +     * Align data structure for not i386 or x86_64
>>       */
>>      aligned_iovec.iov_base = alloca(msg_len);
>>      aligned_iovec.iov_len = msg_len;
>>      memcpy(aligned_iovec.iov_base, msg, msg_len);
>> +#else
>> +    aligned_iovec.iov_base = msg;
>> +    aligned_iovec.iov_len = msg_len;
>> +#endif
>> +
>>      iovec = &aligned_iovec;
>>  
>>      for (i = 0; i <= totempg_max_handle; i++) {
>> @@ -489,8 +505,9 @@ static inline void app_deliver_fn (
>>                              stripped_iovec.iov_len = iovec->iov_len - 
>> adjust_iovec;
>>                              stripped_iovec.iov_base = (char 
>> *)iovec->iov_base + adjust_iovec;
>>  
>> +#ifdef TOTEMPG_NEED_ALIGN
>>                              /*
>> -                             * Align data structure for sparc and ia64
>> +                             * Align data structure for not i386 or x86_64
>>                               */
>>                              if ((char *)iovec->iov_base + adjust_iovec % 4 
>> != 0) {
>>                                      /*
>> @@ -502,7 +519,7 @@ static inline void app_deliver_fn (
>>                                               (char *)iovec->iov_base + 
>> adjust_iovec,
>>                                              stripped_iovec.iov_len);
>>                              }
>> -
>> +#endif
>>                              instance->deliver_fn (
>>                                      nodeid,
>>                                      stripped_iovec.iov_base,
> 

_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to