This patch looks pretty good. One change that needs to be made before this patch can be merged is that the transport drivers need to be aware of their own maximum frame sizes and allocate accordingly. Ie totemrrp_malloc() doesn't need a frame size parameter, (but instead totemudp.c would allocate a frame size that is appropriate for the transport (in the case of udp, something like 10k would be safe).
Another interesting idea that may be useful for a second patch is to keep a list of freed frames to avoid the malloc/free overhead for the common cases of totemsrp. Surprisingly this is quite a significant amount of overhead (according to oprofile). A totemnet_malloc operation would then be something like: if free_list has a frame, return frame and delete frame from free list for the instance else malloc frame using transport driver totemnet_free operation would be something like: add frame to free list To do this "slabbing" properly, a conditional parameter is probably needed in the transport driver definition because totemnet_free for the iba transport actually needs to call the free function (so it can be reposted to the send queue). Regards -steve On 03/09/2011 09:45 PM, Zane Bitter wrote: > This change paves the way for eliminating a copy within the Infiniband > driver in the future by transferring responsibility for allocating and > freeing message buffers to the network driver layer. > > Tested under valgrind on a single-node cluster. > > Signed-off-by: Zane Bitter <[email protected]> > --- > exec/totemiba.c | 10 ++++++++++ > exec/totemiba.h | 4 ++++ > exec/totemnet.c | 28 ++++++++++++++++++++++++++++ > exec/totemnet.h | 4 ++++ > exec/totemrrp.c | 14 ++++++++++++++ > exec/totemrrp.h | 7 +++++++ > exec/totemsrp.c | 22 ++++++++++++++++++---- > exec/totemudp.c | 10 ++++++++++ > exec/totemudp.h | 4 ++++ > exec/totemudpu.c | 10 ++++++++++ > exec/totemudpu.h | 4 ++++ > 11 files changed, 113 insertions(+), 4 deletions(-) > > diff --git a/exec/totemiba.c b/exec/totemiba.c > index a0379ff..45b3fa9 100644 > --- a/exec/totemiba.c > +++ b/exec/totemiba.c > @@ -1317,6 +1317,16 @@ int totemiba_initialize ( > return (res); > } > > +void *totemiba_malloc (size_t size) > +{ > + return malloc(size); > +} > + > +void totemiba_free (void *ptr) > +{ > + return free(ptr); > +} > + > int totemiba_processor_count_set ( > void *iba_context, > int processor_count) > diff --git a/exec/totemiba.h b/exec/totemiba.h > index 4b2fbbf..dddfe78 100644 > --- a/exec/totemiba.h > +++ b/exec/totemiba.h > @@ -62,6 +62,10 @@ extern int totemiba_initialize ( > void (*target_set_completed) ( > void *context)); > > +extern void *totemiba_malloc (size_t size); > + > +extern void totemiba_free (void *ptr); > + > extern int totemiba_processor_count_set ( > void *iba_context, > int processor_count); > diff --git a/exec/totemnet.c b/exec/totemnet.c > index c7670f9..6283a4f 100644 > --- a/exec/totemnet.c > +++ b/exec/totemnet.c > @@ -35,6 +35,8 @@ > > #include <config.h> > > +#include <assert.h> > + > #ifdef HAVE_RDMA > #include <totemiba.h> > #endif > @@ -67,6 +69,10 @@ struct transport { > void (*target_set_completed) ( > void *context)); > > + void *(*malloc) (size_t size); > + > + void (*free) (void *ptr); > + > int (*processor_count_set) ( > void *transport_context, > int processor_count); > @@ -127,6 +133,8 @@ struct transport transport_entries[] = { > { > .name = "UDP/IP Multicast", > .initialize = totemudp_initialize, > + .malloc = totemudp_malloc, > + .free = totemudp_free, > .processor_count_set = totemudp_processor_count_set, > .token_send = totemudp_token_send, > .mcast_flush_send = totemudp_mcast_flush_send, > @@ -145,6 +153,8 @@ struct transport transport_entries[] = { > { > .name = "UDP/IP Unicast", > .initialize = totemudpu_initialize, > + .malloc = totemudpu_malloc, > + .free = totemudpu_free, > .processor_count_set = totemudpu_processor_count_set, > .token_send = totemudpu_token_send, > .mcast_flush_send = totemudpu_mcast_flush_send, > @@ -166,6 +176,8 @@ struct transport transport_entries[] = { > { > .name = "Infiniband/IP", > .initialize = totemiba_initialize, > + .malloc = totemiba_malloc, > + .free = totemiba_free, > .processor_count_set = totemiba_processor_count_set, > .token_send = totemiba_token_send, > .mcast_flush_send = totemiba_mcast_flush_send, > @@ -296,6 +308,22 @@ error_destroy: > return (-1); > } > > +void *totemnet_malloc (void *net_context, size_t size) > +{ > + struct totemnet_instance *instance = net_context; > + assert (instance != NULL); > + assert (instance->transport != NULL); > + return instance->transport->malloc (size); > +} > + > +void totemnet_free (void *net_context, void *ptr) > +{ > + struct totemnet_instance *instance = net_context; > + assert (instance != NULL); > + assert (instance->transport != NULL); > + instance->transport->free (ptr); > +} > + > int totemnet_processor_count_set ( > void *net_context, > int processor_count) > diff --git a/exec/totemnet.h b/exec/totemnet.h > index 7e6374c..a01872c 100644 > --- a/exec/totemnet.h > +++ b/exec/totemnet.h > @@ -69,6 +69,10 @@ extern int totemnet_initialize ( > void (*target_set_completed) ( > void *context)); > > +extern void *totemnet_malloc (void *net_context, size_t size); > + > +extern void totemnet_free (void *net_context, void *ptr); > + > extern int totemnet_processor_count_set ( > void *net_context, > int processor_count); > diff --git a/exec/totemrrp.c b/exec/totemrrp.c > index a8ebd08..a3320f2 100644 > --- a/exec/totemrrp.c > +++ b/exec/totemrrp.c > @@ -1665,6 +1665,20 @@ error_destroy: > return (res); > } > > +void *totemrrp_malloc (void *rrp_context, size_t size) > +{ > + struct totemrrp_instance *instance = rrp_context; > + assert (instance != NULL); > + return totemnet_malloc (instance->net_handles[0], size); > +} > + > +void totemrrp_free (void *rrp_context, void *ptr) > +{ > + struct totemrrp_instance *instance = rrp_context; > + assert (instance != NULL); > + totemnet_free (instance->net_handles[0], ptr); > +} > + > int totemrrp_processor_count_set ( > void *rrp_context, > unsigned int processor_count) > diff --git a/exec/totemrrp.h b/exec/totemrrp.h > index da79ed2..0b291cd 100644 > --- a/exec/totemrrp.h > +++ b/exec/totemrrp.h > @@ -78,6 +78,13 @@ extern int totemrrp_initialize ( > void *context) > ); > > +extern void *totemrrp_malloc( > + void *rrp_context, > + size_t size); > + > +extern void totemrrp_free( > + void *rrp_context, > + void *ptr); > > extern int totemrrp_processor_count_set ( > void *rrp_context, > diff --git a/exec/totemsrp.c b/exec/totemsrp.c > index 62794c7..3a5f1a6 100644 > --- a/exec/totemsrp.c > +++ b/exec/totemsrp.c > @@ -606,6 +606,8 @@ static void timer_function_heartbeat_timeout (void *data); > static void timer_function_token_retransmit_timeout (void *data); > static void timer_function_token_hold_retransmit_timeout (void *data); > static void timer_function_merge_detect_timeout (void *data); > +static void *totemsrp_malloc (struct totemsrp_instance *instance, size_t > size); > +static void totemsrp_free (struct totemsrp_instance *instance, void *ptr); > > void main_deliver_fn ( > void *context, > @@ -1356,6 +1358,18 @@ static void memb_set_print ( > } > #endif > > +static void *totemsrp_malloc (struct totemsrp_instance *instance, size_t > size) > +{ > + assert (instance != NULL); > + return totemrrp_malloc (instance->totemrrp_context, size); > +} > + > +static void totemsrp_free (struct totemsrp_instance *instance, void *ptr) > +{ > + assert (instance != NULL); > + totemrrp_free (instance->totemrrp_context, ptr); > +} > + > static void reset_token_retransmit_timeout (struct totemsrp_instance > *instance) > { > poll_timer_delete (instance->totemsrp_poll_handle, > @@ -2067,7 +2081,7 @@ static void memb_state_recovery_enter ( > messages_originated++; > memset (&message_item, 0, sizeof (struct message_item)); > // TODO LEAK > - message_item.mcast = malloc (FRAME_SIZE_MAX); > + message_item.mcast = totemsrp_malloc (instance, FRAME_SIZE_MAX); > assert (message_item.mcast); > message_item.mcast->header.type = MESSAGE_TYPE_MCAST; > srp_addr_copy (&message_item.mcast->system_from, > &instance->my_id); > @@ -2140,7 +2154,7 @@ int totemsrp_mcast ( > /* > * Allocate pending item > */ > - message_item.mcast = malloc (FRAME_SIZE_MAX); > + message_item.mcast = totemsrp_malloc (instance, FRAME_SIZE_MAX); > if (message_item.mcast == 0) { > goto error_mcast; > } > @@ -2278,7 +2292,7 @@ static void messages_free ( > instance->last_released + i, &ptr); > if (res == 0) { > regular_message = ptr; > - free (regular_message->mcast); > + totemsrp_free (instance, regular_message->mcast); > } > sq_items_release (&instance->regular_sort_queue, > instance->last_released + i); > @@ -3823,7 +3837,7 @@ static int message_handler_mcast ( > * Allocate new multicast memory block > */ > // TODO LEAK > - sort_queue_item.mcast = malloc (msg_len); > + sort_queue_item.mcast = totemsrp_malloc (instance, msg_len); > if (sort_queue_item.mcast == NULL) { > return (-1); /* error here is corrected by the > algorithm */ > } > diff --git a/exec/totemudp.c b/exec/totemudp.c > index b96bdbd..a0894ed 100644 > --- a/exec/totemudp.c > +++ b/exec/totemudp.c > @@ -1832,6 +1832,16 @@ int totemudp_initialize ( > return (0); > } > > +void *totemudp_malloc (size_t size) > +{ > + return malloc(size); > +} > + > +void totemudp_free (void *ptr) > +{ > + return free(ptr); > +} > + > int totemudp_processor_count_set ( > void *udp_context, > int processor_count) > diff --git a/exec/totemudp.h b/exec/totemudp.h > index 6218794..d273f31 100644 > --- a/exec/totemudp.h > +++ b/exec/totemudp.h > @@ -63,6 +63,10 @@ extern int totemudp_initialize ( > void (*target_set_completed) ( > void *context)); > > +extern void *totemudp_malloc (size_t size); > + > +extern void totemudp_free (void *ptr); > + > extern int totemudp_processor_count_set ( > void *udp_context, > int processor_count); > diff --git a/exec/totemudpu.c b/exec/totemudpu.c > index 3fad618..faf48b7 100644 > --- a/exec/totemudpu.c > +++ b/exec/totemudpu.c > @@ -1497,6 +1497,16 @@ int totemudpu_initialize ( > return (0); > } > > +void *totemudpu_malloc (size_t size) > +{ > + return malloc(size); > +} > + > +void totemudpu_free (void *ptr) > +{ > + return free(ptr); > +} > + > int totemudpu_processor_count_set ( > void *udpu_context, > int processor_count) > diff --git a/exec/totemudpu.h b/exec/totemudpu.h > index 2dcad24..865a77a 100644 > --- a/exec/totemudpu.h > +++ b/exec/totemudpu.h > @@ -63,6 +63,10 @@ extern int totemudpu_initialize ( > void (*target_set_completed) ( > void *context)); > > +extern void *totemudpu_malloc (size_t size); > + > +extern void totemudpu_free (void *ptr); > + > extern int totemudpu_processor_count_set ( > void *udpu_context, > int processor_count); > > _______________________________________________ > Openais mailing list > [email protected] > https://lists.linux-foundation.org/mailman/listinfo/openais _______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
