Yo George -- This commit has a bunch of indenting changes, so at first blush, it's hard to tell exactly what it does.
Can you give a short explanation of what this commit does? Thanks! On Feb 19, 2010, at 2:10 AM, <bosi...@osl.iu.edu> wrote: > Author: bosilca > Date: 2010-02-19 02:10:32 EST (Fri, 19 Feb 2010) > New Revision: 22669 > URL: https://svn.open-mpi.org/trac/ompi/changeset/22669 > > Log: > Unrestricted number of interfaces. > > Text files modified: > trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c | 6 > > trunk/ompi/mca/btl/tcp/btl_tcp_proc.c | 290 > ++++++++++++++++++++++----------------- > trunk/ompi/mca/btl/tcp/btl_tcp_proc.h | 3 > > 3 files changed, 165 insertions(+), 134 deletions(-) > > Modified: trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c > ============================================================================== > --- trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c (original) > +++ trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c 2010-02-19 02:10:32 EST (Fri, > 19 Feb 2010) > @@ -315,7 +315,7 @@ > { > /* send process identifier to remote endpoint */ > mca_btl_tcp_proc_t* btl_proc = mca_btl_tcp_proc_local(); > - orte_process_name_t guid = btl_proc->proc_name; > + orte_process_name_t guid = btl_proc->proc_ompi->proc_name; > > ORTE_PROCESS_NAME_HTON(guid); > if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) > != > @@ -479,7 +479,9 @@ > } > ORTE_PROCESS_NAME_NTOH(guid); > /* compare this to the expected values */ > - if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, > &btl_proc->proc_name, &guid)) { > + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, > + > &btl_proc->proc_ompi->proc_name, > + &guid)) { > BTL_ERROR(("received unexpected process identifier %s", > ORTE_NAME_PRINT(&guid))); > mca_btl_tcp_endpoint_close(btl_endpoint); > > Modified: trunk/ompi/mca/btl/tcp/btl_tcp_proc.c > ============================================================================== > --- trunk/ompi/mca/btl/tcp/btl_tcp_proc.c (original) > +++ trunk/ompi/mca/btl/tcp/btl_tcp_proc.c 2010-02-19 02:10:32 EST (Fri, > 19 Feb 2010) > @@ -2,7 +2,7 @@ > * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana > * University Research and Technology > * Corporation. All rights reserved. > - * Copyright (c) 2004-2008 The University of Tennessee and The University > + * Copyright (c) 2004-2010 The University of Tennessee and The University > * of Tennessee Research Foundation. All rights > * reserved. > * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, > @@ -40,17 +40,17 @@ > static void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc); > static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc); > > -mca_btl_tcp_interface_t* local_interfaces[MAX_KERNEL_INTERFACES]; > -mca_btl_tcp_interface_t* peer_interfaces[MAX_KERNEL_INTERFACES]; > -int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; > -int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; > -size_t num_local_interfaces; > -size_t num_peer_interfaces; > -unsigned int *best_assignment; > -int max_assignment_weight; > -int max_assignment_cardinality; > -enum mca_btl_tcp_connection_quality **weights; > -struct mca_btl_tcp_addr_t ***best_addr; > +static mca_btl_tcp_interface_t** local_interfaces = NULL; > +static int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; > +static size_t num_local_interfaces, max_local_interfaces; > +static mca_btl_tcp_interface_t** peer_interfaces = NULL; > +static size_t num_peer_interfaces, max_peer_interfaces; > +static int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; > +static unsigned int *best_assignment; > +static int max_assignment_weight; > +static int max_assignment_cardinality; > +static enum mca_btl_tcp_connection_quality **weights; > +static struct mca_btl_tcp_addr_t ***best_addr; > > OBJ_CLASS_INSTANCE( mca_btl_tcp_proc_t, > opal_list_item_t, > @@ -76,7 +76,7 @@ > /* remove from list of all proc instances */ > OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); > opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs, > - > orte_util_hash_name(&tcp_proc->proc_name)); > + > orte_util_hash_name(&tcp_proc->proc_ompi->proc_name)); > OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); > > /* release resources */ > @@ -113,7 +113,6 @@ > if(NULL == btl_proc) > return NULL; > btl_proc->proc_ompi = ompi_proc; > - btl_proc->proc_name = ompi_proc->proc_name; > > /* add to hash table of all proc instance */ > opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs, > @@ -176,8 +175,6 @@ > int assignment_weight = 0; > int assignment_cardinality = 0; > > - > - > if(max_interfaces < num_peer_interfaces) { > max_interfaces = num_peer_interfaces; > } > @@ -232,64 +229,34 @@ > interface->inuse = 0; > } > > - > -/* > - * Note that this routine must be called with the lock on the process > - * already held. Insert a btl instance into the proc array and assign > - * it an address. > - */ > -int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, > - mca_btl_base_endpoint_t* btl_endpoint ) > +static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(void) > { > - size_t i, j; > - struct sockaddr_storage endpoint_addr_ss, local_addr; > - int idx, rc; > - int *a = NULL; > - unsigned int perm_size; > - char **include; > - char **exclude; > - char **argv; > - bool skip; > + struct sockaddr_storage local_addr; > char local_if_name[IF_NAMESIZE]; > + char **include, **exclude, **argv; > + bool skip; > + int idx; > > - num_local_interfaces = 0; > - num_peer_interfaces = 0; > - > -#ifndef WORDS_BIGENDIAN > - /* if we are little endian and our peer is not so lucky, then we > - need to put all information sent to him in big endian (aka > - Network Byte Order) and expect all information received to > - be in NBO. Since big endian machines always send and receive > - in NBO, we don't care so much about that case. */ > - if (btl_proc->proc_ompi->proc_arch & OPAL_ARCH_ISBIGENDIAN) { > - btl_endpoint->endpoint_nbo = true; > - } > -#endif > - > - /* insert into endpoint array */ > - btl_endpoint->endpoint_proc = btl_proc; > - btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint; > + if( NULL != local_interfaces ) > + return local_interfaces; > > + max_local_interfaces = MAX_KERNEL_INTERFACES; > + num_local_interfaces = 0; > + local_interfaces = (mca_btl_tcp_interface_t**)calloc( > max_local_interfaces, sizeof(mca_btl_tcp_interface_t*) ); > + if( NULL == local_interfaces ) > + return NULL; > > memset(local_kindex_to_index, -1, > sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); > - memset(peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); > - memset(local_interfaces, 0, sizeof(local_interfaces)); > - memset(peer_interfaces, 0, sizeof(peer_interfaces)); > > /* Collect up the list of included and excluded interfaces, if any */ > include = opal_argv_split(mca_btl_tcp_component.tcp_if_include,','); > exclude = opal_argv_split(mca_btl_tcp_component.tcp_if_exclude,','); > > /* > - * the following two blocks shout CODE DUPLICATION. We are aware of > - * the problem > - */ > - > - /* > * identify all kernel interfaces and the associated addresses of > * the local node > */ > - for (idx = opal_ifbegin(); idx >= 0; idx=opal_ifnext (idx)) { > + for( idx = opal_ifbegin(); idx >= 0; idx = opal_ifnext (idx) ) { > int kindex, index; > > opal_ifindextoaddr (idx, (struct sockaddr*) &local_addr, sizeof > (local_addr)); > @@ -310,10 +277,6 @@ > } > argv++; > } > - if (true == skip) { > - /* This interface is not part of the included set, so skip > it */ > - continue; > - } > } > > /* If we were given a list of excluded interfaces, then check to see > if the > @@ -333,9 +296,10 @@ > } > argv++; > } > - if(true == skip) { > - continue; > - } > + } > + if (true == skip) { > + /* This interface is not part of the requested set, so skip it */ > + continue; > } > > kindex = opal_ifindextokindex(idx); > @@ -345,48 +309,103 @@ > if(-1 == index) { > index = num_local_interfaces++; > local_kindex_to_index[kindex] = index; > + > + if( num_local_interfaces == max_local_interfaces ) { > + max_local_interfaces <<= 1; > + local_interfaces = (mca_btl_tcp_interface_t**)realloc( > local_interfaces, > + > max_local_interfaces * sizeof(mca_btl_tcp_interface_t*) ); > + if( NULL == local_interfaces ) > + return NULL; > + } > local_interfaces[index] = (mca_btl_tcp_interface_t *) > malloc(sizeof(mca_btl_tcp_interface_t)); > assert(NULL != local_interfaces[index]); > mca_btl_tcp_initialise_interface(local_interfaces[index], > kindex, index); > } > > switch(local_addr.ss_family) { > - case AF_INET: > - /* if AF is disabled, skip it completely */ > - if (4 == mca_btl_tcp_component.tcp_disable_family) { > - continue; > - } > + case AF_INET: > + /* if AF is disabled, skip it completely */ > + if (4 == mca_btl_tcp_component.tcp_disable_family) { > + continue; > + } > > - > local_interfaces[local_kindex_to_index[kindex]]->ipv4_address = > - (struct sockaddr_storage*) malloc(sizeof(local_addr)); > - > memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address, > - &local_addr, sizeof(local_addr)); > - opal_ifindextomask(idx, > - > &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask, > - sizeof(int)); > - break; > - case AF_INET6: > - /* if AF is disabled, skip it completely */ > - if (6 == mca_btl_tcp_component.tcp_disable_family) { > - continue; > - } > + local_interfaces[local_kindex_to_index[kindex]]->ipv4_address = > + (struct sockaddr_storage*) malloc(sizeof(local_addr)); > + > memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address, > + &local_addr, sizeof(local_addr)); > + opal_ifindextomask(idx, > + > &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask, > + sizeof(int)); > + break; > + case AF_INET6: > + /* if AF is disabled, skip it completely */ > + if (6 == mca_btl_tcp_component.tcp_disable_family) { > + continue; > + } > > - local_interfaces[local_kindex_to_index[kindex]]->ipv6_address > - = (struct sockaddr_storage*) malloc(sizeof(local_addr)); > - > memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address, > - &local_addr, sizeof(local_addr)); > - opal_ifindextomask(idx, > - > &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask, > - sizeof(int)); > - break; > - default: > - opal_output(0, "unknown address family for tcp: %d\n", > + local_interfaces[local_kindex_to_index[kindex]]->ipv6_address > + = (struct sockaddr_storage*) malloc(sizeof(local_addr)); > + > memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address, > + &local_addr, sizeof(local_addr)); > + opal_ifindextomask(idx, > + > &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask, > + sizeof(int)); > + break; > + default: > + opal_output(0, "unknown address family for tcp: %d\n", > local_addr.ss_family); > } > } > opal_argv_free(include); > opal_argv_free(exclude); > > + return local_interfaces; > +} > +/* > + * Note that this routine must be called with the lock on the process > + * already held. Insert a btl instance into the proc array and assign > + * it an address. > + */ > +int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, > + mca_btl_base_endpoint_t* btl_endpoint ) > +{ > + struct sockaddr_storage endpoint_addr_ss; > + unsigned int perm_size; > + int rc, *a = NULL; > + size_t i, j; > + > +#ifndef WORDS_BIGENDIAN > + /* if we are little endian and our peer is not so lucky, then we > + need to put all information sent to him in big endian (aka > + Network Byte Order) and expect all information received to > + be in NBO. Since big endian machines always send and receive > + in NBO, we don't care so much about that case. */ > + if (btl_proc->proc_ompi->proc_arch & OPAL_ARCH_ISBIGENDIAN) { > + btl_endpoint->endpoint_nbo = true; > + } > +#endif > + > + /* insert into endpoint array */ > + btl_endpoint->endpoint_proc = btl_proc; > + btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint; > + > + /* sanity checks */ > + if( NULL == local_interfaces ) { > + if( NULL == mca_btl_tcp_retrieve_local_interfaces() ) > + return OMPI_ERR_OUT_OF_RESOURCE; > + } > + if( 0 == num_local_interfaces ) { > + return OMPI_ERR_UNREACH; > + } > + > + if( NULL == peer_interfaces ) { > + max_peer_interfaces = max_local_interfaces; > + peer_interfaces = (mca_btl_tcp_interface_t**)malloc( > max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*) ); > + } > + num_peer_interfaces = 0; > + memset(peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); > + memset(peer_interfaces, 0, max_peer_interfaces * > sizeof(mca_btl_tcp_interface_t*)); > + > /* > * identify all kernel interfaces and the associated addresses of > * the peer > @@ -405,9 +424,16 @@ > if(-1 == index) { > index = num_peer_interfaces++; > peer_kindex_to_index[endpoint_addr->addr_ifkindex] = index; > + if( num_peer_interfaces == max_peer_interfaces ) { > + max_peer_interfaces <<= 1; > + peer_interfaces = (mca_btl_tcp_interface_t**)realloc( > peer_interfaces, > + > max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*) ); > + if( NULL == peer_interfaces ) > + return OMPI_ERR_OUT_OF_RESOURCE; > + } > peer_interfaces[index] = (mca_btl_tcp_interface_t *) > malloc(sizeof(mca_btl_tcp_interface_t)); > mca_btl_tcp_initialise_interface(peer_interfaces[index], > - endpoint_addr->addr_ifkindex, index); > + endpoint_addr->addr_ifkindex, > index); > } > > /* > @@ -419,25 +445,25 @@ > } > > switch(endpoint_addr_ss.ss_family) { > - case AF_INET: > - peer_interfaces[index]->ipv4_address = (struct > sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); > - peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr; > - memcpy(peer_interfaces[index]->ipv4_address, > - &endpoint_addr_ss, sizeof(endpoint_addr_ss)); > - break; > - case AF_INET6: > - peer_interfaces[index]->ipv6_address = (struct > sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); > - peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr; > - memcpy(peer_interfaces[index]->ipv6_address, > - &endpoint_addr_ss, sizeof(endpoint_addr_ss)); > - break; > - default: > - opal_output(0, "unknown address family for tcp: %d\n", > - local_addr.ss_family); > - /* > - * return OMPI_UNREACH or some error, as this is not > - * good > - */ > + case AF_INET: > + peer_interfaces[index]->ipv4_address = (struct > sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); > + peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr; > + memcpy(peer_interfaces[index]->ipv4_address, > + &endpoint_addr_ss, sizeof(endpoint_addr_ss)); > + break; > + case AF_INET6: > + peer_interfaces[index]->ipv6_address = (struct > sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); > + peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr; > + memcpy(peer_interfaces[index]->ipv6_address, > + &endpoint_addr_ss, sizeof(endpoint_addr_ss)); > + break; > + default: > + opal_output(0, "unknown address family for tcp: %d\n", > + endpoint_addr_ss.ss_family); > + /* > + * return OMPI_UNREACH or some error, as this is not > + * good > + */ > } > } > > @@ -451,17 +477,17 @@ > } > > weights = (enum mca_btl_tcp_connection_quality**) malloc(perm_size > - * sizeof(enum mca_btl_tcp_connection_quality*)); > + * sizeof(enum > mca_btl_tcp_connection_quality*)); > > best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size > - * sizeof(mca_btl_tcp_addr_t **)); > + * sizeof(mca_btl_tcp_addr_t > **)); > for(i = 0; i < perm_size; ++i) { > weights[i] = (enum mca_btl_tcp_connection_quality*) malloc(perm_size > * > - sizeof(enum mca_btl_tcp_connection_quality)); > + > sizeof(enum mca_btl_tcp_connection_quality)); > memset(weights[i], 0, perm_size * sizeof(enum > mca_btl_tcp_connection_quality)); > > best_addr[i] = (mca_btl_tcp_addr_t **) malloc(perm_size * > - sizeof(mca_btl_tcp_addr_t *)); > + > sizeof(mca_btl_tcp_addr_t *)); > memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp_addr_t *)); > } > > @@ -478,11 +504,11 @@ > > /* check for RFC1918 */ > if(opal_net_addr_isipv4public((struct sockaddr*) > local_interfaces[i]->ipv4_address) > - && opal_net_addr_isipv4public((struct sockaddr*) > - peer_interfaces[j]->ipv4_address)) { > + && opal_net_addr_isipv4public((struct sockaddr*) > + > peer_interfaces[j]->ipv4_address)) { > if(opal_net_samenetwork((struct sockaddr*) > local_interfaces[i]->ipv4_address, > - (struct sockaddr*) > peer_interfaces[j]->ipv4_address, > - local_interfaces[i]->ipv4_netmask)) { > + (struct sockaddr*) > peer_interfaces[j]->ipv4_address, > + > local_interfaces[i]->ipv4_netmask)) { > weights[i][j] = CQ_PUBLIC_SAME_NETWORK; > } else { > weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; > @@ -491,8 +517,8 @@ > continue; > } else { > if(opal_net_samenetwork((struct sockaddr*) > local_interfaces[i]->ipv4_address, > - (struct sockaddr*) > peer_interfaces[j]->ipv4_address, > - local_interfaces[i]->ipv4_netmask)) { > + (struct sockaddr*) > peer_interfaces[j]->ipv4_address, > + > local_interfaces[i]->ipv4_netmask)) { > weights[i][j] = CQ_PRIVATE_SAME_NETWORK; > } else { > weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK; > @@ -507,8 +533,8 @@ > if(NULL != local_interfaces[i]->ipv6_address && > NULL != peer_interfaces[j]->ipv6_address) { > if(opal_net_samenetwork((struct sockaddr*) > local_interfaces[i]->ipv6_address, > - (struct sockaddr*) > peer_interfaces[j]->ipv6_address, > - local_interfaces[i]->ipv6_netmask)) { > + (struct sockaddr*) > peer_interfaces[j]->ipv6_address, > + local_interfaces[i]->ipv6_netmask)) { > weights[i][j] = CQ_PUBLIC_SAME_NETWORK; > } else { > weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; > @@ -539,9 +565,9 @@ > rc = OMPI_ERR_UNREACH; > for(i = 0; i < perm_size; ++i) { > if(best_assignment[i] > num_peer_interfaces > - || weights[i][best_assignment[i]] == CQ_NO_CONNECTION > - || peer_interfaces[best_assignment[i]]->inuse > - || NULL == peer_interfaces[best_assignment[i]]) { > + || weights[i][best_assignment[i]] == CQ_NO_CONNECTION > + || peer_interfaces[best_assignment[i]]->inuse > + || NULL == peer_interfaces[best_assignment[i]]) { > continue; > } > peer_interfaces[best_assignment[i]]->inuse++; > @@ -565,6 +591,9 @@ > } > free(peer_interfaces[i]); > } > + free(peer_interfaces); > + peer_interfaces = NULL; > + max_peer_interfaces = 0; > > for(i = 0; i < num_local_interfaces; ++i) { > if(NULL != local_interfaces[i]->ipv4_address) { > @@ -575,6 +604,9 @@ > } > free(local_interfaces[i]); > } > + free(local_interfaces); > + local_interfaces = NULL; > + max_local_interfaces = 0; > > free(weights); > free(best_addr); > > Modified: trunk/ompi/mca/btl/tcp/btl_tcp_proc.h > ============================================================================== > --- trunk/ompi/mca/btl/tcp/btl_tcp_proc.h (original) > +++ trunk/ompi/mca/btl/tcp/btl_tcp_proc.h 2010-02-19 02:10:32 EST (Fri, > 19 Feb 2010) > @@ -41,9 +41,6 @@ > ompi_proc_t *proc_ompi; > /**< pointer to corresponding ompi_proc_t */ > > - orte_process_name_t proc_name; > - /**< globally unique identifier for the process */ > - > struct mca_btl_tcp_addr_t* proc_addrs; > /**< array of addresses exported by peer */ > > _______________________________________________ > svn-full mailing list > svn-f...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/svn-full > -- Jeff Squyres jsquy...@cisco.com For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/