The current code in the TCP BTL prevents local execution on a laptop not exposing a public IP address, by unconditionally disqualifying all interfaces with local addresses. This is not done based on MCA parameters but instead is done deep inside the IP matching logic, independent of what the user specified in the corresponding MCA parameters (if_include and/or if_exclude).
Instead, I propose we exclude the local interface only via the exclude MCA (both IPv4 and IPv6 local addresses are already in the default if_exclude), and remove all the code that prevents local addresses. I propose the following patch (local addresses are accepted via the second if because opal_net_samenetwork returns true). If no complaints by Friday morning, I will push the code. Thanks, George. diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index a727a43..f7decc4 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -541,9 +541,9 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, } - for(i=0; i<proc_data->num_local_interfaces; ++i) { + for( i = 0; i < proc_data->num_local_interfaces; ++i ) { mca_btl_tcp_interface_t* local_interface = proc_data->local_interfaces[i]; - for(j=0; j<proc_data->num_peer_interfaces; ++j) { + for( j = 0; j < proc_data->num_peer_interfaces; ++j ) { /* initially, assume no connection is possible */ proc_data->weights[i][j] = CQ_NO_CONNECTION; @@ -552,19 +552,8 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, if(NULL != proc_data->local_interfaces[i]->ipv4_address && NULL != peer_interfaces[j]->ipv4_address) { - /* check for loopback */ - if ((opal_net_islocalhost((struct sockaddr *)local_interface->ipv4_address) && - !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address)) || - (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address) && - !opal_net_islocalhost((struct sockaddr *)local_interface->ipv4_address)) || - (opal_net_islocalhost((struct sockaddr *)local_interface->ipv4_address) && - !opal_ifislocal(proc_hostname))) { - - /* No connection is possible on these interfaces */ - - /* check for RFC1918 */ - } else if(opal_net_addr_isipv4public((struct sockaddr*) local_interface->ipv4_address) && - opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) { + if(opal_net_addr_isipv4public((struct sockaddr*) local_interface->ipv4_address) && + opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) { if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address, (struct sockaddr*) peer_interfaces[j]->ipv4_address, local_interface->ipv4_netmask)) { @@ -574,17 +563,16 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, } proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; continue; + } + if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address, + (struct sockaddr*) peer_interfaces[j]->ipv4_address, + local_interface->ipv4_netmask)) { + proc_data->weights[i][j] = CQ_PRIVATE_SAME_NETWORK; } else { - if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address, - (struct sockaddr*) peer_interfaces[j]->ipv4_address, - local_interface->ipv4_netmask)) { - proc_data->weights[i][j] = CQ_PRIVATE_SAME_NETWORK; - } else { - proc_data->weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK; - } - proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; - continue; + proc_data->weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK; } + proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; + continue; } /* check state of ipv6 address pair - ipv6 is always public, @@ -593,19 +581,9 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, if(NULL != local_interface->ipv6_address && NULL != peer_interfaces[j]->ipv6_address) { - /* check for loopback */ - if ((opal_net_islocalhost((struct sockaddr *)local_interface->ipv6_address) && - !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address)) || - (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address) && - !opal_net_islocalhost((struct sockaddr *)local_interface->ipv6_address)) || - (opal_net_islocalhost((struct sockaddr *)local_interface->ipv6_address) && - !opal_ifislocal(proc_hostname))) { - - /* No connection is possible on these interfaces */ - - } else if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv6_address, - (struct sockaddr*) peer_interfaces[j]->ipv6_address, - local_interface->ipv6_netmask)) { + if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv6_address, + (struct sockaddr*) peer_interfaces[j]->ipv6_address, + local_interface->ipv6_netmask)) { proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK; } else { proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
_______________________________________________ devel mailing list devel@lists.open-mpi.org https://rfd.newmexicoconsortium.org/mailman/listinfo/devel