Hello Michael, On 09/28/2011 06:48 AM, Michael Raab wrote:
I'm quite sure that the right socket is removed from the socket list and closed. I've prepared a quick and dirty example that should help you to reproduce the problem. The example includes our OSGExt library that contains our specific class implementations of ClusterWindow and RenderServer. Additionlly there are a ClusterClient and a ClusterServer application based on the OSGExt lib included. Usage: - run startClient, startServer1, startServer2 from the run folder - by pressing '1' or '2' you can disconnect either the first or the second render serverBy default the connectiontype is set to Multicast. Using this mode client and server freeze after one server has been disconnected. If you change the connectiontype to "StreamSock" (ClusterClient: line 105), the connection is still ok, after disconnecting one server. I'm sending the complete source code offside the mailing list. Hope this will help you to find the problem.
yes, it is of great help, thank you! I believe I've found the problem: GroupMCastConnection keeps two additional vectors of SocketAdresses (_receiver and _waitFor) and the latter is used to determine which machine has outstanding ack packages. On a disconnect these vectors where not updated so that the connection was still waiting for ack packages from the disconnected machine. I've attached a patch that corrects this and makes your examples work for me. Would you mind giving it a try and letting me know if it fixes the problem at your end as well? Thanks!
Cheers, Carsten
Index: Source/Base/Network/Socket/OSGGroupMCastConnection.cpp =================================================================== RCS file: /cvsroot/opensg/OpenSG/Source/Base/Network/Socket/OSGGroupMCastConnection.cpp,v retrieving revision 1.19 diff -u -r1.19 OSGGroupMCastConnection.cpp --- Source/Base/Network/Socket/OSGGroupMCastConnection.cpp 5 Jun 2008 04:59:07 -0000 1.19 +++ Source/Base/Network/Socket/OSGGroupMCastConnection.cpp 28 Sep 2011 17:46:49 -0000 @@ -148,9 +148,22 @@ */ void GroupMCastConnection::disconnect(Channel channel) { + ChannelIndex index = channelToIndex(channel); + Inherited::disconnect(channel); _lock->aquire(); - _destination.erase(_destination.begin()+channelToIndex(channel)); + _destination.erase(_destination.begin() + index); + + // remove channel from _receiver/_waitFor + std::vector<SocketAddress>::iterator rIt = _receiver.begin() + index; + std::vector<SocketAddress>::iterator wIt = std::find(_waitFor.begin(), + _waitFor.end (), _receiver[index]); + + if(wIt != _waitFor.end()) + _waitFor.erase(wIt); + + _receiver.erase(rIt); + _lock->release(); } @@ -415,7 +428,8 @@ dgram[send]->setEarlySend(false); } sendId = dgram[send]->getId(); -// printf("send dgram %d at id %d\n",send,dgram[send]->getId()); + + // FLOG(("Sending dgram %d at id %d\n", send, dgram[send]->getId())); } // loop while @@ -437,9 +451,9 @@ printf("send %d end %d\n",send,end); printf("lastack %lf\n",getSystemTime() - lastAckTime); #endif - FDEBUG(("timeout count %d %d missing %d\n",count,sendId,missing[ack].size())) -// printf("%.10f timeout count %d %d missing %d\n",getSystemTime()-t1,count,sendId,missing[ack].size()); - + // FLOG(("count %d missing %d readable %d send %d end %d\n", + // count, missing[ack].size(), readable, send, end)); + ackRequest.setSize(0); ackRequest.setId(sendId); @@ -486,7 +500,11 @@ // first ack for this dgram from this receiver if(response.getResponseAck() == true) { -// printf("Ack %d from %s:%d\n",response.getId(),fromAddress.getHost().c_str(),fromAddress.getPort()); + // FLOG(("Ack %d from %s:%d\n", + // response.getId(), + // fromAddress.getHost().c_str(), + // fromAddress.getPort())); + for(m = ack ; dgram[m]->getId() != response.getId() ; m=(m+1) % _windowSize) @@ -500,13 +518,19 @@ continue; lastNak = response.getId(); lastNakTime = getSystemTime(); - FDEBUG(("Nack %d from %s:%d\n",response.getId(),fromAddress.getHost().c_str(),fromAddress.getPort())); -// printf("Nack %d from %s:%d\n",response.getId(),fromAddress.getHost().c_str(),fromAddress.getPort()); + + // FLOG(("Nack %d from %s:%d\n", + // response.getId(), + // fromAddress.getHost().c_str(), + // fromAddress.getPort())); + // retransmit for(m = ack ; m != send && dgram[m]->getId() != response.getId() ; m = (m+1) % _windowSize); - send = m; + { + send = m; + } } } Index: Source/Base/Network/Socket/OSGPointMCastConnection.cpp =================================================================== RCS file: /cvsroot/opensg/OpenSG/Source/Base/Network/Socket/OSGPointMCastConnection.cpp,v retrieving revision 1.11 diff -u -r1.11 OSGPointMCastConnection.cpp --- Source/Base/Network/Socket/OSGPointMCastConnection.cpp 5 Sep 2008 08:21:33 -0000 1.11 +++ Source/Base/Network/Socket/OSGPointMCastConnection.cpp 28 Sep 2011 17:46:49 -0000 @@ -518,6 +518,9 @@ ignoreT = getSystemTime(); do { + if(_recvQueueThreadStop) + return true; + while(!recvNextDgram(dgram)) { if(_recvQueueThreadStop) Index: Source/Base/Network/Socket/OSGPointSockConnection.cpp =================================================================== RCS file: /cvsroot/opensg/OpenSG/Source/Base/Network/Socket/OSGPointSockConnection.cpp,v retrieving revision 1.12 diff -u -r1.12 OSGPointSockConnection.cpp --- Source/Base/Network/Socket/OSGPointSockConnection.cpp 12 Apr 2010 15:00:10 -0000 1.12 +++ Source/Base/Network/Socket/OSGPointSockConnection.cpp 28 Sep 2011 17:46:49 -0000 @@ -281,7 +281,7 @@ } catch(SocketError &e) { - throw ReadError(e.what()); + throw WriteError(e.what()); } }
------------------------------------------------------------------------------ All the data continuously generated in your IT infrastructure contains a definitive record of customers, application performance, security threats, fraudulent activity and more. Splunk takes this data and makes sense of it. Business sense. IT sense. Common sense. http://p.sf.net/sfu/splunk-d2dcopy1
_______________________________________________ Opensg-users mailing list Opensg-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensg-users