I’m guessing, from lack of comments in the code and failure on the following
small test program, that SCTP even in basic form is unsupported.
To observe the failure, compile the code below and run with e.g. dmtcp_launch
./a.out 1500 5 5 and do a dmtcp_command -bc in a separate shell.
The program waits 4 seconds before connecting any sockets.
Behavior varies with when during execution I take the checkpoint. If I take it
before any sockets are connected, then a restart works fine. If I take it
after sockets are connected, restart fails:
[40000] ERROR at connectionrewirer.cpp:114 in doReconnect;
REASON='JASSERT(_real_connect(fd, (sockaddr*) &remoteAddr.addr, remoteAddr.len)
== 0) failed'
id = 24aa32f81b91247f-40000-48aee2af7b128f(99007)
(strerror((*__errno_location ()))) = Connection refused
Message: failed to restore connection
a.out (40000): Terminating...
Derek Beatty
dbea...@acm.org <mailto:dbea...@acm.org>
// Test program for SCTP communication
// g++ -ggdb sctp-ipv4-test.cc <http://sctp-ipv4-test.cc/> -pthread
#include <iostream>
#include <string>
#include <thread>
#include <vector>
#include <netdb.h>
#include <poll.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
int port;
char* portStr;
int numLoops;
int numClients;
using namespace std;
void log(string s) { clog << s << endl; }
void die(string s)
{
string why;
if (errno) why = string(": ") + strerror(errno);
clog << "error: " << s << why << endl;
exit(1);
}
void worker(int w)
{
log("start worker " +to_string(w));
sleep(4);
int sock= socket(AF_INET, SOCK_STREAM, IPPROTO_SCTP);
if (sock<0) die("worker socket");
log("worker " +to_string(w)+ " fd "+to_string(sock));
addrinfo hints = {
.ai_flags = 0,
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM,
.ai_protocol = IPPROTO_SCTP,
.ai_addrlen = 0,
.ai_addr = NULL,
.ai_canonname = NULL,
.ai_next = NULL,
};
addrinfo *result;
int errcode = getaddrinfo("localhost", portStr, &hints, &result);
if (errcode) {
die(string("worker getaddrinfo: ") + gai_strerror(errcode));
}
if (connect(sock,result->ai_addr,result->ai_addrlen)<0) die("worker connect");
log("worker "+to_string(w)+" connected");
for(int i= 0; i < numLoops; i++) {
sleep(1);
auto s= "w:" +to_string(w) + " i:" + to_string(i);
if (send(sock, s.c_str(),1+s.size(),MSG_NOSIGNAL)<0) die("worker send");
log("sent "+s);
}
if (shutdown(sock,SHUT_RDWR)<0) die("worker shutdown");
if (close(sock)<0) die("worker close");
log("end worker "+to_string(w));
}
int main(int argc, char* argv[])
{
try {
if (argc != 4) die("usage: sctp-test port numLoops numClients");
port= atoi((portStr= argv[1])); if (port<1024) die("bad port");
numLoops= atoi(argv[2]); if (numLoops<1) die("bad numLoops");
numClients= atoi(argv[3]); if (numClients<1) die("bad numClients");
vector<thread> workers;
for (int w=0; w < numClients; w++) {
workers.emplace_back([w](){ worker(w); });
}
int sock= socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_SCTP);
if (sock<0) die("master socket");
log("master fd " + to_string(sock));
sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htons(port),
.sin_addr = { INADDR_ANY },
};
if (bind(sock, (sockaddr *)&addr, sizeof addr) < 0) die("bind");
if (listen(sock, numClients) <0) die("listen");
vector<pollfd> pollvec;
pollvec.push_back({sock, POLLIN, 0});
sleep(3);
log("start polling");
bool connected= false;
while (pollvec.size()>1 || !connected) {
if (poll(pollvec.data(), pollvec.size(), -1) < 0) die("poll");
for (auto pfd = pollvec.begin(); pfd != pollvec.end(); ++pfd) {
if (!pfd->revents) continue;
if (pfd->fd == sock) {
sockaddr_in addr;
socklen_t addr_len = sizeof addr;
int connected_fd = accept(sock, (sockaddr *)&addr, &addr_len);
if (connected_fd < 0) {
if (errno==EAGAIN||errno==EWOULDBLOCK) break;
die("accept");
}
log("connected fd " + to_string(connected_fd));
pollvec.push_back({connected_fd, POLLIN, 0});
connected= true;
break;
} else {
char data[16388 + 6];
ssize_t len = recv(pfd->fd, data, sizeof data, 0);
if (len<0) die("recv fd " +to_string(pfd->fd));
if (len == 0) {
log("disconnected fd " + to_string(pfd->fd));
if (close(pfd->fd)<0) die("master close");
pollvec.erase(pfd);
break;
}
char* p= data;
log(string("got ")+p);
}
}
}
log("end polling");
for (auto& w: workers) {
w.join();
}
log("end joining");
}
catch (exception& e) {
die(e.what());
}
return 0;
}
_______________________________________________
Dmtcp-forum mailing list
Dmtcp-forum@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dmtcp-forum