Update: dmtcp-2.6.1~rc1 on centos7 gives several diagnostics if I try to take a 
checkpoint after sockets are connected.

[40000] WARNING at socketconnection.cpp:568 in recvPeerInformation; 
REASON='JWARNING(false) failed'
     _fds[0] = 4
     _localInode = 0
     _remoteInode = 0
Message: DMTCP detected an "external" connect socket.The socket will be 
restored as a dead socket. Try
searching for the "external" process with _remoteInode using
"netstat -pae | grep <_remoteInode>" or
"ss -axp | grep <_remoteInode>".
...
[40000] WARNING at kernelbufferdrainer.cpp:125 in onTimeoutInterval; 
REASON='JWARNING(false) failed'
     _dataSockets[i]->socket().sockfd() = 6
     buffer.size() = 0
     WARN_INTERVAL_SEC = 10


derek.bea...@synopsys.com<mailto:derek.bea...@synopsys.com>
ZeBu R&D team, Synopsys, Austin, Texas (CDT, UTC-05:00)
Urgent? Please phone<tel:+15126511517> 61517  +15126511517

From: Derek Beatty via Dmtcp-forum <dmtcp-forum@lists.sourceforge.net>
Sent: Thursday, October 22, 2020 11:04 AM
To: dmtcp-forum@lists.sourceforge.net
Subject: [Dmtcp-forum] SCTP unsupported?

I’m guessing, from lack of comments in the code and failure on the following 
small test program, that SCTP even in basic form is unsupported.

To observe the failure, compile the code below and run with e.g.  dmtcp_launch 
./a.out 1500 5 5    and do a dmtcp_command -bc  in a separate shell.

The program waits 4 seconds before connecting any sockets.

Behavior varies with when during execution I take the checkpoint.  If I take it 
before any sockets are connected, then a restart works fine.  If I take it 
after sockets are connected, restart fails:

[40000] ERROR at connectionrewirer.cpp:114 in doReconnect; 
REASON='JASSERT(_real_connect(fd, (sockaddr*) &remoteAddr.addr, remoteAddr.len) 
== 0) failed'
     id = 24aa32f81b91247f-40000-48aee2af7b128f(99007)
     (strerror((*__errno_location ()))) = Connection refused
Message: failed to restore connection
a.out (40000): Terminating...


Derek Beatty
dbea...@acm.org<mailto:dbea...@acm.org>



// Test program for SCTP communication
// g++ -ggdb 
sctp-ipv4-test.cc<https://urldefense.com/v3/__http:/sctp-ipv4-test.cc__;!!A4F2R9G_pg!LT0cjbaoprlIBIOMpUtiXMrNjbSM1H6e37VtVTQehF71_ke0PZlEsZ6ZQNjcZrKX$>
 -pthread


#include <iostream>
#include <string>
#include <thread>
#include <vector>
#include <netdb.h>
#include <poll.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>

int port;
char* portStr;
int numLoops;
int numClients;

using namespace std;

void log(string s) { clog << s << endl; }

void die(string s)
{
  string why;
  if (errno) why = string(": ") + strerror(errno);
  clog << "error: " << s << why << endl;
  exit(1);
}

void worker(int w)
{
  log("start worker " +to_string(w));
  sleep(4);
  int sock= socket(AF_INET, SOCK_STREAM, IPPROTO_SCTP);
  if (sock<0) die("worker socket");
  log("worker " +to_string(w)+ " fd "+to_string(sock));
                addrinfo hints = {
                                .ai_flags = 0,
                                .ai_family = AF_UNSPEC,
                                .ai_socktype = SOCK_STREAM,
                                .ai_protocol = IPPROTO_SCTP,
                                .ai_addrlen = 0,
                                .ai_addr = NULL,
                                .ai_canonname = NULL,
                                .ai_next = NULL,
                };
                addrinfo *result;
                int errcode = getaddrinfo("localhost", portStr, &hints, 
&result);
                if (errcode) {
                                die(string("worker getaddrinfo: ") + 
gai_strerror(errcode));
                }
  if (connect(sock,result->ai_addr,result->ai_addrlen)<0) die("worker connect");
  log("worker "+to_string(w)+" connected");
  for(int i= 0; i < numLoops; i++) {
    sleep(1);
    auto s= "w:" +to_string(w) + " i:" + to_string(i);
    if (send(sock, s.c_str(),1+s.size(),MSG_NOSIGNAL)<0) die("worker send");
    log("sent "+s);
  }
  if (shutdown(sock,SHUT_RDWR)<0) die("worker shutdown");
  if (close(sock)<0) die("worker close");
  log("end worker "+to_string(w));
}

int main(int argc, char* argv[])
{
  try {
    if (argc != 4) die("usage: sctp-test port numLoops numClients");
    port= atoi((portStr= argv[1])); if (port<1024) die("bad port");
    numLoops= atoi(argv[2]); if (numLoops<1) die("bad numLoops");
    numClients= atoi(argv[3]); if (numClients<1) die("bad numClients");
    vector<thread> workers;
    for (int w=0; w < numClients; w++) {
      workers.emplace_back([w](){ worker(w); });
    }
    int sock= socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_SCTP);
    if (sock<0) die("master socket");
    log("master fd " + to_string(sock));
    sockaddr_in addr = {
      .sin_family = AF_INET,
      .sin_port = htons(port),
      .sin_addr = { INADDR_ANY },
    };
    if (bind(sock, (sockaddr *)&addr, sizeof addr) < 0) die("bind");
    if (listen(sock, numClients) <0) die("listen");

    vector<pollfd> pollvec;
    pollvec.push_back({sock, POLLIN, 0});

    sleep(3);
    log("start polling");
    bool connected= false;
    while (pollvec.size()>1 || !connected) {
      if (poll(pollvec.data(), pollvec.size(), -1) < 0) die("poll");
      for (auto pfd = pollvec.begin(); pfd != pollvec.end(); ++pfd) {
        if (!pfd->revents) continue;
        if (pfd->fd == sock) {
          sockaddr_in addr;
          socklen_t addr_len = sizeof addr;
          int connected_fd = accept(sock, (sockaddr *)&addr, &addr_len);
          if (connected_fd < 0) {
            if (errno==EAGAIN||errno==EWOULDBLOCK) break;
            die("accept");
          }
          log("connected fd " + to_string(connected_fd));
          pollvec.push_back({connected_fd, POLLIN, 0});
          connected= true;
          break;
        } else {
          char data[16388 + 6];
          ssize_t len = recv(pfd->fd, data, sizeof data, 0);
          if (len<0) die("recv fd " +to_string(pfd->fd));
          if (len == 0) {
            log("disconnected fd " + to_string(pfd->fd));
            if (close(pfd->fd)<0) die("master close");
            pollvec.erase(pfd);
            break;
          }
          char* p= data;
          log(string("got ")+p);
        }
      }
    }
    log("end polling");

    for (auto& w: workers) {
      w.join();
    }
    log("end joining");
  }
  catch (exception& e) {
    die(e.what());
  }
  return 0;
}
_______________________________________________
Dmtcp-forum mailing list
Dmtcp-forum@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dmtcp-forum

Reply via email to