/*
 * Latency under load - a better measure of link performance.
 *
 * (c) 2011 Jonathan "Chromatix" Morton.
 */

#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <sys/time.h>

static time_t epoch;

#include <gsl/gsl_rng.h>

static gsl_rng *global_rng = NULL;

#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <sys/wait.h>
#include <signal.h>
#include <poll.h>

#define PORT "4122"  // the port users will be connecting to: 0x101A
#define BACKLOG 10   // how many pending connections queue will hold

#include <pthread.h>

pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
uint64_t client_id = 0;
uint32_t client_count = 0;

static void reserve_client(uint64_t id)
{
	while(1) {
		pthread_mutex_lock(&mutex);
		if(!client_id)
			client_id = id;
		if(client_id == id) {
			client_count++;
			printf("Now serving %llX (%u)...\n", id, client_count);
			pthread_mutex_unlock(&mutex);
			return;
		}
		pthread_mutex_unlock(&mutex);
		sleep(1);
	}
}

static void release_client(uint64_t id)
{
	pthread_mutex_lock(&mutex);
	if(client_id != id)
		abort();
	if(--client_count == 0) {
		client_id = 0;
		printf("Finished serving %llX.\n", id);
	}
	pthread_mutex_unlock(&mutex);
}

static inline double gettime(void)
{
	struct timeval now;
	gettimeofday(&now, NULL);
	return (now.tv_sec - epoch) + (now.tv_usec * 0.000001);
}

// This CRC32 is probably not standards compliant.
// Start with iv=~0 and end with iv = ~iv.
static uint32_t crc32(uint32_t iv, uint8_t * buf, uint32_t len)
{
	static const uint32_t poly = 0xEDB88320;
	static uint32_t tab[256] = { 0 };
	static bool inited = 0;

	uint32_t i;

	if(!inited) {
		for(i = 0; i < 256; i++) {
			tab[i] = (i & 0x01 ? poly >> 7 : 0) ^
			         (i & 0x02 ? poly >> 6 : 0) ^
			         (i & 0x04 ? poly >> 5 : 0) ^
			         (i & 0x08 ? poly >> 4 : 0) ^
			         (i & 0x10 ? poly >> 3 : 0) ^
			         (i & 0x20 ? poly >> 2 : 0) ^
			         (i & 0x40 ? poly >> 1 : 0) ^
			         (i & 0x80 ? poly >> 0 : 0);
		}
		inited = 1;
	}

	for(i = 0; i < len; i++)
		iv = tab[(iv & 0xFF) ^ buf[i]] ^ (iv >> 8);

	return iv;
}

static gsl_rng* fork_rng(void)
{
	gsl_rng *rng = gsl_rng_alloc(gsl_rng_taus);

	pthread_mutex_lock(&mutex);
	gsl_rng_set(rng, gsl_rng_get(global_rng));
	pthread_mutex_unlock(&mutex);

	return rng;
}

static bool settled = 0;
static bool finished = 0;
static double minRTT = 0;
static double maxRTT = 0;
static double when_maxRTT = 0;

static uint8_t flow_id = 0;
static volatile uint32_t spew_mask = 0;
static volatile uint32_t chug_mask = 0;

static void* pingpong(int sockfd, bool server)
{
	double lastPing = 0, now = 0, rtt = 0, firstPing = 0;
	uint64_t numPings = 0;

	finished = settled = 0;
	minRTT = maxRTT = when_maxRTT = 0;

	while(!finished) {
		if(numPings || server) {
			if(send(sockfd, &chug_mask, sizeof(chug_mask), 0) < sizeof(chug_mask))
				goto finish;
			lastPing = gettime();
		}

		if(recv(sockfd, &spew_mask, sizeof(spew_mask), MSG_WAITALL) < sizeof(spew_mask))
			goto finish;
		now = gettime();

		if(numPings) {
			rtt = now - lastPing;

			if(rtt < minRTT || numPings == 1)
				minRTT = rtt;

			if(rtt > maxRTT) {
				maxRTT = rtt;
				when_maxRTT = now;
			}

			if(!settled && numPings > 16 && now-firstPing > 5.0) {
				printf("MinRTT: %.1fms\n", minRTT * 1000);
				settled = 1;
			}
		} else {
			firstPing = now;
			if(!server)
				printf("Server responding, beginning test...\n");
		}

		numPings++;
	}

finish:
	spew_mask = chug_mask = 0;
	close(sockfd);
	return NULL;
}

static uint8_t get_flow_id(void)
{
	uint8_t fid;

	pthread_mutex_lock(&mutex);
	fid = flow_id;
	chug_mask |= 1 << fid;
	flow_id = (flow_id+1) % 32;
	pthread_mutex_unlock(&mutex);

	return fid;
}

static void cancel_flow(uint8_t fid)
{
	pthread_mutex_lock(&mutex);
	chug_mask &= ~(1 << fid);
	pthread_mutex_unlock(&mutex);
}

typedef struct
{
//	uint32_t crc;
	uint32_t Bps;    // bytes per second, will saturate at 4GB/s per flow.
	uint32_t smooth; // fixed point 24.8, units are Hz
} stats_packet;

static void* spew(int sockfd, bool server)
{
	gsl_rng *rng = fork_rng();
	uint32_t buffer[16384];
//	uint32_t crc = ~0;
	uint32_t i;
	stats_packet *stats = calloc(sizeof(stats_packet), 1);
	int rv;
	double now = gettime();
	double finishTime = now + 60;

	for(i=0; i < 16384; i++)
		buffer[i] = gsl_rng_get(rng);

	while(spew_mask || chug_mask || gettime() < finishTime) {
		rv = send(sockfd, buffer, 65536, 0);
		if(rv < 0) {
			printf("Hard shutdown of spew()!\n");
			if(!server)
				abort();
			goto bail;
		}

//		crc = crc32(crc, (uint8_t*) buffer, rv);
		for(i=0; i*4 < rv; i++)
			buffer[i] = gsl_rng_get(rng);

		now = gettime();
		if(spew_mask | chug_mask)
			finishTime = now + 2;
	}

	// wait for the stats packet and check it
	if(recv(sockfd, stats, sizeof(stats_packet), MSG_WAITALL) != sizeof(stats_packet)) {
		memset(stats, 0, sizeof(stats_packet));
		goto bail;
	}
//	stats->crc = ntohl(stats->crc);
	stats->Bps = ntohl(stats->Bps);
	stats->smooth = ntohl(stats->smooth);
//	if(stats->crc != ~crc)
//		memset(stats, 0, sizeof(stats_packet));

//	printf("Raw stats received: Bps=%u smooth=%u\n", stats->Bps, stats->smooth);

bail:
	gsl_rng_free(rng);
	close(sockfd);
	return stats;
}

static void* chug(int sockfd, bool server)
{
	uint8_t fid = get_flow_id();
	uint8_t buffer[65536];
//	uint32_t crc = ~0;
	uint64_t bytes = 0;
	stats_packet *stats = calloc(sizeof(stats), 1);
	double startTime = gettime(), finishTime = 0;
	double now = 0, then = 0;
	double maxWait = 0;
	double goodput = 0, maxGoodput = 0;
	double whenEvent = startTime;
	int cancelled = 0, rv;

	while(spew_mask | chug_mask) {
		rv = recv(sockfd, buffer, 65536, 0);

		now = gettime();

		if(rv <= 0)  // no more data to receive
			break;

//		crc = crc32(crc, buffer, rv);
		bytes += rv;

		goodput = bytes / (now - startTime);

		if(bytes > 1024*1024) {
			double wait = now-then;
			if(wait > maxWait) {
				maxWait = wait;
				whenEvent = now;
//				printf("maxWait increased to %.1fms\n", wait*1000);
			}
		}
		then = now;

		if(when_maxRTT > whenEvent) {
			whenEvent = when_maxRTT;
//			printf("maxRTT increased to %.1fms\n", maxRTT*1000);
		}

		if(goodput > maxGoodput) {
			maxGoodput = goodput;
			whenEvent = now;
//			printf("maxGoodput increased to %.0f KiB/s\n", goodput/1024);
		}

		if(!cancelled && (((now - whenEvent) > 60 && bytes > (maxGoodput * maxRTT * 100) && bytes > (maxGoodput * maxWait * 100)) || (now - startTime) > 600)) {
			// Maybe time to stop, so signal this as appropriate
			// carry on receiving data until it stops
			cancel_flow(fid);
			cancelled = 1;
//			printf("Flow cancelled.\n");
		}
	}

	if(cancelled && !(chug_mask | spew_mask)) {
		finishTime = now;
		goodput = bytes / (finishTime - startTime);

//		stats->crc = htonl(~crc);
		stats->Bps = htonl((uint32_t) goodput);
		stats->smooth = htonl((uint32_t)(256 / maxWait));

		if(send(sockfd, stats, sizeof(stats_packet), 0) < sizeof(stats_packet))
			stats->Bps = stats->smooth = 0;

		stats->Bps = ntohl(stats->Bps);
		stats->smooth = ntohl(stats->smooth);

//		printf("Raw stats sent: Bps=%u smooth=%u\n", stats->Bps, stats->smooth);
	} else {
		stats->Bps = stats->smooth = 0;
	}

	// drain the connection to make the network quiescent and avoid RST packet bursts
	cancel_flow(fid);
	while(recv(sockfd, buffer, 65536, 0) > 0)
		;

bail:
	cancel_flow(fid);
	close(sockfd);
	return stats;
}

static void* spew_conn(void *arg)
{
	int sockfd = (int) arg;
	uint8_t cmd = 3; // tell server to chug

	send(sockfd, &client_id, sizeof(client_id), 0);
	send(sockfd, &cmd, sizeof(cmd), 0);

	return spew(sockfd, 0);
}

static void* chug_conn(void *arg)
{
	int sockfd = (int) arg;
	uint8_t cmd = 2; // tell server to spew

	send(sockfd, &client_id, sizeof(client_id), 0);
	send(sockfd, &cmd, sizeof(cmd), 0);

	return chug(sockfd, 0);
}

static void* pingpong_conn(void *arg)
{
	int sockfd = (int) arg;
	uint8_t cmd = 1; // tell server to start pinging when ready

	send(sockfd, &client_id, sizeof(client_id), 0);
	send(sockfd, &cmd, sizeof(cmd), 0);

	return pingpong(sockfd, 0);
}

static void client(char *hostname)
{
	int sockfd;
	struct addrinfo hints, *servinfo, *p;
	char s[INET6_ADDRSTRLEN];
	int rv;
	pthread_t pingpong_thread = 0;
	const uint8_t scenario_flows[] = { 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 32, 32, 32, 32, 32, 0 };
	const uint8_t scenario_uplds[] = { 0, 1, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4,  0,  1, 16, 31, 32, 0 };
	int scenario, upScenarios = 0, dnScenarios = 0;
	double flowSmoothness = 0, upCapacity = 0, dnCapacity = 0;

	memset(&hints, 0, sizeof hints);
	hints.ai_family = AF_UNSPEC;
	hints.ai_socktype = SOCK_STREAM;

	if((rv = getaddrinfo(hostname, PORT, &hints, &servinfo)) != 0) {
		fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(rv));
		abort();
	}

	// loop through all the results and connect to the first we can
	for(p = servinfo; p != NULL; p = p->ai_next) {
		if((sockfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) {
			perror("socket");
			continue;
		}

		if(connect(sockfd, p->ai_addr, p->ai_addrlen) == -1) {
			close(sockfd);
			perror("connect");
			continue;
		}

		break;
	}

	// we will reuse p for the parallel connections
	if(!p)
		abort();

	client_id = (((uint64_t) gsl_rng_get(global_rng)) << 32) | gsl_rng_get(global_rng);
	printf("Selected client ID %llX\n", client_id);

	getnameinfo(p->ai_addr, p->ai_addrlen, s, sizeof(s), NULL, 0, NI_NUMERICHOST);
	printf("Connected to %s, waiting for response...\n", s);

	if(pthread_create(&pingpong_thread, NULL, pingpong_conn, (void*) sockfd) < 0) {
		close(sockfd);
		perror("pthread_create");
		abort();
	}

	while(!settled)
		sleep(1);

	for(scenario=0; scenario_flows[scenario]; scenario++) {
		pthread_t bulk_thread[32] = {0};
		stats_packet *stats[32] = {NULL};
		int flow, flows = scenario_flows[scenario], ups = scenario_uplds[scenario], dns = flows-ups;
		double upCap = 0, dnCap = 0, smoothness = 0;
		uint32_t worstSmooth = 0;

		printf("Scenario %u: %u uploads, %u downloads... ", scenario+1, ups, dns);
		fflush(stdout);

		spew_mask = chug_mask = 0;

		for(flow=0; flow < flows; flow++) {
			if((sockfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) {
				perror("socket");
				abort();
			}

			if(connect(sockfd, p->ai_addr, p->ai_addrlen) == -1) {
				close(sockfd);
				perror("connect");
				abort();
			}

			if(pthread_create(&bulk_thread[flow], NULL, (flow < ups) ? spew_conn : chug_conn, (void*) sockfd) < 0) {
				close(sockfd);
				perror("pthread_create");
				abort();
			}

			sleep(1);
		}

		// Wait for test to finish and collect the statistics from the threads.
		// We multiply the *harmonic* mean of the individual goodputs by the number of flows, in order to incorporate mutual fairness into the measure.
		// We also search for the worst smoothness value.
		// The maxRTT of the pingpong thread is already kept globally.
		for(flow=0; flow < flows; flow++) {
			pthread_join(bulk_thread[flow], &stats[flow]);

			if(flow < ups)
				upCap += 1.0 / stats[flow]->Bps;
			else
				dnCap += 1.0 / stats[flow]->Bps;

			if(!flow || worstSmooth > stats[flow]->smooth)
				worstSmooth = stats[flow]->smooth;
		}

		if(ups) {
			upCap = ups*ups/upCap;
			printf("%u KiB/s up, ", (uint32_t)(upCap / 1024));
			upCapacity += 1.0 / upCap;
			upScenarios++;
		}
		if(dns) {
			dnCap = dns*dns/dnCap;
			printf("%u KiB/s down, ", (uint32_t)(dnCap / 1024));
			dnCapacity += 1.0 / dnCap;
			dnScenarios++;
		}

		smoothness = worstSmooth / 256.0;
		printf("%.2f Hz smoothness\n", smoothness);
		if(!scenario || smoothness < flowSmoothness)
			flowSmoothness = smoothness;
	}

	printf("\nOVERALL:\n");

	finished = 1;
	pthread_join(pingpong_thread, NULL);

	printf("    Upload Capacity: %u KiB/s\n", (unsigned int) floor((upScenarios / 1024.0) / upCapacity));
	printf("  Download Capacity: %u KiB/s\n", (unsigned int) floor((dnScenarios / 1024.0) / dnCapacity));
	printf("Link Responsiveness: %u Hz\n", (unsigned int) floor(1.0/maxRTT));
	printf("    Flow Smoothness: %u Hz\n", (unsigned int) floor(flowSmoothness));
}

static void* server_conn(void *arg)
{
	int sockfd = (int) arg;
	uint64_t id = 0;
	uint8_t cmd = 0;

	pthread_detach(pthread_self());

	if(recv(sockfd, &id, sizeof(id), MSG_WAITALL) != sizeof(id)) {
		fprintf(stderr, "Failed to receive client ID!\n");
		close(sockfd);
		return NULL;
	}
	reserve_client(id);

	if(recv(sockfd, &cmd, sizeof(cmd), MSG_WAITALL) != sizeof(cmd))
		goto bail;

	switch(cmd) {
		case 1: pingpong(sockfd, 1); break;
		case 2: free(spew(sockfd, 1)); break;
		case 3: free(chug(sockfd, 1)); break;
		default: fprintf(stderr, "Unknown command %u!\n", cmd); goto bail;
	}

bail:
	release_client(id);
	close(sockfd);
	return NULL;
}

static void server(void)
{
	int sockfd, new_fd;                   // listen on sock_fd, new connection on new_fd
	struct addrinfo hints, *servinfo, *p;
	struct sockaddr_storage their_addr;   // connector's address information
	char s[INET6_ADDRSTRLEN];
	int rv;
	int yes = 1;

	memset(&hints, 0, sizeof hints);
	hints.ai_family = AF_INET6;
	hints.ai_socktype = SOCK_STREAM;
	hints.ai_flags = AI_PASSIVE;	// use my IP

	// try IPv6 first, then fall back to generic if necessary
	if((rv = getaddrinfo(NULL, PORT, &hints, &servinfo)) != 0) {
		hints.ai_family = AF_UNSPEC;
		if((rv = getaddrinfo(NULL, PORT, &hints, &servinfo)) != 0) {
			fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(rv));
			abort();
		}
	}

	// loop through all the results and bind to the first we can
	for(p = servinfo; p != NULL; p = p->ai_next) {
		if((sockfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) {
			perror("socket");
			continue;
		}

		if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int)) == -1) {
			perror("setsockopt");
			abort();
		}

		if(bind(sockfd, p->ai_addr, p->ai_addrlen) == -1) {
			close(sockfd);
			perror("bind");
		} else {
			break;
		}
	}

	if(p == NULL) {
		fprintf(stderr, "server: failed to bind listening socket\n");
		abort();
	}

	if(listen(sockfd, BACKLOG) == -1) {
		perror("listen");
		abort();
	}

	getnameinfo(p->ai_addr, p->ai_addrlen, s, sizeof(s), NULL, 0, NI_NUMERICHOST);
	printf("Server mode: listening on %s (port %s)\n", s, PORT);

	freeaddrinfo(servinfo);		// all done with this structure

	// Start accepting connections, spawning a thread for each one
	while(1) {  // main accept() loop
		socklen_t sin_size = sizeof(their_addr);
		int new_fd = accept(sockfd, (struct sockaddr *)&their_addr, &sin_size);
		pthread_t new_thread;

		if(new_fd == -1) {
			perror("accept");
			continue;
		}
		getnameinfo((struct sockaddr *)&their_addr, sin_size, s, sizeof(s), NULL, 0, NI_NUMERICHOST);
		printf("Connection from %s\n", s);

		if(pthread_create(&new_thread, NULL, server_conn, (void*) new_fd) < 0) {
			close(new_fd);
			perror("pthread_create");
		}
	}
}

int main(int argc, char *argv[])
{
	FILE *fp = fopen("/dev/urandom", "rb");
	struct timeval now;

	gettimeofday(&now, NULL);
	epoch = now.tv_sec;

	// we want to know when a connection closes, not be core-dumped
	signal(SIGPIPE, SIG_IGN);
	signal(SIGFPE, SIG_IGN);

	if(fp) {
		fread(&gsl_rng_default_seed, sizeof(gsl_rng_default_seed), 1, fp);
		fclose(fp);
	} else {
		// for systems which don't have such a convenient randoness source
		gsl_rng_default_seed = now.tv_sec ^ now.tv_usec;
	}

	global_rng = gsl_rng_alloc(gsl_rng_taus);
	crc32(0, NULL, 0);

	if(argc == 1) {
		server();
	} else if(argc == 2 && argv[1][0] != '-' && argv[1][0] != '/') {
		client(argv[1]);
	} else {
		fprintf(stderr, "Usage: %s <host> (client mode)\n       %s        (server mode)\n", argv[0], argv[0]);
		return 1;
	}

	return 0;
}
