/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * Authors: Waiman Long <longman@redhat.com>
 *
 * This is the driver program for running the lock test.
 */
#define	_GNU_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <stdio.h>
#include <limits.h>
#include <locale.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <libgen.h>
#include <pthread.h>
#include <math.h>
#include <fcntl.h>
#include <sched.h>
#include <ctype.h>

/*
 * locktest debugfs files
 */
#define	DEBUGFS_ROOT		"/sys/kernel/debug/locktest"
#define	DEBUGFS_CCOUNT		DEBUGFS_ROOT "/c_count"
#define	DEBUGFS_ICOUNT		DEBUGFS_ROOT "/i_count"
#define	DEBUGFS_LCOUNT		DEBUGFS_ROOT "/l_count"
#define	DEBUGFS_PCOUNT		DEBUGFS_ROOT "/p_count"
#define	DEBUGFS_RRATIO		DEBUGFS_ROOT "/r_percent"
#define	DEBUGFS_LOCKTYPE	DEBUGFS_ROOT "/locktype"
#define	DEBUGFS_LOADTYPE	DEBUGFS_ROOT "/loadtype"
#define	DEBUGFS_ETIME		DEBUGFS_ROOT "/etime"
#define	DEBUGFS_STATUS		DEBUGFS_ROOT "/status"
#define	DEBUGFS_LATENCY		DEBUGFS_ROOT "/latency"

/*
 * Lock types
 */
#define	LOCK_SPIN	0
#define	LOCK_RWLOCK	1
#define	LOCK_MUTEX	2
#define	LOCK_RWSEM	3
#define LOCK_ISPIN	4
#define LOCK_MSPIN	5
#define LOCK_TRYRWSEM   6
#define	LOCK_MAX	7
#define	LOCK_OTHER	LOCK_MAX

/*
 * Timing tests
 */
#define TIME_PAUSE	10
#define TIME_RMB	11
#define TIME_MB		12

/*
 * Load types
 */
#define	LOAD_STANDALONE	0
#define	LOAD_EMBEDDED	1

static char usage[] = "\
Usage: %s [-h] [-v] [-c <load count>] [-i <iterations>]\n\
	[-l {spin|rw|mutex|rwsem|tryrwsem} ] [-L {0|1}]\n\
	[-n <thread count>[-<max thread count>]]\n\
	[-p <pause count>] [-r <read%>] [-t <time-test>]\n\
	[-T <latency>] [-s <cpu>] [-x <cpu-inc>]\n\
where -c - amount of load in critical section (default = 1)\n\
      -h - print this help message\n\
      -i - iteration count(+) or time (- in s)\n\
      -l - lock type (either spin or rw)\n\
      -L - load type (0: standalone, 1: embedded)\n\
      -n - thread count (a single value or a range)\n\
      -p - amount of pause between critical sections (default = 1)\n\
      -r - reader percentage (for rwlock and rwsem, default = 50%)\n\
      -s - starting CPU number (default = 1)\n\
      -t - perform timing test (pause, mb, rmb)\n\
           other options ignored except -i\n\
      -T - enable latency measurement\n\
      -v - verbose flag\n\
      -x - CPU number increment (default = 1)\n\
\n\
A negative -r means number of readers.\n\
A negative -i means fixed execution time in seconds.\n\
";

static int iterations = 5000000;	/* Iteration count	*/
static int rpercent   = 50;		/* Reader percentage	*/
static int loadcnt    = 1;		/* Load count		*/
static int pausecnt   = 1;		/* Pause count		*/
static int threads    = 2;		/* Thread count		*/
static int maxthreads = 2;		/* Maximum thread count */
static int locktype   = LOCK_SPIN;	/* Lock type		*/
static int loadtype   = LOAD_STANDALONE;/* Load type		*/
static int startcpu   = 0;		/* Start CPU number	*/
static int cpuinc     = 1;		/* CPU number increment	*/
static int timetest;			/* Timing test type	*/
static int start;			/* 1 - start, -1 - exit */
static int latency;
static int verbose;

static char *lockname[LOCK_MAX] = {
	[LOCK_SPIN  ] = "spinlock",
	[LOCK_RWLOCK] = "rwlock",
	[LOCK_MUTEX ] = "mutex",
	[LOCK_RWSEM ] = "rwsem",
	[LOCK_TRYRWSEM] = "tryrwsem",
	[LOCK_ISPIN ] = "spinirq",
	[LOCK_MSPIN ] = "mix-spinlock",
};

static struct tdata {
	pthread_t	pthread;	/* Pthread handle	 */
	int	 	done;		/* Done flag		 */
	int		iterations;	/* Iterations		 */
	int		reader;
	long		latency_max;
	long		latency_avg;
	unsigned long	etime;		/* Reported elapsed time */
} *tdata;

static void print_usage(char *cmd)
{
	fprintf(stderr, usage, cmd);
	exit(1);
}

static void write_debugfs(char *file, int value)
{
	char buf[80];
	int  len, fd;

	len = sprintf(buf, "%d", value);
	if ((fd = open(file, O_WRONLY)) < 0) {
		fprintf(stderr, "Error: Can't open %s!\n", file);
		exit(1);
	}
	if (write(fd, buf, len) != len) {
		fprintf(stderr, "Error: debugfs file %s write error!\n", file);
		exit(1);
	}
	close(fd);
}

static void *locktest_thread(void *dummy)
{
	int  tid = (long)dummy;
	char buf[256];
	char *ptr;
	int  retval = 0;
	int  fd, len;

	/*
	 * Wait until instructed to start
	 */
	while (start <= 0) {
		if (start < 0) {
			retval = -1;
			pthread_exit(&retval);
		}
		usleep(1);
	}
	if ((fd = open(DEBUGFS_ETIME, O_RDONLY)) < 0) {
		fprintf(stderr, "Thread %d error: Can't open " DEBUGFS_ETIME
			"!\n", tid);
		exit(1);
	}
	if (read(fd, buf, sizeof(buf)) <= 0) {
		fprintf(stderr, "Thread %d error: debugfs file " DEBUGFS_ETIME
			" read error!\n", tid);
		exit(1);
	}
	tdata[tid].etime = strtoul(buf, &ptr, 10);
	tdata[tid].iterations = strtol(ptr, &ptr, 10);
	tdata[tid].latency_max = tdata[tid].latency_avg = 0;

	while (*ptr && isspace(*ptr))
		ptr++;

	/*
	 * Get the reader flag.
	 */
	tdata[tid].reader = (*ptr++ == 'r');

	if (*ptr) {
		tdata[tid].latency_avg = strtoul(ptr, &ptr, 10);
		tdata[tid].latency_max = strtoul(ptr, &ptr, 10);
	}
	tdata[tid].done = 1;
	pthread_exit(&retval);
}

static void run_locktest(void)
{
	int i;
	unsigned long mean, min, max;
	unsigned long rmin, rmax, wmin, wmax;
	double sd;	/* Standard deviation */
	double pcpurate;
	cpu_set_t set;
	int nreaders = 0;	/* # of reader only thread */
	int nstarved = 0;
	int fixedtime = 0;
	char *kilo;
	unsigned long rtotal, wtotal;

	/*
	 * Set up the debugfs files
	 */
	write_debugfs(DEBUGFS_ICOUNT  , iterations);
	write_debugfs(DEBUGFS_CCOUNT  , threads);
	write_debugfs(DEBUGFS_RRATIO  , rpercent);
	write_debugfs(DEBUGFS_LOCKTYPE, locktype);
	write_debugfs(DEBUGFS_LOADTYPE, loadtype);
	write_debugfs(DEBUGFS_LCOUNT  , loadcnt);
	write_debugfs(DEBUGFS_PCOUNT  , pausecnt);
	write_debugfs(DEBUGFS_LATENCY , latency);

	/*
	 * Create the locktest threads
	 */
	for (i = 0 ; i < threads ; i++) {
		tdata[i].done  = 0;
		tdata[i].etime = 0;
		if (pthread_create(&tdata[i].pthread, NULL, locktest_thread,
				  (void *)(long)i)) {
			start = -1;
			perror("pthread_create");
			exit(1);
		}
	}

	CPU_ZERO(&set);
	for (i = 0 ; i < threads ; i++) {
		int cpu = startcpu + cpuinc * i;

		CPU_SET(cpu, &set);
		if (pthread_setaffinity_np(tdata[i].pthread,
			sizeof(set), &set) < 0) {
			start = -1;
			perror("pthread_setaffinity_np");
			exit(1);
		}
		CPU_CLR(cpu, &set);
	}
	usleep(50);	/* Wait for the threads to be migrated */

	start = 1;
	for (i = 0 ; i < threads ; i++)
		pthread_join(tdata[i].pthread, NULL);

	/*
	 * Compute the mean & standard deviation of the execution time
	 */
	min = rmin = wmin = INT_MAX;
	max = rmax = wmax = 0;
	rtotal = wtotal = 0;
	for (i = 0, mean = 0 ; i < threads ; i++) {
		mean += tdata[i].etime;
		if (tdata[i].etime > max)
			max = tdata[i].etime;
		if (tdata[i].etime < min)
			min = tdata[i].etime;
		if (tdata[i].reader) {
			nreaders++;
			rtotal += tdata[i].iterations;
			if (tdata[i].iterations > rmax)
				rmax = tdata[i].iterations;
			else if (tdata[i].iterations < rmin)
				rmin = tdata[i].iterations;
		} else {
			wtotal += tdata[i].iterations;
			if (tdata[i].iterations > wmax)
				wmax = tdata[i].iterations;
			else if (tdata[i].iterations < wmin)
				wmin = tdata[i].iterations;
		}
	}
	mean = (mean + threads/2)/threads;

	for (i = 0, sd = 0.0 ; i < threads ; i++)
		sd += (tdata[i].etime - mean)*(tdata[i].etime - mean);
	sd = sqrt(sd/threads);

	if (iterations < 0) {
		fixedtime = -iterations;

		/*
		 * Recompute average iterations
		 */
		for (iterations = i = 0; i < threads; i++)
			iterations += tdata[i].iterations;
		iterations /= threads;
	}

	/*
	 * Skip min/max if only 1 thread.
	 */
	if (threads == 1)
		goto skip;

	printf("Threads = %d", threads);
	if (fixedtime)
		printf(", Min/Mean/Max = %'d/%'d/%'d\n",
		       wmin, (wtotal + threads/2)/threads, wmax);
	else
		printf(", Min/Mean/Max = %'.1f/%'.1f/%'.1f ms, SD = %.2f\n",
		       min/1000.0, mean/1000.0, max/1000.0, sd/1000);
skip:
	/*
	 * Compute per-cpu locking rate in kop/s or op/s.
	 */
	pcpurate = ((double)iterations)/mean*1000;
	kilo = "k";
	if (pcpurate < 10.0) {
		pcpurate *= 1000;
		kilo = "";
	}
	printf("Threads = %d, Total Rate = %'.0f %sop/s; "
	       "Percpu Rate = %'.0f %sop/s\n",
	      threads, pcpurate * threads, kilo, pcpurate, kilo);
	if (nreaders && fixedtime) {
		int nwriters = threads - nreaders;

		printf("%d readers, Iterations Min/Mean/Max = %'d/%'d/%'d\n",
			nreaders, rmin, (rtotal + nreaders/2)/nreaders, rmax);
		printf("%d writers, Iterations Min/Mean/Max = %'d/%'d/%'d\n",
			nwriters, wmin, (wtotal + nwriters/2)/nwriters, wmax);
	}

	if (verbose) {
		long ravg, rmax, wavg, wmax;

		ravg = rmax = wavg = wmax = 0;
		/*
		 * List the individual thread execution times
		 */
		for (i = 0 ; i < threads ; i++) {
			printf("Thread %2d(%c): time = %'.1f ms", i, 
				tdata[i].reader ? 'r' : 'w',
				tdata[i].etime/1000.0);

			if (tdata[i].iterations > 0)
				printf(", iterations = %'d",
					tdata[i].iterations);
			else if (tdata[i].iterations < 0)
				printf(", runtime = %ds",
					-tdata[i].iterations);

			if (tdata[i].latency_avg) {
				printf(", avg = %'ld, max = %'ld", 
					tdata[i].latency_avg,
					tdata[i].latency_max);
				if (tdata[i].reader) {
					ravg += tdata[i].latency_avg;
					if (tdata[i].latency_max > rmax)
						rmax = tdata[i].latency_max;
				} else {
					wavg += tdata[i].latency_avg;
					if (tdata[i].latency_max > wmax)
						wmax = tdata[i].latency_max;
				}
			}
			printf("\n");
		}
		if (tdata[0].latency_avg) {
			if (nreaders)
				ravg /= nreaders;
			if (threads - nreaders)
				wavg /= threads - nreaders;
			if (rmax)
				printf("Reader: avg = %'ldns, max = %'ldns\n",
					ravg, rmax);
			if (wmax)
				printf("Writer: avg = %'ldns, max = %'ldns\n",
					wavg, wmax);
		}
	}
}

static void run_timetest(void)
{
	char buf[80];
	int  fd;
	unsigned long etime;
	double ns;

	/*
	 * Set up the debugfs files
	 */
	write_debugfs(DEBUGFS_ICOUNT  , iterations);
	write_debugfs(DEBUGFS_LOCKTYPE, timetest);

	if ((fd = open(DEBUGFS_ETIME, O_RDONLY)) < 0) {
		fprintf(stderr, "Error: Can't open " DEBUGFS_ETIME "!\n");
		exit(1);
	}
	if (read(fd, buf, sizeof(buf)) <= 0) {
		fprintf(stderr, "Error: debugfs file " DEBUGFS_ETIME
			" read error!\n");
		exit(1);
	}
	etime = strtoul(buf, NULL, 10);
	ns = ((double)etime) * 1000 / iterations;
	printf("%s timing test = %.2f ns\n",
	      (timetest == TIME_PAUSE) ? "pause" :
	      (timetest == TIME_RMB  ) ? "rmb"   : "mb", ns);
}

int main(int argc, char *argv[])
{
	int ret, c;
	char *cmd = argv[0];
	char *end;
	struct stat statbuf;

	setlocale(LC_NUMERIC, "en_US");
	while ((c = getopt(argc, argv, "bBc:hi:l:L:n:p:r:s:t:Tvx:")) != -1) {
		switch (c) {
		case 'c':
			loadcnt = atoi(optarg);
			break;
		case 'h':
			print_usage(cmd);
			break;
		case 'i':
			iterations = strtoul(optarg, &end, 0);
			if ((*end == 'M') || (*end == 'm'))
				iterations *= 1000000;
			else if ((*end == 'K') || (*end == 'k'))
				iterations *= 1000;
			break;
		case 'l':
			if (strcmp(optarg, "spin") == 0)
				locktype = LOCK_SPIN;
			else if (strcmp(optarg, "rw") == 0)
				locktype = LOCK_RWLOCK;
			else if (strcmp(optarg, "mutex") == 0)
				locktype = LOCK_MUTEX;
			else if (strcmp(optarg, "rwsem") == 0)
				locktype = LOCK_RWSEM;
			else if (strcmp(optarg, "tryrwsem") == 0)
				locktype = LOCK_TRYRWSEM;
			else if (isdigit(*optarg))
				locktype = atoi(optarg);
			else
				print_usage(cmd);
			break;
		case 'L':
			loadtype = atoi(optarg);
			break;
		case 'n':
			threads = strtoul(optarg, &end, 0);
			if (*end == '-') {
				maxthreads = strtoul(++end, NULL, 0);
				if (maxthreads < threads)
					print_usage(cmd);
			} else {
				maxthreads = threads;
			}
			break;
		case 'p':
			pausecnt = atoi(optarg);
			break;
		case 'r':
			rpercent = atoi(optarg);
			break;
		case 's':
			startcpu = atoi(optarg);
			break;
		case 't':
			if (strcmp(optarg, "pause") == 0)
				timetest = TIME_PAUSE;
			else if (strcmp(optarg, "rmb") == 0)
				timetest = TIME_RMB;
			else if (strcmp(optarg, "mb") == 0)
				timetest = TIME_MB;
			else
				print_usage(cmd);
			break;
		case 'T':
			latency = 1;
			break;
		case 'v':
			verbose = 1;
			break;
		case 'x':
			cpuinc = atoi(optarg);
			break;
		default:
			print_usage(cmd);
			break;
		}
	}

	/*
	 * Check if the debugfs locktest directory is present
	 */
	if ((stat(DEBUGFS_ROOT, &statbuf) < 0) || !S_ISDIR(statbuf.st_mode)) {
		fprintf(stderr, "Error: " DEBUGFS_ROOT
			" directory doesn't exist!\n"
			"Please load the locktest kernel module.\n");
		exit(1);
	}

	/*
	 * Check if root is running it
	 */
	if (geteuid() != 0) {
		fprintf(stderr,
			"Error: this program needs to be run by root!\n");
		exit(1);
	}

	/*
	 * Print test parameters
	 */
	printf("\nRunning locktest with %s [",
	       (locktype < LOCK_MAX) ? lockname[locktype] : "other");
	if (iterations >= 0)
		printf("iterations = %d", iterations);
	else
		printf("runtime = %ds", -iterations);

	if ((locktype == LOCK_RWLOCK) || (locktype == LOCK_RWSEM) ||
	    (locktype == LOCK_TRYRWSEM)) {
		if (rpercent >= 0)
			printf(", r%% = %d%%", rpercent);
		else
			printf(", %dr/%dw", -rpercent, threads + rpercent);
	}
	if (loadcnt >= 0)
		printf(", load = %d", loadcnt);
	else
		printf(", load = %dus", -loadcnt);
	printf("]\n");

	/*
	 * Increase scheduling priority to reduce run-to-run variation
	 */
	if (nice(-20) == -1) {
		perror("nice");
		exit(1);
	}

	/*
	 * Allocate thread data
	 */
	if ((tdata = (struct tdata *)calloc(sizeof(*tdata), maxthreads))
		   == NULL) {
		perror("calloc");
		exit(1);
	}

	if (timetest) {
		run_timetest();
		exit(0);
	}

	run_locktest();
	while (++threads <= maxthreads) {
		usleep(10);
		run_locktest();
	}
	/*
	 * Dump content of the status file if in verbose mode.
	 */
	if (verbose) {
		char buf[1024];
		int fd, len;

		if ((fd = open(DEBUGFS_STATUS, O_RDONLY)) < 0)
			exit(0);
			
		while ((len = read(fd, buf, sizeof(buf))) > 0)
			write(1, buf, len);
	}
	return 0;
}
