/*
 * pp.c: ping-pong threads benchmark
 *
 * Originally published in 
 * Multithreading in the Solaris Operating Environment
 * A Technical White Paper
 * Sun Microsystems 2002
 * 
 * Minor changes to work with Linux 
 * by Ian Wienand <ianw@gelato.unsw.edu.au>
 */

#include <pthread.h>
#include <stdlib.h>
#include <strings.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include <sys/mman.h>

/* a ping-pong player */
typedef struct {
	int table;
	int player;
	int count;
	pthread_mutex_t blocks[2];
	pthread_t thread;
} player_t;

/* a ping-pong table on which a rally is played */
typedef struct {
	int target;
	int sleepms;
	player_t players[2];
	char pad[40];		/* avoids false cache sharing */
} table_t;

/* a barrier used to pthread_processronise and time players */
typedef struct {
	pthread_mutex_t mx;
	pthread_cond_t cv;
	int target;
	int count;
} barrier_t;

/* global arena of ping-pong tables */
static table_t *tables;
/* global lock used to create a bottleneck */
static pthread_mutex_t bottleneck;
/* player pthread_processonisation */
static barrier_t setup_barrier;	/* all players ready */
static barrier_t begin_barrier;	/* all games begin */
static barrier_t end_barrier;	/* all games ended */
/* global pthread attributes - must call attr_init() before using! */
static pthread_mutexattr_t mxattr;
static pthread_condattr_t cvattr;
static pthread_attr_t ptattr;
/* forward references */
static void *player(void *arg);
static void setup_tables(int n, int target, int sleepms);
static void attr_init(int pthread_process, int sched, long stacksize);
static void barrier_init(barrier_t * b, int target);
static void barrier_wait(barrier_t * b);

/* for getopt(3C) */
extern char *optarg;
extern int ptind;

/* verbose output flag */
static int verbose = 0;

int main(int argc, char *argv[])
{
	int c;
	struct timeval t0, t1, tdiff;
	int ntables = 1;
	int target = 1000000;
	int sleepms = 0;
	int pthread_scope = PTHREAD_SCOPE_PROCESS;
	int pthread_process = PTHREAD_PROCESS_PRIVATE;
	long stacksize = 0L;
	int errflg = 0;
	while ((c = getopt(argc, argv, "?vi:n:z:p:s:c:S:")) != EOF)
		switch (c) {
		case '?':
			errflg++;
			continue;
		case 'v':
			verbose++;
			continue;
		case 'i':
			target = atoi(optarg);
			continue;
		case 'n':
			ntables = atoi(optarg);
			continue;
		case 'z':
			sleepms = atoi(optarg);
			continue;
		case 'p':
			if (strcmp(optarg, "shared") == 0) {
				pthread_process = PTHREAD_PROCESS_SHARED;
			} else if (strcmp(optarg, "private") == 0) {
				pthread_scope = PTHREAD_PROCESS_PRIVATE;
			} else {
				errflg++;
			}
			continue;
		case 's':
			if (strcmp(optarg, "system") == 0) {
				pthread_scope = PTHREAD_SCOPE_SYSTEM;
			} else if (strcmp(optarg, "process") == 0) {
				pthread_scope = PTHREAD_SCOPE_PROCESS;
			} else {
				errflg++;
			}
			continue;
		case 'S':
			stacksize = atol(optarg);
			continue;
		default:
			errflg++;
		}
	if (errflg > 0) {
		(void) printf("usage: pp [-v] [-i <target>] [-n <ntables>]"
			      " [-z <sleepms>]\n"
			      "[-p private|shared] [-s process|system]\n"
			      "[-S <stacksize>]\n");
		exit(1);
	}
	if (verbose > 0) {
		(void) printf("\nPING-PONG CONFIGURATION:\n\n"
			      "target (-i) = %d\n"
			      "ntables (-n) = %d\n"
			      "sleepms (-z) = %d\n"
			      "pthread_scope (-s) = %s\n"
			      "pthread_process (-p) = %s\n"
			      "stacksize (-S) = %ld\n\n",
			      target, ntables, sleepms,
			      (pthread_scope ==
			       PTHREAD_SCOPE_PROCESS) ? "process" :
			      "system",
			      (pthread_process ==
			       PTHREAD_PROCESS_PRIVATE) ? "private" :
			      "shared", stacksize);
	}

	/* best to do this first! */
	attr_init(pthread_process, pthread_scope, stacksize);
	/* initialise bottleneck */
	(void) pthread_mutex_init(&bottleneck, &mxattr);
	/* initialise pthread_processronisation and timing points */
	barrier_init(&setup_barrier, (2 * ntables) + 1);
	barrier_init(&begin_barrier, (2 * ntables) + 1);
	barrier_init(&end_barrier, (2 * ntables) + 1);
	/* initialise all games */
	gettimeofday(&t0, NULL);
	setup_tables(ntables, target, sleepms);
	/* wait for all players to be ready */
	barrier_wait(&setup_barrier);
	if (verbose) {
		gettimeofday(&t1, NULL);
		timersub(&t1, &t0, &tdiff);
		(void) printf("%d threads initialised in %.5gs\n",
			      ntables * 2,
			      tdiff.tv_sec + tdiff.tv_usec * 1e-6);
	}
	/* start all games */
	gettimeofday(&t0, NULL);
	barrier_wait(&begin_barrier);
	/* wait for all games to complete */
	barrier_wait(&end_barrier);
	if (verbose) {
		gettimeofday(&t1, NULL);
		timersub(&t1, &t0, &tdiff);
		(void) printf("%d games completed in %.5gs\n", ntables,
			      tdiff.tv_sec + tdiff.tv_usec * 1e-6);

	}
	return (0);
}

/*
 * build and populate the tables
 */

static void setup_tables(int n, int target, int sleepms)
{
	int i, j;
	int res;
	tables =
	    (void *) mmap(NULL, n * sizeof(table_t),
			  PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
			  -1, 0L);
	if (tables == (table_t *) (-1)) {
		exit(1);
	}
	for (i = 0; i < n; i++) {
		tables[i].target = target;
		tables[i].sleepms = sleepms;
		for (j = 0; j < 2; j++) {
			tables[i].players[j].table = i;
			tables[i].players[j].player = j;
			tables[i].players[j].count = 0;
			(void)
			    pthread_mutex_init(&
					       (tables[i].players[j].
						blocks[0]), &mxattr);
			(void)
			    pthread_mutex_init(&
					       (tables[i].players[j].
						blocks[1]), &mxattr);
			res =
			    pthread_create(&(tables[i].players[j].thread),
					   &ptattr, player,
					   &(tables[i].players[j]));
			if (res != 0) {
				perror("pthread_create");
				exit(1);
			}
		}
	}
}

/*
 * a ping-pong player
 */
static void *player(void *arg)
{
	player_t *us, *them;
	table_t *table;
	struct timespec ts;
	us = (player_t *) arg;
	table = &(tables[us->table]);
	them = &(table->players[(us->player + 1) % 2]);
	barrier_wait(&setup_barrier);
	/* player 0 always serves */
	if (us->player == 0) {
		(void) pthread_mutex_lock(&(them->blocks[0]));
		(void) pthread_mutex_lock(&(them->blocks[1]));
		barrier_wait(&begin_barrier);
		/* serve! */
		(void) pthread_mutex_unlock(&(them->blocks[0]));
	} else {
		(void) pthread_mutex_lock(&(them->blocks[0]));
		barrier_wait(&begin_barrier);
	}

	while (us->count < table->target) {
		/* wait to be unblocked */
		(void) pthread_mutex_lock(&(us->blocks[us->count % 2]));
		/* block their next + 1 move */
		(void)
		    pthread_mutex_lock(&
				       (them->
					blocks[(us->count +
						us->player) % 2]));
		/* let them block us again */
		(void) pthread_mutex_unlock(&(us->blocks[us->count % 2]));
		/* unblock their next move */
		(void)
		    pthread_mutex_unlock(&
					 (them->
					  blocks[(us->count + us->player +
						  1) % 2]));
		us->count++;
		if (table->sleepms == -1) {
			(void) pthread_mutex_lock(&bottleneck);
			(void) pthread_mutex_unlock(&bottleneck);
		} else if (table->sleepms > 0) {
			ts.tv_sec = table->sleepms / 1000;
			ts.tv_nsec = (table->sleepms % 1000) * 1000000;
			(void) nanosleep(&ts, NULL);
		}
	}

	barrier_wait(&end_barrier);
	return (NULL);
}

   /*
    * simple, non-spinning barrier wait mechinism
    */

static void barrier_wait(barrier_t * b)
{
	(void) pthread_mutex_lock(&b->mx);
	b->count++;
	if (b->count >= b->target) {
		(void) pthread_mutex_unlock(&b->mx);
		(void) pthread_cond_broadcast(&b->cv);
		return;
	}
	while (b->count < b->target) {
		(void) pthread_cond_wait(&b->cv, &b->mx);
	}
	(void) pthread_mutex_unlock(&b->mx);
}

   /*
    * initialise a barrier (ok to reinitialise object if no   waiters)
    */

static void barrier_init(barrier_t * b, int target)
{
	(void) pthread_mutex_init(&b->mx, &mxattr);
	(void) pthread_cond_init(&b->cv, &cvattr);
	b->target = target;
	b->count = 0;
}

   /*
    * initialise the global pthread attributes
    */

static void
attr_init(int pthread_process, int pthread_scope, long stacksize)
{
	(void) pthread_mutexattr_init(&mxattr);
	(void) pthread_mutexattr_setpshared(&mxattr, pthread_process);
	(void) pthread_condattr_init(&cvattr);
	(void) pthread_condattr_setpshared(&cvattr, pthread_process);
	(void) pthread_attr_init(&ptattr);
	(void) pthread_attr_setscope(&ptattr, pthread_scope);
	if (stacksize > 0)
		(void) pthread_attr_setstacksize(&ptattr, stacksize);
}

