/*
 * Copyright (c) 2010 Philip Frey, Systems Group ETH Zurich.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

/*
 * Connection management.
 */

#include <stdlib.h>
#include <string.h>
#include <arpa/inet.h>
#include <netdb.h>

#include <dlfcn.h>

#include "iwarp.h"
#include "iwarp_debug.h"

#define listen_ctx_to_port(ctx)												   \
ctx->cm_id->route.addr.src_addr.sa_family == PF_INET6 ?						   \
((struct sockaddr_in6 *)&ctx_listen->cm_id->route.addr.src_addr)->sin6_port:   \
((struct sockaddr_in *)&ctx_listen->cm_id->route.addr.src_addr)->sin_port	   \

#define listen_ctx_to_addr(ctx, addr_str)									   \
inet_ntop(((struct sockaddr_in*)&ctx->cm_id->route.addr.src_addr)->sin_family, \
&((struct sockaddr_in*)&ctx->cm_id->route.addr.src_addr)->sin_addr, addr_str,  \
				INET6_ADDRSTRLEN)											   \

int stop_cm_thread = 0;

////////////////////////////////////////////////////////////////////////////////
// EXTERNAL METHODS
////////////////////////////////////////////////////////////////////////////////

extern void bufadv_free(
		IN OUT 	struct iw_ctx_conn *ctx_conn);
extern void create_aeq_thread(
		IN		struct ibv_context *ibv_ctx);
extern void destroy_aeq_thread(
		IN		struct ibv_context *ibv_ctx);


////////////////////////////////////////////////////////////////////////////////
// PRIVATE HELPER METHODS
////////////////////////////////////////////////////////////////////////////////

/*
 * Copy the private data into a new callocated buffer.
 *
 * If there are no private data, this function does nothing.
 *
 * Return the number of bytes copied on success; -1 otherwise.
 */
static int private_data_copy(
		IN 		const struct rdma_cm_event	*event,
		IN OUT	struct iw_ctx_conn			*ctx_conn)
{
	int priv_data_len;

	/* make sure the iwarp context exists */
	if (ctx_null(ctx_conn)) {
		return -1;
	}

	/* check if there are private data */
	priv_data_len = event->param.conn.private_data_len;
	if (priv_data_len == 0) {
		dprint(DBG_CM, LOG_INFO, "no inbound private data");
		return 0;
	}

	/* copy the private data from the event into a new application buffer */
	ctx_conn->priv_data_in = calloc(1, priv_data_len);
	if (!ctx_conn->priv_data_in) {
		dprint(DBG_ON, LOG_ERROR, "out of memory: inbound priv data too big");
		return -1;
	}
	memcpy(ctx_conn->priv_data_in, event->param.conn.private_data,
			priv_data_len);
	ctx_conn->priv_data_in_len = priv_data_len;

	dprint(DBG_CM, LOG_INFO, "inbound private data copied (%d bytes)",
			priv_data_len);

	return priv_data_len;

}


/*
 * Set the connection parameters including the private data (if any)
 */
static void set_connection_parameters(
		IN OUT	struct rdma_conn_param	*conn_param,
		IN		const void				*priv_data_out,
		IN		uint8_t					 priv_data_out_len)
{
	if (!conn_param) {
		dprint(DBG_ON, LOG_ERROR, "conn_param pointer is NULL");
		return;
	}

	memset(conn_param, 0, sizeof(*conn_param));

	/* copy private data (if any) */
	if(priv_data_out && priv_data_out_len > 0) {
		conn_param->private_data = priv_data_out;
		conn_param->private_data_len = priv_data_out_len;
		dprint(DBG_CM, LOG_INFO, "outbound private data set (%d bytes)",
				priv_data_out_len);
	} else {
		dprint(DBG_CM, LOG_INFO, "no outbound private data");
	}

	/* set default connection parameters */
	//TODO: this should be modifiable!
	conn_param->responder_resources	= RESPONDER_RESOURCES;
	conn_param->initiator_depth		= INITIATOR_DEPTH;
	conn_param->retry_count			= RETRY_COUNT;

	return;

}


////////////////////////////////////////////////////////////////////////////////
// ATOMIC VARIABLE (FOR REFERENCE COUNTING)
////////////////////////////////////////////////////////////////////////////////

/*
 * Read atomic variable.
 */
#define atomic_read(v) ((v)->counter)

/*
 * Set atomic variable.
 */
#define atomic_set(v,i) (((v)->counter) = (i))

/*
 * Decrement atomic variable and test it against 0.
 *
 * Return 1 if the variable is zero after decreasing; 0 otherwise.
 */
static inline int atomic_sub_and_test(
		IN OUT atomic_t *v)
{
        return !(__sync_sub_and_fetch(&v->counter, 1));
}

/*
 * Increment atomic variable.
 */
static inline void atomic_inc(
		IN OUT atomic_t *v)
{
       (void)__sync_fetch_and_add(&v->counter, 1);
}


////////////////////////////////////////////////////////////////////////////////
// LINKED LIST (FOR CONNECTION REQUESTS)
////////////////////////////////////////////////////////////////////////////////

/*
 * Append a new connection request at the tail of the singly linked list of the
 * listen context.
 *
 * @ctx_listen:	the listen context on which the event was received
 * @event:		the event containing the connection request
 *
 * Return 0 on success; -1 otherwise.
 */
static int put_request(
		IN OUT	struct iw_ctx_listen *ctx_listen,
		IN		const struct rdma_cm_event *cm_event)
{
	struct iw_conn_req	*new_req;

	/* sanitize input */
	if (!ctx_listen) {
		dprint(DBG_ON, LOG_ERROR, "listen context is NULL");
		return -1;
	}
	if (!cm_event) {
		dprint(DBG_ON, LOG_ERROR, "cm event is NULL");
		return -1;
	}
	if (cm_event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
		dprint(DBG_ON, LOG_ERROR, "wrong cm event type");
		return -1;
	}

	/* allocate request container */
	new_req = calloc(1, sizeof(struct iw_conn_req));
	if (!new_req) {
		dprint(DBG_ON, LOG_ERROR, "out of memory");
		return -1;
	}

	/* save new cm id */
	new_req->cm_id = cm_event->id;

	/* save inbound private data */
	new_req->priv_data_len = cm_event->param.conn.private_data_len;
	if ( new_req->priv_data_len > 0) {
		new_req->priv_data = calloc(1, new_req->priv_data_len);
		if (!new_req->priv_data) {
			dprint(DBG_ON, LOG_ERROR, "out of memory");
			free(new_req);
			return -1;
		}
		memcpy(new_req->priv_data, cm_event->param.conn.private_data,
				new_req->priv_data_len);
	}

	/* append request to the list & post semaphore */
	pthread_mutex_lock(&ctx_listen->list_mutex);
	if(!ctx_listen->head) {
		/* list is still empty */
		ctx_listen->head = new_req;
		ctx_listen->tail = new_req;
	} else {
		/* list contains at least 1 element */
		ctx_listen->tail->next = new_req;
		ctx_listen->tail = new_req;
	}
	sem_post(&ctx_listen->sem);
	pthread_mutex_unlock(&ctx_listen->list_mutex);

	return 0;

}

/*
 * Remove a pending connection request from the head of the singly linked list
 * of the listen context.
 *
 * The data from the connection request will be assigned to the connection
 * context given.
 *
 * @ctx_listen:	listen context to get next connection request from
 * @ctx_conn:	uninitialized connection context
 *
 * Return 0 on success; -1 otherwise.
 */
static int get_request(
		IN OUT	struct iw_ctx_listen *ctx_listen,
		IN OUT	struct iw_ctx_conn *ctx_conn)
{
	struct iw_conn_req *conn_req;

	/* sanitize input */
	if (!ctx_listen) {
		dprint(DBG_ON, LOG_ERROR, "listen context is NULL");
		return -1;
	}
	if (!ctx_conn) {
		dprint(DBG_ON, LOG_ERROR, "connection context is NULL");
		return -1;
	}

	/* get head of the list */
	sem_wait_safe(&ctx_listen->sem);
	pthread_mutex_lock(&ctx_listen->list_mutex);
	conn_req = ctx_listen->head;
	ctx_listen->head = ctx_listen->head->next;
	if (!ctx_listen->head) {
		/* we just removed the last element */
		ctx_listen->tail = NULL;
	}
	pthread_mutex_unlock(&ctx_listen->list_mutex);

	/* set connection context members */
	ctx_conn->cm_id = conn_req->cm_id;
	ctx_conn->priv_data_in_len = conn_req->priv_data_len;
	if (conn_req->priv_data_len) {
		ctx_conn->priv_data_in = malloc(conn_req->priv_data_len);
		if (!ctx_conn->priv_data_in) {
			dprint(DBG_ON, LOG_ERROR, "out of memory");
			pthread_mutex_unlock(&ctx_listen->list_mutex);
			free(conn_req);
			return -1;
		}
		memcpy(ctx_conn->priv_data_in, conn_req->priv_data,
				conn_req->priv_data_len);
	}
	ctx_conn->cm_id->context = ctx_conn;
	ctx_conn->ctx_listen = ctx_listen;
	ctx_conn->state = IWARP_CONN_REQ;

	free(conn_req);

	return 0;
}


////////////////////////////////////////////////////////////////////////////////
// PRIVATE METHODS
////////////////////////////////////////////////////////////////////////////////

static void *cm_thread(void *arg);

/*
 * Init iWARP library:
 * - calloc global library structure (lib)
 * - open rdma library (OFED)
 * - create event channel
 * - start cm thread
 *
 * Return 0 on success or if the lib already exists; -1 otherwise.
 */
static int lib_init()
{
	int		 ret;
	void	*lib_handle;

	if (lib) {
		dprint(DBG_CM, LOG_WARNING, "context exists - doing nothing");
		return 0;
	}

	/* open shared rdma library */
	lib_handle = dlopen(RDMA_LIB_PATH, RTLD_LAZY);
	if (!lib_handle) {
		dprint(DBG_ON, LOG_ERROR, "failed to load rdma library from %s: %s",
				RDMA_LIB_PATH, dlerror());
		return -1;
	}

	/* allocate context */
	lib = calloc(1, sizeof(struct iw_lib));
	if (!lib) {
		dprint(DBG_ON, LOG_ERROR, "out of memory");
		dlclose(lib_handle);
		return -1;
	}
	lib->lib_rdma_handle = lib_handle;

	/* CM function pointers */
	if (!(lib->ops.s_rdma_create_event_channel = dlsym(lib_handle,
			"rdma_create_event_channel"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_destroy_event_channel = dlsym(lib_handle,
			"rdma_destroy_event_channel"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_get_cm_event = dlsym(lib_handle,
			"rdma_get_cm_event"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_ack_cm_event = dlsym(lib_handle,
			"rdma_ack_cm_event"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_create_id = dlsym(lib_handle,
			"rdma_create_id"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_destroy_id = dlsym(lib_handle,
			"rdma_destroy_id"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_create_qp = dlsym(lib_handle,
			"rdma_create_qp"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_destroy_qp = dlsym(lib_handle,
			"rdma_destroy_qp"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_resolve_addr = dlsym(lib_handle,
			"rdma_resolve_addr"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_resolve_route = dlsym(lib_handle,
			"rdma_resolve_route"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_bind_addr = dlsym(lib_handle,
			"rdma_bind_addr"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_connect = dlsym(lib_handle,
			"rdma_connect"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_listen = dlsym(lib_handle,
			"rdma_listen"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_accept = dlsym(lib_handle,
			"rdma_accept"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_reject = dlsym(lib_handle,
			"rdma_reject"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_rdma_disconnect = dlsym(lib_handle,
			"rdma_disconnect"))) {
		goto sym_err;
	}

	/* IBV */
	if (!(lib->ops.s_ibv_alloc_pd = dlsym(lib_handle,
			"ibv_alloc_pd"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_dealloc_pd = dlsym(lib_handle,
			"ibv_dealloc_pd"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_create_comp_channel = dlsym(lib_handle,
			"ibv_create_comp_channel"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_destroy_comp_channel = dlsym(lib_handle,
			"ibv_destroy_comp_channel"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_create_cq = dlsym(lib_handle,
			"ibv_create_cq"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_get_cq_event = dlsym(lib_handle,
			"ibv_get_cq_event"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_ack_cq_events = dlsym(lib_handle,
			"ibv_ack_cq_events"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_destroy_cq = dlsym(lib_handle,
			"ibv_destroy_cq"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_create_srq = dlsym(lib_handle,
			"ibv_create_srq"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_destroy_srq = dlsym(lib_handle,
			"ibv_destroy_srq"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_reg_mr = dlsym(lib_handle,
			"ibv_reg_mr"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_dereg_mr = dlsym(lib_handle,
			"ibv_dereg_mr"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_query_qp = dlsym(lib_handle,
			"ibv_query_qp"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_get_async_event = dlsym(lib_handle,
			"ibv_get_async_event"))) {
		goto sym_err;
	}
	if (!(lib->ops.s_ibv_ack_async_event = dlsym(lib_handle,
			"ibv_ack_async_event"))) {
		goto sym_err;
	}

	/* create rdma event channel */
	lib->event_channel = (*lib->ops.s_rdma_create_event_channel)();
	if (!lib->event_channel) {
		dprint(DBG_ON, LOG_ERROR, "failed to create rdma event channel (%m)");
		dlclose(lib_handle);
		free(lib);
		lib = NULL;
		return -1;
	}

	ret = pthread_create(&lib->cm_thread, NULL, cm_thread, NULL);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to start cm thread");
		dlclose(lib_handle);
		free(lib);
		lib = NULL;
		return -1;
	}

	/* init aeq context list */
	lib->aeq_ctx_list = NULL;
	pthread_mutex_init(&lib->aeq_ctx_list_mutex, NULL);

	dprint(DBG_CM, LOG_INFO, "shared context created");

	return 0;

sym_err:
	dprint(DBG_ON, LOG_ERROR, "dlsym() failed");
	dlclose(lib_handle);
	free(lib);
	lib = NULL;

	return -1;

}


/*
 * Resolve the remote address and the route to the remote host.
 *
 * This method is called by the active side before connecting in order to bind
 * the rdma device which will be used for the connection.
 *
 * The @ctx_conn must be in state IWARP_INVALID before this call and is set to
 * IWARP_ROUTE_RES on success.
 *
 * @remote_host		the remote address or hostname
 * @remote_port:	the remote port
 * @ctx_conn:		the iwarp connection context
 *
 * Only the active side should call this function.
 *
 * Return 0 on success; -1 otherwise.
 */
static int addr_route_resolve(
		IN		const char			*remote_host,
		IN		const char			*remote_port,
		IN OUT	struct iw_ctx_conn	*ctx_conn)
{
	int					 ret;
	struct sockaddr_in	 srv_addr;
	struct addrinfo		*res;
	struct addrinfo 	 hints = {
			.ai_family = AF_INET,
			.ai_socktype = SOCK_STREAM
	};

	if (!lib) {
		dprint(DBG_ON, LOG_ERROR, "shared context should exist at this point");
		return -1;
	}

	if (!ctx_state(IWARP_INVALID, ctx_conn)) {
		return -1;
	}

	if (ctx_conn->type != CTX_CONNECT) {
		dprint(DBG_ON, LOG_WARNING, "only connect context should not need to "
				"resolve address and route - did you use the wrong context?");
		return -1;
	}

	/* set and resolve responder (remote) address (timeout: 1s) */
	ret = getaddrinfo(remote_host, remote_port, &hints, &res);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to get address info for %s:%s (%s)",
				remote_host, remote_port, gai_strerror(ret));
		return -1;
	}
	memset(&srv_addr, 0, sizeof(srv_addr));
	srv_addr.sin_addr.s_addr =
		((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;
	srv_addr.sin_family = AF_INET;
	srv_addr.sin_port = strtol(remote_port, NULL, 0);
	ret = (lib->ops.s_rdma_resolve_addr)(ctx_conn->cm_id, NULL,
			(struct sockaddr*)&srv_addr, 1000);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to resolve responder address (%m)");
		return -1;
	}
	freeaddrinfo(res);

	/* wait for address resolved event */
	dprint(DBG_CM, LOG_INFO, "waiting for ADDR_RESOLVED event");
	sem_wait_safe(&ctx_conn->sem);
	if (!ctx_state(IWARP_ADDR_RES, ctx_conn)) {
		return -1;
	}

	/* resolve route to responder (timeout: 1s) */
	ret = (*lib->ops.s_rdma_resolve_route)(ctx_conn->cm_id, 1000);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to resolve route (%m)");
		return -1;
	}

	/* wait for route resolved event */
	dprint(DBG_CM, LOG_INFO, "waiting for ROUTE_RESOLVED event");
	sem_wait_safe(&ctx_conn->sem);
	if (!ctx_state(IWARP_ROUTE_RES, ctx_conn)) {
		return -1;
	}

	dprint(DBG_CM, LOG_INFO, "address and route resolved");

	return 0;

}


/*
 * Dispatch the connection management event.
 *
 * Each event has an indirect pointer to the corresponding iwarp context to
 * which the event belongs. Each iwarp context has a semaphore which is used
 * to wait for an event and wake up on its arrival.
 *
 * @event:	the event to dispatch
 *
 * Return 0 on success; -1 otherwise.
 */
static int cm_event_dispatcher(
		IN struct rdma_cm_event *event)
{
	int						 ret;
	struct iw_ctx_conn		*ctx_conn;
	struct iw_ctx_listen	*ctx_listen;

	if (event->status == 0) {
		dprint(DBG_CM, LOG_INFO, "%s", rdma_evt_str(event->event));
	} else {
		dprint(DBG_ON, LOG_ERROR, "%s", rdma_evt_str(event->event));
	}

	ctx_conn = (struct iw_ctx_conn*)event->id->context;
	if (ctx_null(ctx_conn)) {
		return -1;
	}

	switch (event->event) {
	case RDMA_CM_EVENT_ADDR_RESOLVED:
		ctx_conn->state = IWARP_ADDR_RES;
		sem_post(&ctx_conn->sem);
		break;
	case RDMA_CM_EVENT_ROUTE_RESOLVED:
		ctx_conn->state = IWARP_ROUTE_RES;
		sem_post(&ctx_conn->sem);
		break;

	/* need further handling */
	case RDMA_CM_EVENT_CONNECT_REQUEST:
		/* get the connection cm id */
		ctx_listen = (struct iw_ctx_listen*)event->listen_id->context;
		if (ctx_listen->connect) {
			/* callback function */
			(*ctx_listen->connect)(event);
		} else {
			/* enqueue the connection request at the listen context */
			if (put_request(ctx_listen, event)) {
				return -1;
			}
		}
		break;

	case RDMA_CM_EVENT_CONNECT_RESPONSE:
		//we never saw this...
		dprint(DBG_ON, LOG_WARNING, "CONNECT_RESPONSE event arrived");
		sem_post(&ctx_conn->sem);
		break;

	case RDMA_CM_EVENT_ESTABLISHED:
		ctx_conn->state = IWARP_CONNECTED;
		if (ctx_conn->type == CTX_CONNECT) {
			/* active side: copy private data from responder */
			ret = private_data_copy(event, ctx_conn);
			if (ret == -1) {
				ctx_conn->state = IWARP_ERROR;
				sem_post(&ctx_conn->sem);
				return -1;
			}
			sem_post(&ctx_conn->sem);
 		} else {
 			/* passive side */
 			if (ctx_conn->ctx_listen->connect) {
 				/* assume nobody is waiting on the semaphore since accept has
 				   not been called through this library */
 				if (ctx_conn->established) {
 					(*ctx_conn->established)(ctx_conn);
 				}
 			} else {
 				/* iw_accept() is waiting on the semaphore */
 				sem_post(&ctx_conn->sem);
 			}
 		}
		break;

	case RDMA_CM_EVENT_REJECTED:
		/* we get this only at the active side (initiator) */
		ctx_conn->state = IWARP_REJECTED;
		ret = private_data_copy(event, ctx_conn);
		if (ret) {
			ctx_conn->state = IWARP_ERROR;
			sem_post(&ctx_conn->sem);
			return -1;
		}
		sem_post(&ctx_conn->sem);
		break;

	case RDMA_CM_EVENT_DISCONNECTED:
		ctx_conn->state = IWARP_DISCONNECTED;
		if (ctx_conn->disconnect) {
			/* callback function */
			(*ctx_conn->disconnect)(ctx_conn);
		} else {
			/* notification through semaphore */
			sem_post(&ctx_conn->sem);
		}
		break;

	case RDMA_CM_EVENT_MULTICAST_JOIN:
		dprint(DBG_ON, LOG_ERROR, "multicast not implemented!");
		sem_post(&ctx_conn->sem);
		break;

	/* errors */
	case RDMA_CM_EVENT_ADDR_ERROR:
	case RDMA_CM_EVENT_ROUTE_ERROR:
	case RDMA_CM_EVENT_CONNECT_ERROR:
	case RDMA_CM_EVENT_UNREACHABLE:
	case RDMA_CM_EVENT_DEVICE_REMOVAL:
	case RDMA_CM_EVENT_MULTICAST_ERROR:
		ctx_conn->state = IWARP_ERROR;
		if (ctx_conn->error) {
			/* callback function */
			(*ctx_conn->error)(ctx_conn);
		} else {
			/* notification through semaphore */
			sem_post(&ctx_conn->sem);
		}
		break;

	default:
		dprint(DBG_ON, LOG_ERROR, "unknown cm event (this should not happen!)");
		sem_post(&ctx_conn->sem);

		return -1;

	}

	return 0;

}


/*
 * The connection management event handler.
 * It waits for events on the global event channel, forwards them to the
 * dispatcher and acknowledges them afterwards.
 *
 * The thread can be stopped by setting the global variable stop_cm_thread to 1.
 */
static void *cm_thread(void *arg)
{
	int						 ret;
	struct rdma_cm_event	*event, my_event;

	lib->cm_thread_running = 1;

	while (!stop_cm_thread) {
		ret = (*lib->ops.s_rdma_get_cm_event)(lib->event_channel, &event);
		if (stop_cm_thread) {
			(*lib->ops.s_rdma_ack_cm_event)(event);
			return NULL;
		}
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to get cm event (%m)");
			lib->cm_thread_running = 0;
			return NULL;
		}
		memcpy(&my_event, event, sizeof(struct rdma_cm_event));
		ret = (*lib->ops.s_rdma_ack_cm_event)(event);
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to ack cm event (%m)");
			lib->cm_thread_running = 0;
			return NULL;
		}
		cm_event_dispatcher(&my_event);
	}

	return NULL;

}

////////////////////////////////////////////////////////////////////////////////
// INTERFACE METHODS
////////////////////////////////////////////////////////////////////////////////

int iw_ctx_alloc(
		IN		const char*			 remote_host,
		IN		const char*			 remote_port,
		IN OUT	struct ibv_qp_cap	*qp_cap,
		IN OUT	struct iw_pd		*shared_pd,
		IN		int					 create_srq,
		IN		struct iw_srq		*existing_srq,
		IN OUT	struct iw_ctx_conn 	*ctx_conn)
{
	int							ret;
	struct ibv_qp_init_attr		qp_init_attr;
	struct ibv_srq_init_attr	srq_init_attr;

	/* create global context if necessary */
	if (!lib) {
		ret = lib_init();
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to create global context");
			return -1;
		}
	}

	if (ctx_null(ctx_conn)) {
		return -1;
	}

	/* check that the context is in the correct state */
	if (ctx_conn->type == CTX_ACCEPT && !ctx_state(IWARP_CONN_REQ, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "accept-context must be in state "
				"IWARP_CONN_REQ befor allocation");
		return -1;
	} else if (ctx_conn->type == CTX_CONNECT &&
			!ctx_state(IWARP_INVALID, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "connect-context must be in state "
				"IWARP_INVALID before allocation");
		return -1;
	}
	if (ctx_conn->state == IWARP_VALID) {
		dprint(DBG_CM, LOG_WARNING, "connection context is already valid");
		return 0;
	}

	/* init connection event semaphore */
	ret = sem_init(&ctx_conn->sem, 0, 0);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "context semaphore init failed (%m)");
		return -1;
	}

	/* do the binding if necessary */
	if (ctx_conn->type == CTX_ACCEPT) {

		if (!ctx_conn->cm_id) {
			dprint(DBG_ON, LOG_ERROR, "accept-context: cm id is NULL");
			return -1;
		}
		/* cm id is already bound to device */

	} else if (ctx_conn->type == CTX_CONNECT) {

		/* 0-init active side context (passive side does this at iw_listen()) */
		memset(ctx_conn, 0, sizeof(struct iw_ctx_conn));

		/* create local cm id (passiv side does this at iw_listen()) */
		ret = (*lib->ops.s_rdma_create_id)(lib->event_channel,
				&ctx_conn->cm_id, ctx_conn, RDMA_PS_TCP);
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to create local cm id (%m)");
			goto destroy_sem;
		}

		/* cm_id has to be bound to device */
		ret = addr_route_resolve(remote_host, remote_port, ctx_conn);
		if (ret) {
			goto destroy_id;
		}

	} else {
		dprint(DBG_ON, LOG_ERROR, "invalid context type: %d", ctx_conn->type);
		return -1;
	}

	/* setup pd */
	if (!shared_pd) {
		/* create new pd */
		ctx_conn->iw_pd = malloc(sizeof(struct iw_pd));
		if (!ctx_conn->iw_pd) {
			dprint(DBG_ON, LOG_ERROR, "out of memory");
			goto destroy_id;
		}
		ctx_conn->iw_pd->pd =
			(*lib->ops.s_ibv_alloc_pd)(ctx_conn->cm_id->verbs);
		if (!ctx_conn->iw_pd->pd) {
			dprint(DBG_ON, LOG_ERROR, "failed to alloc pd (%m)");
			goto free_pd;
		}
		atomic_set(&ctx_conn->iw_pd->ref, 1);
	} else {
		/* use existing, shared pd */
		ctx_conn->iw_pd = shared_pd;
		atomic_inc(&ctx_conn->iw_pd->ref);
	}

	/* setup send completion event channel */
	ctx_conn->send_comp_channel = (*lib->ops.s_ibv_create_comp_channel)
									(ctx_conn->cm_id->verbs);
	if (!ctx_conn->send_comp_channel) {
		dprint(DBG_ON, LOG_ERROR, "failed to create send completion channel "
				"(%m)");
		goto dealloc_pd;
	}

	/* setup receive completion event channel */
	ctx_conn->recv_comp_channel = (*lib->ops.s_ibv_create_comp_channel)
									(ctx_conn->cm_id->verbs);
	if (!ctx_conn->recv_comp_channel) {
		dprint(DBG_ON, LOG_ERROR, "failed to create recv completion channel "
				"(%m)");
		goto destroy_send_comp_channel;
	}

	/* check/set the qp capacities */
	if (!qp_cap) {
		qp_init_attr.cap.max_inline_data = MAX_INLINE_DATA;
		qp_init_attr.cap.max_send_wr = MAX_SEND_WR;
		qp_init_attr.cap.max_recv_wr = MAX_RECV_WR;
		qp_init_attr.cap.max_send_sge = MAX_SEND_SGE;
		qp_init_attr.cap.max_recv_sge = MAX_RECV_SGE;
	} else {
		qp_cap->max_inline_data =
			min(qp_cap->max_inline_data, MAX_INLINE_DATA);
		qp_cap->max_send_wr =
			min(qp_cap->max_send_wr, MAX_SEND_WR);
		qp_cap->max_recv_wr =
			min(qp_cap->max_recv_wr, MAX_RECV_WR);
		qp_cap->max_send_sge =
			min(qp_cap->max_send_sge, MAX_SEND_SGE);
		qp_cap->max_recv_sge =
			min(qp_cap->max_recv_sge, MAX_RECV_SGE);
		qp_init_attr.cap.max_inline_data = qp_cap->max_inline_data;
		qp_init_attr.cap.max_send_wr = qp_cap->max_send_wr;
		qp_init_attr.cap.max_recv_wr = qp_cap->max_recv_wr;
		qp_init_attr.cap.max_send_sge = qp_cap->max_send_sge;
		qp_init_attr.cap.max_recv_sge = qp_cap->max_recv_sge;
	}

	/* setup send cq */
	ctx_conn->send_cq = (*lib->ops.s_ibv_create_cq)(ctx_conn->cm_id->verbs,
			qp_init_attr.cap.max_send_wr, ctx_conn, ctx_conn->send_comp_channel,
			0);
	if (!ctx_conn->send_cq) {
		dprint(DBG_ON, LOG_ERROR, "failed to create send cq (%m)");
		goto destroy_recv_comp_channel;
	}
	/* request event notification for the next event on send cq */
	ret = ibv_req_notify_cq(ctx_conn->send_cq, 0);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to request notifications on send"
				"cq (%m)");
		goto destroy_send_cq;
	}

	/* setup receive cq */
	ctx_conn->receive_cq = (*lib->ops.s_ibv_create_cq)(ctx_conn->cm_id->verbs,
			qp_init_attr.cap.max_recv_wr, ctx_conn, ctx_conn->recv_comp_channel,
			0);
	if (!ctx_conn->receive_cq) {
		dprint(DBG_ON, LOG_ERROR, "failed to create receive cq (%m)");
		goto destroy_send_cq;
	}
	/* request event notification for the next event on receive cq */
	ret = ibv_req_notify_cq(ctx_conn->receive_cq, 0);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to request notifications on receive"
				"cq (%m)");
		goto destroy_recv_cq;
	}

	/* setup srq */
	if (create_srq) {
		/* create new srq */
		if (existing_srq) {
			dprint(DBG_ON, LOG_WARNING, "requested to create a new srq although"
					" a pointer to an existing one is given - using new srq");
		}
		ctx_conn->iw_srq = malloc(sizeof(struct iw_srq));
		if (!ctx_conn->iw_srq) {
			dprint(DBG_ON, LOG_ERROR, "out of memory");
			goto destroy_recv_cq;
		}
		//TODO: the attributes here are arbitrary and should be an argument
		srq_init_attr.attr.max_wr = qp_init_attr.cap.max_recv_wr;
		srq_init_attr.attr.max_sge = qp_init_attr.cap.max_recv_sge;
		srq_init_attr.attr.srq_limit = srq_init_attr.attr.max_wr/10; // 10%
		srq_init_attr.srq_context = ctx_conn;
		ctx_conn->iw_srq->srq =
			(*lib->ops.s_ibv_create_srq)(ctx_conn->iw_pd->pd, &srq_init_attr);
		if (!ctx_conn->iw_srq->srq) {
			dprint(DBG_ON, LOG_ERROR, "failed to create srq");
			goto free_srq;
		}
		atomic_set(&ctx_conn->iw_srq->ref, 1);
	} else {
		if (existing_srq) {
			/* use existing srq */
			ctx_conn->iw_srq = existing_srq;
			atomic_inc(&ctx_conn->iw_srq->ref);
		}
	}

	/* setup qp */
	qp_init_attr.qp_context = ctx_conn;
	qp_init_attr.qp_type = IBV_QPT_RC;
	qp_init_attr.recv_cq = ctx_conn->receive_cq;
	qp_init_attr.send_cq = ctx_conn->send_cq;
	qp_init_attr.sq_sig_all = 0;
	qp_init_attr.srq = ctx_conn->iw_srq ? ctx_conn->iw_srq->srq : NULL;
	qp_init_attr.xrc_domain = 0;
	ret = (*lib->ops.s_rdma_create_qp)(ctx_conn->cm_id, ctx_conn->iw_pd->pd,
			&qp_init_attr);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to create qp (%m)");
		goto destroy_srq;
	}

	create_aeq_thread(ctx_conn->cm_id->verbs);

	ctx_conn->qp = ctx_conn->cm_id->qp;

	ctx_conn->bufadv_valid = 0;

	ctx_conn->state = IWARP_VALID;

	dprint(DBG_CM, LOG_INFO, "iwarp connection context created");

	return 0;

	/* Undo everything in case of an error */
destroy_srq:
	if (create_srq) {
		(*lib->ops.s_ibv_destroy_srq)(ctx_conn->iw_srq->srq);
	} else {
		atomic_sub_and_test(&ctx_conn->iw_pd->ref);
	}
free_srq:
	if (create_srq) {
		free(ctx_conn->iw_srq);
	}
destroy_recv_cq:
	(*lib->ops.s_ibv_destroy_cq)(ctx_conn->receive_cq);
	ctx_conn->receive_cq = NULL;
destroy_send_cq:
	(*lib->ops.s_ibv_destroy_cq)(ctx_conn->send_cq);
	ctx_conn->send_cq = NULL;
destroy_recv_comp_channel:
	(*lib->ops.s_ibv_destroy_comp_channel)(ctx_conn->recv_comp_channel);
	ctx_conn->recv_comp_channel = NULL;
destroy_send_comp_channel:
	(*lib->ops.s_ibv_destroy_comp_channel)(ctx_conn->send_comp_channel);
	ctx_conn->send_comp_channel = NULL;
dealloc_pd:
	if (!shared_pd) {
		(*lib->ops.s_ibv_dealloc_pd)(ctx_conn->iw_pd->pd);
	} else {
		atomic_sub_and_test(&ctx_conn->iw_pd->ref);
	}
free_pd:
	if (!shared_pd) {
		free(ctx_conn->iw_pd);
	}
destroy_id:
	if (ctx_conn->type == CTX_CONNECT) {
		(*lib->ops.s_rdma_destroy_id)(ctx_conn->cm_id);
	}
destroy_sem:
	if (ctx_conn->type == CTX_CONNECT) {
		sem_destroy(&ctx_conn->sem);
	}
	return -1;

}


int iw_ctx_free(
		IN OUT 	struct iw_ctx_conn	*ctx_conn)
{
	int ret, failure = 0;

	if (ctx_null(ctx_conn)) {
		return 0;
	}

	if (!ctx_conn->cm_id) {
		if (ctx_conn->type == CTX_ACCEPT) {
			dprint(DBG_ON, LOG_ERROR, "accept-context: remote id is NULL");
		} else {
			dprint(DBG_ON, LOG_ERROR, "connect-context: local id is NULL");
		}
		return -1;
	}

	/* qp */
	if (ctx_conn->qp) {
		(*lib->ops.s_rdma_destroy_qp)(ctx_conn->cm_id);
		ctx_conn->qp = NULL;
	}

	/* srq */
	if (ctx_conn->iw_srq) {
		if (atomic_sub_and_test(&ctx_conn->iw_srq->ref)) {
			ret = (*lib->ops.s_ibv_destroy_srq)(ctx_conn->iw_srq->srq);
			if (ret) {
				dprint(DBG_ON, LOG_ERROR, "failed to deallocate srq (%m)");
				failure = 1;
			}
			free(ctx_conn->iw_srq);
		}
		ctx_conn->iw_srq = NULL;
	}

	/* receive cq */
	if (ctx_conn->receive_cq) {
		if (ctx_conn->rcq_acks) {
			(*lib->ops.s_ibv_ack_cq_events)(ctx_conn->receive_cq,
					ctx_conn->rcq_acks);
			ctx_conn->rcq_acks = 0;
		}
		ret = (*lib->ops.s_ibv_destroy_cq)(ctx_conn->receive_cq);
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to destroy receive cq (%m)");
			failure = 1;
		}
		ctx_conn->receive_cq = NULL;
	}

	/* send cq */
	if (ctx_conn->send_cq) {
		if (ctx_conn->scq_acks) {
			(*lib->ops.s_ibv_ack_cq_events)(ctx_conn->send_cq,
					ctx_conn->scq_acks);
			ctx_conn->scq_acks = 0;
		}
		ret = (*lib->ops.s_ibv_destroy_cq)(ctx_conn->send_cq);
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to destroy send cq (%m)");
			failure = 1;
		}
		ctx_conn->send_cq = NULL;
	}

	/* receive completion channel */
	if (ctx_conn->recv_comp_channel) {
		ret = (*lib->ops.s_ibv_destroy_comp_channel)(
				ctx_conn->recv_comp_channel);
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to destroy recv completion"
					" channel (%m)");
			failure = 1;
		}
		ctx_conn->recv_comp_channel = NULL;
	}

	/* send completion channel */
	if (ctx_conn->send_comp_channel) {
		ret = (*lib->ops.s_ibv_destroy_comp_channel)(
				ctx_conn->send_comp_channel);
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to destroy send completion"
					" channel (%m)");
			failure = 1;
		}
		ctx_conn->send_comp_channel = NULL;
	}

	/* cleanup mem exchange infrastructure if it exists */
	if (ctx_conn->bufadv_valid) {
		bufadv_free(ctx_conn);
	}

	/* pd */
	if (ctx_conn->iw_pd) {
		if (atomic_sub_and_test(&ctx_conn->iw_pd->ref)) {
			ret = (*lib->ops.s_ibv_dealloc_pd)(ctx_conn->iw_pd->pd);
			if (ret) {
				dprint(DBG_ON, LOG_ERROR, "failed to deallocate pd (%m)");
				failure = 1;
			}
			free(ctx_conn->iw_pd);
		}
	}
	ctx_conn->iw_pd = NULL;

	/* private data */
	if (ctx_conn->priv_data_in && ctx_conn->priv_data_in_len > 0) {
		free(ctx_conn->priv_data_in);
		ctx_conn->priv_data_in = NULL;
		ctx_conn->priv_data_in_len = -1;
	}

	/* cm id */
	ret = (*lib->ops.s_rdma_destroy_id)(ctx_conn->cm_id);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to destroy connection id (%m)");
	}

	/* semaphore */
	ret = sem_destroy(&ctx_conn->sem);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to destroy context semaphore (%m)");
	}

	destroy_aeq_thread(ctx_conn->cm_id->verbs);

	/* invalidate context */
	ctx_conn->state = IWARP_INVALID;

	dprint(DBG_CM, LOG_INFO, "iwarp connection context destroyed");

	return -failure;

}


int iw_connect(
		IN		const void			*priv_data,
		IN		uint8_t				 priv_data_len,
		IN OUT	struct iw_ctx_conn	*ctx_conn)
{
	int						 ret;
	struct rdma_conn_param	 conn_param;
	char					*priv_data_in;
	char					 loc_ip[INET6_ADDRSTRLEN], rem_ip[INET6_ADDRSTRLEN];

	if (ctx_null(ctx_conn)) {
		return -1;
	}

	/* the iwarp connection context must be in IWARP_VALID state now */
	if (!ctx_state(IWARP_VALID, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "did you forget to allocate the context?");
		return -1;
	}

	if (ctx_conn->type == CTX_ACCEPT) {
		dprint(DBG_ON, LOG_ERROR, "cannot connect using an accept-context; did"
				" you set the type-property of the context you passed?");
		return -1;
	}

	/* make sure the global context exists */
	if (!lib) {
		dprint(DBG_ON, LOG_ERROR, "global context should exist here");
		return -1;
	}

	/* CONNECT TO THE SERVER **************************************************/

	/* set connection parameters */
	set_connection_parameters(&conn_param, priv_data, priv_data_len);

	/* connection request */
	ret = (*lib->ops.s_rdma_connect)(ctx_conn->cm_id, &conn_param);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to connect to remote host %s:%d"
				" (%d:%s)",	iw_ctx_get_remote_ip(ctx_conn),
				iw_ctx_get_remote_port(ctx_conn), ret, strerror(-ret));
		goto error;
	}

	/* await connection response */
	dprint(DBG_CM, LOG_INFO, "waiting for connection response");
	sem_wait_safe(&ctx_conn->sem);
	if (!ctx_state(IWARP_CONNECTED, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "iwarp connection failed");
		/* response might be a reject with private data (assume string) */
		if (ctx_conn->state == IWARP_REJECTED) {
			dprint(DBG_ON, LOG_ERROR, "connection rejected");
			if (ctx_conn->priv_data_in && ctx_conn->priv_data_in_len) {
				/* print private inbound data (if any) */
				dprint(DBG_ON, LOG_ERROR, "inbound private data (%d bytes):",
						ctx_conn->priv_data_in_len);
				priv_data_in = calloc(1, ctx_conn->priv_data_in_len + 1);
				snprintf(priv_data_in, ctx_conn->priv_data_in_len + 1, "%s",
						(char*)ctx_conn->priv_data_in);
				dprint(DBG_ON, LOG_ERROR, " \"%s\"", priv_data_in);
				free(priv_data_in);
				priv_data_in = NULL;
			}
		}
		goto error;
	}

	strcpy(loc_ip, iw_ctx_get_local_ip(ctx_conn));
	strcpy(rem_ip, iw_ctx_get_remote_ip(ctx_conn));
	dprint(DBG_CM, LOG_INFO, "iwarp connection established [%s:%d <--> %s:%d]",
				loc_ip, iw_ctx_get_local_port(ctx_conn),
				rem_ip, iw_ctx_get_remote_port(ctx_conn));

	return 0;

error:
	ctx_conn->state = IWARP_ERROR;

	return -1;

}


int iw_open(
		IN		const char				*local_ip,
		IN		const char				*local_port,
		IN		int						 backlog,
		IN OUT	struct iw_ctx_listen	*ctx_listen)
{
	int 				 ret;
	struct sockaddr_in 	 srv_addr;
	struct addrinfo		*res;
	struct addrinfo 	 hints = {
			.ai_family = AF_INET,
			.ai_socktype = SOCK_STREAM
	};

	if (ctx_listen == NULL) {
		dprint(DBG_ON, LOG_ERROR, "listen context is NULL");
		return -1;
	}

	if (!lib) {
		ret = lib_init();
		if (ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to create global context");
			return -1;
		}
	}

	memset(ctx_listen, 0, sizeof(struct iw_ctx_listen));
	ret = sem_init(&ctx_listen->sem, 0, 0);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to init semaphore");
		return -1;
	}

	ret = (*lib->ops.s_rdma_create_id)(lib->event_channel, &ctx_listen->cm_id,
			ctx_listen, RDMA_PS_TCP);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to create listen cm id (%m)");
		goto destroy_semaphore;
	}

	/* bind local addr/port to RDMA address */
	ret = getaddrinfo(local_ip, local_port, &hints, &res);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to get address info for %s:%s (%s)",
				local_ip, local_port, gai_strerror(ret));
		return -1;
	}
	memset(&srv_addr, 0, sizeof(srv_addr));
	srv_addr.sin_addr.s_addr =
		((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;
	srv_addr.sin_family = AF_INET;
	srv_addr.sin_port = strtol(local_port, NULL, 0);
	ret = (*lib->ops.s_rdma_bind_addr)(ctx_listen->cm_id,
			(struct sockaddr*)&srv_addr);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "binding local addr failed (%m)");
		goto destroy_id;
	}

	/* start listening on the bound addr/port */
	ret = (*lib->ops.s_rdma_listen)(ctx_listen->cm_id, backlog);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "listening failed (%m)");
		goto destroy_id;
	}

	pthread_mutex_init(&ctx_listen->list_mutex, NULL);

	return 0;

destroy_id:
	(*lib->ops.s_rdma_destroy_id)(ctx_listen->cm_id);
destroy_semaphore:
	sem_destroy(&ctx_listen->sem);

	return -1;

}


int iw_close(
		IN OUT	struct iw_ctx_listen *ctx_listen)
{
	int ret;

	if (!ctx_listen) {
		dprint(DBG_ON, LOG_ERROR, "listen context is NULL");
		return -1;
	}

	if (!ctx_listen->cm_id) {
		dprint(DBG_ON, LOG_ERROR, "listen cm id is NULL");
		return -1;
	}

	ret = (*lib->ops.s_rdma_destroy_id)(ctx_listen->cm_id);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to destroy listen cm id");
		return -1;
	}

	pthread_mutex_destroy(&ctx_listen->list_mutex);

	return 0;

}


int iw_listen(
		IN OUT	struct iw_ctx_listen	*ctx_listen,
		IN OUT	struct iw_ctx_conn		*ctx_conn)
{
	char	addr_str[INET6_ADDRSTRLEN];

	if (ctx_null(ctx_conn)) {
		return -1;
	}

	if (!ctx_state(IWARP_INVALID, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "ctx_conn is expected to be in IWARP_INVALID "
				"state before this call");
		return -1;
	}

	if (!ctx_listen) {
		dprint(DBG_ON, LOG_ERROR, "listen context is NULL");
		return -1;
	}

	/* 0-init passive side context (active side does this at iw_ctx_alloc()) */
	memset(ctx_conn, 0, sizeof(struct iw_ctx_conn));
	ctx_conn->type = CTX_ACCEPT;

	/* make sure that the global context exists */
	if (!lib) {
		dprint(DBG_ON, LOG_ERROR, "global context should exist here");
	}

	/* get connection request from the listen context or wait for one */
	listen_ctx_to_addr(ctx_listen, addr_str);
	dprint(DBG_CM, LOG_INFO, "listening for CONNECT_REQUEST event at %s:%d",
			addr_str, listen_ctx_to_port(ctx_listen));
	get_request(ctx_listen, ctx_conn);
	if (!ctx_state(IWARP_CONN_REQ, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "failed to listen for connection request");
		return -1;
	}

	dprint(DBG_CM, LOG_INFO, "connection request received");

	return 0;

}


int iw_accept(
		IN		const void			*priv_data,
		IN		uint8_t				 priv_data_len,
		IN OUT 	struct iw_ctx_conn	*ctx_conn)
{
	int 					ret;
	struct rdma_conn_param	conn_param;
	char					loc_ip[INET6_ADDRSTRLEN], rem_ip[INET6_ADDRSTRLEN];

	if (ctx_null(ctx_conn)) {
		return -1;
	}

	if (!ctx_state(IWARP_VALID, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "iwarp context must be in IWARP_VALID"
				"state; did you forget to call iw_ctx_alloc()?");
		goto error;
	}

	if (ctx_conn->type != CTX_ACCEPT) {
		dprint(DBG_ON, LOG_ERROR, "wrong context type");
		goto error;
	}

	if (!ctx_conn->cm_id) {
		dprint(DBG_ON, LOG_ERROR, "remote id is NULL");
		goto error;
	}

	/* set the connection parameters */
	set_connection_parameters(&conn_param, priv_data, priv_data_len);

	/* accept the connection */
	ret = (*lib->ops.s_rdma_accept)(ctx_conn->cm_id, &conn_param);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "failed to accept connection (%m)");
		goto error;
	}
	dprint(DBG_CM, LOG_INFO, "waiting for ESTABLISHED event");
	sem_wait_safe(&ctx_conn->sem);
	if (!ctx_state(IWARP_CONNECTED, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "failed to accept connection");
		goto error;
	}

	strcpy(loc_ip, iw_ctx_get_local_ip(ctx_conn));
	strcpy(rem_ip, iw_ctx_get_remote_ip(ctx_conn));
	dprint(DBG_CM, LOG_INFO, "iwarp connection established [%s:%d <--> %s:%d]",
			loc_ip, iw_ctx_get_local_port(ctx_conn),
			rem_ip, iw_ctx_get_remote_port(ctx_conn));

	return 0;

error:
	ctx_conn->state = IWARP_ERROR;

	return -1;

}


int iw_reject(
		IN		const void			*priv_data,
		IN		uint8_t				 priv_data_len,
		IN OUT	struct iw_ctx_conn	*ctx_conn)
{
	int ret;

	if (ctx_null(ctx_conn)) {
		return -1;
	}

	if (!ctx_state(IWARP_VALID, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "connection context must be in IWARP_VALID"
				" state; did you forget to call iw_ctx_alloc()?");
				goto error;
	}

	if (ctx_conn->type != CTX_ACCEPT) {
		dprint(DBG_ON, LOG_ERROR, "wrong context type");
		goto error;
	}

	if (!ctx_conn->cm_id) {
		dprint(DBG_ON, LOG_ERROR, "remote id is NULL");
		goto error;
	}

	ret = (*lib->ops.s_rdma_reject)(ctx_conn->cm_id, priv_data,
			priv_data_len);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "rejecting connection failed (%m)");
		goto error;
	}

	dprint(DBG_CM, LOG_INFO, "connection rejected");

	ctx_conn->state = IWARP_DISCONNECTED;

	//TODO: is there a DISCONNECT event or something now?

	return 0;

error:
	ctx_conn->state = IWARP_ERROR;

	return -1;

}


int iw_await_disconnect(
		IN OUT	struct iw_ctx_conn *ctx_conn)
{
	if (ctx_null(ctx_conn)) {
		return -1;
	}

	if (ctx_conn->state == IWARP_DISCONNECTED) {
		/* we have received the disconnect already */
		goto disconnected;
	}

	if (!ctx_state(IWARP_CONNECTED, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "cannot disconnect an iwarp connection"
				" context that is not in IWARP_CONNECTED state");
		goto error;
	}

	dprint(DBG_CM, LOG_INFO, "waiting for disconnect request");
	sem_wait_safe(&ctx_conn->sem);
	if (!ctx_state(IWARP_DISCONNECTED, ctx_conn)) {
		goto error;
	}

disconnected:
	dprint(DBG_CM, LOG_INFO, "disconnect request received");

	return 0;

error:
	ctx_conn->state = IWARP_ERROR;

	return -1;

}


int iw_disconnect(
		IN OUT	struct iw_ctx_conn *ctx_conn)
{
	int 				 ret;

	if (ctx_null(ctx_conn)) {
		return -1;
	}

	if (!lib) {
		dprint(DBG_ON, LOG_ERROR, "shared conntext is NULL");
		goto error;
	}

	if (!ctx_state(IWARP_CONNECTED, ctx_conn)) {
		dprint(DBG_ON, LOG_ERROR, "cannot disconnect an iwarp connection"
				" context that is not in IWARP_CONNECTED state");
		goto error;
	}

	if (!ctx_conn->cm_id) {
		if (ctx_conn->type == CTX_ACCEPT) {
			dprint(DBG_ON, LOG_ERROR, "accept-context: remote cm id is NULL");
		} else {
			dprint(DBG_ON, LOG_ERROR, "connect-context: local cm id is NULL");
		}
		goto error;
	}

	ret = (*lib->ops.s_rdma_disconnect)(ctx_conn->cm_id);
	if (ret) {
		dprint(DBG_ON, LOG_ERROR, "disconnect failed (%m)");
		goto error;
	}

	//TODO: is there a disconnect event here?

	dprint(DBG_CM, LOG_INFO, "iwarp connection closed");

	ctx_conn->state = IWARP_DISCONNECTED;

	return 0;

error:
	ctx_conn->state = IWARP_ERROR;

	return -1;
}


in_port_t iw_ctx_get_local_port(
		IN	const struct iw_ctx_conn	*ctx_conn)
{
	struct rdma_cm_id	*id;

	if (ctx_null(ctx_conn) || !ctx_conn->cm_id) {
		return 0;
	} else {
		id = ctx_conn->cm_id;
		return	id->route.addr.src_addr.sa_family == PF_INET6 ?
				((struct sockaddr_in6 *) &id->route.addr.src_addr)->sin6_port :
				((struct sockaddr_in *) &id->route.addr.src_addr)->sin_port;
	}

}


in_port_t iw_ctx_get_remote_port(
		IN	const struct iw_ctx_conn	*ctx_conn)
{
	struct rdma_cm_id	*id;

	if (ctx_null(ctx_conn) || !ctx_conn->cm_id) {
		return 0;
	} else {
		id = ctx_conn->cm_id;
		return	id->route.addr.dst_addr.sa_family == PF_INET6 ?
				((struct sockaddr_in6 *) &id->route.addr.dst_addr)->sin6_port :
				((struct sockaddr_in *) &id->route.addr.dst_addr)->sin_port;
	}

}


char* iw_ctx_get_local_ip(
		IN	const struct iw_ctx_conn	*ctx_conn)
{
	static char	 addr[INET6_ADDRSTRLEN];
	const char	*ret;

	if (ctx_null(ctx_conn) || !ctx_conn->cm_id) {
		dprint(DBG_ON, LOG_ERROR, "invalid iwarp connection context");
		return "";
	} else {
		struct sockaddr_in	*loc_addr;
		loc_addr = (struct sockaddr_in*)&ctx_conn->cm_id->route.addr.src_addr;
		ret = inet_ntop(loc_addr->sin_family, &loc_addr->sin_addr, addr,
				INET6_ADDRSTRLEN);
		if (!ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to create print representation of"
					" the local IP address");
			return "";
		}
		return addr;
	}

}


char* iw_ctx_get_remote_ip(
		IN	const struct iw_ctx_conn	*ctx_conn)
{
	static char	 addr[INET6_ADDRSTRLEN];
	const char	*ret;

	if (ctx_null(ctx_conn) || !ctx_conn->cm_id) {
		dprint(DBG_ON, LOG_ERROR, "invalid iwarp connection context");
		return "";
	} else {
		struct sockaddr_in	*rem_addr;
		rem_addr = (struct sockaddr_in*)&ctx_conn->cm_id->route.addr.dst_addr;
		ret = inet_ntop(rem_addr->sin_family, &rem_addr->sin_addr, addr,
				INET6_ADDRSTRLEN);
		if (!ret) {
			dprint(DBG_ON, LOG_ERROR, "failed to create print representation of"
					" the remote address");
			return "";
		}
		return addr;
	}

}
