/*
 * Copyright (c) 2010 Philip Frey, Systems Group ETH Zurich.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

/*
 * iWARP objects.
 */

#ifndef IWARP_OBJS_H_
#define IWARP_OBJS_H_

#include <stdint.h>
#include <pthread.h>
#include <semaphore.h>
#include <rdma/rdma_cma.h>

#define MAX_PENDING_ACKS 16384

/* Atomic operations support (requires gcc version 4.1 or later) */
typedef struct {
	volatile int counter;
} atomic_t;


enum iw_state {
	IWARP_INVALID = 0,	/* nothing initialised */
	IWARP_ADDR_RES,		/* address resolved */
	IWARP_ROUTE_RES,	/* route resolved */
	IWARP_CONN_REQ,		/* connection request received */
	IWARP_VALID,		/* qp etc initialised but not yet connected */
	IWARP_REJECTED,		/* connection response was 'reject' */
	IWARP_CONNECTED,	/* connection established */
	IWARP_DISCONNECTED,	/* disconnect event received */
	IWARP_ERROR			/* error event received */
/* update iw_state_str() on changes to this enum! */
};


/* handle for local memory region */
struct iw_lmr {
	void					*buf;			/* local buffer */
	struct ibv_mr			*mr;			/* local memory region on buf */
	uint32_t				 len;			/* total lenght of buf */
	enum ibv_access_flags	 access;		/* protection settings of mr*/
};

/* handle for remote memory region */
struct iw_rmr {
	uint64_t	addr;			/* remote address */
	uint32_t	len;			/* lenght of remote buffer */
	uint32_t	rkey;			/* remote buffer tag */
};


/* scatter-gather list */
struct iw_sgl {
	struct ibv_sge	*sg_list;	/* list of scatter-gather elements */
	int				 num_sge;	/* number of elements in the list */
};

/* protection domain */
struct iw_pd {
	struct ibv_pd	*pd;	/* OFED protection domain */
	atomic_t		 ref;	/* reference counter for sharing */
};

/* shared receive queue */
struct iw_srq {
	struct ibv_srq	*srq;	/* OFED srq */
	atomic_t		 ref;	/* reference counter */
};

/* cq type */
enum iw_cq_type {
	IW_SCQ = 0,
	IW_RCQ
};


/* operations offered by the shared OpenFabrics RDMA library */
struct rdma_ops {

	/* Connection Management */
	struct rdma_event_channel* (*s_rdma_create_event_channel)(void);
	void (*s_rdma_destroy_event_channel)(struct rdma_event_channel *channel);
	int (*s_rdma_get_cm_event)(struct rdma_event_channel *channel,
			struct rdma_cm_event **event);
	int (*s_rdma_ack_cm_event)(struct rdma_cm_event *event);
	int (*s_rdma_create_id)(struct rdma_event_channel *channel,
			struct rdma_cm_id **id, void *context, enum rdma_port_space ps);
	int (*s_rdma_destroy_id)(struct rdma_cm_id *id);
	int (*s_rdma_create_qp)(struct rdma_cm_id *id, struct ibv_pd *pd,
			struct ibv_qp_init_attr *qp_init_attr);
	void (*s_rdma_destroy_qp)(struct rdma_cm_id *id);
	int (*s_rdma_resolve_addr)(struct rdma_cm_id *id, struct sockaddr *src_addr,
			struct sockaddr *dst_addr, int timeout_ms);
	int (*s_rdma_resolve_route)(struct rdma_cm_id *id, int timeout_ms);
	int (*s_rdma_bind_addr)(struct rdma_cm_id *id, struct sockaddr *addr);
	int (*s_rdma_connect)(struct rdma_cm_id *id,
			struct rdma_conn_param *conn_param);
	int (*s_rdma_listen)(struct rdma_cm_id *id, int backlog);
	int (*s_rdma_accept)(struct rdma_cm_id *id,
			struct rdma_conn_param *conn_param);
	int (*s_rdma_reject)(struct rdma_cm_id *id, const void *private_data,
			uint8_t private_data_len);
	int (*s_rdma_disconnect)(struct rdma_cm_id *id);

	/* Infiniband Verbs */
	struct ibv_pd* (*s_ibv_alloc_pd)(struct ibv_context *context);
	int (*s_ibv_dealloc_pd)(struct ibv_pd *pd);
	struct ibv_comp_channel* (*s_ibv_create_comp_channel)(struct ibv_context
			*context);
	int (*s_ibv_destroy_comp_channel)(struct ibv_comp_channel *channel);
	struct ibv_cq* (*s_ibv_create_cq)(struct ibv_context *context, int cqe,
			void *cq_context, struct ibv_comp_channel *channel,
			int comp_vector);
	int (*s_ibv_destroy_cq)(struct ibv_cq *cq);
	int (*s_ibv_get_cq_event)(struct ibv_comp_channel *channel,
			struct ibv_cq **cq, void **cq_context);
	void (*s_ibv_ack_cq_events)(struct ibv_cq *cq, unsigned int nevents);
	struct ibv_srq* (*s_ibv_create_srq)(struct ibv_pd *pd,
			struct ibv_srq_init_attr *srq_init_attr);
	int (*s_ibv_destroy_srq)(struct ibv_srq *srq);
	struct ibv_mr* (*s_ibv_reg_mr)(struct ibv_pd *pd, void *addr,
			size_t length, enum ibv_access_flags access);
	int (*s_ibv_dereg_mr)(struct ibv_mr *mr);
	int (*s_ibv_query_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
			enum ibv_qp_attr_mask attr_mask,
			struct ibv_qp_init_attr *init_attr);
	int (*s_ibv_get_async_event)(struct ibv_context *context,
			struct ibv_async_event *event);
	void (*s_ibv_ack_async_event)(struct ibv_async_event *event);

};

/* infiniband verbs context list */
struct iw_ctx_aeq {
	int					 ref;
	pthread_t			 thread;
	struct iw_ctx_aeq	*next;
	struct ibv_context	*ibv_ctx;
};

/* iwarp library context */
struct iw_lib {
	/* connection management */
	struct rdma_event_channel	*event_channel;
	pthread_t					 cm_thread;
	int							 cm_thread_running;

	/* async event queue thread */
	struct iw_ctx_aeq			*aeq_ctx_list;
	pthread_mutex_t				 aeq_ctx_list_mutex;

	/* function pointers for ibv_* */
	void						*lib_rdma_handle;
	struct rdma_ops				 ops;
};


/* connection context type (connect = active side, accept = passive side) */
enum iw_ctx_conn_type {
	CTX_CONNECT = 0,
	CTX_ACCEPT,
};

/* per-connection iwarp context */
struct iw_ctx_conn {
	/* connection id */
	struct rdma_cm_id 	*cm_id;
	sem_t				 sem;

	/* corresponding listen context */
	struct iw_ctx_listen	*ctx_listen;

	/* connection state */
	enum iw_state			state;
	enum iw_ctx_conn_type	type;

	/* private data received from the remote host at MPA handshake */
	uint8_t	 priv_data_in_len;	/* if set to -1: *priv_data_in is invalid */
	void	*priv_data_in;

	/* IB verbs */
	struct iw_pd			*iw_pd;
	struct ibv_comp_channel	*send_comp_channel, *recv_comp_channel;
	struct ibv_cq			*send_cq, *receive_cq;
	struct ibv_qp			*qp;
	struct iw_srq			*iw_srq;
	uint32_t				 scq_acks, rcq_acks;	/* pending acks */

	/* buffer advertisement infrastructure */
	int				 bufadv_valid;
	struct ibv_sge	*bufadv_rx_sgl, *bufadv_tx_sgl;
	struct iw_lmr	*bufadv_rx, *bufadv_tx;

	/* callback functions */
	void (*disconnect)(struct iw_ctx_conn *ctx_conn);
	void (*error)(struct iw_ctx_conn *ctx_conn);
	void (*established)(struct iw_ctx_conn *ctx_conn);
};


/* listening connection context */
struct iw_ctx_listen {
	struct rdma_cm_id	*cm_id;		/* listen id */
//	struct iw_ctx_conn	*new_ctx;	/* new context (to be filled) */
	sem_t				 sem;

	/* list of pending connection requests */
	struct iw_conn_req	*head, *tail;
	pthread_mutex_t		 list_mutex;

	/* callback functions */
	void (*connect)(struct rdma_cm_event *cm_event);
};

/* Linked list of connection requests */
struct iw_conn_req {
	struct iw_conn_req	*next;
	struct rdma_cm_id	*cm_id;
	void				*priv_data;
	uint8_t				 priv_data_len;
};


#endif /*IWARP_OBJS_H_*/
