Here is a simple ulp to test transfering pages between
machines on an infiniband network. I modified Sean's cmtest to be a
standalone thread that listens for incoming connections (ib_cm_spts.ko).
A seperate module (spts) registers itself so that incoming connections
can be initialized. This code at least managed to work for connecting a
small number of nodes together (less than 10). (n)(n-1)/2 connections.
However, something still goes wrong after MAX_CQ + 1 transfers, just
like the cmtest code. If anyone has any ideas on what is wrong with what
I am doing here that would be helpful. It must be something really
simple I just don't understand about the IB stack.
Specifically, when I create the cq I pass in SPTS_IB_CQ_SIZE:
node->recv_cq = ib_create_cq(node->device, spts_comp_handler,
spts_event_handler, node, SPTS_IB_CQ_SIZE);
and then I set qp_attr.cap.max_send_wr = SPTS_IB_CQ_SIZE.
After that initilization, I fill the qp using ib_post_recv().
When I get to the SPTS_IB_CQ_SIZE + 1 message to send, everything stops
transfering. I'm not sure what the correct method to "clear" out entries
so more buffers can be posted to recieve into.
Enjoy,
Jeff
Signed-off-by: Jeff Carr <[EMAIL PROTECTED]>
diff -Naur spts.old/Kconfig trunk/src/linux-kernel/infiniband/ulp/spts/Kconfig
--- spts.old/Kconfig 1969-12-31 16:00:00.000000000 -0800
+++ trunk/src/linux-kernel/infiniband/ulp/spts/Kconfig 2005-06-21
09:34:03.000000000 -0700
@@ -0,0 +1,7 @@
+config INFINIBAND_SPTS
+ tristate "A Simple Page Transfer Scheme over InfiniBand (SPiTS)"
+ depends on INFINIBAND
+ ---help---
+ All this does is let you establish a simple connection
+ between two infiniband hosts so you can do 2 way
+ data transfers.
diff -Naur spts.old/Makefile trunk/src/linux-kernel/infiniband/ulp/spts/Makefile
--- spts.old/Makefile 1969-12-31 16:00:00.000000000 -0800
+++ trunk/src/linux-kernel/infiniband/ulp/spts/Makefile 2005-06-21
09:34:31.000000000 -0700
@@ -0,0 +1,18 @@
+EXTRA_CFLAGS += -Idrivers/infiniband/include
+
+obj-$(CONFIG_INFINIBAND_SPTS) += ib_spts.o
+ib_spts-y := spts.o
+
+obj-$(CONFIG_INFINIBAND_SPTS) += ib_cm_spts.o
+ib_cm_spts-y := cm_spts.o
+
+obj-$(CONFIG_INFINIBAND_SPTS) += ib_cm_spts_client.o
+ib_cm_spts_client-y := client_start.o
+
+all:
+ make -C $(BUILDDIR) SUBDIRS=$(PWD) modules
+
+clean:
+ rm -rf .tmp_versions
+ rm -f *.o *.ko .*.o.cmd .*.o.d .*.ko.cmd *.mod.c
+
diff -Naur spts.old/client_start.c
trunk/src/linux-kernel/infiniband/ulp/spts/client_start.c
--- spts.old/client_start.c 1969-12-31 16:00:00.000000000 -0800
+++ trunk/src/linux-kernel/infiniband/ulp/spts/client_start.c 2005-06-21
09:33:51.000000000 -0700
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2005 Linux Machines Inc. All rights reserved.
+ *
+ * This software is available to you under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include "spts.h"
+
+MODULE_AUTHOR("Jeff Carr");
+MODULE_DESCRIPTION("Trigger a client connection");
+MODULE_LICENSE("GPL");
+
+void ib_cm_spts_conn_client(int slid, int dlid);
+
+static int slid = 0;
+static int dlid = 0;
+
+module_param(slid, int, 0444);
+module_param(dlid, int, 0444);
+
+MODULE_PARM_DESC(slid, "Source LID to use for connection.");
+MODULE_PARM_DESC(dlid, "Destination LID to use for connection.");
+
+static int __init client_start_init(void)
+{
+ printk("client_start_init() slid = %d, dlid = %d\n", slid, dlid);
+ ib_cm_spts_conn_client(slid, dlid);
+ ib_cm_reg_init_conn( &spts_init_conn, &spts_post_recvs);
+ return 0;
+}
+
+static void __exit client_start_cleanup(void)
+{
+ return;
+}
+
+module_init(client_start_init);
+module_exit(client_start_cleanup);
diff -Naur spts.old/cm_spts.c
trunk/src/linux-kernel/infiniband/ulp/spts/cm_spts.c
--- spts.old/cm_spts.c 1969-12-31 16:00:00.000000000 -0800
+++ trunk/src/linux-kernel/infiniband/ulp/spts/cm_spts.c 2005-06-21
09:33:51.000000000 -0700
@@ -0,0 +1,497 @@
+/*
+ * A Simple Page Transfer Scheme "SPTS" listener
+ *
+ *
+ * Copyright (c) 2005 Linux Machines Inc. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree.
+ *
+ *
+ * This self contained kernel module will listen, establish,
+ * destroy and manage connections.
+ *
+ * I wrote this module so I could tuck away all the details
+ * of making and watching connections somewhere from SPiTS.
+ *
+ * I tried to make this simple and generic enough that it might
+ * be a good reference point for anyone else that might be
+ * in the same Infiniband boat.
+ *
+ * -- Jeff Carr <[EMAIL PROTECTED]>
+ *
+ */
+
+#include <asm/semaphore.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/time.h>
+#include <linux/workqueue.h>
+#include <asm/uaccess.h>
+#include <linux/delay.h>
+
+#include <ib_verbs.h>
+#include <ib_cm.h>
+
+#include "spts.h"
+
+MODULE_AUTHOR("Jeff Carr");
+MODULE_DESCRIPTION("A Simple CM listener intended for use with SPTS");
+MODULE_LICENSE("GPL");
+
+DECLARE_MUTEX(ib_cm_spts_lock);
+
+static void ib_cm_spts_add_one (struct ib_device *device);
+static void ib_cm_spts_remove_one (struct ib_device *device);
+
+static struct ib_client ib_cm_spts_client = {
+ .name = "ib_cm_spts",
+ .add = ib_cm_spts_add_one,
+ .remove = ib_cm_spts_remove_one
+};
+
+struct cm_spts {
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ atomic_t connects_left;
+ atomic_t disconnects_left;
+ struct semaphore sem;
+ wait_queue_head_t wait;
+};
+
+static struct cm_spts test;
+struct ib_cm_id *listen_id;
+struct ib_device *mydevice;
+static int port_num = 1;
+ib_cm_conn_init1 conn_init1 = NULL;
+ib_cm_conn_init2 conn_init2 = NULL;
+
+/*
+ * generic routine needed by the CM and used while a connection
+ * is established.
+ */
+int ib_cm_spts_modify_to_rtr(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+ struct spts_ib_connection *node = cm_id->context;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ printk("ib_cm_spts: failed to init QP attr for INIT: %d\n",
ret);
+ return ret;
+ }
+ ret = ib_modify_qp(node->qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ printk("ib_cm_spts: failed to modify QP to INIT: %d\n", ret);
+ return ret;
+ }
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ printk("ib_cm_spts: failed to init QP attr for RTR: %d\n", ret);
+ return ret;
+ }
+ qp_attr.rq_psn = node->qp->qp_num;
+ ret = ib_modify_qp(node->qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ printk("ib_cm_spts: failed to modify QP to RTR: %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * generic routine needed by the CM and used while a connection
+ * is established.
+ */
+int ib_cm_spts_modify_to_rts(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+ struct spts_ib_connection *node = cm_id->context;
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ printk("ib_cm_spts: failed to init QP attr for RTS: %d\n", ret);
+ return ret;
+ }
+ ret = ib_modify_qp(node->qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ printk("ib_cm_spts: failed to modify QP to RTS: %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+// if context == NULL, then we know this is a new cm_id and we should
initialize it.
+int ib_cm_init1( struct ib_cm_id *cm_id )
+{
+ struct spts_ib_connection *anode = cm_id->context;
+
+ DEBUGK("conn_init1: START cm_id == 0x%08X\n", (int) cm_id);
+ DEBUGK("conn_init1: START cm_id->context == 0x%08X\n", (int)
cm_id->context);
+
+ if (cm_id->context) {
+ DEBUGK("conn_init1: already is initialized\n");
+ return 0;
+ }
+
+ if (!conn_init1) {
+ printk("conn_init1: no one registered an init function yet!\n");
+ return -1;
+ }
+ anode = conn_init1(mydevice);
+ if (!anode) {
+ DEBUGK("conn_init1: AHHHHH! anode is still NULL\n");
+ return -1;
+ }
+ cm_id->context = anode;
+ anode->cm_id = cm_id;
+
+ DEBUGK("conn_init1: END cm_id == 0x%08X\n", (int) cm_id);
+ DEBUGK("conn_init1: END cm_id->context == 0x%08X\n", (int)
cm_id->context);
+ return 0;
+}
+
+/*
+ * This is what runs on the client side of a connection after the server
responds
+ */
+static void ib_cm_spts_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event
*event)
+{
+ struct spts_ib_connection *anode = cm_id->context;
+ int ret;
+
+ ret = ib_cm_spts_modify_to_rtr(cm_id, event);
+ if (ret)
+ goto fail;
+
+ ret = ib_cm_spts_modify_to_rts(cm_id, event);
+ if (ret)
+ goto fail;
+
+ if (!conn_init2) {
+ printk("ib_cm_spts: no one registered an init2 function
yet!\n");
+ goto fail;
+ }
+ ret = conn_init2(anode);
+ if (ret)
+ goto fail;
+
+ ret = ib_send_cm_rtu(cm_id, NULL, 0);
+ if (ret) {
+ printk("ib_cm_spts: failed to send CM RTU: %d\n", ret);
+ goto fail;
+ }
+ return;
+fail:
+ printk("ib_cm_spts_rep_handler() failing connection reply\n");
+ ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
+}
+
+/*
+ * This is what runs on the listening side to establish a connection.
+ */
+static void ib_cm_spts_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event
*event)
+{
+ struct spts_ib_connection *anode = cm_id->context;
+ struct ib_cm_req_event_param *req;
+ struct ib_cm_rep_param rep;
+ int ret;
+
+ DEBUGK("ib_cm_spts_req_handler() START\n");
+
+ ret = ib_cm_spts_modify_to_rtr(cm_id, event);
+ if (ret)
+ goto fail;
+
+ DEBUGK("ib_cm_spts_req_handler() about to conn_init2()\n");
+ if (!conn_init2) {
+ printk("ib_cm_spts: no one registered an init2 function
yet!\n");
+ goto fail;
+ }
+ ret = conn_init2(anode);
+ if (ret)
+ goto fail;
+
+ req = &event->param.req_rcvd;
+ memset(&rep, 0, sizeof rep);
+ rep.qp_num = anode->qp->qp_num;
+ rep.srq = (anode->qp->srq != NULL);
+ rep.starting_psn = anode->qp->qp_num;
+ rep.responder_resources = req->responder_resources;
+ rep.initiator_depth = req->initiator_depth;
+ rep.target_ack_delay = 20;
+ rep.flow_control = req->flow_control;
+ rep.rnr_retry_count = req->rnr_retry_count;
+
+ DEBUGK("ib_cm_spts_req_handler() about to ib_send_cm_rep()\n");
+ ret = ib_send_cm_rep(cm_id, &rep);
+ if (ret) {
+ printk("ib_cm_spts_req_handler() failed to send CM REP: %d\n",
ret);
+ goto fail;
+ }
+ DEBUGK("ib_cm_spts_req_handler done\n");
+ return;
+fail:
+ printk("ib_cm_spts_req_handler() failing connection request\n");
+ ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
+
+ // don't take this out
+ if (atomic_dec_and_test(&test.connects_left))
+ wake_up(&test.wait);
+}
+
+/*
+ * This routine is the generic routine registered with the Infiniband
+ * Communication Manager. This routine is called each time an event
+ * occurs and any connection.
+ */
+static int ib_cm_spts_handler(struct ib_cm_id *cm_id, struct ib_cm_event
*event)
+{
+ // check to see there is already a connection for this cm_id
+ // If not, then this makes a new one.
+ if(ib_cm_init1(cm_id)) {
+ printk("ib_cm_init1() FAILED\n");
+ printk("event = 0x%08X\n", (int) event->event);
+ return -1;
+ }
+
+ switch (event->event) {
+ case IB_CM_REQ_RECEIVED:
+ printk("ib_cm_spts: IB_CM_REQ_RECEIVED\n");
+ ib_cm_spts_req_handler(cm_id, event);
+ break;
+ case IB_CM_REP_RECEIVED:
+ printk("ib_cm_spts: IB_CM_REP_RECEIVED\n");
+ ib_cm_spts_rep_handler(cm_id, event);
+ break;
+ case IB_CM_RTU_RECEIVED:
+ printk("ib_cm_spts: IB_CM_RTU_RECEIVED\n");
+ ib_cm_spts_modify_to_rts(cm_id, event);
+ break;
+ case IB_CM_DREQ_RECEIVED:
+ printk("ib_cm_spts: IB_CM_DREQ_RECEIVED\n");
+ ib_send_cm_drep(cm_id, NULL, 0);
+ break;
+ case IB_CM_DREP_RECEIVED:
+ printk("ib_cm_spts: UNHANDLED IB_CM_DREP_RECEIVED\n");
+ break;
+ case IB_CM_TIMEWAIT_EXIT:
+ printk("ib_cm_spts: UNHANDLED IB_CM_TIMEWAIT_EXIT\n");
+ break;
+ case IB_CM_REQ_ERROR:
+ printk("ib_cm_spts: UNHANDLED IB_CM_REQ_ERROR\n");
+ break;
+ default:
+ printk("ib_cm_spts: UNKNOWN event = 0x%08X\n", (int)
event->event);
+ break;
+ }
+ return 0;
+}
+
+static void ib_cm_spts_query_handler(int status, struct ib_sa_path_rec *resp,
+ void *context)
+{
+ struct spts_ib_connection *anode = context;
+ if (!status)
+ anode->path_rec = *resp;
+
+ anode->query_status = status;
+ wake_up(&test.wait);
+}
+
+static int ib_cm_spts_query_for_path(
+ struct spts_ib_connection *anode, int slid, int dlid)
+{
+ int ret, retries = 0;
+ struct ib_sa_query *query;
+
+ anode->path_rec.dlid = cpu_to_be16(dlid);
+ anode->path_rec.slid = cpu_to_be16(slid);
+ anode->path_rec.numb_path = 1;
+
+ do {
+ anode->query_status = 1;
+ ret = ib_sa_path_rec_get(anode->device, port_num,
&anode->path_rec,
+ IB_SA_PATH_REC_DLID |
IB_SA_PATH_REC_SLID |
+ IB_SA_PATH_REC_NUMB_PATH, 5000,
+ GFP_KERNEL, ib_cm_spts_query_handler,
+ anode, &query);
+ if (ret < 0) {
+ printk("ib_cm_spts: ib_sa_path_rec_get failed: %d\n",
ret);
+ goto out;
+ }
+
+ wait_event(test.wait, anode->query_status <= 0);
+ ret = anode->query_status;
+ retries++;
+ } while (ret == -ETIMEDOUT && retries <= 3);
+out:
+ return ret;
+}
+
+/*
+ * This should be changed to return useful values
+ *
+ */
+void ib_cm_spts_conn_client(int slid, int dlid)
+{
+ struct spts_ib_connection *anode = NULL;
+ struct ib_cm_req_param req;
+ struct ib_cm_id *cm_id;
+ int ret;
+
+ DEBUGK("ib_cm_spts_conn_client() START\n");
+ cm_id = ib_create_cm_id(ib_cm_spts_handler, NULL);
+ if (IS_ERR(cm_id)) {
+ ret = PTR_ERR(cm_id);
+ printk("ib_cm_spts: failed to create cm_id: %d\n", ret);
+ return;
+ }
+
+ if(ib_cm_init1(cm_id)) {
+ printk("ib_cm_init1() FAILED\n");
+ return;
+ }
+
+ anode = cm_id->context;
+
+ DEBUGK("ib_cm_spts_conn_client() about to query_for_path\n");
+ ret = ib_cm_spts_query_for_path(anode, slid, dlid);
+ if (ret) {
+ printk("ib_cm_spts: failed path record query: %d\n", ret);
+ return;
+ }
+
+ memset(&req, 0, sizeof req);
+ req.primary_path = &anode->path_rec;
+ req.service_id = LISTEN_NUM;
+ req.responder_resources = 1;
+ req.initiator_depth = 1;
+ req.remote_cm_response_timeout = 20;
+ req.local_cm_response_timeout = 20;
+ req.retry_count = 5;
+ req.max_cm_retries = 5;
+
+ req.qp_num = anode->qp->qp_num;
+ req.qp_type = anode->qp->qp_type;
+ req.srq = (anode->qp->srq != NULL);
+ req.starting_psn = anode->qp->qp_num;
+ ret = ib_send_cm_req(anode->cm_id, &req);
+ if (ret) {
+ printk("ib_cm_spts_conn_client() failure sending REQ: %d\n",
ret);
+ return;
+ }
+ DEBUGK("ib_cm_spts_conn_client END\n");
+}
+EXPORT_SYMBOL(ib_cm_spts_conn_client);
+
+static void ib_cm_spts_listen(void *data)
+{
+ int ret;
+ DEBUGK("listen() START\n");
+ down(&ib_cm_spts_lock);
+ DEBUGK("listen() after down(&ib_cm_spts_lock)\n");
+ listen_id = ib_create_cm_id(ib_cm_spts_handler, &test);
+ if (IS_ERR(listen_id)) {
+ ret = PTR_ERR(listen_id);
+ printk("listen() WATCH: listen request failed: %d\n", ret);
+ return;
+ }
+
+ DEBUGK("listen() listen_id == 0x%08X\n", (int) listen_id);
+ DEBUGK("listen() listen_id->context == 0x%08X\n", (int)
listen_id->context);
+ listen_id->context = NULL;
+ DEBUGK("listen() START listen_id == 0x%08X\n", (int) listen_id);
+ DEBUGK("listen() listen_id->context == 0x%08X\n", (int)
listen_id->context);
+
+ DEBUGK("listen() about to ib_cm_listen()\n");
+ ret = ib_cm_listen(listen_id, LISTEN_NUM, 0);
+ DEBUGK("listen() finished ib_cm_listen() ret == %d\n", ret);
+ if (ret) {
+ printk("listen() WATCH: failure trying to listen: %d\n", ret);
+ }
+ DEBUGK("listen() END\n");
+}
+
+static void ib_cm_spts_add_one(struct ib_device *device)
+{
+ printk("ib_cm_spts_add_one() START\n");
+ mydevice = device;
+ printk("ib_cm_spts_add_one() ib_set_client_data\n");
+ ib_set_client_data(device, &ib_cm_spts_client, &test);
+ up(&ib_cm_spts_lock);
+ printk("ib_cm_spts_add_one() END\n");
+}
+
+static void ib_cm_spts_remove_one(struct ib_device *device)
+{
+ printk("ib_cm_spts_remove_one START\n");
+ // figure out what this really does
+ if (ib_get_client_data(device, &ib_cm_spts_client) != &test)
+ return;
+ printk("ib_cm_spts_remove_one END\n");
+}
+
+static void ib_cm_spts_start_listen(void)
+{
+ DEBUGK("ib_cm_spts_start_listen() START\n");
+ test.wq = create_workqueue("ib_cm_spts");
+ if (!test.wq)
+ return;
+
+ DEBUGK("ib_cm_spts_start_listen() INIT_WORK(ib_cm_spts_listen)\n");
+ INIT_WORK(&test.work, ib_cm_spts_listen, &test);
+ init_waitqueue_head(&test.wait);
+
+ DEBUGK("ib_cm_spts_start_listen() about to queue work\n");
+ queue_work(test.wq, &test.work);
+ DEBUGK("ib_cm_spts_start_listen() FINISHED\n");
+}
+
+/*
+ * use this to register the initialization functions to
+ * establish new connections
+ *
+ * init1() should create the qp & cq's.
+ * init2() should do ib_post_recv()
+ *
+ */
+int ib_cm_reg_init_conn( ib_cm_conn_init1 init1, ib_cm_conn_init2 init2 )
+{
+ conn_init1 = init1;
+ conn_init2 = init2;
+ return 0;
+}
+EXPORT_SYMBOL(ib_cm_reg_init_conn);
+
+static int __init ib_cm_spts_init(void)
+{
+ listen_id = NULL;
+ memset(&test, 0, sizeof test);
+ down(&ib_cm_spts_lock);
+ ib_cm_spts_start_listen();
+ DEBUGK("ib_cm_spts_init() starting ib_cm_spts_init()\n");
+ ib_register_client(&ib_cm_spts_client);
+ return 0;
+}
+
+static void __exit ib_cm_spts_cleanup(void)
+{
+ wake_up(&test.wait);
+ flush_workqueue(test.wq);
+ destroy_workqueue(test.wq);
+ ib_destroy_cm_id(listen_id);
+ ib_unregister_client(&ib_cm_spts_client);
+}
+
+module_init(ib_cm_spts_init);
+module_exit(ib_cm_spts_cleanup);
diff -Naur spts.old/spts.c trunk/src/linux-kernel/infiniband/ulp/spts/spts.c
--- spts.old/spts.c 1969-12-31 16:00:00.000000000 -0800
+++ trunk/src/linux-kernel/infiniband/ulp/spts/spts.c 2005-06-21
09:33:51.000000000 -0700
@@ -0,0 +1,551 @@
+/*
+ *
+ * SPTS - A Simple Page Transfer Scheme (spits messages between smart hosts)
+ *
+ * Copyright (c) 2005 Linux Machines. All rights reserved.
+ *
+ * This software is available to you under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree.
+ *
+ *
+ * It does what it says: it lets you send messages between machines
+ * on an Infiniband network.
+ *
+ * It uses/requires Sean Hefty's Infiniband Connection Manager (ib_cm.ko)
+ * to establish connections.
+ *
+ * Jeff Carr <[EMAIL PROTECTED]>
+ *
+ */
+
+#include <asm/semaphore.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/time.h>
+#include <linux/workqueue.h>
+
+#include <ib_cm.h>
+
+#include "spts.h"
+// #include "spts_proc_test.h"
+
+MODULE_AUTHOR("Jeff Carr");
+MODULE_DESCRIPTION("SPTS - A Simple Page Tranfser Scheme");
+MODULE_LICENSE("GPL");
+
+struct spts_msg smsg[SPTS_MSG_COUNT]; // send buffers
+struct spts_msg rmsg[SPTS_MSG_COUNT]; // recieve buffers
+struct spts_ib_connection conn[SPTS_MAX_CONNECTIONS];
+// struct ib_send_wr send_wr, *bad_send_wr;
+// struct ib_send_wr send_wr, *send_wr_failure;
+struct ib_sge sge;
+
+/*
+ * A event handler routine that can be registered
+ * with infiniband to find out about certain events.
+ */
+void spts_event_handler(struct ib_event *event, void *data) {
+ printk("WATCH spts_event_handler()\n");
+}
+EXPORT_SYMBOL(spts_event_handler);
+
+/*
+ * A completion handler routine that can be registered
+ * with infiniband to find out when transfers complete.
+ */
+void spts_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+ printk("WATCH spts_comp_handler()\n");
+}
+EXPORT_SYMBOL(spts_comp_handler);
+
+/*
+ * Generate a single QP recieve entry
+ * <insert better description here>
+ */
+int spts_post_recv(struct ib_qp *qp, struct spts_msg *msg )
+{
+ struct ib_recv_wr recv_wr, *recv_failure;
+ struct ib_sge sge;
+ int ret = 0;
+
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sge;
+ recv_wr.num_sge = 1;
+ // recv_wr.wr_id = &conn[0]; // always 0 for now. This might correlate
to which connection is which
+ recv_wr.wr_id = 0;
+
+ // this is the actual size of the recv buffer in bytes
+ // sge.length = SPTS_MSG_SIZE / 4;
+ sge.length = SPTS_MSG_SIZE;
+ sge.lkey = msg->mr->lkey;
+ sge.addr = msg->addr;
+
+ ret = ib_post_recv(qp, &recv_wr, &recv_failure);
+ if (ret) {
+ printk("spts_post_recv() failed ib_post_recv == %d\n", ret);
+ }
+ // DEBUGK("single_post_recv() (aka ib_post_recv) finished\n");
+ return ret;
+}
+EXPORT_SYMBOL(spts_post_recv);
+
+/*
+ * Sets up SPTS_IB_CQ_SIZE number of buffers to recieve
+ * messages into.
+ */
+int spts_post_recvs(struct spts_ib_connection *node)
+{
+ int i, j = 0;
+
+ DEBUGK("spts_post_recvs() START\n");
+
+ for (i = 0; i < SPTS_IB_CQ_SIZE - 4; i++ ) {
+ if( spts_post_recv( node->qp, &rmsg[j] ))
+ return -1;
+ ++j;
+ if(j == SPTS_MSG_COUNT)
+ j=0;
+ }
+ node->status = 1;
+ DEBUGK("spts_post_recvs() END node->status is set now\n");
+ return 0;
+}
+EXPORT_SYMBOL(spts_post_recvs);
+
+/*
+ * Creates the messages for communication between 2 IB cards once
+ * a connection is established.
+ */
+int spts_create_messages(struct ib_device *device,
+ struct ib_pd *pd, struct spts_msg *msgs)
+{
+ int i, ret;
+
+ for( i = 0; i < SPTS_MSG_COUNT; ++i ) {
+ msgs[i].mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(msgs[i].mr)) {
+ kfree(msgs[i].mem);
+ ret = PTR_ERR(msgs[i].mr);
+ printk("spts_create_messages() failed to get DMA MR:
%d\n", ret);
+ return ret;
+ }
+
+ msgs[i].addr = dma_map_single(device->dma_device,
+ msgs[i].mem, SPTS_MSG_SIZE, DMA_TO_DEVICE);
+ pci_unmap_addr_set(msgs[i], mapping, msgs[i].addr);
+ }
+ DEBUGK("spts_create_messages() END\n");
+ return 0;
+}
+EXPORT_SYMBOL(spts_create_messages);
+
+/*
+ * This raw primitive tells IB to transfer a buffer to a pre-existing
+ * and established connection to another IB card. (sometimes called RDMA)
+ */
+int spts_send_single_msg( int dest, struct spts_msg *msg)
+{
+ struct ib_send_wr send_wr, *send_wr_bad;
+ struct ib_sge sge;
+
+ if(conn[dest].status == 0) {
+ printk("WARNING: this connection was never initialized\n");
+ return -1;
+ }
+
+ send_wr.next = NULL;
+ send_wr.sg_list = &sge;
+ send_wr.num_sge = 1;
+ send_wr.opcode = IB_WR_SEND;
+ // send_wr.send_flags = 0;
+ // send_wr.send_flags = IB_SEND_SIGNALED;
+ // send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send_wr.send_flags = IB_SEND_SOLICITED;
+ send_wr.wr_id = 0;
+
+ // this is the actual size of the recv buffer in bytes
+ // sge.length = SPTS_MSG_SIZE / 4;
+ sge.length = SPTS_MSG_SIZE;
+ sge.addr = msg->addr;
+ sge.lkey = msg->mr->lkey;
+
+ return ib_post_send(conn[dest].qp, &send_wr, &send_wr_bad);
+}
+
+/*
+ * Useful for debugging, this just dumps the contents of a message
+ */
+void spts_dump_page( struct spts_msg *msg )
+{
+ /*
+ int i = 0;
+ int *testmem;
+
+ testmem = (int *) msg->mem;
+ DEBUGK("0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
+ testmem[i+0], testmem[i+1], testmem[i+2], testmem[i+3],
+ testmem[i+4], testmem[i+5], testmem[i+6], testmem[i+7]);
+ i+=8;
+ DEBUGK("0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
+ testmem[i+0], testmem[i+1], testmem[i+2], testmem[i+3],
+ testmem[i+4], testmem[i+5], testmem[i+6], testmem[i+7]);
+
+ i = (SPTS_MSG_SIZE / 4) - 8;
+ DEBUGK("0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
+ testmem[i+0], testmem[i+1], testmem[i+2], testmem[i+3],
+ testmem[i+4], testmem[i+5], testmem[i+6], testmem[i+7]);
+ i+=8;
+ DEBUGK("0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
+ testmem[i+0], testmem[i+1], testmem[i+2], testmem[i+3],
+ testmem[i+4], testmem[i+5], testmem[i+6], testmem[i+7]);
+ */
+}
+EXPORT_SYMBOL(spts_dump_page);
+
+int spts_init_node(struct spts_ib_connection *node)
+{
+ struct ib_qp_init_attr qp_attr;
+ int ret;
+
+ node->recv_cq = ib_create_cq(node->device, spts_comp_handler,
+ spts_event_handler, node, SPTS_IB_CQ_SIZE);
+ if (IS_ERR(node->recv_cq)) {
+ ret = PTR_ERR(node->recv_cq);
+ printk("spts_init_node() unable to create CQ: %d\n", ret);
+ goto out;
+ }
+ DEBUGK("spts_init_node recv_cq ib_create_cq()\n");
+
+ node->send_cq = ib_create_cq(node->device, spts_comp_handler,
+ spts_event_handler, node, SPTS_IB_CQ_SIZE);
+ if (IS_ERR(node->send_cq)) {
+ ret = PTR_ERR(node->send_cq);
+ printk("spts_init_node() unable to create CQ: %d\n", ret);
+ goto out;
+ }
+ DEBUGK("spts_init_node ib_create_send_cq()\n");
+
+ memset(&qp_attr, 0, sizeof qp_attr);
+ qp_attr.event_handler = spts_event_handler;
+ qp_attr.cap.max_send_wr = SPTS_IB_CQ_SIZE;
+ qp_attr.cap.max_recv_wr = SPTS_IB_CQ_SIZE;
+ qp_attr.cap.max_send_sge = 1;
+ qp_attr.cap.max_recv_sge = 1;
+ qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ qp_attr.qp_type = IB_QPT_RC;
+
+ qp_attr.send_cq = node->send_cq;
+ qp_attr.recv_cq = node->recv_cq;
+ node->qp = ib_create_qp(node->pd, &qp_attr);
+ if (IS_ERR(node->qp)) {
+ ret = PTR_ERR(node->qp);
+ printk("pts_init_node() unable to create QP: %d\n", ret);
+ ib_destroy_cq(node->recv_cq);
+ ib_destroy_cq(node->send_cq);
+ goto out;
+ }
+ // ib_req_notify_cq(node->cq, 3);
+ DEBUGK("spts_init_node ib_create_qp()\n");
+ return 0;
+out:
+ spts_destroy_node(node);
+ return ret;
+}
+EXPORT_SYMBOL(spts_init_node);
+
+struct spts_ib_connection *spts_init_conn(struct ib_device *device)
+{
+ int ret;
+ int i = 0;
+
+ DEBUGK("spts_init_conn() START i == %d\n", i);
+
+ for( i=0; i<SPTS_MAX_CONNECTIONS; ++i ) {
+ if( conn[i].status )
+ continue;
+
+ conn[i].device = device;
+
+ if( conn[0].pd ) {
+ DEBUGK("spts_init_conn() just use the already allocated
PD\n");
+ conn[i].pd = conn[0].pd;
+ } else {
+ conn[i].pd = ib_alloc_pd(conn[i].device);
+ DEBUGK("spts_init_conn() ib_alloc_pd() ok\n");
+ if (IS_ERR(conn[i].pd)) {
+ ret = PTR_ERR(conn[i].pd);
+ printk("spts_init_conn() failed to alloc PD:
%d\n", ret);
+ ib_dealloc_pd(conn[i].pd);
+ return NULL;
+ }
+
+ if( spts_create_messages(conn[i].device, conn[i].pd,
rmsg) )
+ return NULL;
+ if( spts_create_messages(conn[i].device, conn[i].pd,
smsg) )
+ return NULL;
+ }
+
+ ret = spts_init_node(&conn[i]);
+ if (ret) {
+ printk("spts_init_conn() unable to create test nodes:
%d\n", ret);
+ return NULL;
+ }
+
+ DEBUGK("spts_init_conn() END\n");
+ return &conn[i];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(spts_init_conn);
+
+
+/*
+ * Not sure if this is all that is needed to destroy a "node"
+ */
+void spts_destroy_node(struct spts_ib_connection *node)
+{
+ DEBUGK("spts_destroy_node()\n");
+ if (!node) {
+ DEBUGK("BAD spts_destroy_node node == NULL\n");
+ return;
+ }
+ if (node->qp) {
+ ib_destroy_qp(node->qp);
+ }
+ if (node->send_cq) {
+ ib_destroy_cq(node->send_cq);
+ }
+ if (node->recv_cq) {
+ ib_destroy_cq(node->recv_cq);
+ }
+}
+EXPORT_SYMBOL(spts_destroy_node);
+
+/*
+ * spts_send_single()
+ *
+ * This function sends a message over a particular QP.
+ * It returns len if the transfer completes correctly.
+ *
+ * dest the connection to send the buffer over
+ * char *buf the memory address of the buffer
+ * int len the length of the data to send
+ *
+ * TODO: break the transfer up if len > the max msg size
+ * TODO: pass in *conn instead of *qp
+ *
+ */
+int spts_send_single(int dest, char *buf, int len)
+{
+ int ret, i;
+ struct spts_simple_msg *wrap;
+
+ // DEBUGK("spts_send_single() START\n");
+
+ if(conn[dest].status == 0) {
+ printk("WARNING: this connection was never initialized\n");
+ return -1;
+ }
+ i = conn[dest].cur_send_buf;
+ memcpy(smsg[i].mem, buf, len);
+ wrap = (struct spts_simple_msg *) smsg[i].mem;
+ wrap->size = len;
+ ret = spts_send_single_msg(dest, &smsg[i]);
+ DEBUGK("spts_send_single_msg() returned %d\n", ret);
+ spts_dump_page( &conn[dest].smsg[i] );
+ conn[dest].cur_send_buf += 1;
+ if(conn[dest].cur_send_buf == SPTS_MSG_COUNT)
+ conn[dest].cur_send_buf=0;
+ return len;
+}
+EXPORT_SYMBOL(spts_send_single);
+
+/*
+ * spts_send_all()
+ *
+ * simply sends a message to every connection
+ *
+ */
+int spts_send_all(char *buf, int len)
+{
+ int ret = 0;
+ int i;
+
+ DEBUGK("spts_poll_all() START\n");
+
+ for( i=0; i<SPTS_MAX_CONNECTIONS; ++i ) {
+ if( ! conn[i].status )
+ continue;
+
+ ret = spts_send_single(i, buf, len);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(spts_send_all);
+
+/*
+ * spts_poll()
+ *
+ * This function polls IB and send passes back the newly
+ * received buffers
+ *
+ * Returns number of received phys_buf's
+ * if return == SPTS_MAX_COUNT then there are more to receive.
+ *
+ * TODO: maybe change wc[8] to wc[SPTS_MSG_COUNT - cur_recv_buf] ???
+ *
+ */
+int spts_poll(int dest, struct spts_msg **msgs)
+{
+ int send_cq, recv_cq;
+ struct ib_wc wc[SPTS_MAX_QUEUE_POLL];
+ int i, j;
+
+ if(conn[dest].status == 0) {
+ printk("WARNING: this connection was never initialized\n");
+ return -1;
+ }
+ send_cq = ib_poll_cq(conn[dest].send_cq, SPTS_MAX_QUEUE_POLL, wc);
+ recv_cq = ib_poll_cq(conn[dest].recv_cq, SPTS_MAX_QUEUE_POLL, wc);
+ i = conn[dest].cur_recv_buf;
+ *msgs = &rmsg[i];
+ DEBUGK("spts_poll() SEND CQ: %d, RECV CQ: %d\n", send_cq, recv_cq);
+ if( recv_cq >= SPTS_MSG_COUNT )
+ printk("WARNING: SPTS overflowing\n");
+ for(i=0; i < recv_cq; ++i) {
+ j = conn[dest].cur_recv_buf;
+ spts_dump_page( &conn[dest].rmsg[j] );
+ conn[dest].cur_recv_buf += 1;
+ if(conn[dest].cur_recv_buf == SPTS_MSG_COUNT)
+ conn[dest].cur_recv_buf = 0;
+ }
+ mthca_dump_qp( conn[dest].qp );
+ return recv_cq;
+}
+EXPORT_SYMBOL(spts_poll);
+
+/*
+ * spts_poll_all()
+ *
+ * This searches through all the open connections and returns anything
+ * it finds.
+ *
+ */
+int spts_poll_all(struct spts_msg **msgs)
+{
+ int recvcq, i;
+
+ DEBUGK("spts_poll_all() START\n");
+
+ for( i=0; i<SPTS_MAX_CONNECTIONS; ++i ) {
+ if( ! conn[i].status )
+ continue;
+
+ recvcq = spts_poll( i, msgs);
+ if(recvcq>0) {
+ DEBUGK("spts_poll_all() recvcq == %d, i == %d\n",
recvcq, i);
+ return recvcq;
+ } else if(recvcq < 0) {
+ DEBUGK("spts_poll_all() FAILURE: An error occurred in
on connection %d\n", i);
+ DEBUGK("spts_poll_all() should kill this connection
here %d\n", i);
+ return -1;
+ } else {
+ DEBUGK("spts_poll_all() RECV queue empty for i ==
%d\n", i);
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(spts_poll_all);
+
+/*
+ * Setup message buffers and memset them to useful values
+ * so that the transfers can be checked to see if they are
+ * working.
+ */
+static int __init spts_init(void)
+{
+ int i;
+ memset(&smsg, 0, sizeof smsg);
+ memset(&rmsg, 0, sizeof rmsg);
+
+ memset(&conn, 0, sizeof conn);
+ // memset(&send_wr, 0, sizeof send_wr);
+
+ conn[0].smsg = smsg;
+ conn[0].rmsg = rmsg;
+ conn[0].cur_recv_buf = 0;
+ conn[0].cur_send_buf = 0;
+
+ printk("spts_init() TEST: sizeof conn == %d\n", sizeof conn);
+ printk("spts_init() TEST: conn[0].status == %d\n", conn[0].status);
+ conn[0].status = 0;
+ printk("spts_init() TEST: conn[1].status == %d\n", conn[1].status);
+ conn[1].status = 0;
+
+ conn[0].id = 0;
+ conn[1].id = 1;
+
+
+ for( i = 0; i < SPTS_MSG_COUNT; ++i ) {
+ rmsg[i].mem = kmalloc( SPTS_MSG_SIZE, GFP_KERNEL);
+ if (!rmsg[i].mem) {
+ printk("spts_init: failed message allocation\n");
+ return -ENOMEM;
+ }
+ // printk("spts_init: about to memset i = %d\n", i);
+ memset(rmsg[i].mem, i, SPTS_MSG_SIZE);
+ }
+
+ for( i = 0; i < SPTS_MSG_COUNT; ++i ) {
+ smsg[i].mem = kmalloc( SPTS_MSG_SIZE, GFP_KERNEL);
+ if (!smsg[i].mem) {
+ printk("spts_init: failed message allocation\n");
+ return -ENOMEM;
+ }
+ // printk("spts_init: about to memset i = %d\n", i);
+ memset(smsg[i].mem, 0xA0 + i, SPTS_MSG_SIZE);
+ }
+
+ // spts_dump_page( &rmsg[0] );
+
+ ib_cm_reg_init_conn( &spts_init_conn, &spts_post_recvs);
+ printk("Finished spts_init() -- allocated recieve and send buffers\n");
+ return 0;
+}
+
+/*
+ * Need to check to make sure this is destroying everything correctly
+ * What else needs to be cleaned up & freed on exit?
+ */
+static void __exit spts_cleanup(void)
+{
+ int i;
+
+// spts_cleanup_proc();
+
+ for( i = 0; i < SPTS_MSG_COUNT; ++i ) {
+ if (rmsg[i].mem)
+ kfree(rmsg[i].mem);
+ if (smsg[i].mem)
+ kfree(smsg[i].mem);
+ }
+
+ for( i=0; i<SPTS_MAX_CONNECTIONS; ++i ) {
+ if( conn[i].status )
+ continue;
+ DEBUGK("spts_destroy_node() i == %d\n", i);
+ spts_destroy_node(&conn[i]);
+ }
+
+ printk("spts_cleanup() END\n");
+}
+
+module_init(spts_init);
+module_exit(spts_cleanup);
+
diff -Naur spts.old/spts.h trunk/src/linux-kernel/infiniband/ulp/spts/spts.h
--- spts.old/spts.h 1969-12-31 16:00:00.000000000 -0800
+++ trunk/src/linux-kernel/infiniband/ulp/spts/spts.h 2005-06-21
09:33:55.000000000 -0700
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2005 Linux Machines Inc. All rights reserved.
+ *
+ * This software is available to you under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree.
+ *
+ */
+
+#include <asm/semaphore.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/time.h>
+#include <linux/workqueue.h>
+
+#include <ib_cm.h>
+
+// #define DEBUGK(fmt, args...) printk( KERN_DEBUG fmt, ## args)
+#define DEBUGK(fmt, args...)
+
+// 0x2000 for MSG_COUNT & IB_CQ_SIZE seems to be the max...
+// otherwise the server panics "unable to create QP"
+
+#define SPTS_MSG_COUNT 0x10 // how many page buffers to allocate
+#define SPTS_MSG_SIZE 0x1000 // the size of each message to send
(aka MTU)
+ // breaks over IB right now at 0x1000
+#define SPTS_IB_CQ_SIZE 0x10 // how big to make the CQ
+#define SPTS_MAX_CONNECTIONS 8 // max simultanious connections
+#define SPTS_MAX_QUEUE_POLL 1 // max completion buffers to grab
+
+#define LISTEN_NUM 0x1000 // the number to listen on
+
+/*
+ * Simple Page Transfer Scheme "spits" message struct
+ *
+ * This is the raw primative struct that to create a pool
+ * of pages in memory that can be used for transfers
+ *
+ */
+
+struct spts_msg {
+ int size; // msg_size
+ int s; // source
+ int d; // destination
+ // struct ib_recv_wr recv_wr;
+ // struct ib_send_wr send_wr;
+ // struct ib_send_wr *send_wr_bad;
+ // struct ib_sge sge;
+ void *mem; // the message itself
+ // char mem[SPTS_MSG_SIZE];
+ u64 addr; // addr for IB HCA
+ struct ib_mr *mr; // the IB MR
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+/*
+ * This is a message wrapper struct for simple things
+ *
+ */
+
+struct spts_simple_msg {
+ u32 msg[(SPTS_MSG_SIZE >> 2)-1];
+ u32 size;
+};
+
+/*
+ * Contains Infiniband specific info about the state
+ * of the connection between to nodes.
+ *
+ */
+struct spts_ib_connection {
+ int id;
+ struct ib_cm_id *cm_id;
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_qp *qp;
+ struct ib_cq *send_cq;
+ struct ib_cq *recv_cq;
+ struct spts_msg *smsg; // send buffers
+ struct spts_msg *rmsg; // recieve buffers
+ int cur_recv_buf;
+ int cur_send_buf;
+ int status;
+
+ /* cm info */
+ struct ib_sa_path_rec path_rec;
+ int query_status;
+};
+
+typedef struct spts_ib_connection *(*ib_cm_conn_init1)(struct ib_device
*device);
+typedef int (*ib_cm_conn_init2)(struct spts_ib_connection *conn);
+
+int ib_cm_reg_init_conn( ib_cm_conn_init1 init1, ib_cm_conn_init2 init2 );
+
+void mthca_dump_qp(struct ib_qp *ibqp);
+int mthca_get_sq_free(struct ib_qp *ibqp);
+int mthca_get_rq_free(struct ib_qp *ibqp);
+
+int spts_post_recv(struct ib_qp *qp, struct spts_msg *msg );
+int spts_create_messages(struct ib_device *device, struct ib_pd *pd, struct
spts_msg *msgs);
+void spts_dump_page( struct spts_msg *msg );
+void spts_destroy_node(struct spts_ib_connection *node);
+void spts_event_handler(struct ib_event *event, void *data);
+void spts_comp_handler(struct ib_cq *cq, void *cq_context);
+
+// int spts_modify_to_rtr(struct ib_cm_id *cm_id, struct ib_cm_event
*event);
+// int spts_modify_to_rts(struct ib_cm_id *cm_id, struct ib_cm_event
*event);
+
+int spts_poll_all( struct spts_msg **msgs);
+int spts_poll( int dest, struct spts_msg **msgs);
+int spts_send_all(char *buf, int len);
+int spts_send_single(int dest, char *buf, int len);
+int spts_init_node(struct spts_ib_connection *node);
+void spts_init_proc(void);
+
+struct spts_ib_connection *spts_init_conn(struct ib_device *dev);
+int spts_post_recvs(struct spts_ib_connection *node);
+void ib_cm_spts_conn_client(int slid, int dlid);
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general