This patch adds support for Xsigo logical "links". This includes the Xsigo Directory Service (XDS). In terms of the host, XDS contains a list of XCMs (Xsigo Configuration Managers) assigned to it for this IB port. XDS is first located via standard SA ServiceRecord query.
Signed-off-by: Hal Rosenstock <[EMAIL PROTECTED]> --- drivers/infiniband/ulp/xsigo/xscore/ib_if.c | 837 +++++++++++++++++++++++ drivers/infiniband/ulp/xsigo/xscore/ib_if.h | 119 ++++ drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h | 82 +++ 3 files changed, 1038 insertions(+), 0 deletions(-) create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if.c create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if.h create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if.c b/drivers/infiniband/ulp/xsigo/xscore/ib_if.c new file mode 100644 index 0000000..52f1c13 --- /dev/null +++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if.c @@ -0,0 +1,837 @@ +/* + * Copyright (c) 2006-2008 Xsigo Systems Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/utsname.h> +#include <rdma/ib_cache.h> + +#include "xs_core.h" +#include "ib_if.h" +#include "xcpm_export.h" +#include "xcpm_priv.h" +#include "xsmp.h" + +static struct kmem_cache *ib_if_cachep = NULL; +extern struct kmem_cache *xsmp_cachep; + +/* Used to track outstanding reads and writes at the time of a disconnect */ +static atomic_t msg_refcount; + +extern struct workqueue_struct *xcpm_wq; +extern struct xcpm_info *xcpm; + +static int ib_if_port_setup(struct ib_device *device, int port_num, + struct ib_pd *pd, struct ib_mr *mr, + struct ib_port_info *ib_port, u32 *xds_handle); +static void xds_query_callback(u32 handle, void *context, + int status, struct ib_xds_mad *xds_mad); +static void recv_comp_handler(struct ib_cq *cq, void *cq_context); +static void send_comp_handler(struct ib_cq *cq, void *cq_context); + +static struct ib_sa_client sa_client; + +int ib_if_init(struct ib_client *ibclient) +{ + int ret; + + ib_sa_register_client(&sa_client); + + ret = ib_register_client(ibclient); + if (!ret) + atomic_set(&msg_refcount, 0); + + return ret; +} + +void ib_if_exit(struct ib_client *ibclient) +{ + ib_unregister_client(ibclient); + ib_sa_unregister_client(&sa_client); +} + +/* + * Post a query to the XDS to obtain a list of XCMs assigned to the + * server on this port + */ +int ib_if_query_xds(struct ib_port_info *ib_port) +{ + struct ib_port_attr port_attr; + struct xds_request request; + int ret = 0; + int xds_dlid; + + atomic_inc(&ib_port->refcount); + + if (ib_port->port_down) { + ret = -EAGAIN; + goto ib_if_query_exit2; + } + + xcpm_debug(KERN_INFO, "Querying XDS on port %d\n", ib_port->port_num); + + /* Determine the XDS lid to use for the query */ + xds_dlid = ib_port->xds_dlid; + xcpm_debug(KERN_INFO, "XDS query to lid %d\n", xds_dlid); + + /* We have communicated with the XDS (just about to) */ + ib_port->queried = 1; + + port_attr = xsigo_ib_get_port_attr(ib_port->device, ib_port->port_num); + if (port_attr.state != IB_PORT_ACTIVE) { + xcpm_debug(KERN_WARNING, "Port %d not active\n", + ib_port->port_num); + ret = -EAGAIN; + goto ib_if_query_exit2; + } + + if (ib_port->xds_handle != XSIGO_IB_ERROR_HANDLE) { + xsigo_ib_delete_xds_context(ib_port->xds_handle); + ib_port->xds_handle = XSIGO_IB_ERROR_HANDLE; + } + + /* Re-initialize the port, in case any parameters changed */ + ib_if_port_setup(ib_port->device, ib_port->port_num, + ib_port->pd, ib_port->mr, ib_port, + &ib_port->xds_handle); + + + memset(&request, 0, sizeof(request)); + + if (ib_port->xds_handle == XSIGO_IB_ERROR_HANDLE) { + xcpm_debug(KERN_ERR, "Trying to query on an uninitialized " + "query context\n"); + ret = -EINVAL; + goto ib_if_query_exit2; + } + + request.server_record.vm_id = 0; + request.server_record.port_id = cpu_to_be64(ib_port->guid); + + strncpy(request.hostname, init_utsname()->nodename, + XSIGO_MAX_HOSTNAME); + + ret = xsigo_ib_query_xds(ib_port->xds_handle, (u8 *) &request, + sizeof(request), xds_dlid); + if (!ret) + goto ib_if_query_exit; + +ib_if_query_exit2: + if (!atomic_dec_return(&ib_port->refcount)) + wake_up(&xcpm_wait); +ib_if_query_exit: + return ret; +} + +/* Parse the XCM records and send out an XCM list */ +static int parse_and_dispatch_xcmlist(struct ib_port_info *ib_port, + struct ib_xds_mad *xds_mad) +{ + u8 *data = xds_mad->data; + struct server_info sinfo; + int count; + struct xcm_list list; + + /* Get the server info record */ + memcpy((u8 *) &sinfo, data, sizeof(sinfo)); + data += sizeof(sinfo); + sinfo.port_id = cpu_to_be64(sinfo.port_id); + + /* List of XCFMs */ + memcpy((u8 *) &list, data, sizeof(list)); + + if (list.count * sizeof(struct xcfm_record) > + ((u8 *) xds_mad + sizeof(struct ib_xds_mad) - data) || + list.count > MAX_XCFM_COUNT) { + printk(KERN_ERR PFX "Number of XCFM records '%d' is too high '%d'\n", + list.count, MAX_XCFM_COUNT); + goto dispatch_exit; + } + + if (list.count && list.xcm_version != XCM_REC_VERSION) { + printk(KERN_ERR PFX "xcm_version '%d' mismatch, expected '%d'\n", + list.xcm_version, XCM_REC_VERSION); + goto dispatch_exit; + } + + xcpm_debug(KERN_INFO, + "SA response: %d records (port %d) xcm_version %d\n", + list.count, ib_port->port_num, list.xcm_version); + + for (count = 0; count < list.count; count++) { + list.xcms[count].xcm_lid = cpu_to_be16(list.xcms[count].xcm_lid); + /* Keep the GID components in the network byte order */ + + xcpm_debug(KERN_INFO, "Count %d, xcm_lid %d, port_id 0x%Lx\n", + count, list.xcms[count].xcm_lid, + cpu_to_be64(list.xcms[count].port_id)); + } + + allocate_port_and_links(ib_port, &list); + +dispatch_exit: + return list.count; +} + +static void xds_query_callback(u32 handle, void *context, + int status, struct ib_xds_mad *xds_mad) +{ + struct ib_port_info *ib_port = (struct ib_port_info *) context; + + if (status == IB_WC_SUCCESS) { + if (parse_and_dispatch_xcmlist(ib_port, xds_mad) == 0) + ib_port->fast_poll = 1; + else + ib_port->fast_poll = 0; + } else { + xcpm_debug(KERN_ERR, + "Error sending query XDS MAD, status %d (port %d)\n", + status, ib_port->port_num); + ib_port->fast_poll = 0; + } + + if (!atomic_dec_return(&ib_port->refcount)) + wake_up(&xcpm_wait); +} + +/* + * Port sweeping: check all ports periodically to see if + * any of the links need to be reconnected + */ +static void port_sweep_handler(struct work_struct *work) +{ + struct ib_port_info *ib_port = container_of(work, struct ib_port_info, + port_sweep_work.work); + + /* Whether we need to query the SA */ + if (ib_port->queried) + return; + + ib_port->queried = 1; + + ib_if_sa_query_xds(ib_port); +} + +static int ib_if_port_setup(struct ib_device *device, int port_num, + struct ib_pd *pd, struct ib_mr *mr, + struct ib_port_info *ib_port, u32 *xds_handle) +{ + int ret; + struct xsigo_ib_query_info query_info = { + .device = device, + .port_num = port_num, + .mgmt_class = XSIGO_MGMT_CLASS, + .mgmt_class_version = XSIGO_MGMT_CLASS_VERSION, + .attr_id = __constant_cpu_to_be16(IB_MAD_ATTR_XCM_REQUEST), + .context = ib_port, + .callback = &xds_query_callback + }; + struct ib_device_attr dev_attr; + + xcpm_debug(KERN_INFO, "ib_if_port_init port %d...\n", port_num); + + ib_port->device = device; + ib_port->port_num = port_num; + ib_port->pd = pd; + ib_port->mr = mr; + + ib_port->gid = xsigo_ib_get_port_gid(device, port_num); + + /* Link 'guid' stored in host byte order like all other fields */ + ib_port->guid = be64_to_cpu(ib_port->gid.global.interface_id); + ib_port->lid = xsigo_ib_get_port_lid(device, port_num); + + ret = ib_query_device(device, &dev_attr); + if (ret) { + printk(KERN_ERR PFX "ib_query_device %s failed %d\n", + device->name, ret); + ib_port->fw_ver = 0; + ib_port->hw_ver = 0; + ib_port->vendor_part_id = 0; + ret = 0; + } else { + ib_port->fw_ver = dev_attr.fw_ver; + ib_port->hw_ver = dev_attr.hw_ver; + ib_port->vendor_part_id = dev_attr.vendor_part_id; + } + + *xds_handle = xsigo_ib_create_xds_context(&query_info); + if (*xds_handle == XSIGO_IB_ERROR_HANDLE) { + printk(KERN_ERR PFX "xsigo_ib_create_xds_context failed on %s port %d\n", + device->name, port_num); + ret = -EINVAL; + goto port_setup_exit; + } + +port_setup_exit: + return ret; +} + +/* XDS query implementation */ +void service_rec_callback(int status, struct ib_sa_service_rec *resp, + void *context) +{ + struct ib_port_info *ib_port = (struct ib_port_info *) context; + + xcpm_debug(KERN_INFO, "Service rec callback, resp: %p, status: %d\n", + resp, status); + + if (!resp || status) { + xcpm_debug(KERN_WARNING, "Error %d during SA XDS query\n", status); + ib_port->fast_poll = 0; + } else { + ib_port->xds_dlid = cpu_to_be16(resp->data16[0]); + + xcpm_debug(KERN_INFO, "XDS lid %d\n", ib_port->xds_dlid); + + ib_if_query_xds(ib_port); + } + + schedule_port_sweep(ib_port, ib_port->fast_poll); + + if (!atomic_dec_return(&ib_port->refcount)) + wake_up(&xcpm_wait); +} + +int ib_if_sa_query_xds(struct ib_port_info *ib_port) +{ + struct ib_sa_service_rec service_rec; + struct ib_sa_query *query; + struct ib_port_attr port_attr; + int ret; + + xcpm_debug(KERN_INFO, "SA XDS query on port %d\n", ib_port->port_num); + + atomic_inc(&ib_port->refcount); + + port_attr = xsigo_ib_get_port_attr(ib_port->device, ib_port->port_num); + if (port_attr.state != IB_PORT_ACTIVE) { + xcpm_debug(KERN_WARNING, "Port %d not active\n", + ib_port->port_num); + ret = -EAGAIN; + goto xds_query_exit; + } + + memset(&service_rec, 0, sizeof(service_rec)); + strcpy(service_rec.name, XSIGO_XDS_STRING); + + ret = ib_sa_service_rec_query(&sa_client, ib_port->device, ib_port->port_num, + IB_MGMT_METHOD_GET, &service_rec, + IB_SA_SERVICE_REC_SERVICE_NAME, 1000, + GFP_ATOMIC, &service_rec_callback, + ib_port, &query); + + xcpm_debug(KERN_INFO, "ib_sa_service_rec_query, return value: %d\n", ret); + +xds_query_exit: + if (ret) { + if (!atomic_dec_return(&ib_port->refcount)) + wake_up(&xcpm_wait); + schedule_port_sweep(ib_port, ib_port->fast_poll); + } + + return ret; +} + +/* + * Initialize the link: the IB specific part: + * setup ib_link_info: queue pair, CQ + * Initialize the 'ib_port_info' structure + */ +int ib_if_port_init(struct ib_device *device, int port_num, struct ib_pd *pd, + struct ib_mr *mr, struct ib_port_info *ib_port, + u32 *xds_handle) +{ + ib_port->port_down = 0; + + INIT_DELAYED_WORK(&ib_port->port_sweep_work, &port_sweep_handler); + + atomic_set(&ib_port->refcount, 0); + + ib_port->fast_poll = 0; + + return ib_if_port_setup(device, port_num, pd, mr, ib_port, xds_handle); +} + +void ib_if_port_exit(struct ib_port_info *ib_port) +{ + xcpm_debug(KERN_INFO, "ib_if_port_exit...\n"); + + ib_port->port_down = 1; + + /* Wait if we are in the init stage */ + if (!wait_event_timeout(xcpm_wait, + !atomic_read(&ib_port->refcount), 10 * HZ)) + xcpm_debug(KERN_WARNING, + "Warning: Timed out waiting for the reference count\n"); + + if (ib_port->xds_handle != XSIGO_IB_ERROR_HANDLE) { + xsigo_ib_delete_xds_context(ib_port->xds_handle); + ib_port->xds_handle = XSIGO_IB_ERROR_HANDLE; + } + + /* No more port sweeping */ + cancel_delayed_work(&ib_port->port_sweep_work); + flush_workqueue(xcpm_wq); + cancel_delayed_work(&ib_port->port_sweep_work); +} + +int ib_if_link_match(struct xcfm_record *pxcm, struct ib_link_info *iblink) +{ + if (pxcm == NULL || iblink == NULL) { + xcpm_debug(KERN_WARNING, "Null params\n"); + return 0; + } + + return (pxcm->xcm_lid == iblink->link_xcm.xcm_lid && + pxcm->port_id == iblink->link_xcm.port_id); +} + +/* + * Initialize the details of the logical link + * Initialize the 'ib_link_info' structure + */ +void ib_if_link_init(int link_index, struct xcfm_record *pxcm, + struct ib_port_info *pport, struct ib_link_info *iblink) +{ + /* Assign HCA and port and all other parameters of the XCM */ + iblink->link_xcm = *pxcm; + + /* The port on which the link exists */ + iblink->port = pport; + + iblink->link_index = link_index; + iblink->handle = XSIGO_IB_ERROR_HANDLE; + iblink->connected = 0; +} + +/* Free any buffers for sends that did not complete */ +static void ib_if_free_posted_bufs(u32 handle) +{ + int count; + + if (handle == XSIGO_IB_ERROR_HANDLE) { + xcpm_debug(KERN_INFO, "Incorrect handle\n"); + return; + } + + for (count = 0; count < NUM_ELEMENTS; count++) { + if (xsigo_ib_send_buf_posted(handle, count)) { + void *buf = xsigo_ib_get_send_buf_address(handle, count); + + xsigo_ib_unmap_send_buf(handle, count); + xcpm_debug(KERN_WARNING, + "Freeing posted buffer: handle %d, index %d\n", + handle, count); + + if (buf) { + if (xsmp_is_local_msg(buf)) + kmem_cache_free(xsmp_cachep, buf); + else + kfree(buf); + } + } + } +} + +void ib_if_link_exit(struct ib_link_info *iblink) +{ + int time_rem; + + xcpm_debug(KERN_INFO, "enter (handle %d)...\n", iblink->handle); + + if (iblink->handle != XSIGO_IB_ERROR_HANDLE) { + iblink->connected = 0; + + time_rem = wait_event_timeout(xcpm_wait, + !atomic_read(&msg_refcount), + 10 * HZ); + if (!time_rem) + xcpm_debug(KERN_WARNING, + "Timed out waiting for the reference count, " + "value %d\n", atomic_read(&msg_refcount)); + + ib_if_free_posted_bufs(iblink->handle); + + xsigo_ib_disconnect(iblink->handle); + iblink->handle = XSIGO_IB_ERROR_HANDLE; + } + + xcpm_debug(KERN_INFO, "exit...\n"); +} + +static void ib_if_connect_callback(u32 handle, void *context, + enum xsigo_ib_connect_status status) +{ + struct ib_link_info *iblink = (struct ib_link_info *) context; + int link_index; + struct ib_port_info *ib_port; + + xcpm_debug(KERN_INFO, "Connection status: %d\n", status); + + if (!iblink) { + xcpm_debug(KERN_WARNING, "null context\n"); + goto callback_exit; + } + + ib_port = iblink->port; + if (!ib_port) { + xcpm_debug(KERN_WARNING, "Uninitialized link\n"); + goto callback_exit; + } + + link_index = iblink->link_index; + + if (status != XSIGO_IB_SUCCESS) { + iblink->connected = 0; + xcpm_debug(KERN_WARNING, "Link %d not connected (status 0x%x)\n", + link_index, status); + + /* + * We want to time out on the link if the other side + * disconnects and cleanup the service resources also + */ + if (status != XSIGO_IB_DISCONNECT_RECEIVED) + bring_down_link(link_index); + + goto callback_end; + } + + xcpm_debug(KERN_INFO, "Link %d: IB connection established, " + "starting XSMP connection sequence...\n", link_index); + iblink->connected = 1; + startup_link(link_index, ib_port->fw_ver, ib_port->hw_ver, + ib_port->vendor_part_id); + +callback_end: + atomic_dec(&iblink->port->refcount); + wake_up(&xcpm_wait); +callback_exit: + return; +} + +/* Connect to the XCM and update the corresponding link_info structure */ +int ib_if_link_connect(int link_index, struct ib_link_info *iblink) +{ + u32 handle = XSIGO_IB_ERROR_HANDLE; + u16 port_lid; + int ret; + struct xsigo_ib_connect_info connect_info; + union ib_gid gid; + + memset(&connect_info, 0, sizeof(connect_info)); + + connect_info.device = iblink->port->device; + connect_info.port_num = iblink->port->port_num; + connect_info.num_recv_elements = NUM_ELEMENTS; + connect_info.num_send_elements = NUM_ELEMENTS; + connect_info.recv_buf_size = MAX_BUF_SIZE; + connect_info.send_handler = &send_comp_handler; + connect_info.recv_handler = &recv_comp_handler; + connect_info.private_data = NULL; + connect_info.private_data_len = 0; + connect_info.pd = iblink->port->pd; + connect_info.mr = iblink->port->mr; + connect_info.callback = &ib_if_connect_callback; + connect_info.context = (void *) iblink; + connect_info.no_rdma = 1; + + connect_info.dgid.global.interface_id = iblink->link_xcm.port_id; + connect_info.sgid = iblink->port->gid; + connect_info.dlid = cpu_to_be16(iblink->link_xcm.xcm_lid); + port_lid = iblink->port->lid; + connect_info.slid = cpu_to_be16(port_lid); + + ret = ib_get_cached_gid(iblink->port->device, iblink->port->port_num, + 0, &gid); + if (ret) + connect_info.dgid.global.subnet_prefix = cpu_to_be64(DEFAULT_SUBNET_PREFIX); + else + connect_info.dgid.global.subnet_prefix = gid.global.subnet_prefix; + + connect_info.service_id = cpu_to_be64(SERVICE_ID); + + xcpm_debug(KERN_INFO, "GIDs: local <0x%Lx 0x%Lx>, " + "remote <0x%Lx 0x%Lx> service_id <0x%llx>\n", + be64_to_cpu(connect_info.sgid.global.subnet_prefix), + be64_to_cpu(connect_info.sgid.global.interface_id), + be64_to_cpu(connect_info.dgid.global.subnet_prefix), + be64_to_cpu(connect_info.dgid.global.interface_id), + be64_to_cpu(connect_info.service_id)); + + xcpm_debug(KERN_INFO, "LIDs: local <0x%x>, remote <0x%x>\n", + be16_to_cpu(connect_info.slid), + be16_to_cpu(connect_info.dlid)); + + atomic_inc(&iblink->port->refcount); + + handle = xsigo_ib_connect(&connect_info); + iblink->handle = handle; + if (handle == XSIGO_IB_ERROR_HANDLE) { + xcpm_debug(KERN_ERR, + "Error getting a handle for link %d\n", + link_index); + if (!atomic_dec_return(&iblink->port->refcount)) + wake_up(&xcpm_wait); + ret = -EINVAL; + } else { + xcpm_debug(KERN_INFO, + "Retrieved a handle <0x%x> for link %d\n", + handle, link_index); + + ret = 0; + } + + return ret; +} + +struct ib_cq *ib_if_get_recv_cq(struct ib_link_info *link) +{ + if (!link->connected) + return NULL; + + return xsigo_ib_get_recv_cq(link->handle); +} + +/* Receive message handling */ + +/* + * Completion queue handler for all queue pairs for all links + * 'cq_context' is the link number on which the message came on + * 'cq' is the completion queue associated with the link + */ +void recv_comp_handler(struct ib_cq *cq, void *cq_context) +{ + int handle = (unsigned long) cq_context; + struct ib_link_info *ib_link = xsigo_ib_get_handle_context(handle); + int link; + + if (!ib_link) + return; + + link = ib_link->link_index; + + /* Schedule 'work' to handle the events if the link is up */ + if (xcpm->links[link].link_state != LINK_DOWN && + xcpm->links[link].link_state != LINK_DEAD && + !atomic_read(&xcpm->xcpm_down)) + queue_work(xcpm_wq, &xcpm->links[link].msg_dispatch_work); +} + +/* Actually handle the messages received from the XCM */ +void ib_if_recv_comp_handler(struct ib_link_info *iblink, struct ib_cq *cq) +{ + struct ib_wc *wcp = kmem_cache_alloc(ib_if_cachep, GFP_ATOMIC); + u32 conn_handle; + int count, num_completions; + + if (!wcp) { + printk(KERN_ERR PFX "%s cache allocation failed\n", __FUNCTION__); + return; + } + + atomic_inc(&msg_refcount); + + if (!iblink) { + xcpm_debug(KERN_WARNING, "Warning: iblink is null\n"); + goto recv_exit; + } + + if (!cq) { + xcpm_debug(KERN_WARNING, "Null CQ, probably a deleted handle\n"); + goto recv_exit; + } + + /* Check the link first before handling messages */ + if (!iblink->connected) { + xcpm_debug(KERN_WARNING, + "Message received for a non-existent link (probably " + "a phantom CQ), ignoring...\n"); + goto recv_exit; + } + + conn_handle = iblink->handle; + + num_completions = xsigo_ib_get_completions(conn_handle, RECV_CQ, + NUM_ELEMENTS, wcp); + + for (count = 0; count < num_completions; count++) { + int index = wcp[count].wr_id; + struct ib_wc *wc = &wcp[count]; + u8 *buf; + int length; + + if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + xcpm_debug(KERN_ERR, + "Receive error: id %d, " + "status %d (handle %d)\n", + index, wc->status, conn_handle); + continue; + } + + if (iblink->connected) { + buf = xsigo_ib_get_recv_buf_address(conn_handle, index); + length = wc->byte_len; + xsigo_ib_unmap_recv_buf(conn_handle, index); + process_incoming_msg(iblink->link_index, buf, length); + } + + if (iblink->connected) + /* + * Enqueue the receive buffer back to + * get further messages + */ + xsigo_ib_post_receive(conn_handle, index); + else { + xcpm_debug(KERN_WARNING, + "Handle %d disconnected, skipping buffer " + "posting\n", conn_handle); + continue; + } + } + + if (iblink->connected) + xsigo_ib_arm_cq(conn_handle, RECV_CQ); + else + xcpm_debug(KERN_WARNING, + "Handle %d disconnected, skipping CQ arming\n", + conn_handle); + +recv_exit: + atomic_dec(&msg_refcount); + wake_up(&xcpm_wait); + kmem_cache_free(ib_if_cachep, wcp); +} + +/* End - handling of receive messages */ + +/* Handling sends */ + +/* Send message completion handler */ +static void send_comp_handler(struct ib_cq *send_cq, void *cq_context) +{ + struct ib_wc *wcp = kmem_cache_alloc(ib_if_cachep, GFP_ATOMIC); + u32 conn_handle = (u32)(unsigned long) cq_context; + struct ib_link_info *iblink; + int count = 0, num_completions; + + if (!wcp) { + printk(KERN_ERR PFX "%s cache allocation failed\n", __FUNCTION__); + return; + } + + iblink = xsigo_ib_get_handle_context(conn_handle); + if (!iblink) { + xcpm_debug(KERN_WARNING, "Warning: iblink is null\n"); + goto send_exit; + } + + num_completions = xsigo_ib_get_completions(conn_handle, SEND_CQ, + NUM_ELEMENTS, wcp); + + for (count = 0; count < num_completions; count++) { + int index = wcp[count].wr_id; + struct ib_wc *wc = &wcp[count]; + void *buf; + + if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + xcpm_debug(KERN_ERR, + "Send error: id %d, " + "status %d (handle %d)\n", + index, wc->status, conn_handle); + } + + buf = xsigo_ib_get_send_buf_address(conn_handle, index); + xsigo_ib_unmap_send_buf(conn_handle, index); + + if (buf) { + if (xsmp_is_local_msg(buf)) + kmem_cache_free(xsmp_cachep, buf); + else + kfree(buf); + } + + atomic_dec(&msg_refcount); + wake_up(&xcpm_wait); + } + + /* + * Accept further notifications + * This is unconditional unlike the recv completion handler + */ + xsigo_ib_arm_cq(conn_handle, SEND_CQ); + +send_exit: + kmem_cache_free(ib_if_cachep, wcp); +} + +/* Send out a message on the queue pair */ +int ib_if_send_msg(struct ib_link_info *iblink, u8 *data, int length) +{ + int ret; + + if (!iblink->connected) { + xcpm_debug(KERN_WARNING, "Link not connected, discarding send\n"); + ret = -EINVAL; + goto ib_if_send_msg_exit; + } + + xcpm_debug(KERN_DEBUG, "Dispatching a send (handle %d, length %d)...\n", + iblink->handle, length); + + atomic_inc(&msg_refcount); + + /* A check should have been made for the link status before this point */ + ret = xsigo_ib_send_msg(iblink->handle, data, length); + +ib_if_send_msg_exit: + return ret; +} + +/* Memory pools */ +int alloc_ib_if_mem_pool(void) +{ + ib_if_cachep = kmem_cache_create("xscore_xcpm_ib_if_cache", + sizeof(struct ib_wc) * NUM_ELEMENTS, 0, + 0, NULL); + + return (ib_if_cachep == NULL) ? -1 : 0; +} + +void dealloc_ib_if_mem_pool(void) +{ + if (ib_if_cachep) + kmem_cache_destroy(ib_if_cachep); +} diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if.h b/drivers/infiniband/ulp/xsigo/xscore/ib_if.h new file mode 100644 index 0000000..8a6cb56 --- /dev/null +++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2006-2008 Xsigo Systems Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __IB_IF_H__ +#define __IB_IF_H__ + +#include <rdma/ib_verbs.h> +#include <rdma/ib_mad.h> +#include <linux/err.h> +#include <linux/dma-mapping.h> +#include <linux/types.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_cm.h> +#include <asm/byteorder.h> + +#include "xsigoib.h" +#include "ib_if_xds.h" + +#define MAD_TIMEOUT_MS 1000 + +#define NUM_ELEMENTS 288 + +#define MAX_BUF_SIZE 1024 +#define XSIGO_XDS_STRING "XSIGOXDS" + +struct link_msg_buf { + void *vaddr; + u64 dma_addr; + int length; + int posted; +}; + +/* Parameters for the logical link */ +struct ib_link_info { + struct ib_port_info *port; /* The port used on the HCA */ + struct xcfm_record link_xcm; /* Remote endpoint: the XCM */ + u32 link_index; /* Index referenced by the XCPM core */ + u32 handle; /* XsigoIB connection handle */ + + /* + * State of the IB connection + * We need this separate from the handle value because + * sometimes we get messages on stale handle values + */ + int connected; +}; + +struct ib_port_info { + u64 guid; /* Host byte order */ + u16 lid; /* Host byte order */ + struct ib_device *device; /* The HCA that the port belongs to */ + int port_num; + struct ib_pd *pd; + struct ib_mr *mr; + union ib_gid gid; + u32 xds_handle; + int queried; /* 0 => Query the XDS */ + u64 fw_ver; + u32 vendor_part_id; + u32 hw_ver; + struct delayed_work port_sweep_work; + int used; + atomic_t refcount; + int port_down; + u16 xds_dlid; + int fast_poll; +} __attribute__ ((packed)); + +int ib_if_port_init(struct ib_device *device, int port_num, + struct ib_pd *pd, struct ib_mr *mr, + struct ib_port_info *ib_port, u32 *xds_handle); +void ib_if_port_exit(struct ib_port_info *ib_port); +void ib_if_link_init(int link_index, struct xcfm_record *pxcm, + struct ib_port_info *pport, struct ib_link_info *plink); +void ib_if_link_exit(struct ib_link_info *iblink); +int ib_if_link_connect(int port_index, struct ib_link_info *iblink); +int ib_if_post_receive(struct ib_link_info *ib_link, int id); +int ib_if_send_msg(struct ib_link_info *ib_link, u8 *data, int length); +int ib_if_init(struct ib_client *ibclient); +void ib_if_exit(struct ib_client *ibclient); +void ib_if_recv_comp_handler(struct ib_link_info *iblink, struct ib_cq *cq); +struct ib_cq *ib_if_get_recv_cq(struct ib_link_info *link); +int ib_if_link_match(struct xcfm_record *pxcm, struct ib_link_info *iblink); +int alloc_ib_if_mem_pool(void); +void dealloc_ib_if_mem_pool(void); +int ib_if_sa_query_xds(struct ib_port_info *); + +#endif /* __IB_IF_H__ */ diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h b/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h new file mode 100644 index 0000000..3baa603 --- /dev/null +++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2006-2008 Xsigo Systems Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __IB_IF_XDS_H__ +#define __IB_IF_XDS_H__ + +#include <rdma/ib_verbs.h> +#include <rdma/ib_mad.h> + +#define XCM_REC_VERSION 1 +#define MAX_XCFM_COUNT 8 + +#define XSIGO_MGMT_CLASS 0x0B +#define XSIGO_MGMT_CLASS_VERSION 0x02 + +#define IB_MAD_ATTR_XCM_REQUEST 0xB002 + +#define XSIGO_MGMT_METHOD_GET IB_MGMT_METHOD_GET +#define XSIGO_MGMT_METHOD_SET IB_MGMT_METHOD_SET + +#define XSIGO_MAX_HOSTNAME 65 + +struct xcfm_record { + u64 port_id; + u16 xcm_lid; /* lid of the XCM port */ + u8 reserved[10]; +} __attribute__ ((packed)); + +struct xcm_list { + u8 count; + u8 xcm_version; + u8 reserved[2]; + struct xcfm_record xcms[MAX_XCFM_COUNT]; +}; + +struct server_info { + u32 vm_id; + u64 port_id; +} __attribute__ ((packed)); + +struct xds_request { + struct server_info server_record; + char hostname[XSIGO_MAX_HOSTNAME]; +} __attribute__ ((packed)); + +struct ib_xds_mad { + struct ib_mad_hdr mad_hdr; + u8 reserved[IB_MGMT_SA_HDR - IB_MGMT_MAD_HDR]; + u8 data[IB_MGMT_SA_DATA]; +} __attribute__ ((packed)); + +#endif /*__IB_IF_XDS_H__ */ -- 1.5.2 _______________________________________________ ewg mailing list ewg@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg