This patch set provides means for communicating internal data from IB modules
to the userspace.It is composed of three components:
1. Main ib_netlink module which is independent of IB modules.(ib_netlink.ko).
2. "plug-in" modules per client IB module.(only ib_netlink_rdma_cm.ko for now).
Depends on (1) and (3). Their role is to keep (1) and (3) independent
as well as choosing callbacks to call, based on the requested op.
This doesn't actually happen in ib_netlink_rdma_cm.ko because at the moment,
only one callback is implemented.
3. additional callbacks which are implemented inside existing IB modules.
(only rdma_cm for now).
No additional dependencies, and existing flows stay untouched.
At the moment the implementation is basic and generic.
ib_netlink uses the standard netlink module and defines a new netlink unit
(NETLINK_INFINIBAND) in netlink.h.
Upon receiving a request from userspace, it finds the target client using a
registration mechanism, allocates a raw buffer (skbuff) for the client IB module
to write its data on, and then, forwards the result back.
The size of the buffer space to be allocated is returned by the IB module,
which is also responsible to write no more than the given size.
The exact format of the returned data is unknown to ib_netlink itself.
It is shared between the kernel and userspace in the form of common headers.
The current choice of format is for reasons of simplicity.
A quick and dirty userspace demo application output+source is attached for
reference.
Sample output:
Type Device Port PID Net_dev Src Address Dst Address
Space State QPN
IB mthca0 1 27404 ib0 192.168.168.3/7174 N/A
TCP LISTEN 0
IB mthca0 2 27415 ib1 192.168.2.3/7174 N/A
TCP LISTEN 0
IB mthca0 1 30 ib0 192.168.168.3/7174 192.168.168.2/57354
TCP CONNECT 590854
IB mthca0 2 15 ib1 192.168.2.3/7174 192.168.2.4/33290
TCP CONNECT 590855
Source:
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <linux/netlink.h>
#include "rdma_cma.h"
#include "ib_netlink.h"
#include <sys/ioctl.h>
#include <net/if.h>
#include <net/if_arp.h>
#define MAX_PAYLOAD 1024
struct sockaddr_nl src_addr, dest_addr;
struct nlmsghdr *nlh = NULL;
struct msghdr msg;
struct iovec iov;
int sock_fd;
struct rdma_cm_stats *stats;
struct rdma_cm_device_stats *cur_device_stats;
struct rdma_cm_id_stats *cur_id_stats;
void *buff_head;
int i, j;
char *get_ifname(int index)
{
static struct ifreq req;
int sock = socket(AF_INET, SOCK_DGRAM, 0);
req.ifr_ifindex = index;
if (index == 0) {
return "N/A";
}
if (ioctl(sock, SIOCGIFNAME, &req) < 0) {
fprintf(stderr, "SIOCGIFNAME failed for index %d\n", index);
return "N/A";
}
return req.ifr_name;
}
static const char *format_cma_state(enum cma_state s)
{
switch (s) {
case CMA_IDLE: return "IDLE";
case CMA_ADDR_QUERY: return "ADDR_QUERY";
case CMA_ADDR_RESOLVED: return "ADDR_RESOLVED";
case CMA_ROUTE_QUERY: return "ROUTE_QUERY";
case CMA_ROUTE_RESOLVED: return "ROUTE_RESOLVED";
case CMA_CONNECT: return "CONNECT";
case CMA_DISCONNECT: return "DISCONNECT";
case CMA_ADDR_BOUND: return "ADDR_BOUND";
case CMA_LISTEN: return "LISTEN";
case CMA_DEVICE_REMOVAL: return "DEVICE_REMOVAL";
case CMA_DESTROYING: return "DESTROYING";
default: return "N/A";
}
}
static const char *format_port_space(enum rdma_port_space ps)
{
switch (ps) {
case RDMA_PS_SDP: return "SDP";
case RDMA_PS_IPOIB: return "IPOIB";
case RDMA_PS_TCP: return "TCP";
case RDMA_PS_UDP: return "UDP";
default: return "N/A";
}
}
static const char *format_node_type(enum rdma_node_type nt)
{
switch (nt) {
case ARPHRD_INFINIBAND: return "IB";
case ARPHRD_ETHER: return "IW";
default: return "N/A";
}
}
static int format_address(struct sockaddr *addr, char *buff)
{
struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
if (addr_in->sin_addr.s_addr) {
sprintf(buff, "%s/%d", inet_ntoa(addr_in->sin_addr),
ntohs(addr_in->sin_port));
}
else
sprintf(buff, "N/A");
return 0;
}
int main()
{
char tmp_buff[64];
sock_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_INFINIBAND);
if (sock_fd < 0) {
printf("Failed to create socket. Error: %s (%d)\n",
strerror(errno), errno);
return -1;
}
memset(&src_addr, 0, sizeof(src_addr));
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid(); /* self pid */
src_addr.nl_groups = 0; /* not in mcast groups */
bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));
memset(&dest_addr, 0, sizeof(dest_addr));
dest_addr.nl_family = AF_NETLINK;
dest_addr.nl_pid = 0; /* For Linux Kernel */
dest_addr.nl_groups = 0; /* unicast */
nlh=(struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
/* Fill the netlink message header */
nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
nlh->nlmsg_pid = getpid(); /* self pid */
nlh->nlmsg_flags = NLM_F_REQUEST;
nlh->nlmsg_type = IBNL_GET_TYPE(IBNL_RDMA_CM, IBNL_RDMA_CM_STATS);
iov.iov_base = (void *)nlh;
iov.iov_len = nlh->nlmsg_len;
msg.msg_name = (void *)&dest_addr;
msg.msg_namelen = sizeof(dest_addr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
sendmsg(sock_fd, &msg, 0);
memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
recvmsg(sock_fd, &msg, 0);
stats = NLMSG_DATA(nlh);
buff_head = stats + 1;
for (i = 0; i < stats->num_devices; i++) {
cur_device_stats = buff_head;
buff_head = cur_device_stats + 1;
printf("%-5s %-8s %-5s %-6s %-10s %-20s %-20s %-6s %-15s %-8s
\n",
"Type", "Device", "Port", "PID", "Net_dev", "Src
Address",
"Dst Address", "Space", "State", "QPN");
for (j = 0; j < cur_device_stats->num_ids; j++) {
cur_id_stats = buff_head;
buff_head = cur_id_stats + 1;
printf("%-5s %-8s %-5d %-6u %-10s ",
format_node_type(cur_id_stats->nt),
cur_device_stats->name,
cur_id_stats->port_num,
cur_id_stats->pid,
get_ifname(cur_id_stats->bound_dev_if));
format_address(&cur_id_stats->local_addr, tmp_buff);
printf("%-20s ",tmp_buff);
format_address(&cur_id_stats->remote_addr, tmp_buff);
printf("%-20s ",tmp_buff);
printf("%-6s %-15s %-8d \n",
format_port_space(cur_id_stats->ps),
format_cma_state(cur_id_stats->cma_state),
cur_id_stats->qp_num);
}
}
close(sock_fd);
return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html