On Thu, Dec 10, 2009 at 10:33:53AM -0700, Jason Gunthorpe wrote:
> Could you prepare this based on Roland's tree? This patch won't apply.
> 

I quote two patches, one for libibverbs based on 74638ac, and the
other for libmlx4 based on 444f634. I changed the padding handling as
you requested for libibverbs. You also need to apply a patch to the
kernel driver to match the new values for link_layer. I put it here
too.

libibverbs:


diff --git a/examples/devinfo.c b/examples/devinfo.c
index 84f95c7..393ec04 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -184,6 +184,19 @@ static int print_all_port_gids(struct ibv_context *ctx, 
uint8_t port_num, int tb
        return rc;
 }
 
+static const char *link_layer_str(uint8_t link_layer)
+{
+       switch (link_layer) {
+       case IBV_LINK_LAYER_UNSPECIFIED:
+       case IBV_LINK_LAYER_INFINIBAND:
+               return "IB";
+       case IBV_LINK_LAYER_ETHERNET:
+               return "Ethernet";
+       default:
+               return "Unknown";
+       }
+}
+
 static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 {
        struct ibv_context *ctx;
@@ -284,6 +297,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t 
ib_port)
                printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
                printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
                printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
+               printf("\t\t\tlink_layer:\t\t%s\n", 
link_layer_str(port_attr.link_layer));
 
                if (verbose) {
                        printf("\t\t\tmax_msg_sz:\t\t0x%x\n", 
port_attr.max_msg_sz);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index b916f59..d4a46e4 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -31,6 +31,8 @@
  */
 
 #include "pingpong.h"
+#include <arpa/inet.h>
+#include <stdlib.h>
 
 enum ibv_mtu pp_mtu_to_enum(int mtu)
 {
@@ -53,3 +55,10 @@ uint16_t pp_get_local_lid(struct ibv_context *context, int 
port)
 
        return attr.lid;
 }
+
+int pp_get_port_info(struct ibv_context *context, int port,
+                    struct ibv_port_attr *attr)
+{
+       return ibv_query_port(context, port, attr);
+}
+
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 71d7c3f..16d3466 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -37,5 +37,7 @@
 
 enum ibv_mtu pp_mtu_to_enum(int mtu);
 uint16_t pp_get_local_lid(struct ibv_context *context, int port);
+int pp_get_port_info(struct ibv_context *context, int port,
+                    struct ibv_port_attr *attr);
 
 #endif /* IBV_PINGPONG_H */
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index fa969e0..4d0bd0d 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -67,6 +67,8 @@ struct pingpong_context {
        int                      size;
        int                      rx_depth;
        int                      pending;
+       struct ibv_port_attr     portinfo;
+       union ibv_gid            dgid;
 };
 
 struct pingpong_dest {
@@ -94,6 +96,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int 
port, int my_psn,
                        .port_num       = port
                }
        };
+
+       if (ctx->dgid.global.interface_id) {
+               attr.ah_attr.is_global = 1;
+               attr.ah_attr.grh.hop_limit = 1;
+               attr.ah_attr.grh.dgid = ctx->dgid;
+       }
        if (ibv_modify_qp(ctx->qp, &attr,
                          IBV_QP_STATE              |
                          IBV_QP_AV                 |
@@ -289,11 +297,11 @@ out:
 
 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int 
size,
                                            int rx_depth, int port,
-                                           int use_event)
+                                           int use_event, int is_server)
 {
        struct pingpong_context *ctx;
 
-       ctx = malloc(sizeof *ctx);
+       ctx = calloc(1, sizeof *ctx);
        if (!ctx)
                return NULL;
 
@@ -306,7 +314,7 @@ static struct pingpong_context *pp_init_ctx(struct 
ibv_device *ib_dev, int size,
                return NULL;
        }
 
-       memset(ctx->buf, 0, size);
+       memset(ctx->buf, 0x7b + is_server, size);
 
        ctx->context = ibv_open_device(ib_dev);
        if (!ctx->context) {
@@ -481,6 +489,7 @@ static void usage(const char *argv0)
        printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
        printf("  -l, --sl=<sl>          service level value\n");
        printf("  -e, --events           sleep on CQ events (default poll)\n");
+       printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -504,6 +513,7 @@ int main(int argc, char *argv[])
        int                      rcnt, scnt;
        int                      num_cq_events = 0;
        int                      sl = 0;
+       char                    *grh = NULL;
 
        srand48(getpid() * time(NULL));
 
@@ -520,10 +530,11 @@ int main(int argc, char *argv[])
                        { .name = "iters",    .has_arg = 1, .val = 'n' },
                        { .name = "sl",       .has_arg = 1, .val = 'l' },
                        { .name = "events",   .has_arg = 0, .val = 'e' },
+                       { .name = "gid",      .has_arg = 1, .val = 'g' },
                        { 0 }
                };
 
-               c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, 
NULL);
+               c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", 
long_options, NULL);
                if (c == -1)
                        break;
 
@@ -576,6 +587,10 @@ int main(int argc, char *argv[])
                        ++use_event;
                        break;
 
+               case 'g':
+                       grh = strdupa(optarg);
+                       break;
+
                default:
                        usage(argv[0]);
                        return 1;
@@ -615,7 +630,7 @@ int main(int argc, char *argv[])
                }
        }
 
-       ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
+       ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event, 
!servername);
        if (!ctx)
                return 1;
 
@@ -631,17 +646,31 @@ int main(int argc, char *argv[])
                        return 1;
                }
 
-       my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
-       my_dest.qpn = ctx->qp->qp_num;
-       my_dest.psn = lrand48() & 0xffffff;
-       if (!my_dest.lid) {
-               fprintf(stderr, "Couldn't get local LID\n");
+
+       if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+               fprintf(stderr, "Couldn't get port info\n");
                return 1;
        }
 
+       my_dest.lid = ctx->portinfo.lid;
+       if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+               if (!grh) {
+                       fprintf(stderr, "Must supply remote gid\n");
+                       return 1;
+               }
+               inet_pton(AF_INET6, grh, &ctx->dgid);
+       } else {
+               if (!my_dest.lid) {
+                       fprintf(stderr, "Couldn't get local LID\n");
+                       return 1;
+               }
+       }
+       my_dest.qpn = ctx->qp->qp_num;
+       my_dest.psn = lrand48() & 0xffffff;
        printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
               my_dest.lid, my_dest.qpn, my_dest.psn);
 
+
        if (servername)
                rem_dest = pp_client_exch_dest(servername, port, &my_dest);
        else
@@ -706,6 +735,7 @@ int main(int argc, char *argv[])
                                        fprintf(stderr, "poll CQ failed %d\n", 
ne);
                                        return 1;
                                }
+
                        } while (!use_event && ne < 1);
 
                        for (i = 0; i < ne; ++i) {
diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c
index 1e36c57..eda9013 100644
--- a/examples/srq_pingpong.c
+++ b/examples/srq_pingpong.c
@@ -71,6 +71,8 @@ struct pingpong_context {
        int                      num_qp;
        int                      rx_depth;
        int                      pending[MAX_QP];
+       struct ibv_port_attr     portinfo;
+       union ibv_gid            dgid;
 };
 
 struct pingpong_dest {
@@ -101,6 +103,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, 
int port, enum ibv_mtu m
                                .port_num       = port
                        }
                };
+
+               if (ctx->dgid.global.interface_id) {
+                       attr.ah_attr.is_global = 1;
+                       attr.ah_attr.grh.hop_limit = 1;
+                       attr.ah_attr.grh.dgid = ctx->dgid;
+               }
                if (ibv_modify_qp(ctx->qp[i], &attr,
                                  IBV_QP_STATE              |
                                  IBV_QP_AV                 |
@@ -327,7 +335,7 @@ static struct pingpong_context *pp_init_ctx(struct 
ibv_device *ib_dev, int size,
        struct pingpong_context *ctx;
        int i;
 
-       ctx = malloc(sizeof *ctx);
+       ctx = calloc(1, sizeof *ctx);
        if (!ctx)
                return NULL;
 
@@ -551,6 +559,7 @@ static void usage(const char *argv0)
        printf("  -n, --iters=<iters>    number of exchanges per QP(default 
1000)\n");
        printf("  -l, --sl=<sl>          service level value\n");
        printf("  -e, --events           sleep on CQ events (default poll)\n");
+       printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -578,6 +587,7 @@ int main(int argc, char *argv[])
        int                      i;
        int                      num_cq_events = 0;
        int                      sl = 0;
+       char                    *grh = NULL;
 
        srand48(getpid() * time(NULL));
 
@@ -595,10 +605,11 @@ int main(int argc, char *argv[])
                        { .name = "iters",    .has_arg = 1, .val = 'n' },
                        { .name = "sl",       .has_arg = 1, .val = 'l' },
                        { .name = "events",   .has_arg = 0, .val = 'e' },
+                       { .name = "gid",      .has_arg = 1, .val = 'g' },
                        { 0 }
                };
 
-               c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:e", 
long_options, NULL);
+               c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", 
long_options, NULL);
                if (c == -1)
                        break;
 
@@ -655,6 +666,10 @@ int main(int argc, char *argv[])
                        ++use_event;
                        break;
 
+               case 'g':
+                       grh = strdupa(optarg);
+                       break;
+
                default:
                        usage(argv[0]);
                        return 1;
@@ -722,13 +737,25 @@ int main(int argc, char *argv[])
 
        memset(my_dest, 0, sizeof my_dest);
 
+       if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+               fprintf(stderr, "Couldn't get port info\n");
+               return 1;
+       }
        for (i = 0; i < num_qp; ++i) {
                my_dest[i].qpn = ctx->qp[i]->qp_num;
                my_dest[i].psn = lrand48() & 0xffffff;
-               my_dest[i].lid = pp_get_local_lid(ctx->context, ib_port);
-               if (!my_dest[i].lid) {
-                       fprintf(stderr, "Couldn't get local LID\n");
-                       return 1;
+               if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+                       if (!grh) {
+                               fprintf(stderr, "Must supply remote gid\n");
+                               return 1;
+                       }
+                       inet_pton(AF_INET6, grh, &ctx->dgid);
+               } else {
+                       my_dest[i].lid = ctx->portinfo.lid;
+                       if (!my_dest[i].lid) {
+                               fprintf(stderr, "Couldn't get local LID\n");
+                               return 1;
+                       }
                }
 
                printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c
index 6f31247..2bc7da5 100644
--- a/examples/uc_pingpong.c
+++ b/examples/uc_pingpong.c
@@ -67,6 +67,8 @@ struct pingpong_context {
        int                      size;
        int                      rx_depth;
        int                      pending;
+       struct ibv_port_attr     portinfo;
+       union ibv_gid            dgid;
 };
 
 struct pingpong_dest {
@@ -92,6 +94,13 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int 
port, int my_psn,
                        .port_num       = port
                }
        };
+
+       if (ctx->dgid.global.interface_id) {
+               attr.ah_attr.is_global = 1;
+               attr.ah_attr.grh.hop_limit = 1;
+               attr.ah_attr.grh.dgid = ctx->dgid;
+       }
+
        if (ibv_modify_qp(ctx->qp, &attr,
                          IBV_QP_STATE              |
                          IBV_QP_AV                 |
@@ -281,7 +290,7 @@ static struct pingpong_context *pp_init_ctx(struct 
ibv_device *ib_dev, int size,
 {
        struct pingpong_context *ctx;
 
-       ctx = malloc(sizeof *ctx);
+       ctx = calloc(1, sizeof *ctx);
        if (!ctx)
                return NULL;
 
@@ -469,6 +478,7 @@ static void usage(const char *argv0)
        printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
        printf("  -l, --sl=<sl>          service level value\n");
        printf("  -e, --events           sleep on CQ events (default poll)\n");
+       printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -492,6 +502,7 @@ int main(int argc, char *argv[])
        int                      rcnt, scnt;
        int                      num_cq_events = 0;
        int                      sl = 0;
+       char                    *grh = NULL;
 
        srand48(getpid() * time(NULL));
 
@@ -508,10 +519,11 @@ int main(int argc, char *argv[])
                        { .name = "iters",    .has_arg = 1, .val = 'n' },
                        { .name = "sl",       .has_arg = 1, .val = 'l' },
                        { .name = "events",   .has_arg = 0, .val = 'e' },
+                       { .name = "gid",      .has_arg = 1, .val = 'g' },
                        { 0 }
                };
 
-               c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, 
NULL);
+               c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", 
long_options, NULL);
                if (c == -1)
                        break;
 
@@ -564,6 +576,10 @@ int main(int argc, char *argv[])
                        ++use_event;
                        break;
 
+               case 'g':
+                       grh = strdupa(optarg);
+                       break;
+
                default:
                        usage(argv[0]);
                        return 1;
@@ -619,14 +635,27 @@ int main(int argc, char *argv[])
                        return 1;
                }
 
-       my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
-       my_dest.qpn = ctx->qp->qp_num;
-       my_dest.psn = lrand48() & 0xffffff;
-       if (!my_dest.lid) {
-               fprintf(stderr, "Couldn't get local LID\n");
+       if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+               fprintf(stderr, "Couldn't get port info\n");
                return 1;
        }
 
+       my_dest.lid = ctx->portinfo.lid;
+       if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+               if (!grh) {
+                       fprintf(stderr, "Must supply remote gid\n");
+                       return 1;
+               }
+               inet_pton(AF_INET6, grh, &ctx->dgid);
+       } else {
+               if (!my_dest.lid) {
+                       fprintf(stderr, "Couldn't get local LID\n");
+                       return 1;
+               }
+       }
+       my_dest.qpn = ctx->qp->qp_num;
+       my_dest.psn = lrand48() & 0xffffff;
+
        printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
               my_dest.lid, my_dest.qpn, my_dest.psn);
 
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 6f10212..e30d6d6 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -68,6 +68,8 @@ struct pingpong_context {
        int                      size;
        int                      rx_depth;
        int                      pending;
+       struct ibv_port_attr     portinfo;
+       union ibv_gid            dgid;
 };
 
 struct pingpong_dest {
@@ -105,6 +107,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, 
int port, int my_psn,
                return 1;
        }
 
+       if (ctx->dgid.global.interface_id) {
+               ah_attr.is_global = 1;
+               ah_attr.grh.hop_limit = 1;
+               ah_attr.grh.dgid = ctx->dgid;
+       }
+
        ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
        if (!ctx->ah) {
                fprintf(stderr, "Failed to create AH\n");
@@ -478,6 +486,7 @@ static void usage(const char *argv0)
        printf("  -r, --rx-depth=<dep>   number of receives to post at a time 
(default 500)\n");
        printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
        printf("  -e, --events           sleep on CQ events (default poll)\n");
+       printf("  -g, --gid              specify remote gid\n");
 }
 
 int main(int argc, char *argv[])
@@ -500,6 +509,7 @@ int main(int argc, char *argv[])
        int                      rcnt, scnt;
        int                      num_cq_events = 0;
        int                      sl = 0;
+       char                    *gid = NULL;
 
        srand48(getpid() * time(NULL));
 
@@ -515,10 +525,11 @@ int main(int argc, char *argv[])
                        { .name = "iters",    .has_arg = 1, .val = 'n' },
                        { .name = "sl",       .has_arg = 1, .val = 'l' },
                        { .name = "events",   .has_arg = 0, .val = 'e' },
+                       { .name = "gid",      .has_arg = 1, .val = 'g' },
                        { 0 }
                };
 
-               c = getopt_long(argc, argv, "p:d:i:s:r:n:l:e", long_options, 
NULL);
+               c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:", long_options, 
NULL);
                if (c == -1)
                        break;
 
@@ -563,6 +574,10 @@ int main(int argc, char *argv[])
                        ++use_event;
                        break;
 
+               case 'g':
+                        gid = strdupa(optarg);
+                       break;
+
                default:
                        usage(argv[0]);
                        return 1;
@@ -618,12 +633,25 @@ int main(int argc, char *argv[])
                        return 1;
                }
 
-       my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
+       if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+               fprintf(stderr, "Couldn't get port info\n");
+               return 1;
+       }
+       my_dest.lid = ctx->portinfo.lid;
+
        my_dest.qpn = ctx->qp->qp_num;
        my_dest.psn = lrand48() & 0xffffff;
-       if (!my_dest.lid) {
-               fprintf(stderr, "Couldn't get local LID\n");
-               return 1;
+       if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+               if (!my_dest.lid) {
+                       fprintf(stderr, "Couldn't get local LID\n");
+                       return 1;
+               }
+       } else {
+               if (!gid) {
+                       fprintf(stderr, "must specify remote GID\n");
+                       return 1;
+               }
+               inet_pton(AF_INET6, gid, &ctx->dgid);
        }
 
        printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index 9a81416..8d7c2c6 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -131,6 +131,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
 int ibv_cmd_destroy_ah(struct ibv_ah *ah);
 int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t 
lid);
 int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t 
lid);
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t 
*mac);
 
 int ibv_dontfork_range(void *base, size_t size);
 int ibv_dofork_range(void *base, size_t size);
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..8ef8844 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -46,7 +46,7 @@
  * The minimum and maximum kernel ABI that we can handle.
  */
 #define IB_USER_VERBS_MIN_ABI_VERSION  1
-#define IB_USER_VERBS_MAX_ABI_VERSION  6
+#define IB_USER_VERBS_MAX_ABI_VERSION  7
 
 enum {
        IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,7 +85,8 @@ enum {
        IB_USER_VERBS_CMD_MODIFY_SRQ,
        IB_USER_VERBS_CMD_QUERY_SRQ,
        IB_USER_VERBS_CMD_DESTROY_SRQ,
-       IB_USER_VERBS_CMD_POST_SRQ_RECV
+       IB_USER_VERBS_CMD_POST_SRQ_RECV,
+       IB_USER_VERBS_CMD_GET_MAC
 };
 
 /*
@@ -223,7 +224,8 @@ struct ibv_query_port_resp {
        __u8  active_width;
        __u8  active_speed;
        __u8  phys_state;
-       __u8  reserved[3];
+       __u8  link_layer;
+       __u8  reserved[2];
 };
 
 struct ibv_alloc_pd {
@@ -798,6 +800,7 @@ enum {
        IB_USER_VERBS_CMD_QUERY_SRQ_V2,
        IB_USER_VERBS_CMD_DESTROY_SRQ_V2,
        IB_USER_VERBS_CMD_POST_SRQ_RECV_V2,
+       IB_USER_VERBS_CMD_GET_MAC_V2 = -1,
        /*
         * Set commands that didn't exist to -1 so our compile-time
         * trick opcodes in IBV_INIT_CMD() doesn't break.
@@ -878,4 +881,20 @@ struct ibv_create_srq_resp_v5 {
        __u32 srq_handle;
 };
 
+struct ibv_get_mac {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+       __u32 pd_handle;
+       __u8  port;
+       __u8  reserved[3];
+       __u8  dgid[16];
+};
+
+struct ibv_get_mac_resp {
+       __u8    mac[6];
+       __u16   reserved;
+};
+
 #endif /* KERN_ABI_H */
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index 0f1cb2e..ecdbc6f 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -38,6 +38,7 @@
 
 #include <stdint.h>
 #include <pthread.h>
+#include <string.h>
 
 #ifdef __cplusplus
 #  define BEGIN_C_DECLS extern "C" {
@@ -61,6 +62,7 @@ union ibv_gid {
                uint64_t        subnet_prefix;
                uint64_t        interface_id;
        } global;
+       uint32_t                dwords[4];
 };
 
 enum ibv_node_type {
@@ -161,6 +163,12 @@ enum ibv_port_state {
        IBV_PORT_ACTIVE_DEFER   = 5
 };
 
+enum {
+       IBV_LINK_LAYER_UNSPECIFIED,
+       IBV_LINK_LAYER_INFINIBAND,
+       IBV_LINK_LAYER_ETHERNET,
+};
+
 struct ibv_port_attr {
        enum ibv_port_state     state;
        enum ibv_mtu            max_mtu;
@@ -181,6 +189,8 @@ struct ibv_port_attr {
        uint8_t                 active_width;
        uint8_t                 active_speed;
        uint8_t                 phys_state;
+       uint8_t                 link_layer;
+       uint8_t                 pad;
 };
 
 enum ibv_event_type {
@@ -693,6 +703,16 @@ struct ibv_context {
        void                   *abi_compat;
 };
 
+static inline int ___ibv_query_port(struct ibv_context *context,
+                                   uint8_t port_num,
+                                   struct ibv_port_attr *port_attr)
+{
+       port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED;
+       port_attr->pad = 0;
+
+       return context->ops.query_port(context, port_num, port_attr);
+}
+
 /**
  * ibv_get_device_list - Get list of IB devices currently available
  * @num_devices: optional.  if non-NULL, set to the number of devices
@@ -1097,4 +1117,7 @@ END_C_DECLS
 
 #  undef __attribute_const
 
+#define ibv_query_port(context, port_num, port_attr) \
+       ___ibv_query_port(context, port_num, port_attr)
+
 #endif /* INFINIBAND_VERBS_H */
diff --git a/src/cmd.c b/src/cmd.c
index cbd5288..5183d59 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -162,6 +162,7 @@ int ibv_cmd_query_device(struct ibv_context *context,
        return 0;
 }
 
+#include <stdio.h>
 int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
                       struct ibv_port_attr *port_attr,
                       struct ibv_query_port *cmd, size_t cmd_size)
@@ -196,6 +197,7 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t 
port_num,
        port_attr->active_width    = resp.active_width;
        port_attr->active_speed    = resp.active_speed;
        port_attr->phys_state      = resp.phys_state;
+       port_attr->link_layer      = resp.link_layer;
 
        return 0;
 }
@@ -1122,3 +1124,22 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union 
ibv_gid *gid, uint16_t l
 
        return 0;
 }
+
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t 
*mac)
+{
+       struct ibv_get_mac cmd;
+       struct ibv_get_mac_resp resp;
+
+       IBV_INIT_CMD_RESP(&cmd, sizeof cmd, GET_MAC, &resp, sizeof resp);
+       memcpy(cmd.dgid, gid, sizeof cmd.dgid);
+       cmd.pd_handle = pd->handle;
+       cmd.port = port;
+
+       if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+               return errno;
+
+       memcpy(mac, resp.mac, 6);
+
+       return 0;
+}
+
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 1827da0..1688e73 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -64,6 +64,7 @@ IBVERBS_1.0 {
                ibv_cmd_destroy_ah;
                ibv_cmd_attach_mcast;
                ibv_cmd_detach_mcast;
+               ibv_cmd_get_mac;
                ibv_copy_qp_attr_from_kern;
                ibv_copy_path_rec_from_kern;
                ibv_copy_path_rec_to_kern;
diff --git a/src/verbs.c b/src/verbs.c
index ba3c0a4..2b175b6 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -86,6 +86,7 @@ default_symver(__ibv_query_device, ibv_query_device);
 int __ibv_query_port(struct ibv_context *context, uint8_t port_num,
                     struct ibv_port_attr *port_attr)
 {
+       port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED;
        return context->ops.query_port(context, port_num, port_attr);
 }
 default_symver(__ibv_query_port, ibv_query_port);



libmlx4:


diff --git a/src/mlx4.h b/src/mlx4.h
index 4445998..661255b 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -236,11 +236,14 @@ struct mlx4_av {
        uint8_t                         hop_limit;
        uint32_t                        sl_tclass_flowlabel;
        uint8_t                         dgid[16];
+       uint8_t                         mac[8];
 };
 
 struct mlx4_ah {
        struct ibv_ah                   ibv_ah;
        struct mlx4_av                  av;
+       uint16_t                        vlan;
+       uint8_t                         mac[6];
 };
 
 static inline unsigned long align(unsigned long val, unsigned long align)
diff --git a/src/qp.c b/src/qp.c
index d194ae3..cd8fab0 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -143,6 +143,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg 
*dseg,
        memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
        dseg->dqpn = htonl(wr->wr.ud.remote_qpn);
        dseg->qkey = htonl(wr->wr.ud.remote_qkey);
+       dseg->vlan = htons(to_mah(wr->wr.ud.ah)->vlan);
+       memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6);
 }
 
 static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg)
diff --git a/src/verbs.c b/src/verbs.c
index 1ac1362..667ef68 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -614,9 +614,21 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
        return 0;
 }
 
+static int mcast_mac(uint8_t *mac)
+{
+       int i;
+       uint8_t val = 0xff;
+
+       for (i = 0; i < 6; ++i)
+               val &= mac[i];
+
+       return val == 0xff;
+}
+
 struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
 {
        struct mlx4_ah *ah;
+       struct ibv_port_attr port_attr;
 
        ah = malloc(sizeof *ah);
        if (!ah)
@@ -642,7 +654,24 @@ struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct 
ibv_ah_attr *attr)
                memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
        }
 
+       if (ibv_query_port(pd->context, attr->port_num, &port_attr))
+               goto err;
+
+       if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
+               if (ibv_cmd_get_mac(pd, attr->port_num, ah->av.dgid, ah->mac))
+                       goto err;
+
+               ah->vlan = 0;
+               if (mcast_mac(ah->mac))
+                       ah->av.dlid = htons(0xc000);
+
+       }
+
+
        return &ah->ibv_ah;
+err:
+       free(ah);
+       return NULL;
 }
 
 int mlx4_destroy_ah(struct ibv_ah *ah)
diff --git a/src/wqe.h b/src/wqe.h
index 6f7f309..bbd22ba 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -78,7 +78,8 @@ struct mlx4_wqe_datagram_seg {
        uint32_t                av[8];
        uint32_t                dqpn;
        uint32_t                qkey;
-       uint32_t                reserved[2];
+       uint16_t                vlan;
+       uint8_t                 mac[6];
 };
 
 struct mlx4_wqe_data_seg {


kernel driver:

diff --git a/drivers/infiniband/core/uverbs_cmd.c 
b/drivers/infiniband/core/uverbs_cmd.c
index 012aadf..d592bd2 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -452,7 +452,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
        resp.active_width    = attr.active_width;
        resp.active_speed    = attr.active_speed;
        resp.phys_state      = attr.phys_state;
-       resp.transport       = attr.transport;
+       resp.transport       = attr.transport == RDMA_TRANSPORT_RDMAOE ?
+               IB_LINK_LAYER_ETHERNET : IB_LINK_LAYER_INFINIBAND;
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bf6e860..57653b7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -73,6 +73,12 @@ enum rdma_transport_type {
        RDMA_TRANSPORT_RDMAOE
 };
 
+enum {
+       IV_LINK_LAYER_UNSPECIFIED,
+       IB_LINK_LAYER_INFINIBAND,
+       IB_LINK_LAYER_ETHERNET,
+};
+
 enum ib_device_cap_flags {
        IB_DEVICE_RESIZE_MAX_WR         = 1,
        IB_DEVICE_BAD_PKEY_CNTR         = (1<<1),
-- 
1.6.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to