[PATCH 0/2] Fix OPA_VNIC issues with debug kernel

2017-06-14 Thread Vishwanathapura, Niranjana
Hi Doug,
Here are couple OPA_VNIC bug fixes.
As OPA_VNIC is alredy included for 4.12, it would be great
to get them into the RC release.

Thanks,
Niranjana

Vishwanathapura, Niranjana (2):
  IB/opa_vnic: Use GFP_ATOMIC while sending trap
  IB/opa_vnic: Use spinlock instead of mutex for stats_lock

 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c| 4 ++--
 drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h   | 2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c | 8 +++-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c   | 2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | 8 
 5 files changed, 11 insertions(+), 13 deletions(-)

-- 
1.8.3.1



[PATCH rdma-next v2 12/12] IB/hfi1: VNIC SDMA support

2017-04-12 Thread Vishwanathapura, Niranjana
HFI1 VNIC SDMA support enables transmission of VNIC packets over SDMA.
Map VNIC queues to SDMA engines and support halting and wakeup of the
VNIC queues.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   1 +
 drivers/infiniband/hw/hfi1/init.c  |   1 +
 drivers/infiniband/hw/hfi1/vnic.h  |  28 +++
 drivers/infiniband/hw/hfi1/vnic_main.c |  24 ++-
 drivers/infiniband/hw/hfi1/vnic_sdma.c | 323 +
 6 files changed, 376 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 2280538..88085f6 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o vnic_main.o
+   verbs_txreq.o vnic_main.o vnic_sdma.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a12bb46..2862b14 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -834,6 +834,7 @@ struct hfi1_asic_data {
 /* Virtual NIC information */
 struct hfi1_vnic_data {
struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+   struct kmem_cache *txreq_cache;
u8 num_vports;
struct idr vesw_idr;
u8 rmt_start;
diff --git a/drivers/infiniband/hw/hfi1/init.c 
b/drivers/infiniband/hw/hfi1/init.c
index de2eec4..b4c7e04 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -681,6 +681,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+   dd->process_vnic_dma_send = hfi1_vnic_send_dma;
 
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h 
b/drivers/infiniband/hw/hfi1/vnic.h
index 9bed40d..e2c4552 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -49,6 +49,7 @@
 
 #include 
 #include "hfi.h"
+#include "sdma.h"
 
 #define HFI1_VNIC_MAX_TXQ 16
 #define HFI1_VNIC_MAX_PAD 12
@@ -85,6 +86,26 @@
 #define HFI1_VNIC_MAX_QUEUE 16
 
 /**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+   struct hfi1_devdata *dd;
+   struct sdma_engine  *sde;
+   struct hfi1_vnic_vport_info *vinfo;
+   struct iowait wait;
+   struct sdma_txreq stx;
+   unsigned int state;
+   u8 q_idx;
+};
+
+/**
  * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
  * @idx: queue index
  * @vinfo: pointer to vport information
@@ -111,6 +132,7 @@ struct hfi1_vnic_rx_queue {
  * @vesw_id: virtual switch id
  * @rxq: Array of receive queues
  * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
  */
 struct hfi1_vnic_vport_info {
struct hfi1_devdata *dd;
@@ -126,6 +148,7 @@ struct hfi1_vnic_vport_info {
struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
 
struct opa_vnic_stats  stats[HFI1_VNIC_MAX_QUEUE];
+   struct hfi1_vnic_sdma  sdma[HFI1_VNIC_MAX_TXQ];
 };
 
 #define v_dbg(format, arg...) \
@@ -138,8 +161,13 @@ struct hfi1_vnic_vport_info {
 /* vnic hfi1 internal functions */
 void hfi1_vnic_setup(struct hfi1_devdata *dd);
 void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+   u8 q_idx);
 
 /* vnic rdma netdev operations */
 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c 
b/drivers/infiniband/hw/hfi1/vnic_main.c
index 32d91b6..392f4d5 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -406,6 +406,10 @@ static void hfi1_vnic_maybe_stop_tx(struct 
hfi1_vnic_vport_info *vinfo,
u8 q_idx)
 {
netif_stop_subqueue(vinfo->netdev, q_idx);
+   if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+   return;
+

[PATCH rdma-next v2 05/12] IB/opa-vnic: VNIC Ethernet Management (EM) structure definitions

2017-04-12 Thread Vishwanathapura, Niranjana
Define VNIC EM MAD structures and the associated macros. These structures
are used for information exchange between VNIC EM agent (EMA) on the host
and the Ethernet manager. These include the virtual ethernet switch (vesw)
port information, vesw port mac table, summay and error counters,
vesw port interface mac lists and the EMA trap.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   | 423 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  33 ++
 2 files changed, 456 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index 176fca9..c025cde 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -52,6 +52,28 @@
  * and decapsulation of Ethernet packets
  */
 
+#include 
+#include 
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION   0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO   0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS   0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
 /* VNIC configured and operational state values */
 #define OPA_VNIC_STATE_DROP_ALL0x1
 #define OPA_VNIC_STATE_FORWARDING  0x3
@@ -59,4 +81,405 @@
 #define OPA_VESW_MAX_NUM_DEF_PORT   16
 #define OPA_VNIC_MAX_NUM_PCP8
 
+#define OPA_VNIC_EMA_DATA(OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+   __be16  fabric_id;
+   __be16  vesw_id;
+
+   u8  rsvd0[6];
+   __be16  def_port_mask;
+
+   u8  rsvd1[2];
+   __be16  pkey;
+
+   u8  rsvd2[4];
+   __be32  u_mcast_dlid;
+   __be32  u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+   u8  rsvd3[44];
+   __be16  eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+   __be16  eth_mtu_non_vlan;
+   u8  rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+   __be32  port_num;
+
+   u8  eth_link_status;
+   u8  rsvd0[3];
+
+   u8  base_mac_addr[ETH_ALEN];
+   u8  config_state;
+   u8  oper_state;
+
+   __be16  max_mac_tbl_ent;
+   __be16  max_smac_ent;
+   __be32  mac_tbl_digest;
+   u8  rsvd1[4];
+
+   __be32  encap_slid;
+
+   u8  pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+   u8

[PATCH rdma-next v2 06/12] IB/opa-vnic: VNIC statistics support

2017-04-12 Thread Vishwanathapura, Niranjana
OPA VNIC driver statistics support maintains various counters including
standard netdev counters and the Ethernet manager defined counters.
Add the Ethtool hook to read the counters.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c | 110 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|   4 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |  18 
 3 files changed, 132 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index b74f6ad..a98948c 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -53,9 +53,119 @@
 
 #include "opa_vnic_internal.h"
 
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+   char stat_string[ETH_GSTRING_LEN];
+   struct {
+   int sizeof_stat;
+   int stat_offset;
+   };
+};
+
+#define VNIC_STAT(m){ FIELD_SIZEOF(struct opa_vnic_stats, m),   \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+   /* NETDEV stats */
+   {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+   {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+   {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+   {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+   {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+   {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+   {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+   {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+   /* SUMMARY counters */
+   {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+   {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+   {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+   {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+   {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+   {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+   {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+   {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+   {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+   {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+   {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+   {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+   {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+   {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+   {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+   {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+   {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+   {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+   {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+   {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+   {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+   {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+   /* ERROR counters */
+   {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+   {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+   {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+   {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+   {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+   {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+   {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+   {"rx_oversize", VNIC_STAT(rx_oversize)},
+   {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+   return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+  struct ethtool_stats *stats, u64 *data)
+{
+   struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+   struct opa_vnic_stats vstats;
+   int i;
+
+   memset(&vstats, 0, sizeof(vstats));
+   mutex_lock(&adapter->stats_lock);
+   adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+   for (i = 0; i < VNIC_STATS_LEN; i++) {
+   char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+   data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+  sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+   }
+   mutex_unlock(&adapter->stats_lock);
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 
*data)
+{
+   int i;
+
+   if (stringset != ETH_SS_STATS)
+   return;
+
+   for (i = 0; i < VNIC_STATS_LEN; i++)
+   memcpy(data + i * ETH_GSTRING_LEN,
+  vnic_gstrings_stats[i].stat_string,
+  ETH_GSTRING_LEN);
+}
+
 /* ethtool ops */
 static const struct ethtool_ops opa_vnic_ethtool_ops = {
.get_link

[PATCH rdma-next v2 02/12] IB/opa-vnic: RDMA NETDEV interface

2017-04-12 Thread Vishwanathapura, Niranjana
Add rdma netdev interface to ib device structure allowing rdma netdev
devices to be allocated by ib clients.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 include/rdma/ib_verbs.h | 33 +
 1 file changed, 33 insertions(+)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3a8e058..5c6b8c0 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -55,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1877,6 +1878,24 @@ struct ib_port_immutable {
u32   max_mad_size;
 };
 
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+   RDMA_NETDEV_OPA_VNIC
+};
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+   void  *clnt_priv;
+   struct ib_device  *hca;
+   u8 port_num;
+
+   /* control functions */
+   void (*set_id)(struct net_device *netdev, int id);
+};
+
 struct ib_device {
char  name[IB_DEVICE_NAME_MAX];
 
@@ -2127,6 +2146,20 @@ struct ib_device {
   struct 
ib_rwq_ind_table_init_attr *init_attr,
   struct ib_udata 
*udata);
int(*destroy_rwq_ind_table)(struct 
ib_rwq_ind_table *wq_ind_table);
+   /**
+* rdma netdev operations
+*
+* Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
+* doesn't support the specified rdma netdev type.
+*/
+   struct net_device *(*alloc_rdma_netdev)(
+   struct ib_device *device,
+   u8 port_num,
+   enum rdma_netdev_t type,
+   const char *name,
+   unsigned char name_assign_type,
+   void (*setup)(struct net_device *));
+   void (*free_rdma_netdev)(struct net_device *netdev);
 
struct module   *owner;
struct devicedev;
-- 
1.8.3.1



[PATCH rdma-next v2 04/12] IB/opa-vnic: Virtual Network Interface Controller (VNIC) netdev

2017-04-12 Thread Vishwanathapura, Niranjana
OPA VNIC netdev function supports Ethernet functionality over Omni-Path
fabric by encapsulating Ethernet packets inside Omni-Path packet header.
It allocates a rdma netdev device and interfaces with the network stack to
provide standard Ethernet network interfaces. It overrides HFI1 device's
netdev operations where it is required.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Sudeep Dutt 
Signed-off-by: Andrzej Kacprowski 
---
 MAINTAINERS|   7 +
 drivers/infiniband/Kconfig |   1 +
 drivers/infiniband/ulp/Makefile|   1 +
 drivers/infiniband/ulp/opa_vnic/Kconfig|   8 +
 drivers/infiniband/ulp/opa_vnic/Makefile   |   6 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   | 239 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |  62 ++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |  65 ++
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h| 186 
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  | 227 +++
 10 files changed, 802 insertions(+)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/Kconfig
 create mode 100644 drivers/infiniband/ulp/opa_vnic/Makefile
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c

diff --git a/MAINTAINERS b/MAINTAINERS
index c776906..fc32256 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5843,6 +5843,13 @@ F:   drivers/block/cciss*
 F: include/linux/cciss_ioctl.h
 F: include/uapi/linux/cciss_ioctl.h
 
+OPA-VNIC DRIVER
+M: Dennis Dalessandro 
+M: Niranjana Vishwanathapura 
+L: linux-r...@vger.kernel.org
+S: Supported
+F: drivers/infiniband/ulp/opa_vnic
+
 HFI1 DRIVER
 M: Mike Marciniszyn 
 M: Dennis Dalessandro 
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66f8602..234fe01 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/ulp/opa_vnic/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 source "drivers/infiniband/sw/rxe/Kconfig"
 
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf..c28af18 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP)+= srp/
 obj-$(CONFIG_INFINIBAND_SRPT)  += srpt/
 obj-$(CONFIG_INFINIBAND_ISER)  += iser/
 obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC)  += opa_vnic/
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig 
b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000..48132ab
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+   tristate "Intel OPA VNIC support"
+   depends on X86_64 && INFINIBAND
+   ---help---
+   This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+   driver for Ethernet over Omni-Path feature. It implements the HW
+   independent VNIC functionality. It interfaces with Linux stack for
+   data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000..975c313
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,6 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000..c74d02a
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR

[PATCH rdma-next v2 07/12] IB/opa-vnic: VNIC MAC table support

2017-04-12 Thread Vishwanathapura, Niranjana
OPA VNIC MAC table contains the MAC address to DLID mappings provided by
the Ethernet manager. During transmission, the MAC table provides the MAC
address to DLID translation. Implement MAC table using simple hash list.
Also provide support to update/query the MAC table by Ethernet manager.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   | 236 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  51 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |   4 +
 3 files changed, 291 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index c74d02a..2e8fee9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -96,6 +96,238 @@ static inline void opa_vnic_make_header(u8 *hdr, u32 slid, 
u32 dlid, u16 len,
memcpy(hdr, h, OPA_VNIC_HDR_LEN);
 }
 
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+   struct opa_vnic_mac_tbl_node *node;
+   struct hlist_node *tmp;
+   int bkt;
+
+   if (!mactbl)
+   return;
+
+   vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+   hash_del(&node->hlist);
+   kfree(node);
+   }
+   kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+   u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+   struct hlist_head *mactbl;
+
+   mactbl = kzalloc(size, GFP_KERNEL);
+   if (!mactbl)
+   return ERR_PTR(-ENOMEM);
+
+   vnic_hash_init(mactbl);
+   return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+   struct hlist_head *mactbl;
+
+   mutex_lock(&adapter->mactbl_lock);
+   mactbl = rcu_access_pointer(adapter->mactbl);
+   rcu_assign_pointer(adapter->mactbl, NULL);
+   synchronize_rcu();
+   opa_vnic_free_mac_tbl(mactbl);
+   mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+   struct opa_veswport_mactable *tbl)
+{
+   struct opa_vnic_mac_tbl_node *node;
+   struct hlist_head *mactbl;
+   int bkt;
+   u16 loffset, lnum_entries;
+
+   rcu_read_lock();
+   mactbl = rcu_dereference(adapter->mactbl);
+   if (!mactbl)
+   goto get_mac_done;
+
+   loffset = be16_to_cpu(tbl->offset);
+   lnum_entries = be16_to_cpu(tbl->num_entries);
+
+   vnic_hash_for_each(mactbl, bkt, node, hlist) {
+   struct __opa_vnic_mactable_entry *nentry = &node->entry;
+   struct opa_veswport_mactable_entry *entry;
+
+   if ((node->index < loffset) ||
+   (node->index >= (loffset + lnum_entries)))
+   continue;
+
+   /* populate entry in the tbl corresponding to the index */
+   entry = &tbl->tbl_entries[node->index - loffset];
+   memcpy(entry->mac_addr, nentry->mac_addr,
+  ARRAY_SIZE(entry->mac_addr));
+   memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+  ARRAY_SIZE(entry->mac_addr_mask));
+   entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+   }
+   tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+   rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ *  - Allocate a new mac (hash) table.
+ *  - Add the specified entries to the new table.
+ *(except the ones that are requested to be deleted).
+ *  - Add all the other entries from the old mac table.
+ *  - If there is a failure, free the new table and return.
+ *  - Switch to the new table.
+ *  - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+   struct opa_veswport_mactable *tbl)
+{
+   struct opa_vnic_mac_tbl_node *node, *new_node;
+   struct hlist_head *new_mactbl, *old_mactbl;
+   int i, bkt, rc = 0;
+   u8 key;
+   u16 loffset, lnum_entries;
+
+   mutex_lock(&adapter->mactbl_lock);
+   /* allocate new mac table */
+   new_mactbl = opa_vnic_alloc_mac_tbl();
+   if (IS_ERR(

[PATCH rdma-next v2 10/12] IB/hfi1: OPA_VNIC RDMA netdev support

2017-04-12 Thread Vishwanathapura, Niranjana
Add support to create and free OPA_VNIC rdma netdev devices.
Implement netstack interface functionality including xmit_skb,
receive side NAPI etc. Also implement rdma netdev control functions.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/driver.c|  25 +-
 drivers/infiniband/hw/hfi1/hfi.h   |  27 +-
 drivers/infiniband/hw/hfi1/init.c  |   9 +-
 drivers/infiniband/hw/hfi1/vnic.h  | 153 
 drivers/infiniband/hw/hfi1/vnic_main.c | 644 +
 6 files changed, 853 insertions(+), 7 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a0..2280538 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o
+   verbs_txreq.o vnic_main.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/driver.c 
b/drivers/infiniband/hw/hfi1/driver.c
index 64bdbce..e4dc6a5 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -60,6 +60,7 @@
 #include "qp.h"
 #include "sdma.h"
 #include "debugfs.h"
+#include "vnic.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1381,15 +1382,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
 }
 
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+   /* Packet received in VNIC context via RSM */
+   if (packet->rcd->is_vnic)
+   return true;
+
+   if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+   (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+   return true;
+
+   return false;
+}
+
 int process_receive_bypass(struct hfi1_packet *packet)
 {
struct hfi1_devdata *dd = packet->rcd->dd;
 
-   if (unlikely(rhf_err_flags(packet->rhf)))
+   if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+   } else if (hfi1_is_vnic_packet(packet)) {
+   hfi1_vnic_bypass_rcv(packet);
+   return RHF_RCV_CONTINUE;
+   }
 
-   dd_dev_err(dd,
-  "Bypass packets are not supported in normal operation. 
Dropping\n");
+   dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a31638c..f85e8f4 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -337,6 +337,12 @@ struct hfi1_ctxtdata {
 * packets with the wrong interrupt handler.
 */
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+   /* Indicates that this is vnic context */
+   bool is_vnic;
+
+   /* vnic queue index this context is mapped to */
+   u8 vnic_q_idx;
 };
 
 /*
@@ -808,6 +814,19 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
 };
 
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT   8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+   struct idr vesw_idr;
+};
+
+struct hfi1_vnic_vport_info;
+
 /* device data struct now contains only "general per-device" info.
  * fields related to a physical IB port are in a hfi1_pportdata struct.
  */
@@ -1115,6 +1134,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+   int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+s

[PATCH rdma-next v2 11/12] IB/hfi1: Virtual Network Interface Controller (VNIC) HW support

2017-04-12 Thread Vishwanathapura, Niranjana
HFI1 HW specific support for VNIC functionality.
Dynamically allocate a set of contexts for VNIC when the first vnic
port is instantiated. Allocate VNIC contexts from user contexts pool
and return them back to the same pool while freeing up. Set aside
enough MSI-X interrupts for VNIC contexts and assign them when the
contexts are allocated. On the receive side, use an RSM rule to
spread TCP/UDP streams among VNIC contexts.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/aspm.h |  15 +-
 drivers/infiniband/hw/hfi1/chip.c | 291 +-
 drivers/infiniband/hw/hfi1/chip.h |   2 +
 drivers/infiniband/hw/hfi1/debugfs.c  |   8 +-
 drivers/infiniband/hw/hfi1/driver.c   |  52 --
 drivers/infiniband/hw/hfi1/file_ops.c |  27 ++-
 drivers/infiniband/hw/hfi1/hfi.h  |  29 ++-
 drivers/infiniband/hw/hfi1/init.c |  29 +--
 drivers/infiniband/hw/hfi1/mad.c  |  10 +-
 drivers/infiniband/hw/hfi1/pio.c  |  19 +-
 drivers/infiniband/hw/hfi1/pio.h  |   8 +-
 drivers/infiniband/hw/hfi1/sysfs.c|   4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c |   8 +-
 drivers/infiniband/hw/hfi1/user_pages.c   |   5 +-
 drivers/infiniband/hw/hfi1/verbs.c|   6 +-
 drivers/infiniband/hw/hfi1/vnic.h |   3 +
 drivers/infiniband/hw/hfi1/vnic_main.c| 245 -
 include/rdma/opa_port_info.h  |   3 +-
 18 files changed, 660 insertions(+), 104 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/aspm.h 
b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3..794e681 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long 
data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
 }
 
-/* Disable interrupt processing for verbs contexts when PSM contexts are open 
*/
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
 static inline void aspm_disable_all(struct hfi1_devdata *dd)
 {
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
-   rcd->ctxt < rcd->dd->first_user_ctxt;
+   rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
 }
 
 static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
 
-   for (i = 0; i < dd->first_user_ctxt; i++)
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
 
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c 
b/drivers/infiniband/hw/hfi1/chip.c
index 79a316a..e520929 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -126,9 +126,16 @@ struct flag_table {
 #define DEFAULT_KRCVQS   2
 #define MIN_KERNEL_KCTXTS 2
 #define FIRST_KERNEL_KCTXT1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ *   0 - Verbs
+ *   1 - User Fecn Handling
+ *   2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN  1
+#define RSM_INS_VNIC  2
 
 /* Bit offset into the GUID which carries HFI id information */
 #define GUID_HFI_INDEX_SHIFT 39
@@ -139,8 +146,7 @@ struct flag_table {
 #define is_emulator_p(dd) dd)->irev) & 0xf) == 3)
 #define is_emulator_s(dd) dd)->irev) & 0xf) == 4)
 
-/* RSM fields */
-
+/* RSM fields for Verbs */
 /* packet type */
 #define IB_PACKET_TYPE   

[PATCH rdma-next v2 08/12] IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) interface

2017-04-12 Thread Vishwanathapura, Niranjana
OPA VNIC EMA interface functions are the management interfaces to the OPA
VNIC netdev. Add support to add and remove VNIC ports. Implement the
required GET/SET management interface functions and processing of new
management information. Add support to send trap notifications upon various
events like interface status change, unicast/multicast mac list update and
mac address change.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/opa_vnic/Makefile   |   3 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |   4 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  44 +++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  | 142 +++-
 .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c  | 390 +
 5 files changed, 581 insertions(+), 2 deletions(-)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c

diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
index 975c313..e8d1ea1 100644
--- a/drivers/infiniband/ulp/opa_vnic/Makefile
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -3,4 +3,5 @@
 #
 obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
 
-opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+  opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index c025cde..4c434b9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -99,6 +99,10 @@
 #define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
 #define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
 
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN   2
+
 /**
  * struct opa_vesw_info - OPA vnic switch information
  * @fabric_id: 10-bit fabric id
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index bec4866..b49f5d7 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -161,14 +161,28 @@ struct __opa_veswport_trap {
 } __packed;
 
 /**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ */
+struct opa_vnic_ctrl_port {
+   struct ib_device   *ibdev;
+   struct opa_vnic_ctrl_ops   *ops;
+};
+
+/**
  * struct opa_vnic_adapter - OPA VNIC netdev private data structure
  * @netdev: pointer to associated netdev
  * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
  * @rn_ops: rdma netdev's net_device_ops
  * @port_num: OPA port number
  * @vport_num: vesw port number
  * @lock: adapter lock
  * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
  * @mactbl: hash table of MAC entries
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
@@ -177,6 +191,7 @@ struct __opa_veswport_trap {
 struct opa_vnic_adapter {
struct net_device *netdev;
struct ib_device  *ibdev;
+   struct opa_vnic_ctrl_port *cport;
const struct net_device_ops   *rn_ops;
 
u8 port_num;
@@ -186,6 +201,9 @@ struct opa_vnic_adapter {
struct mutex lock;
 
struct __opa_veswport_info  info;
+   u8  vema_mac_addr[ETH_ALEN];
+   u32 umac_hash;
+   u32 mmac_hash;
struct hlist_head  __rcu   *mactbl;
 
/* Lock used to protect updates to mac table */
@@ -225,6 +243,11 @@ struct opa_vnic_mac_tbl_node {
 #define v_warn(format, arg...) \
netdev_warn(adapter->netdev, format, ## arg)
 
+#define c_err(format, arg...) \
+   dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+   dev_info(&cport->ibdev->dev, format, ## arg)
+
 /* The maximum allowed entries in the mac table */
 #define OPA_VNIC_MAC_TBL_MAX_ENTRIES  2048
 /* Limit of smac entries in mac table */
@@ -264,11 +287,32 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct 
ib_device *ibdev,
 void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
 u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
 u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff 
*skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
 void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
 void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
struct opa_veswport_mactable *tbl);
 int opa_vnic_update_mac_tbl(struct opa_vnic_

[PATCH rdma-next v2 09/12] IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) function

2017-04-12 Thread Vishwanathapura, Niranjana
OPA VEMA function interfaces with the Infiniband MAD stack to exchange the
management information packets with the Ethernet Manager (EM).
It interfaces with the OPA VNIC netdev function to SET/GET the management
information. The information exchanged with the EM includes class port
details, encapsulation configuration, various counters, unicast and
multicast MAC list and the MAC table. It also supports sending traps
to the EM.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sudeep Dutt 
---
 drivers/infiniband/ulp/opa_vnic/Makefile   |2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |   12 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|   17 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c| 1078 
 .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c  |2 +-
 5 files changed, 1106 insertions(+), 5 deletions(-)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c

diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
index e8d1ea1..8061b28 100644
--- a/drivers/infiniband/ulp/opa_vnic/Makefile
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -4,4 +4,4 @@
 obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
 
 opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
-  opa_vnic_vema_iface.o
+  opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index a98948c..d66540e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -120,6 +120,17 @@ struct vnic_stats {
 
 #define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
 
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+struct ethtool_drvinfo *drvinfo)
+{
+   strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+   strlcpy(drvinfo->version, opa_vnic_driver_version,
+   sizeof(drvinfo->version));
+   strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+   sizeof(drvinfo->bus_info));
+}
+
 /* vnic_get_sset_count - get string set count */
 static int vnic_get_sset_count(struct net_device *netdev, int sset)
 {
@@ -162,6 +173,7 @@ static void vnic_get_strings(struct net_device *netdev, u32 
stringset, u8 *data)
 
 /* ethtool ops */
 static const struct ethtool_ops opa_vnic_ethtool_ops = {
+   .get_drvinfo = vnic_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = vnic_get_strings,
.get_sset_count = vnic_get_sset_count,
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index b49f5d7..6bba886 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -164,10 +164,12 @@ struct __opa_veswport_trap {
  * struct opa_vnic_ctrl_port - OPA virtual NIC control port
  * @ibdev: pointer to ib device
  * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
  */
 struct opa_vnic_ctrl_port {
struct ib_device   *ibdev;
struct opa_vnic_ctrl_ops   *ops;
+   u8  num_ports;
 };
 
 /**
@@ -187,6 +189,8 @@ struct opa_vnic_ctrl_port {
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
  * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
  */
 struct opa_vnic_adapter {
struct net_device *netdev;
@@ -213,6 +217,9 @@ struct opa_vnic_adapter {
struct mutex stats_lock;
 
u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+   unsigned long trap_timeout;
+   u8trap_count;
 };
 
 /* Same as opa_veswport_mactable_entry, but without bitwise attribute */
@@ -247,6 +254,8 @@ struct opa_vnic_mac_tbl_node {
dev_err(&cport->ibdev->dev, format, ## arg)
 #define c_info(format, arg...) \
dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+   dev_dbg(&cport->ibdev->dev, format, ## arg)
 
 /* The maximum allowed entries in the mac table */
 #define OPA_VNIC_MAC_TBL_MAX_ENTRIES  2048
@@ -281,6 +290,9 @@ struct opa_vnic_mac_tbl_node {
!obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++)   \
hlist_for_each_entry(obj, &name[bkt], member)
 
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
 struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
 u8 port_num, u8 vport_num);
 void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
@@ -310,9 +322,8 @@ void opa_vnic_get_per_veswport_info(struct opa_vni

[PATCH rdma-next v2 01/12] IB/opa-vnic: Virtual Network Interface Controller (VNIC) documentation

2017-04-12 Thread Vishwanathapura, Niranjana
Add OPA VNIC design document explaining the VNIC architecture and the
driver design.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 Documentation/infiniband/opa_vnic.txt | 153 ++
 1 file changed, 153 insertions(+)
 create mode 100644 Documentation/infiniband/opa_vnic.txt

diff --git a/Documentation/infiniband/opa_vnic.txt 
b/Documentation/infiniband/opa_vnic.txt
new file mode 100644
index 000..282e17b
--- /dev/null
+++ b/Documentation/infiniband/opa_vnic.txt
@@ -0,0 +1,153 @@
+Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC) feature
+supports Ethernet functionality over Omni-Path fabric by encapsulating
+the Ethernet packets between HFI nodes.
+
+Architecture
+=
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes.
+
+ +---+
+ |  Subnet/  |
+ | Ethernet  |
+ |  Manager  |
+ +---+
+/  /
+  /   /
+//
+  / /
++-+  +--+
+|  Virtual Ethernet Switch|  |  Virtual Ethernet Switch |
+|  +-++-+ |  | +-++-+   |
+|  | VPORT   ||  VPORT  | |  | |  VPORT  ||  VPORT  |   |
++--+-++-+-+  +-+-++-+---+
+ | \/ |
+ |   \/   |
+ | \/ |
+ |/  \|
+ |  /  \  |
+ +---++  +---++
+ |   VNIC|VNIC|  |VNIC   |VNIC|
+ +---++  +---++
+ |  HFI   |  |  HFI   |
+ ++  ++
+
+
+The Omni-Path encapsulated Ethernet packet format is as described below.
+
+Bits  Field
+
+Quad Word 0:
+0-19  SLID (lower 20 bits)
+20-30 Length (in Quad Words)
+31BECN bit
+32-51 DLID (lower 20 bits)
+52-56 SC (Service Class)
+57-59 RC (Routing Control)
+60FECN bit
+61-62 L2 (=10, 16B format)
+63LT (=1, Link Transfer Head Flit)
+
+Quad Word 1:
+0-7   L4 type (=0x78 ETHERNET)
+8-11  SLID[23:20]
+12-15 DLID[23:20]
+16-31 PKEY
+32-47 Entropy
+48-63 Reserved
+
+Quad Word 2:
+0-15  Reserved
+16-31 L4 header
+32-63 Ethernet Packet
+
+Quad Words 3 to N-1:
+0-63  Ethernet packet (pad extended)
+
+Quad Word N (last):
+0-23  Ethernet packet (pad extended)
+24-55 ICRC
+56-61 Tail
+62-63 LT (=01, Link Transfer Tail Flit)
+
+Ethernet packet is padded on the transmit side to ensure that the VNIC OPA
+packet is quad word aligned. The 'Tail' field contains the number of bytes
+padded. On the receive side the 'Tail' field is read and the padding is
+removed (along with ICRC, Tail and OPA header) before passing packet up
+the network stack.
+
+The L4 header field contains the virtual Ethernet switch id the VNIC port
+belongs to. On the receive side, this field is used to de-multiplex the
+received VNIC packets to different VNIC ports.
+
+Driver Design
+==
+Intel OPA VNIC software design is presented in the below diagram.
+OPA VNIC functionality has a HW dependent component and a HW
+independent component.
+
+The support has been added for IB device to allocate and free the RDMA
+netdev devices. The RDMA netdev supports interfacing with the network
+stack thus creating standard network interfaces. OPA_VNIC is an RDMA
+netdev device type.
+
+The HW dependent VNIC functionality is part of

[PATCH rdma-next v2 00/12] Omni-Path Virtual Network Interface Controller (VNIC)

2017-04-12 Thread Vishwanathapura, Niranjana
urce allocation/management for VNIC functionality.
It interfaces with the network stack and implements the required
net_device_ops functions. It expects Omni-Path encapsulated Ethernet
packets in the transmit path and provides HW access to them. It strips
the Omni-Path header from the received packets before passing them up
the network stack. It also implements the RDMA netdev control operations.

The OPA VNIC module implements the HW independent VNIC functionality.
It consists of two parts. The VNIC Ethernet Management Agent (VEMA)
registers itself with IB core as an IB client and interfaces with the
IB MAD stack. It exchanges the management information with the Ethernet
Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees
the OPA_VNIC RDMA netdev devices. It overrides the net_device_ops functions
set by HW dependent VNIC driver where required to accommodate any control
operation. It also handles the encapsulation of Ethernet packets with an
Omni-Path header in the transmit path. For each VNIC interface, the
information required for encapsulation is configured by the EM via VEMA MAD
interface. It also passes any control information to the HW dependent driver
by invoking the RDMA netdev control operations.

+---+ +--+
|   | |   Linux  |
| IB MAD| |  Network |
|   | |   Stack  |
+---+ +--+
 |   |  |
 |   |  |
++  |
||  |
|  OPA VNIC Module   |  |
|  (OPA VNIC RDMA Netdev |  |
| & EMA functions)   |  |
||  |
++  |
|   |
|   |
   +--+ |
   | IB core  | |
   +--+ |
|   |
|   |
++
||
|  HFI1 Driver with VNIC support |
||
+--------+


Vishwanathapura, Niranjana (12):
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) documentation
  IB/opa-vnic: RDMA NETDEV interface
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) interface
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) netdev
  IB/opa-vnic: VNIC Ethernet Management (EM) structure definitions
  IB/opa-vnic: VNIC statistics support
  IB/opa-vnic: VNIC MAC table support
  IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) interface
  IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) function
  IB/hfi1: OPA_VNIC RDMA netdev support
  IB/hfi1: Virtual Network Interface Controller (VNIC) HW support
  IB/hfi1: VNIC SDMA support

 Documentation/infiniband/opa_vnic.txt  |  153 +++
 MAINTAINERS|7 +
 drivers/infiniband/Kconfig |1 +
 drivers/infiniband/hw/hfi1/Makefile|2 +-
 drivers/infiniband/hw/hfi1/aspm.h  |   15 +-
 drivers/infiniband/hw/hfi1/chip.c  |  291 +-
 drivers/infiniband/hw/hfi1/chip.h  |2 +
 drivers/infiniband/hw/hfi1/debugfs.c   |8 +-
 drivers/infiniband/hw/hfi1/driver.c|   77 +-
 drivers/infiniband/hw/hfi1/file_ops.c  |   27 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   57 +-
 drivers/infiniband/hw/hfi1/init.c  |   39 +-
 drivers/infiniband/hw/hfi1/mad.c   |   10 +-
 drivers/infiniband/hw/hfi1/pio.c   |   19 +-
 drivers/infiniband/hw/hfi1/pio.h   |8 +-
 drivers/infiniband/hw/hfi1/sysfs.c |4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c  |8 +-
 drivers/infiniband/hw/hfi1/user_pages.c|5 +-
 drivers/infiniband/hw/hfi1/verbs.c |6 +-
 drivers/infiniband/hw/hfi1/vnic.h  |  184 
 drivers/infiniband/hw/hfi1/vnic_main.c |  907 
 drivers/infiniband/hw/hfi1/vnic_sdma.c |  323 ++
 drivers/infiniband/ulp/Makefile|1 +
 drivers/infiniband/ulp/opa_vnic/Kconfig|8 +
 drivers/infiniband/ulp/opa_vnic/Makefile   |7 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   |  475 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |  489 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |  187 +++

[PATCH rdma-next v2 03/12] IB/opa-vnic: Virtual Network Interface Controller (VNIC) interface

2017-04-12 Thread Vishwanathapura, Niranjana
Define OPA VNIC interface between hardware independent VNIC
functionality and the hardware dependent VNIC functionality.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 include/rdma/ib_verbs.h |   1 +
 include/rdma/opa_vnic.h | 141 
 2 files changed, 142 insertions(+)
 create mode 100644 include/rdma/opa_vnic.h

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5c6b8c0..88abef8 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -225,6 +225,7 @@ enum ib_device_cap_flags {
IB_DEVICE_VIRTUAL_FUNCTION  = (1ULL << 33),
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS   = (1ULL << 34),
+   IB_DEVICE_RDMA_NETDEV_OPA_VNIC  = (1ULL << 35),
 };
 
 enum ib_signature_prot_cap {
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
new file mode 100644
index 000..39d6890
--- /dev/null
+++ b/include/rdma/opa_vnic.h
@@ -0,0 +1,141 @@
+#ifndef _OPA_VNIC_H
+#define _OPA_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in
+ *the documentation and/or other materials provided with the
+ *distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *contributors may be used to endorse or promote products derived
+ *from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains Intel Omni-Path (OPA) Virtual Network Interface
+ * Controller (VNIC) specific declarations.
+ */
+
+#include 
+
+/* VNIC uses 16B header format */
+#define OPA_VNIC_L2_TYPE0x2
+
+/* 16 header bytes + 2 reserved bytes */
+#define OPA_VNIC_L2_HDR_LEN   (16 + 2)
+
+#define OPA_VNIC_L4_HDR_LEN   2
+
+#define OPA_VNIC_HDR_LEN  (OPA_VNIC_L2_HDR_LEN + \
+  OPA_VNIC_L4_HDR_LEN)
+
+#define OPA_VNIC_L4_ETHR  0x78
+
+#define OPA_VNIC_ICRC_LEN   4
+#define OPA_VNIC_TAIL_LEN   1
+#define OPA_VNIC_ICRC_TAIL_LEN  (OPA_VNIC_ICRC_LEN + OPA_VNIC_TAIL_LEN)
+
+#define OPA_VNIC_SKB_MDATA_LEN 4
+#define OPA_VNIC_SKB_MDATA_ENCAP_ERR   0x1
+
+/* opa vnic rdma netdev's private data structure */
+struct opa_vnic_rdma_netdev {
+   struct rdma_netdev rn;  /* keep this first */
+   /* followed by device private data */
+   char *dev_priv[0];
+};
+
+static inline void *opa_vnic_priv(const struct net_device *dev)
+{
+   struct rdma_netdev *rn = netdev_priv(dev);
+
+   return rn->clnt_priv;
+}
+
+static inline void *opa_vnic_dev_priv(const struct net_device *dev)
+{
+   struct opa_vnic_rdma_netdev *oparn = netdev_priv(dev);
+
+   return oparn->dev_priv;
+}
+
+/* opa_vnic skb meta data structrue */
+struct opa_vnic_skb_mdata {
+   u8 vl;
+   u8 entropy;
+   u8 flags;
+   u8 rsvd;
+} __packed;
+
+/* OPA VNIC group statistics */
+struct opa_vnic_grp_stats {
+   u64 unicast;
+   u64 mcastbcast;
+   u64 untagged;
+   u64 vlan;
+   u64 s_64;
+   u64 s_65_127;
+   u64 s_128_255;
+   u64 s_256_511;
+

Re: [PATCH rdma-next v1 10/12] IB/hfi1: OPA_VNIC RDMA netdev support

2017-04-12 Thread Vishwanathapura, Niranjana

On Wed, Apr 12, 2017 at 09:56:21AM -0600, Jason Gunthorpe wrote:

On Tue, Apr 11, 2017 at 11:40:05PM -0700, Vishwanathapura, Niranjana wrote:

Add support to create and free OPA_VNIC rdma netdev devices.
Implement netstack interface functionality including xmit_skb,
receive side NAPI etc. Also implement rdma netdev control functions.


Now that you have all this infrastructure, and Erez as produced a
ipoib patch to use it, are you going to look at implementing the ipoib
rdma_netdev variant too?


For hfi1? Yah, I can give it a try later once both these patch series gets 
accepted (to avoid any rework or duplicate work).


Niranjana





Re: [PATCH rdma-next v1 04/12] IB/opa-vnic: Virtual Network Interface Controller (VNIC) netdev

2017-04-12 Thread Vishwanathapura, Niranjana

On Wed, Apr 12, 2017 at 10:08:30AM +0300, Leon Romanovsky wrote:

+#define v_dbg(format, arg...) \
+   netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+   netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+   netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+   netdev_warn(adapter->netdev, format, ## arg)
+


IMHO, these wrappers are redundant.



Using same constructs as some Intel standard ethernet drivers.


+/* opa_netdev_open - activate network interface */
+static int opa_netdev_open(struct net_device *netdev)
+{
+   struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+   int rc;
+
+   rc = adapter->rn_ops->ndo_open(adapter->netdev);
+   if (rc) {
+   v_dbg("open failed %d\n", rc);
+   return rc;
+   }
+
+   v_info("opened\n");


All these v_info are achieved by tracepoints (function tracer).



Some of these messages are useful for analysing reported logs.
Let me change these opened/closed messges to debug level.


+
+   netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
+   if (!netdev)
+   return ERR_PTR(-ENOMEM);
+   else if (IS_ERR(netdev))
+   return ERR_CAST(netdev);
+





Erez and Jason came to this code for IPoIB, it is better to have same
error handling for all alloc_rdma_netdev callers.
+   if (hca->alloc_rdma_netdev) {
+   dev = hca->alloc_rdma_netdev(hca, port,
+RDMA_NETDEV_IPOIB, name,
+NET_NAME_UNKNOWN,
+ipoib_setup_common);
+   if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+   return NULL;
+   }




IPoIB handles EOPNOTSUPP differently (by assigning default operations).
It is not applicable to OPA VNIC, hence it just returns the error code.

I just noticed that IPoIB is using EOPNOTSUPP, however OPA VNIC is using 
ENOTSUPP. EOPNOTSUPP seesm to be widely used, so I will change OPA VNIC to use 
the same. Will also document this requirement in ib_verbs.h where this function 
is defined.


Niranjana



[PATCH rdma-next v1 04/12] IB/opa-vnic: Virtual Network Interface Controller (VNIC) netdev

2017-04-11 Thread Vishwanathapura, Niranjana
OPA VNIC netdev function supports Ethernet functionality over Omni-Path
fabric by encapsulating Ethernet packets inside Omni-Path packet header.
It allocates a rdma netdev device and interfaces with the network stack to
provide standard Ethernet network interfaces. It overrides HFI1 device's
netdev operations where it is required.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Sudeep Dutt 
Signed-off-by: Andrzej Kacprowski 
---
 MAINTAINERS|   7 +
 drivers/infiniband/Kconfig |   1 +
 drivers/infiniband/ulp/Makefile|   1 +
 drivers/infiniband/ulp/opa_vnic/Kconfig|   8 +
 drivers/infiniband/ulp/opa_vnic/Makefile   |   6 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   | 239 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |  62 ++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |  65 ++
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h| 186 
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  | 229 
 10 files changed, 804 insertions(+)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/Kconfig
 create mode 100644 drivers/infiniband/ulp/opa_vnic/Makefile
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c

diff --git a/MAINTAINERS b/MAINTAINERS
index c776906..fc32256 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5843,6 +5843,13 @@ F:   drivers/block/cciss*
 F: include/linux/cciss_ioctl.h
 F: include/uapi/linux/cciss_ioctl.h
 
+OPA-VNIC DRIVER
+M: Dennis Dalessandro 
+M: Niranjana Vishwanathapura 
+L: linux-r...@vger.kernel.org
+S: Supported
+F: drivers/infiniband/ulp/opa_vnic
+
 HFI1 DRIVER
 M: Mike Marciniszyn 
 M: Dennis Dalessandro 
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66f8602..234fe01 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/ulp/opa_vnic/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 source "drivers/infiniband/sw/rxe/Kconfig"
 
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf..c28af18 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP)+= srp/
 obj-$(CONFIG_INFINIBAND_SRPT)  += srpt/
 obj-$(CONFIG_INFINIBAND_ISER)  += iser/
 obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC)  += opa_vnic/
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig 
b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000..48132ab
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+   tristate "Intel OPA VNIC support"
+   depends on X86_64 && INFINIBAND
+   ---help---
+   This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+   driver for Ethernet over Omni-Path feature. It implements the HW
+   independent VNIC functionality. It interfaces with Linux stack for
+   data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000..975c313
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,6 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000..c74d02a
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FO

[PATCH rdma-next v1 05/12] IB/opa-vnic: VNIC Ethernet Management (EM) structure definitions

2017-04-11 Thread Vishwanathapura, Niranjana
Define VNIC EM MAD structures and the associated macros. These structures
are used for information exchange between VNIC EM agent (EMA) on the host
and the Ethernet manager. These include the virtual ethernet switch (vesw)
port information, vesw port mac table, summay and error counters,
vesw port interface mac lists and the EMA trap.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   | 423 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  33 ++
 2 files changed, 456 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index 176fca9..c025cde 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -52,6 +52,28 @@
  * and decapsulation of Ethernet packets
  */
 
+#include 
+#include 
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION   0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO   0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS   0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
 /* VNIC configured and operational state values */
 #define OPA_VNIC_STATE_DROP_ALL0x1
 #define OPA_VNIC_STATE_FORWARDING  0x3
@@ -59,4 +81,405 @@
 #define OPA_VESW_MAX_NUM_DEF_PORT   16
 #define OPA_VNIC_MAX_NUM_PCP8
 
+#define OPA_VNIC_EMA_DATA(OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+   __be16  fabric_id;
+   __be16  vesw_id;
+
+   u8  rsvd0[6];
+   __be16  def_port_mask;
+
+   u8  rsvd1[2];
+   __be16  pkey;
+
+   u8  rsvd2[4];
+   __be32  u_mcast_dlid;
+   __be32  u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+   u8  rsvd3[44];
+   __be16  eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+   __be16  eth_mtu_non_vlan;
+   u8  rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+   __be32  port_num;
+
+   u8  eth_link_status;
+   u8  rsvd0[3];
+
+   u8  base_mac_addr[ETH_ALEN];
+   u8  config_state;
+   u8  oper_state;
+
+   __be16  max_mac_tbl_ent;
+   __be16  max_smac_ent;
+   __be32  mac_tbl_digest;
+   u8  rsvd1[4];
+
+   __be32  encap_slid;
+
+   u8  pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+   u8

[PATCH rdma-next v1 02/12] IB/opa-vnic: RDMA NETDEV interface

2017-04-11 Thread Vishwanathapura, Niranjana
Add rdma netdev interface to ib device structure allowing rdma netdev
devices to be allocated by ib clients.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 include/rdma/ib_verbs.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3a8e058..1064459 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -55,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1877,6 +1878,24 @@ struct ib_port_immutable {
u32   max_mad_size;
 };
 
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+   RDMA_NETDEV_OPA_VNIC
+};
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+   void  *clnt_priv;
+   struct ib_device  *hca;
+   u8 port_num;
+
+   /* control functions */
+   void (*set_id)(struct net_device *netdev, int id);
+};
+
 struct ib_device {
char  name[IB_DEVICE_NAME_MAX];
 
@@ -2127,6 +2146,15 @@ struct ib_device {
   struct 
ib_rwq_ind_table_init_attr *init_attr,
   struct ib_udata 
*udata);
int(*destroy_rwq_ind_table)(struct 
ib_rwq_ind_table *wq_ind_table);
+   /* rdma netdev operations */
+   struct net_device *(*alloc_rdma_netdev)(
+   struct ib_device *device,
+   u8 port_num,
+   enum rdma_netdev_t type,
+   const char *name,
+   unsigned char name_assign_type,
+   void (*setup)(struct net_device *));
+   void (*free_rdma_netdev)(struct net_device *netdev);
 
struct module   *owner;
struct devicedev;
-- 
1.8.3.1



[PATCH rdma-next v1 10/12] IB/hfi1: OPA_VNIC RDMA netdev support

2017-04-11 Thread Vishwanathapura, Niranjana
Add support to create and free OPA_VNIC rdma netdev devices.
Implement netstack interface functionality including xmit_skb,
receive side NAPI etc. Also implement rdma netdev control functions.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/driver.c|  25 +-
 drivers/infiniband/hw/hfi1/hfi.h   |  27 +-
 drivers/infiniband/hw/hfi1/init.c  |   9 +-
 drivers/infiniband/hw/hfi1/vnic.h  | 153 
 drivers/infiniband/hw/hfi1/vnic_main.c | 644 +
 6 files changed, 853 insertions(+), 7 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a0..2280538 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o
+   verbs_txreq.o vnic_main.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/driver.c 
b/drivers/infiniband/hw/hfi1/driver.c
index 64bdbce..e4dc6a5 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -60,6 +60,7 @@
 #include "qp.h"
 #include "sdma.h"
 #include "debugfs.h"
+#include "vnic.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1381,15 +1382,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
 }
 
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+   /* Packet received in VNIC context via RSM */
+   if (packet->rcd->is_vnic)
+   return true;
+
+   if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+   (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+   return true;
+
+   return false;
+}
+
 int process_receive_bypass(struct hfi1_packet *packet)
 {
struct hfi1_devdata *dd = packet->rcd->dd;
 
-   if (unlikely(rhf_err_flags(packet->rhf)))
+   if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+   } else if (hfi1_is_vnic_packet(packet)) {
+   hfi1_vnic_bypass_rcv(packet);
+   return RHF_RCV_CONTINUE;
+   }
 
-   dd_dev_err(dd,
-  "Bypass packets are not supported in normal operation. 
Dropping\n");
+   dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a31638c..f85e8f4 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -337,6 +337,12 @@ struct hfi1_ctxtdata {
 * packets with the wrong interrupt handler.
 */
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+   /* Indicates that this is vnic context */
+   bool is_vnic;
+
+   /* vnic queue index this context is mapped to */
+   u8 vnic_q_idx;
 };
 
 /*
@@ -808,6 +814,19 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
 };
 
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT   8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+   struct idr vesw_idr;
+};
+
+struct hfi1_vnic_vport_info;
+
 /* device data struct now contains only "general per-device" info.
  * fields related to a physical IB port are in a hfi1_pportdata struct.
  */
@@ -1115,6 +1134,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+   int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+s

[PATCH rdma-next v1 08/12] IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) interface

2017-04-11 Thread Vishwanathapura, Niranjana
OPA VNIC EMA interface functions are the management interfaces to the OPA
VNIC netdev. Add support to add and remove VNIC ports. Implement the
required GET/SET management interface functions and processing of new
management information. Add support to send trap notifications upon various
events like interface status change, unicast/multicast mac list update and
mac address change.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/opa_vnic/Makefile   |   3 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |   4 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  44 +++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  | 142 +++-
 .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c  | 390 +
 5 files changed, 581 insertions(+), 2 deletions(-)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c

diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
index 975c313..e8d1ea1 100644
--- a/drivers/infiniband/ulp/opa_vnic/Makefile
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -3,4 +3,5 @@
 #
 obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
 
-opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+  opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index c025cde..4c434b9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -99,6 +99,10 @@
 #define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
 #define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
 
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN   2
+
 /**
  * struct opa_vesw_info - OPA vnic switch information
  * @fabric_id: 10-bit fabric id
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index bec4866..b49f5d7 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -161,14 +161,28 @@ struct __opa_veswport_trap {
 } __packed;
 
 /**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ */
+struct opa_vnic_ctrl_port {
+   struct ib_device   *ibdev;
+   struct opa_vnic_ctrl_ops   *ops;
+};
+
+/**
  * struct opa_vnic_adapter - OPA VNIC netdev private data structure
  * @netdev: pointer to associated netdev
  * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
  * @rn_ops: rdma netdev's net_device_ops
  * @port_num: OPA port number
  * @vport_num: vesw port number
  * @lock: adapter lock
  * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
  * @mactbl: hash table of MAC entries
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
@@ -177,6 +191,7 @@ struct __opa_veswport_trap {
 struct opa_vnic_adapter {
struct net_device *netdev;
struct ib_device  *ibdev;
+   struct opa_vnic_ctrl_port *cport;
const struct net_device_ops   *rn_ops;
 
u8 port_num;
@@ -186,6 +201,9 @@ struct opa_vnic_adapter {
struct mutex lock;
 
struct __opa_veswport_info  info;
+   u8  vema_mac_addr[ETH_ALEN];
+   u32 umac_hash;
+   u32 mmac_hash;
struct hlist_head  __rcu   *mactbl;
 
/* Lock used to protect updates to mac table */
@@ -225,6 +243,11 @@ struct opa_vnic_mac_tbl_node {
 #define v_warn(format, arg...) \
netdev_warn(adapter->netdev, format, ## arg)
 
+#define c_err(format, arg...) \
+   dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+   dev_info(&cport->ibdev->dev, format, ## arg)
+
 /* The maximum allowed entries in the mac table */
 #define OPA_VNIC_MAC_TBL_MAX_ENTRIES  2048
 /* Limit of smac entries in mac table */
@@ -264,11 +287,32 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct 
ib_device *ibdev,
 void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
 u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
 u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff 
*skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
 void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
 void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
struct opa_veswport_mactable *tbl);
 int opa_vnic_update_mac_tbl(struct opa_vnic_

[PATCH rdma-next v1 12/12] IB/hfi1: VNIC SDMA support

2017-04-11 Thread Vishwanathapura, Niranjana
HFI1 VNIC SDMA support enables transmission of VNIC packets over SDMA.
Map VNIC queues to SDMA engines and support halting and wakeup of the
VNIC queues.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   1 +
 drivers/infiniband/hw/hfi1/init.c  |   1 +
 drivers/infiniband/hw/hfi1/vnic.h  |  28 +++
 drivers/infiniband/hw/hfi1/vnic_main.c |  24 ++-
 drivers/infiniband/hw/hfi1/vnic_sdma.c | 323 +
 6 files changed, 376 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 2280538..88085f6 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o vnic_main.o
+   verbs_txreq.o vnic_main.o vnic_sdma.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a12bb46..2862b14 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -834,6 +834,7 @@ struct hfi1_asic_data {
 /* Virtual NIC information */
 struct hfi1_vnic_data {
struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+   struct kmem_cache *txreq_cache;
u8 num_vports;
struct idr vesw_idr;
u8 rmt_start;
diff --git a/drivers/infiniband/hw/hfi1/init.c 
b/drivers/infiniband/hw/hfi1/init.c
index de2eec4..b4c7e04 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -681,6 +681,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+   dd->process_vnic_dma_send = hfi1_vnic_send_dma;
 
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h 
b/drivers/infiniband/hw/hfi1/vnic.h
index 9bed40d..e2c4552 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -49,6 +49,7 @@
 
 #include 
 #include "hfi.h"
+#include "sdma.h"
 
 #define HFI1_VNIC_MAX_TXQ 16
 #define HFI1_VNIC_MAX_PAD 12
@@ -85,6 +86,26 @@
 #define HFI1_VNIC_MAX_QUEUE 16
 
 /**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+   struct hfi1_devdata *dd;
+   struct sdma_engine  *sde;
+   struct hfi1_vnic_vport_info *vinfo;
+   struct iowait wait;
+   struct sdma_txreq stx;
+   unsigned int state;
+   u8 q_idx;
+};
+
+/**
  * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
  * @idx: queue index
  * @vinfo: pointer to vport information
@@ -111,6 +132,7 @@ struct hfi1_vnic_rx_queue {
  * @vesw_id: virtual switch id
  * @rxq: Array of receive queues
  * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
  */
 struct hfi1_vnic_vport_info {
struct hfi1_devdata *dd;
@@ -126,6 +148,7 @@ struct hfi1_vnic_vport_info {
struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
 
struct opa_vnic_stats  stats[HFI1_VNIC_MAX_QUEUE];
+   struct hfi1_vnic_sdma  sdma[HFI1_VNIC_MAX_TXQ];
 };
 
 #define v_dbg(format, arg...) \
@@ -138,8 +161,13 @@ struct hfi1_vnic_vport_info {
 /* vnic hfi1 internal functions */
 void hfi1_vnic_setup(struct hfi1_devdata *dd);
 void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+   u8 q_idx);
 
 /* vnic rdma netdev operations */
 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c 
b/drivers/infiniband/hw/hfi1/vnic_main.c
index 1846d7c..6d0a4b1 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -406,6 +406,10 @@ static void hfi1_vnic_maybe_stop_tx(struct 
hfi1_vnic_vport_info *vinfo,
u8 q_idx)
 {
netif_stop_subqueue(vinfo->netdev, q_idx);
+   if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+   return;
+

[PATCH rdma-next v1 01/12] IB/opa-vnic: Virtual Network Interface Controller (VNIC) documentation

2017-04-11 Thread Vishwanathapura, Niranjana
Add OPA VNIC design document explaining the VNIC architecture and the
driver design.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 Documentation/infiniband/opa_vnic.txt | 153 ++
 1 file changed, 153 insertions(+)
 create mode 100644 Documentation/infiniband/opa_vnic.txt

diff --git a/Documentation/infiniband/opa_vnic.txt 
b/Documentation/infiniband/opa_vnic.txt
new file mode 100644
index 000..282e17b
--- /dev/null
+++ b/Documentation/infiniband/opa_vnic.txt
@@ -0,0 +1,153 @@
+Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC) feature
+supports Ethernet functionality over Omni-Path fabric by encapsulating
+the Ethernet packets between HFI nodes.
+
+Architecture
+=
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes.
+
+ +---+
+ |  Subnet/  |
+ | Ethernet  |
+ |  Manager  |
+ +---+
+/  /
+  /   /
+//
+  / /
++-+  +--+
+|  Virtual Ethernet Switch|  |  Virtual Ethernet Switch |
+|  +-++-+ |  | +-++-+   |
+|  | VPORT   ||  VPORT  | |  | |  VPORT  ||  VPORT  |   |
++--+-++-+-+  +-+-++-+---+
+ | \/ |
+ |   \/   |
+ | \/ |
+ |/  \|
+ |  /  \  |
+ +---++  +---++
+ |   VNIC|VNIC|  |VNIC   |VNIC|
+ +---++  +---++
+ |  HFI   |  |  HFI   |
+ ++  ++
+
+
+The Omni-Path encapsulated Ethernet packet format is as described below.
+
+Bits  Field
+
+Quad Word 0:
+0-19  SLID (lower 20 bits)
+20-30 Length (in Quad Words)
+31BECN bit
+32-51 DLID (lower 20 bits)
+52-56 SC (Service Class)
+57-59 RC (Routing Control)
+60FECN bit
+61-62 L2 (=10, 16B format)
+63LT (=1, Link Transfer Head Flit)
+
+Quad Word 1:
+0-7   L4 type (=0x78 ETHERNET)
+8-11  SLID[23:20]
+12-15 DLID[23:20]
+16-31 PKEY
+32-47 Entropy
+48-63 Reserved
+
+Quad Word 2:
+0-15  Reserved
+16-31 L4 header
+32-63 Ethernet Packet
+
+Quad Words 3 to N-1:
+0-63  Ethernet packet (pad extended)
+
+Quad Word N (last):
+0-23  Ethernet packet (pad extended)
+24-55 ICRC
+56-61 Tail
+62-63 LT (=01, Link Transfer Tail Flit)
+
+Ethernet packet is padded on the transmit side to ensure that the VNIC OPA
+packet is quad word aligned. The 'Tail' field contains the number of bytes
+padded. On the receive side the 'Tail' field is read and the padding is
+removed (along with ICRC, Tail and OPA header) before passing packet up
+the network stack.
+
+The L4 header field contains the virtual Ethernet switch id the VNIC port
+belongs to. On the receive side, this field is used to de-multiplex the
+received VNIC packets to different VNIC ports.
+
+Driver Design
+==
+Intel OPA VNIC software design is presented in the below diagram.
+OPA VNIC functionality has a HW dependent component and a HW
+independent component.
+
+The support has been added for IB device to allocate and free the RDMA
+netdev devices. The RDMA netdev supports interfacing with the network
+stack thus creating standard network interfaces. OPA_VNIC is an RDMA
+netdev device type.
+
+The HW dependent VNIC functionality is part of

[PATCH rdma-next v1 11/12] IB/hfi1: Virtual Network Interface Controller (VNIC) HW support

2017-04-11 Thread Vishwanathapura, Niranjana
HFI1 HW specific support for VNIC functionality.
Dynamically allocate a set of contexts for VNIC when the first vnic
port is instantiated. Allocate VNIC contexts from user contexts pool
and return them back to the same pool while freeing up. Set aside
enough MSI-X interrupts for VNIC contexts and assign them when the
contexts are allocated. On the receive side, use an RSM rule to
spread TCP/UDP streams among VNIC contexts.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/aspm.h |  15 +-
 drivers/infiniband/hw/hfi1/chip.c | 291 +-
 drivers/infiniband/hw/hfi1/chip.h |   2 +
 drivers/infiniband/hw/hfi1/debugfs.c  |   8 +-
 drivers/infiniband/hw/hfi1/driver.c   |  52 --
 drivers/infiniband/hw/hfi1/file_ops.c |  27 ++-
 drivers/infiniband/hw/hfi1/hfi.h  |  29 ++-
 drivers/infiniband/hw/hfi1/init.c |  29 +--
 drivers/infiniband/hw/hfi1/mad.c  |  10 +-
 drivers/infiniband/hw/hfi1/pio.c  |  19 +-
 drivers/infiniband/hw/hfi1/pio.h  |   8 +-
 drivers/infiniband/hw/hfi1/sysfs.c|   4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c |   8 +-
 drivers/infiniband/hw/hfi1/user_pages.c   |   5 +-
 drivers/infiniband/hw/hfi1/verbs.c|   6 +-
 drivers/infiniband/hw/hfi1/vnic.h |   3 +
 drivers/infiniband/hw/hfi1/vnic_main.c| 245 -
 include/rdma/opa_port_info.h  |   3 +-
 18 files changed, 660 insertions(+), 104 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/aspm.h 
b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3..794e681 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long 
data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
 }
 
-/* Disable interrupt processing for verbs contexts when PSM contexts are open 
*/
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
 static inline void aspm_disable_all(struct hfi1_devdata *dd)
 {
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
-   rcd->ctxt < rcd->dd->first_user_ctxt;
+   rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
 }
 
 static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
 
-   for (i = 0; i < dd->first_user_ctxt; i++)
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
 
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c 
b/drivers/infiniband/hw/hfi1/chip.c
index 79a316a..e520929 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -126,9 +126,16 @@ struct flag_table {
 #define DEFAULT_KRCVQS   2
 #define MIN_KERNEL_KCTXTS 2
 #define FIRST_KERNEL_KCTXT1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ *   0 - Verbs
+ *   1 - User Fecn Handling
+ *   2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN  1
+#define RSM_INS_VNIC  2
 
 /* Bit offset into the GUID which carries HFI id information */
 #define GUID_HFI_INDEX_SHIFT 39
@@ -139,8 +146,7 @@ struct flag_table {
 #define is_emulator_p(dd) dd)->irev) & 0xf) == 3)
 #define is_emulator_s(dd) dd)->irev) & 0xf) == 4)
 
-/* RSM fields */
-
+/* RSM fields for Verbs */
 /* packet type */
 #define IB_PACKET_TYPE   

[PATCH rdma-next v1 06/12] IB/opa-vnic: VNIC statistics support

2017-04-11 Thread Vishwanathapura, Niranjana
OPA VNIC driver statistics support maintains various counters including
standard netdev counters and the Ethernet manager defined counters.
Add the Ethtool hook to read the counters.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c | 110 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|   4 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |  18 
 3 files changed, 132 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index b74f6ad..a98948c 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -53,9 +53,119 @@
 
 #include "opa_vnic_internal.h"
 
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+   char stat_string[ETH_GSTRING_LEN];
+   struct {
+   int sizeof_stat;
+   int stat_offset;
+   };
+};
+
+#define VNIC_STAT(m){ FIELD_SIZEOF(struct opa_vnic_stats, m),   \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+   /* NETDEV stats */
+   {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+   {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+   {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+   {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+   {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+   {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+   {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+   {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+   /* SUMMARY counters */
+   {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+   {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+   {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+   {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+   {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+   {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+   {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+   {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+   {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+   {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+   {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+   {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+   {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+   {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+   {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+   {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+   {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+   {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+   {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+   {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+   {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+   {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+   /* ERROR counters */
+   {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+   {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+   {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+   {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+   {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+   {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+   {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+   {"rx_oversize", VNIC_STAT(rx_oversize)},
+   {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+   return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+  struct ethtool_stats *stats, u64 *data)
+{
+   struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+   struct opa_vnic_stats vstats;
+   int i;
+
+   memset(&vstats, 0, sizeof(vstats));
+   mutex_lock(&adapter->stats_lock);
+   adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+   for (i = 0; i < VNIC_STATS_LEN; i++) {
+   char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+   data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+  sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+   }
+   mutex_unlock(&adapter->stats_lock);
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 
*data)
+{
+   int i;
+
+   if (stringset != ETH_SS_STATS)
+   return;
+
+   for (i = 0; i < VNIC_STATS_LEN; i++)
+   memcpy(data + i * ETH_GSTRING_LEN,
+  vnic_gstrings_stats[i].stat_string,
+  ETH_GSTRING_LEN);
+}
+
 /* ethtool ops */
 static const struct ethtool_ops opa_vnic_ethtool_ops = {
.get_link

[PATCH rdma-next v1 07/12] IB/opa-vnic: VNIC MAC table support

2017-04-11 Thread Vishwanathapura, Niranjana
OPA VNIC MAC table contains the MAC address to DLID mappings provided by
the Ethernet manager. During transmission, the MAC table provides the MAC
address to DLID translation. Implement MAC table using simple hash list.
Also provide support to update/query the MAC table by Ethernet manager.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   | 236 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  51 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |   4 +
 3 files changed, 291 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index c74d02a..2e8fee9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -96,6 +96,238 @@ static inline void opa_vnic_make_header(u8 *hdr, u32 slid, 
u32 dlid, u16 len,
memcpy(hdr, h, OPA_VNIC_HDR_LEN);
 }
 
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+   struct opa_vnic_mac_tbl_node *node;
+   struct hlist_node *tmp;
+   int bkt;
+
+   if (!mactbl)
+   return;
+
+   vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+   hash_del(&node->hlist);
+   kfree(node);
+   }
+   kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+   u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+   struct hlist_head *mactbl;
+
+   mactbl = kzalloc(size, GFP_KERNEL);
+   if (!mactbl)
+   return ERR_PTR(-ENOMEM);
+
+   vnic_hash_init(mactbl);
+   return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+   struct hlist_head *mactbl;
+
+   mutex_lock(&adapter->mactbl_lock);
+   mactbl = rcu_access_pointer(adapter->mactbl);
+   rcu_assign_pointer(adapter->mactbl, NULL);
+   synchronize_rcu();
+   opa_vnic_free_mac_tbl(mactbl);
+   mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+   struct opa_veswport_mactable *tbl)
+{
+   struct opa_vnic_mac_tbl_node *node;
+   struct hlist_head *mactbl;
+   int bkt;
+   u16 loffset, lnum_entries;
+
+   rcu_read_lock();
+   mactbl = rcu_dereference(adapter->mactbl);
+   if (!mactbl)
+   goto get_mac_done;
+
+   loffset = be16_to_cpu(tbl->offset);
+   lnum_entries = be16_to_cpu(tbl->num_entries);
+
+   vnic_hash_for_each(mactbl, bkt, node, hlist) {
+   struct __opa_vnic_mactable_entry *nentry = &node->entry;
+   struct opa_veswport_mactable_entry *entry;
+
+   if ((node->index < loffset) ||
+   (node->index >= (loffset + lnum_entries)))
+   continue;
+
+   /* populate entry in the tbl corresponding to the index */
+   entry = &tbl->tbl_entries[node->index - loffset];
+   memcpy(entry->mac_addr, nentry->mac_addr,
+  ARRAY_SIZE(entry->mac_addr));
+   memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+  ARRAY_SIZE(entry->mac_addr_mask));
+   entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+   }
+   tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+   rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ *  - Allocate a new mac (hash) table.
+ *  - Add the specified entries to the new table.
+ *(except the ones that are requested to be deleted).
+ *  - Add all the other entries from the old mac table.
+ *  - If there is a failure, free the new table and return.
+ *  - Switch to the new table.
+ *  - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+   struct opa_veswport_mactable *tbl)
+{
+   struct opa_vnic_mac_tbl_node *node, *new_node;
+   struct hlist_head *new_mactbl, *old_mactbl;
+   int i, bkt, rc = 0;
+   u8 key;
+   u16 loffset, lnum_entries;
+
+   mutex_lock(&adapter->mactbl_lock);
+   /* allocate new mac table */
+   new_mactbl = opa_vnic_alloc_mac_tbl();
+   if (IS_ERR(

[PATCH rdma-next v1 03/12] IB/opa-vnic: Virtual Network Interface Controller (VNIC) interface

2017-04-11 Thread Vishwanathapura, Niranjana
Define OPA VNIC interface between hardware independent VNIC
functionality and the hardware dependent VNIC functionality.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 include/rdma/ib_verbs.h |   1 +
 include/rdma/opa_vnic.h | 141 
 2 files changed, 142 insertions(+)
 create mode 100644 include/rdma/opa_vnic.h

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1064459..654b353 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -225,6 +225,7 @@ enum ib_device_cap_flags {
IB_DEVICE_VIRTUAL_FUNCTION  = (1ULL << 33),
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS   = (1ULL << 34),
+   IB_DEVICE_RDMA_NETDEV_OPA_VNIC  = (1ULL << 35),
 };
 
 enum ib_signature_prot_cap {
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
new file mode 100644
index 000..39d6890
--- /dev/null
+++ b/include/rdma/opa_vnic.h
@@ -0,0 +1,141 @@
+#ifndef _OPA_VNIC_H
+#define _OPA_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in
+ *the documentation and/or other materials provided with the
+ *distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *contributors may be used to endorse or promote products derived
+ *from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains Intel Omni-Path (OPA) Virtual Network Interface
+ * Controller (VNIC) specific declarations.
+ */
+
+#include 
+
+/* VNIC uses 16B header format */
+#define OPA_VNIC_L2_TYPE0x2
+
+/* 16 header bytes + 2 reserved bytes */
+#define OPA_VNIC_L2_HDR_LEN   (16 + 2)
+
+#define OPA_VNIC_L4_HDR_LEN   2
+
+#define OPA_VNIC_HDR_LEN  (OPA_VNIC_L2_HDR_LEN + \
+  OPA_VNIC_L4_HDR_LEN)
+
+#define OPA_VNIC_L4_ETHR  0x78
+
+#define OPA_VNIC_ICRC_LEN   4
+#define OPA_VNIC_TAIL_LEN   1
+#define OPA_VNIC_ICRC_TAIL_LEN  (OPA_VNIC_ICRC_LEN + OPA_VNIC_TAIL_LEN)
+
+#define OPA_VNIC_SKB_MDATA_LEN 4
+#define OPA_VNIC_SKB_MDATA_ENCAP_ERR   0x1
+
+/* opa vnic rdma netdev's private data structure */
+struct opa_vnic_rdma_netdev {
+   struct rdma_netdev rn;  /* keep this first */
+   /* followed by device private data */
+   char *dev_priv[0];
+};
+
+static inline void *opa_vnic_priv(const struct net_device *dev)
+{
+   struct rdma_netdev *rn = netdev_priv(dev);
+
+   return rn->clnt_priv;
+}
+
+static inline void *opa_vnic_dev_priv(const struct net_device *dev)
+{
+   struct opa_vnic_rdma_netdev *oparn = netdev_priv(dev);
+
+   return oparn->dev_priv;
+}
+
+/* opa_vnic skb meta data structrue */
+struct opa_vnic_skb_mdata {
+   u8 vl;
+   u8 entropy;
+   u8 flags;
+   u8 rsvd;
+} __packed;
+
+/* OPA VNIC group statistics */
+struct opa_vnic_grp_stats {
+   u64 unicast;
+   u64 mcastbcast;
+   u64 untagged;
+   u64 vlan;
+   u64 s_64;
+   u64 s_65_127;
+   u64 s_128_255;
+   u64 s_256_511;
+

[PATCH rdma-next v1 00/12] Omni-Path Virtual Network Interface Controller (VNIC)

2017-04-11 Thread Vishwanathapura, Niranjana
h encapsulated Ethernet
packets in the transmit path and provides HW access to them. It strips
the Omni-Path header from the received packets before passing them up
the network stack. It also implements the RDMA netdev control operations.

The OPA VNIC module implements the HW independent VNIC functionality.
It consists of two parts. The VNIC Ethernet Management Agent (VEMA)
registers itself with IB core as an IB client and interfaces with the
IB MAD stack. It exchanges the management information with the Ethernet
Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees
the OPA_VNIC RDMA netdev devices. It overrides the net_device_ops functions
set by HW dependent VNIC driver where required to accommodate any control
operation. It also handles the encapsulation of Ethernet packets with an
Omni-Path header in the transmit path. For each VNIC interface, the
information required for encapsulation is configured by the EM via VEMA MAD
interface. It also passes any control information to the HW dependent driver
by invoking the RDMA netdev control operations.

+---+ +--+
|   | |   Linux  |
| IB MAD| |  Network |
|   | |   Stack  |
+---+ +--+
 |   |  |
 |   |  |
++  |
||  |
|  OPA VNIC Module   |  |
|  (OPA VNIC RDMA Netdev |  |
| & EMA functions)   |  |
||  |
++  |
|   |
|   |
   +--+ |
   | IB core  | |
   +--+ |
|   |
|   |
++
||
|  HFI1 Driver with VNIC support |
||
+--------+


Vishwanathapura, Niranjana (12):
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) documentation
  IB/opa-vnic: RDMA NETDEV interface
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) interface
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) netdev
  IB/opa-vnic: VNIC Ethernet Management (EM) structure definitions
  IB/opa-vnic: VNIC statistics support
  IB/opa-vnic: VNIC MAC table support
  IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) interface
  IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) function
  IB/hfi1: OPA_VNIC RDMA netdev support
  IB/hfi1: Virtual Network Interface Controller (VNIC) HW support
  IB/hfi1: VNIC SDMA support

 Documentation/infiniband/opa_vnic.txt  |  153 +++
 MAINTAINERS|7 +
 drivers/infiniband/Kconfig |1 +
 drivers/infiniband/hw/hfi1/Makefile|2 +-
 drivers/infiniband/hw/hfi1/aspm.h  |   15 +-
 drivers/infiniband/hw/hfi1/chip.c  |  291 +-
 drivers/infiniband/hw/hfi1/chip.h  |2 +
 drivers/infiniband/hw/hfi1/debugfs.c   |8 +-
 drivers/infiniband/hw/hfi1/driver.c|   77 +-
 drivers/infiniband/hw/hfi1/file_ops.c  |   27 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   57 +-
 drivers/infiniband/hw/hfi1/init.c  |   39 +-
 drivers/infiniband/hw/hfi1/mad.c   |   10 +-
 drivers/infiniband/hw/hfi1/pio.c   |   19 +-
 drivers/infiniband/hw/hfi1/pio.h   |8 +-
 drivers/infiniband/hw/hfi1/sysfs.c |4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c  |8 +-
 drivers/infiniband/hw/hfi1/user_pages.c|5 +-
 drivers/infiniband/hw/hfi1/verbs.c |6 +-
 drivers/infiniband/hw/hfi1/vnic.h  |  184 
 drivers/infiniband/hw/hfi1/vnic_main.c |  907 
 drivers/infiniband/hw/hfi1/vnic_sdma.c |  323 ++
 drivers/infiniband/ulp/Makefile|1 +
 drivers/infiniband/ulp/opa_vnic/Kconfig|8 +
 drivers/infiniband/ulp/opa_vnic/Makefile   |7 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   |  475 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |  489 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |  187 
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  329 ++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |  391 +++
 drivers/infiniband/ulp/opa_vni

[PATCH rdma-next v1 09/12] IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) function

2017-04-11 Thread Vishwanathapura, Niranjana
OPA VEMA function interfaces with the Infiniband MAD stack to exchange the
management information packets with the Ethernet Manager (EM).
It interfaces with the OPA VNIC netdev function to SET/GET the management
information. The information exchanged with the EM includes class port
details, encapsulation configuration, various counters, unicast and
multicast MAC list and the MAC table. It also supports sending traps
to the EM.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sudeep Dutt 
---
 drivers/infiniband/ulp/opa_vnic/Makefile   |2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |   12 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|   17 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c| 1078 
 .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c  |2 +-
 5 files changed, 1106 insertions(+), 5 deletions(-)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c

diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
index e8d1ea1..8061b28 100644
--- a/drivers/infiniband/ulp/opa_vnic/Makefile
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -4,4 +4,4 @@
 obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
 
 opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
-  opa_vnic_vema_iface.o
+  opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index a98948c..d66540e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -120,6 +120,17 @@ struct vnic_stats {
 
 #define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
 
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+struct ethtool_drvinfo *drvinfo)
+{
+   strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+   strlcpy(drvinfo->version, opa_vnic_driver_version,
+   sizeof(drvinfo->version));
+   strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+   sizeof(drvinfo->bus_info));
+}
+
 /* vnic_get_sset_count - get string set count */
 static int vnic_get_sset_count(struct net_device *netdev, int sset)
 {
@@ -162,6 +173,7 @@ static void vnic_get_strings(struct net_device *netdev, u32 
stringset, u8 *data)
 
 /* ethtool ops */
 static const struct ethtool_ops opa_vnic_ethtool_ops = {
+   .get_drvinfo = vnic_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = vnic_get_strings,
.get_sset_count = vnic_get_sset_count,
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index b49f5d7..6bba886 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -164,10 +164,12 @@ struct __opa_veswport_trap {
  * struct opa_vnic_ctrl_port - OPA virtual NIC control port
  * @ibdev: pointer to ib device
  * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
  */
 struct opa_vnic_ctrl_port {
struct ib_device   *ibdev;
struct opa_vnic_ctrl_ops   *ops;
+   u8  num_ports;
 };
 
 /**
@@ -187,6 +189,8 @@ struct opa_vnic_ctrl_port {
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
  * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
  */
 struct opa_vnic_adapter {
struct net_device *netdev;
@@ -213,6 +217,9 @@ struct opa_vnic_adapter {
struct mutex stats_lock;
 
u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+   unsigned long trap_timeout;
+   u8trap_count;
 };
 
 /* Same as opa_veswport_mactable_entry, but without bitwise attribute */
@@ -247,6 +254,8 @@ struct opa_vnic_mac_tbl_node {
dev_err(&cport->ibdev->dev, format, ## arg)
 #define c_info(format, arg...) \
dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+   dev_dbg(&cport->ibdev->dev, format, ## arg)
 
 /* The maximum allowed entries in the mac table */
 #define OPA_VNIC_MAC_TBL_MAX_ENTRIES  2048
@@ -281,6 +290,9 @@ struct opa_vnic_mac_tbl_node {
!obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++)   \
hlist_for_each_entry(obj, &name[bkt], member)
 
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
 struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
 u8 port_num, u8 vport_num);
 void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
@@ -310,9 +322,8 @@ void opa_vnic_get_per_veswport_info(struct opa_vni

Re: [RFC v1 for accelerated IPoIB 04/25] IB/verb: Add ipoib_options struct and API

2017-03-15 Thread Vishwanathapura, Niranjana

On Wed, Mar 15, 2017 at 08:30:43AM +0200, Leon Romanovsky wrote:

On Tue, Mar 14, 2017 at 10:11:49AM -0600, Jason Gunthorpe wrote:

On Tue, Mar 14, 2017 at 12:01:09AM -0700, Vishwanathapura, Niranjana wrote:
> On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:
> >>+  /* multicast */
> >>+  int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
> >>+  union ib_gid *gid, u16 lid, int set_qkey);
> >>+  int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
> >>+  union ib_gid *gid, u16 lid);
> >
> >It would make more sense to store the struct ib_device pointer in the
> >struct rdma_netdev.
> >
>
> Agree that it shouldn't be a function parameters.
> For opa_vnic, I found it convenient to store ib_device pointer in client and
> device private structures as those will be available in most places anyhow.

If vnic uses it too, then lets add the ib_device and port num to
rdma_netdev itself?


Agree, at the end this rdma_netdev is intended for the drivers/infiniband
and it is better to have this binding (rdma_netdev and ib_device) as early as 
possible.



I agree with adding ibdev and port num to rdma_netdev.

Niranjana



Re: [RFC v1 for accelerated IPoIB 04/25] IB/verb: Add ipoib_options struct and API

2017-03-14 Thread Vishwanathapura, Niranjana

On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:

+   /* multicast */
+   int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
+   union ib_gid *gid, u16 lid, int set_qkey);
+   int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
+   union ib_gid *gid, u16 lid);


It would make more sense to store the struct ib_device pointer in the
struct rdma_netdev.



Agree that it shouldn't be a function parameters.
For opa_vnic, I found it convenient to store ib_device pointer in client and 
device private structures as those will be available in most places anyhow.


Niranjana


Re: [RFC v1 for accelerated IPoIB 04/25] IB/verb: Add ipoib_options struct and API

2017-03-13 Thread Vishwanathapura, Niranjana

On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:

+struct ipoib_rdma_netdev {
+   struct rdma_netdev rn;  /* keep this first */
+   /* followed by device private data */
+   char *dev_priv[0];
+};
+
+static inline void *ipoib_priv(const struct net_device *dev)
+{
+   struct rdma_netdev *rn = netdev_priv(dev);
+
+   return rn->clnt_priv;
+}
+
+static inline void *ipoib_dev_priv(const struct net_device *dev)
+{
+   struct ipoib_rdma_netdev *ipoib_rn = netdev_priv(dev);
+
+   return ipoib_rn->dev_priv;
+}
+


It can be confusing to see return of ipoib_priv() getting assigned to 
ipoib_dev_priv (legacy name). May be we should change ipoib_dev_priv() to 
ipoib_hw_priv()?




+#endif /* IB_IPOIB_ACCEL_OPS_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 85b9034c8cfc..9b090efccdba 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1901,6 +1901,41 @@ struct ib_port_immutable {
u32   max_mad_size;
};

+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+   RDMA_NETDEV_OPA_VNIC,
+   RDMA_NETDEV_IPOIB
+};
+
+struct ipoib_ah;
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+   void *clnt_priv;
+
+   /* control functions */
+   void (*set_id)(struct net_device *netdev, int id);
+   /* IB resource allocation function, returns new UD QP */
+   int (*ib_dev_init)(struct net_device *dev, struct ib_device *hca,
+  int *qp_num);
+   void (*ib_dev_cleanup)(struct net_device *dev, struct ib_device *hca);
+
+   /* send packet */
+   void (*send)(struct net_device *dev, struct sk_buff *skb,
+struct ipoib_ah *address, u32 dqpn, u32 dqkey);
+
+   /* multicast */
+   int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
+   union ib_gid *gid, u16 lid, int set_qkey);
+   int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
+   union ib_gid *gid, u16 lid);
+   int qp_num;


May be ipoib_rdma_netdev structure is the right place for these functions?


+   void *context;


No context should be necessary here.


+};
+
struct ib_device {
struct device*dma_device;

@@ -2149,6 +2184,7 @@ struct ib_device {
struct ib_wq_attr *attr,
u32 wq_attr_mask,
struct ib_udata *udata);
+   struct ib_ipoib_accel_ops * (*get_ipoib_accel_ops)(struct ib_device 
*device);


old code, needs fix.

Niranjana


Re: [RFC v1 for accelerated IPoIB 05/25] IB/ipoib: Support ipoib acceleration options callbacks

2017-03-13 Thread Vishwanathapura, Niranjana

On Mon, Mar 13, 2017 at 08:31:16PM +0200, Erez Shitrit wrote:

+static struct net_device *ipoib_create_netdev_default(struct ib_device *hca,
+ const char *name,
+ void (*setup)(struct 
net_device *))
{
struct net_device *dev;
+   struct rdma_netdev *rn;

-   dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
-  NET_NAME_UNKNOWN, ipoib_setup);
+   dev = alloc_netdev((int)sizeof(struct ipoib_rdma_netdev),
+  name,
+  NET_NAME_UNKNOWN, setup);
if (!dev)
return NULL;

-   return netdev_priv(dev);
+   rn = netdev_priv(dev);
+
+   rn->ib_dev_init = ipoib_dev_init_default;
+   rn->ib_dev_cleanup = ipoib_dev_uninit_default;
+   rn->send = ipoib_send;
+   rn->attach_mcast = ipoib_mcast_attach;
+   rn->detach_mcast = ipoib_mcast_detach;
+
+   dev->netdev_ops = &ipoib_netdev_default_pf;
+


Probably no need to set netdev_ops here as it gets overwritten.


+   return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+   const char *name)
+{
+   struct net_device *dev;
+   struct ipoib_dev_priv *priv;
+   struct rdma_netdev *rn;
+
+   priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+   if (!priv) {
+   pr_err("%s failed allocting priv\n", __func__);
+   return NULL;
+   }
+
+   if (!hca->alloc_rdma_netdev)
+   dev = ipoib_create_netdev_default(hca, name, 
ipoib_setup_common);
+   else
+   dev = hca->alloc_rdma_netdev(hca, port, RDMA_NETDEV_IPOIB,
+name, NET_NAME_UNKNOWN,
+ipoib_setup_common);
+   if (!dev) {
+   kfree(priv);
+   return NULL;
+   }


This will break ipoib on hfi1 as hfi1 will define alloc_rdma_netdev for 
OPA_VNIC type. We should probably look for a dedicated return type (-ENODEV?) 
to determine of the driver supports specified rdma netdev type. Or use a ib 
device attribute to suggest driver support ipoib rdma netdev.


Niranjana


Re: [RFC v1 for accelerated IPoIB 25/25] mlx5_ib: skeleton for mlx5_ib to support ipoib_ops

2017-03-13 Thread Vishwanathapura, Niranjana

On Mon, Mar 13, 2017 at 08:31:36PM +0200, Erez Shitrit wrote:

+int mlx5_ib_dev_init(struct net_device *dev, struct ib_device *hca,
+int *qp_num)
+{
+   void *next_priv = ipoib_dev_priv(dev);
+   struct rdma_netdev *rn = netdev_priv(dev);
+   struct mlx5_ib_dev *ib_dev = to_mdev(hca);
+   int ret;
+
+   ret = mlx5i_attach(ib_dev->mdev, next_priv);
+   if (ret) {
+   pr_err("Failed resources allocation for device: %s ret: %d\n",
+  dev->name, ret);
+   return ret;
+   }
+
+   *qp_num = rn->qp_num;
+
+   pr_debug("resources allocated for device: %s\n", dev->name);
+
+   return 0;
+}
+
+void mlx5_ib_dev_cleanup(struct net_device *dev, struct ib_device *hca)
+{
+   void *next_priv = ipoib_dev_priv(dev);
+   struct rdma_netdev *rn = netdev_priv(dev);
+   struct mlx5_ib_dev *ib_dev = to_mdev(hca);
+   struct mlx5_qp_context context;
+   int ret;
+
+   /* detach qp from flow-steering by reset it */
+   ret = mlx5_core_qp_modify(ib_dev->mdev,
+ MLX5_CMD_OP_2RST_QP, 0, &context,
+ (struct mlx5_core_qp *)rn->context);
+   if (ret)
+   pr_err("%s failed (ret: %d) to reset QP\n", __func__, ret);
+
+   mlx5i_detach(ib_dev->mdev, next_priv);
+
+   mlx5_ib_clean_qp(ib_dev, (struct mlx5_core_qp *)rn->context);
+}
+


Why can't use ndo_init() and ndo_uninit() here (just like open and stop below).
We really don't need to pass in hca here (or in any other interface function) 
as it is already made available to the driver during alloc_rdma_netdev.
Also, why qp_num is an output parameter in the init function? Ipoib can access 
rn->qp_num which this init function is returning.



+struct net_device *mlx5_alloc_rdma_netdev(struct ib_device *hca,
+u8 port_num,
+enum rdma_netdev_t type,
+const char *name,
+unsigned char name_assign_type,
+void (*setup)(struct net_device *))
+{


Probably need to check the 'type' here as any rdma netdev client can call this 
function (with different rdma_netdev type) and cause driver to misbehave.



+void mlx5_free_rdma_netdev(struct net_device *netdev)
+{
+}


May be it is safer and cleaner for this function undo what alloc does here 
(instead of doing it in other places)?



--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC v3 01/11] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) documentation

2017-02-22 Thread Vishwanathapura, Niranjana

On Wed, Feb 08, 2017 at 05:00:45PM +, Bart Van Assche wrote:

On Tue, 2017-02-07 at 12:23 -0800, Vishwanathapura, Niranjana wrote:

Please elaborate this section. What is a virtual Ethernet switch? Is it a
software entity or something that is implemented in hardware? Also, how are
these independent Ethernet networks identified on the wire? The Linux kernel
already supports IB partitions and Ethernet VLANs. How do these independent
Ethernet networks compare to IB partitions and Ethernet VLANs? Which wire-
level header contains the identity of these Ethernet networks? Is it
possible to query from user space which Ethernet network a VNIC belongs to?
If so, with which API and which tools?



I have added the VNIC packet format and some related information to the 
documentation in the PATCH series I just sent out.



Thanks,



[PATCH 00/11] Omni-Path Virtual Network Interface Controller (VNIC)

2017-02-22 Thread Vishwanathapura, Niranjana
self with IB core as an IB client and interfaces with the
IB MAD stack. It exchanges the management information with the Ethernet
Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees
the OPA_VNIC RDMA netdev devices. It overrides the net_device_ops functions
set by HW dependent VNIC driver where required to accommodate any control
operation. It also handles the encapsulation of Ethernet packets with an
Omni-Path header in the transmit path. For each VNIC interface, the
information required for encapsulation is configured by the EM via VEMA MAD
interface. It also passes any control information to the HW dependent driver
by invoking the RDMA netdev control operations.

+---+ +--+
|   | |   Linux  |
| IB MAD| |  Network |
|   | |   Stack  |
+---+ +--+
 |   |  |
 |   |  |
++  |
||  |
|  OPA VNIC Module   |  |
|  (OPA VNIC RDMA Netdev |  |
| & EMA functions)   |  |
||  |
++  |
|   |
|   |
   +--+ |
   | IB core  | |
   +--+ |
|   |
|   |
++
||
|  HFI1 Driver with VNIC support |
||
+--------+


Vishwanathapura, Niranjana (11):
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) documentation
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) interface
  IB/opa-vnic: Virtual Network Interface Controller (VNIC) netdev
  IB/opa-vnic: VNIC Ethernet Management (EM) structure definitions
  IB/opa-vnic: VNIC statistics support
  IB/opa-vnic: VNIC MAC table support
  IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) interface
  IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) function
  IB/hfi1: OPA_VNIC RDMA netdev support
  IB/hfi1: Virtual Network Interface Controller (VNIC) HW support
  IB/hfi1: VNIC SDMA support

 Documentation/infiniband/opa_vnic.txt  |  153 +++
 MAINTAINERS|7 +
 drivers/infiniband/Kconfig |1 +
 drivers/infiniband/hw/hfi1/Makefile|2 +-
 drivers/infiniband/hw/hfi1/aspm.h  |   15 +-
 drivers/infiniband/hw/hfi1/chip.c  |  293 +-
 drivers/infiniband/hw/hfi1/chip.h  |4 +-
 drivers/infiniband/hw/hfi1/debugfs.c   |8 +-
 drivers/infiniband/hw/hfi1/driver.c|   77 +-
 drivers/infiniband/hw/hfi1/file_ops.c  |   27 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   57 +-
 drivers/infiniband/hw/hfi1/init.c  |   39 +-
 drivers/infiniband/hw/hfi1/mad.c   |   10 +-
 drivers/infiniband/hw/hfi1/pio.c   |   19 +-
 drivers/infiniband/hw/hfi1/pio.h   |8 +-
 drivers/infiniband/hw/hfi1/sysfs.c |4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c  |8 +-
 drivers/infiniband/hw/hfi1/user_pages.c|5 +-
 drivers/infiniband/hw/hfi1/verbs.c |8 +-
 drivers/infiniband/hw/hfi1/vnic.h  |  184 
 drivers/infiniband/hw/hfi1/vnic_main.c |  909 +
 drivers/infiniband/hw/hfi1/vnic_sdma.c |  323 ++
 drivers/infiniband/ulp/Makefile|1 +
 drivers/infiniband/ulp/opa_vnic/Kconfig|8 +
 drivers/infiniband/ulp/opa_vnic/Makefile   |7 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   |  475 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |  489 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |  187 
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  329 ++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |  389 +++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c| 1071 
 .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c  |  390 +++
 include/rdma/ib_verbs.h|   27 +
 include/rdma/opa_port_info.h   |4 +-
 include/rdma/opa_vnic.h|  143 +++
 35 files changed, 5567 insertions(+), 114 deletions(-)
 create mode 100644 Documentation/infiniband/opa_vnic.txt
 cr

[PATCH 05/11] IB/opa-vnic: VNIC statistics support

2017-02-22 Thread Vishwanathapura, Niranjana
OPA VNIC driver statistics support maintains various counters including
standard netdev counters and the Ethernet manager defined counters.
Add the Ethtool hook to read the counters.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c | 110 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|   4 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |  20 
 3 files changed, 134 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index b74f6ad..a98948c 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -53,9 +53,119 @@
 
 #include "opa_vnic_internal.h"
 
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+   char stat_string[ETH_GSTRING_LEN];
+   struct {
+   int sizeof_stat;
+   int stat_offset;
+   };
+};
+
+#define VNIC_STAT(m){ FIELD_SIZEOF(struct opa_vnic_stats, m),   \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+   /* NETDEV stats */
+   {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+   {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+   {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+   {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+   {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+   {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+   {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+   {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+   /* SUMMARY counters */
+   {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+   {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+   {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+   {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+   {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+   {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+   {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+   {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+   {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+   {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+   {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+   {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+   {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+   {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+   {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+   {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+   {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+   {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+   {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+   {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+   {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+   {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+   /* ERROR counters */
+   {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+   {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+   {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+   {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+   {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+   {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+   {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+   {"rx_oversize", VNIC_STAT(rx_oversize)},
+   {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+   return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+  struct ethtool_stats *stats, u64 *data)
+{
+   struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+   struct opa_vnic_stats vstats;
+   int i;
+
+   memset(&vstats, 0, sizeof(vstats));
+   mutex_lock(&adapter->stats_lock);
+   adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+   for (i = 0; i < VNIC_STATS_LEN; i++) {
+   char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+   data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+  sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+   }
+   mutex_unlock(&adapter->stats_lock);
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 
*data)
+{
+   int i;
+
+   if (stringset != ETH_SS_STATS)
+   return;
+
+   for (i = 0; i < VNIC_STATS_LEN; i++)
+   memcpy(data + i * ETH_GSTRING_LEN,
+  vnic_gstrings_stats[i].stat_string,
+  ETH_GSTRING_LEN);
+}
+
 /* ethtool ops */
 static const struct ethtool_ops opa_vnic_ethtool_ops = {
.get_link

[PATCH 04/11] IB/opa-vnic: VNIC Ethernet Management (EM) structure definitions

2017-02-22 Thread Vishwanathapura, Niranjana
Define VNIC EM MAD structures and the associated macros. These structures
are used for information exchange between VNIC EM agent (EMA) on the host
and the Ethernet manager. These include the virtual ethernet switch (vesw)
port information, vesw port mac table, summay and error counters,
vesw port interface mac lists and the EMA trap.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Tanya K Jajodia 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   | 423 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  33 ++
 2 files changed, 456 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index 176fca9..c025cde 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -52,6 +52,28 @@
  * and decapsulation of Ethernet packets
  */
 
+#include 
+#include 
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION   0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO   0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS   0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
 /* VNIC configured and operational state values */
 #define OPA_VNIC_STATE_DROP_ALL0x1
 #define OPA_VNIC_STATE_FORWARDING  0x3
@@ -59,4 +81,405 @@
 #define OPA_VESW_MAX_NUM_DEF_PORT   16
 #define OPA_VNIC_MAX_NUM_PCP8
 
+#define OPA_VNIC_EMA_DATA(OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+   __be16  fabric_id;
+   __be16  vesw_id;
+
+   u8  rsvd0[6];
+   __be16  def_port_mask;
+
+   u8  rsvd1[2];
+   __be16  pkey;
+
+   u8  rsvd2[4];
+   __be32  u_mcast_dlid;
+   __be32  u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+   u8  rsvd3[44];
+   __be16  eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+   __be16  eth_mtu_non_vlan;
+   u8  rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+   __be32  port_num;
+
+   u8  eth_link_status;
+   u8  rsvd0[3];
+
+   u8  base_mac_addr[ETH_ALEN];
+   u8  config_state;
+   u8  oper_state;
+
+   __be16  max_mac_tbl_ent;
+   __be16  max_smac_ent;
+   __be32  mac_tbl_digest;
+   u8  rsvd1[4];
+
+   __be32  encap_slid;
+
+   u8  pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_vl_uc[OPA_V

[PATCH 03/11] IB/opa-vnic: Virtual Network Interface Controller (VNIC) netdev

2017-02-22 Thread Vishwanathapura, Niranjana
OPA VNIC netdev function supports Ethernet functionality over Omni-Path
fabric by encapsulating Ethernet packets inside Omni-Path packet header.
It allocates a rdma netdev device and interfaces with the network stack to
provide standard Ethernet network interfaces. It overrides HFI1 device's
netdev operations where it is required.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Sudeep Dutt 
Signed-off-by: Tanya K Jajodia 
Signed-off-by: Andrzej Kacprowski 
---
 MAINTAINERS|   7 +
 drivers/infiniband/Kconfig |   1 +
 drivers/infiniband/ulp/Makefile|   1 +
 drivers/infiniband/ulp/opa_vnic/Kconfig|   8 +
 drivers/infiniband/ulp/opa_vnic/Makefile   |   6 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   | 239 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |  62 ++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |  65 ++
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h| 186 
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  | 225 +++
 10 files changed, 800 insertions(+)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/Kconfig
 create mode 100644 drivers/infiniband/ulp/opa_vnic/Makefile
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 468d2e8..7f0a07d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5775,6 +5775,13 @@ F:   drivers/block/cciss*
 F: include/linux/cciss_ioctl.h
 F: include/uapi/linux/cciss_ioctl.h
 
+OPA-VNIC DRIVER
+M: Dennis Dalessandro 
+M: Niranjana Vishwanathapura 
+L: linux-r...@vger.kernel.org
+S: Supported
+F: drivers/infiniband/ulp/opa_vnic
+
 HFI1 DRIVER
 M: Mike Marciniszyn 
 M: Dennis Dalessandro 
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66f8602..234fe01 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/ulp/opa_vnic/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 source "drivers/infiniband/sw/rxe/Kconfig"
 
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf..c28af18 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP)+= srp/
 obj-$(CONFIG_INFINIBAND_SRPT)  += srpt/
 obj-$(CONFIG_INFINIBAND_ISER)  += iser/
 obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC)  += opa_vnic/
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig 
b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000..48132ab
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+   tristate "Intel OPA VNIC support"
+   depends on X86_64 && INFINIBAND
+   ---help---
+   This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+   driver for Ethernet over Omni-Path feature. It implements the HW
+   independent VNIC functionality. It interfaces with Linux stack for
+   data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000..975c313
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,6 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000..c74d02a
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ 

[PATCH 01/11] IB/opa-vnic: Virtual Network Interface Controller (VNIC) documentation

2017-02-22 Thread Vishwanathapura, Niranjana
Add OPA VNIC design document explaining the VNIC architecture and the
driver design.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 Documentation/infiniband/opa_vnic.txt | 153 ++
 1 file changed, 153 insertions(+)
 create mode 100644 Documentation/infiniband/opa_vnic.txt

diff --git a/Documentation/infiniband/opa_vnic.txt 
b/Documentation/infiniband/opa_vnic.txt
new file mode 100644
index 000..282e17b
--- /dev/null
+++ b/Documentation/infiniband/opa_vnic.txt
@@ -0,0 +1,153 @@
+Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC) feature
+supports Ethernet functionality over Omni-Path fabric by encapsulating
+the Ethernet packets between HFI nodes.
+
+Architecture
+=
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes.
+
+ +---+
+ |  Subnet/  |
+ | Ethernet  |
+ |  Manager  |
+ +---+
+/  /
+  /   /
+//
+  / /
++-+  +--+
+|  Virtual Ethernet Switch|  |  Virtual Ethernet Switch |
+|  +-++-+ |  | +-++-+   |
+|  | VPORT   ||  VPORT  | |  | |  VPORT  ||  VPORT  |   |
++--+-++-+-+  +-+-++-+---+
+ | \/ |
+ |   \/   |
+ | \/ |
+ |/  \|
+ |  /  \  |
+ +---++  +---++
+ |   VNIC|VNIC|  |VNIC   |VNIC|
+ +---++  +---++
+ |  HFI   |  |  HFI   |
+ ++  ++
+
+
+The Omni-Path encapsulated Ethernet packet format is as described below.
+
+Bits  Field
+
+Quad Word 0:
+0-19  SLID (lower 20 bits)
+20-30 Length (in Quad Words)
+31BECN bit
+32-51 DLID (lower 20 bits)
+52-56 SC (Service Class)
+57-59 RC (Routing Control)
+60FECN bit
+61-62 L2 (=10, 16B format)
+63LT (=1, Link Transfer Head Flit)
+
+Quad Word 1:
+0-7   L4 type (=0x78 ETHERNET)
+8-11  SLID[23:20]
+12-15 DLID[23:20]
+16-31 PKEY
+32-47 Entropy
+48-63 Reserved
+
+Quad Word 2:
+0-15  Reserved
+16-31 L4 header
+32-63 Ethernet Packet
+
+Quad Words 3 to N-1:
+0-63  Ethernet packet (pad extended)
+
+Quad Word N (last):
+0-23  Ethernet packet (pad extended)
+24-55 ICRC
+56-61 Tail
+62-63 LT (=01, Link Transfer Tail Flit)
+
+Ethernet packet is padded on the transmit side to ensure that the VNIC OPA
+packet is quad word aligned. The 'Tail' field contains the number of bytes
+padded. On the receive side the 'Tail' field is read and the padding is
+removed (along with ICRC, Tail and OPA header) before passing packet up
+the network stack.
+
+The L4 header field contains the virtual Ethernet switch id the VNIC port
+belongs to. On the receive side, this field is used to de-multiplex the
+received VNIC packets to different VNIC ports.
+
+Driver Design
+==
+Intel OPA VNIC software design is presented in the below diagram.
+OPA VNIC functionality has a HW dependent component and a HW
+independent component.
+
+The support has been added for IB device to allocate and free the RDMA
+netdev devices. The RDMA netdev supports interfacing with the network
+stack thus creating standard network interfaces. OPA_VNIC is an RDMA
+netdev device type.
+
+The HW dependent VNIC functionality is part of

[PATCH 10/11] IB/hfi1: Virtual Network Interface Controller (VNIC) HW support

2017-02-22 Thread Vishwanathapura, Niranjana
HFI1 HW specific support for VNIC functionality.
Dynamically allocate a set of contexts for VNIC when the first vnic
port is instantiated. Allocate VNIC contexts from user contexts pool
and return them back to the same pool while freeing up. Set aside
enough MSI-X interrupts for VNIC contexts and assign them when the
contexts are allocated. On the receive side, use an RSM rule to
spread TCP/UDP streams among VNIC contexts.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/aspm.h |  15 +-
 drivers/infiniband/hw/hfi1/chip.c | 293 +-
 drivers/infiniband/hw/hfi1/chip.h |   4 +-
 drivers/infiniband/hw/hfi1/debugfs.c  |   8 +-
 drivers/infiniband/hw/hfi1/driver.c   |  52 --
 drivers/infiniband/hw/hfi1/file_ops.c |  27 ++-
 drivers/infiniband/hw/hfi1/hfi.h  |  29 ++-
 drivers/infiniband/hw/hfi1/init.c |  29 +--
 drivers/infiniband/hw/hfi1/mad.c  |  10 +-
 drivers/infiniband/hw/hfi1/pio.c  |  19 +-
 drivers/infiniband/hw/hfi1/pio.h  |   8 +-
 drivers/infiniband/hw/hfi1/sysfs.c|   4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c |   8 +-
 drivers/infiniband/hw/hfi1/user_pages.c   |   5 +-
 drivers/infiniband/hw/hfi1/verbs.c|   8 +-
 drivers/infiniband/hw/hfi1/vnic.h |   3 +
 drivers/infiniband/hw/hfi1/vnic_main.c| 245 -
 include/rdma/opa_port_info.h  |   4 +-
 18 files changed, 663 insertions(+), 108 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/aspm.h 
b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3..794e681 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long 
data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
 }
 
-/* Disable interrupt processing for verbs contexts when PSM contexts are open 
*/
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
 static inline void aspm_disable_all(struct hfi1_devdata *dd)
 {
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
-   rcd->ctxt < rcd->dd->first_user_ctxt;
+   rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
 }
 
 static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
 
-   for (i = 0; i < dd->first_user_ctxt; i++)
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
 
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c 
b/drivers/infiniband/hw/hfi1/chip.c
index 121a4c9..f97fccb 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -125,9 +125,16 @@ struct flag_table {
 #define DEFAULT_KRCVQS   2
 #define MIN_KERNEL_KCTXTS 2
 #define FIRST_KERNEL_KCTXT1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ *   0 - Verbs
+ *   1 - User Fecn Handling
+ *   2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN  1
+#define RSM_INS_VNIC  2
 
 /* Bit offset into the GUID which carries HFI id information */
 #define GUID_HFI_INDEX_SHIFT

[PATCH 08/11] IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) function

2017-02-22 Thread Vishwanathapura, Niranjana
OPA VEMA function interfaces with the Infiniband MAD stack to exchange the
management information packets with the Ethernet Manager (EM).
It interfaces with the OPA VNIC netdev function to SET/GET the management
information. The information exchanged with the EM includes class port
details, encapsulation configuration, various counters, unicast and
multicast MAC list and the MAC table. It also supports sending traps
to the EM.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Tanya K Jajodia 
Signed-off-by: Sudeep Dutt 
---
 drivers/infiniband/ulp/opa_vnic/Makefile   |2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c |   12 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|   17 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c| 1071 
 .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c  |2 +-
 5 files changed, 1099 insertions(+), 5 deletions(-)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c

diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
index e8d1ea1..8061b28 100644
--- a/drivers/infiniband/ulp/opa_vnic/Makefile
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -4,4 +4,4 @@
 obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
 
 opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
-  opa_vnic_vema_iface.o
+  opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index a98948c..d66540e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -120,6 +120,17 @@ struct vnic_stats {
 
 #define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
 
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+struct ethtool_drvinfo *drvinfo)
+{
+   strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+   strlcpy(drvinfo->version, opa_vnic_driver_version,
+   sizeof(drvinfo->version));
+   strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+   sizeof(drvinfo->bus_info));
+}
+
 /* vnic_get_sset_count - get string set count */
 static int vnic_get_sset_count(struct net_device *netdev, int sset)
 {
@@ -162,6 +173,7 @@ static void vnic_get_strings(struct net_device *netdev, u32 
stringset, u8 *data)
 
 /* ethtool ops */
 static const struct ethtool_ops opa_vnic_ethtool_ops = {
+   .get_drvinfo = vnic_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = vnic_get_strings,
.get_sset_count = vnic_get_sset_count,
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index b49f5d7..6bba886 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -164,10 +164,12 @@ struct __opa_veswport_trap {
  * struct opa_vnic_ctrl_port - OPA virtual NIC control port
  * @ibdev: pointer to ib device
  * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
  */
 struct opa_vnic_ctrl_port {
struct ib_device   *ibdev;
struct opa_vnic_ctrl_ops   *ops;
+   u8  num_ports;
 };
 
 /**
@@ -187,6 +189,8 @@ struct opa_vnic_ctrl_port {
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
  * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
  */
 struct opa_vnic_adapter {
struct net_device *netdev;
@@ -213,6 +217,9 @@ struct opa_vnic_adapter {
struct mutex stats_lock;
 
u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+   unsigned long trap_timeout;
+   u8trap_count;
 };
 
 /* Same as opa_veswport_mactable_entry, but without bitwise attribute */
@@ -247,6 +254,8 @@ struct opa_vnic_mac_tbl_node {
dev_err(&cport->ibdev->dev, format, ## arg)
 #define c_info(format, arg...) \
dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+   dev_dbg(&cport->ibdev->dev, format, ## arg)
 
 /* The maximum allowed entries in the mac table */
 #define OPA_VNIC_MAC_TBL_MAX_ENTRIES  2048
@@ -281,6 +290,9 @@ struct opa_vnic_mac_tbl_node {
!obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++)   \
hlist_for_each_entry(obj, &name[bkt], member)
 
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
 struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
 u8 port_num, u8 vport_num);
 void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
@@ -310,9 +322,8 @@ void opa_vnic_get_

[PATCH 02/11] IB/opa-vnic: Virtual Network Interface Controller (VNIC) interface

2017-02-22 Thread Vishwanathapura, Niranjana
Add rdma netdev interface to ib device structure allowing rdma netdev
devices to be allocated by ib clients.
Define OPA VNIC interface between hardware independent VNIC
functionality and the hardware dependent VNIC functionality.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 include/rdma/ib_verbs.h |  27 +
 include/rdma/opa_vnic.h | 143 
 2 files changed, 170 insertions(+)
 create mode 100644 include/rdma/opa_vnic.h

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8c61532..16ad142 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -55,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -221,6 +222,7 @@ enum ib_device_cap_flags {
IB_DEVICE_SG_GAPS_REG   = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION  = (1ULL << 33),
IB_DEVICE_RAW_SCATTER_FCS   = (1ULL << 34),
+   IB_DEVICE_RDMA_NETDEV_OPA_VNIC  = (1ULL << 35),
 };
 
 enum ib_signature_prot_cap {
@@ -1858,6 +1860,22 @@ struct ib_port_immutable {
u32   max_mad_size;
 };
 
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+   RDMA_NETDEV_OPA_VNIC
+};
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+   void *clnt_priv;
+
+   /* control functions */
+   void (*set_id)(struct net_device *netdev, int id);
+};
+
 struct ib_device {
struct device*dma_device;
 
@@ -2110,6 +2128,15 @@ struct ib_device {
   struct 
ib_rwq_ind_table_init_attr *init_attr,
   struct ib_udata 
*udata);
int(*destroy_rwq_ind_table)(struct 
ib_rwq_ind_table *wq_ind_table);
+   /* rdma netdev operations */
+   struct net_device *(*alloc_rdma_netdev)(
+   struct ib_device *device,
+   u8 port_num,
+   enum rdma_netdev_t type,
+   const char *name,
+   unsigned char name_assign_type,
+   void (*setup)(struct net_device *));
+   void (*free_rdma_netdev)(struct net_device *netdev);
struct ib_dma_mapping_ops   *dma_ops;
 
struct module   *owner;
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
new file mode 100644
index 000..68315cc
--- /dev/null
+++ b/include/rdma/opa_vnic.h
@@ -0,0 +1,143 @@
+#ifndef _OPA_VNIC_H
+#define _OPA_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in
+ *the documentation and/or other materials provided with the
+ *distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *contributors may be used to endorse or promote products derived
+ *from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE

[PATCH 07/11] IB/opa-vnic: VNIC Ethernet Management Agent (VEMA) interface

2017-02-22 Thread Vishwanathapura, Niranjana
OPA VNIC EMA interface functions are the management interfaces to the OPA
VNIC netdev. Add support to add and remove VNIC ports. Implement the
required GET/SET management interface functions and processing of new
management information. Add support to send trap notifications upon various
events like interface status change, unicast/multicast mac list update and
mac address change.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Tanya K Jajodia 
---
 drivers/infiniband/ulp/opa_vnic/Makefile   |   3 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h   |   4 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  44 +++
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  | 142 +++-
 .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c  | 390 +
 5 files changed, 581 insertions(+), 2 deletions(-)
 create mode 100644 drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c

diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile 
b/drivers/infiniband/ulp/opa_vnic/Makefile
index 975c313..e8d1ea1 100644
--- a/drivers/infiniband/ulp/opa_vnic/Makefile
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -3,4 +3,5 @@
 #
 obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
 
-opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+  opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index c025cde..4c434b9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -99,6 +99,10 @@
 #define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
 #define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
 
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN   2
+
 /**
  * struct opa_vesw_info - OPA vnic switch information
  * @fabric_id: 10-bit fabric id
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index bec4866..b49f5d7 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -161,14 +161,28 @@ struct __opa_veswport_trap {
 } __packed;
 
 /**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ */
+struct opa_vnic_ctrl_port {
+   struct ib_device   *ibdev;
+   struct opa_vnic_ctrl_ops   *ops;
+};
+
+/**
  * struct opa_vnic_adapter - OPA VNIC netdev private data structure
  * @netdev: pointer to associated netdev
  * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
  * @rn_ops: rdma netdev's net_device_ops
  * @port_num: OPA port number
  * @vport_num: vesw port number
  * @lock: adapter lock
  * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
  * @mactbl: hash table of MAC entries
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
@@ -177,6 +191,7 @@ struct __opa_veswport_trap {
 struct opa_vnic_adapter {
struct net_device *netdev;
struct ib_device  *ibdev;
+   struct opa_vnic_ctrl_port *cport;
const struct net_device_ops   *rn_ops;
 
u8 port_num;
@@ -186,6 +201,9 @@ struct opa_vnic_adapter {
struct mutex lock;
 
struct __opa_veswport_info  info;
+   u8  vema_mac_addr[ETH_ALEN];
+   u32 umac_hash;
+   u32 mmac_hash;
struct hlist_head  __rcu   *mactbl;
 
/* Lock used to protect updates to mac table */
@@ -225,6 +243,11 @@ struct opa_vnic_mac_tbl_node {
 #define v_warn(format, arg...) \
netdev_warn(adapter->netdev, format, ## arg)
 
+#define c_err(format, arg...) \
+   dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+   dev_info(&cport->ibdev->dev, format, ## arg)
+
 /* The maximum allowed entries in the mac table */
 #define OPA_VNIC_MAC_TBL_MAX_ENTRIES  2048
 /* Limit of smac entries in mac table */
@@ -264,11 +287,32 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct 
ib_device *ibdev,
 void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
 u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
 u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff 
*skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
 void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
 void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
struct opa_veswport_mactable *tbl);
 int opa_vnic

[PATCH 09/11] IB/hfi1: OPA_VNIC RDMA netdev support

2017-02-22 Thread Vishwanathapura, Niranjana
Add support to create and free OPA_VNIC rdma netdev devices.
Implement netstack interface functionality including xmit_skb,
receive side NAPI etc. Also implement rdma netdev control functions.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/driver.c|  25 +-
 drivers/infiniband/hw/hfi1/hfi.h   |  27 +-
 drivers/infiniband/hw/hfi1/init.c  |   9 +-
 drivers/infiniband/hw/hfi1/vnic.h  | 153 
 drivers/infiniband/hw/hfi1/vnic_main.c | 646 +
 6 files changed, 855 insertions(+), 7 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a0..2280538 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o
+   verbs_txreq.o vnic_main.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/driver.c 
b/drivers/infiniband/hw/hfi1/driver.c
index 3881c95..4969b88 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -59,6 +59,7 @@
 #include "trace.h"
 #include "qp.h"
 #include "sdma.h"
+#include "vnic.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1372,15 +1373,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
 }
 
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+   /* Packet received in VNIC context via RSM */
+   if (packet->rcd->is_vnic)
+   return true;
+
+   if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+   (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+   return true;
+
+   return false;
+}
+
 int process_receive_bypass(struct hfi1_packet *packet)
 {
struct hfi1_devdata *dd = packet->rcd->dd;
 
-   if (unlikely(rhf_err_flags(packet->rhf)))
+   if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+   } else if (hfi1_is_vnic_packet(packet)) {
+   hfi1_vnic_bypass_rcv(packet);
+   return RHF_RCV_CONTINUE;
+   }
 
-   dd_dev_err(dd,
-  "Bypass packets are not supported in normal operation. 
Dropping\n");
+   dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 0808e3c3..66fb9e4 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -337,6 +337,12 @@ struct hfi1_ctxtdata {
 * packets with the wrong interrupt handler.
 */
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+   /* Indicates that this is vnic context */
+   bool is_vnic;
+
+   /* vnic queue index this context is mapped to */
+   u8 vnic_q_idx;
 };
 
 /*
@@ -808,6 +814,19 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
 };
 
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT   8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+   struct idr vesw_idr;
+};
+
+struct hfi1_vnic_vport_info;
+
 /* device data struct now contains only "general per-device" info.
  * fields related to a physical IB port are in a hfi1_pportdata struct.
  */
@@ -1115,6 +1134,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+   int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+st

[PATCH 11/11] IB/hfi1: VNIC SDMA support

2017-02-22 Thread Vishwanathapura, Niranjana
HFI1 VNIC SDMA support enables transmission of VNIC packets over SDMA.
Map VNIC queues to SDMA engines and support halting and wakeup of the
VNIC queues.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   1 +
 drivers/infiniband/hw/hfi1/init.c  |   1 +
 drivers/infiniband/hw/hfi1/vnic.h  |  28 +++
 drivers/infiniband/hw/hfi1/vnic_main.c |  24 ++-
 drivers/infiniband/hw/hfi1/vnic_sdma.c | 323 +
 6 files changed, 376 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 2280538..88085f6 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o vnic_main.o
+   verbs_txreq.o vnic_main.o vnic_sdma.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index ac31b23..b57b88a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -834,6 +834,7 @@ struct hfi1_asic_data {
 /* Virtual NIC information */
 struct hfi1_vnic_data {
struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+   struct kmem_cache *txreq_cache;
u8 num_vports;
struct idr vesw_idr;
u8 rmt_start;
diff --git a/drivers/infiniband/hw/hfi1/init.c 
b/drivers/infiniband/hw/hfi1/init.c
index 1ecccaa..3fc7984 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -681,6 +681,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+   dd->process_vnic_dma_send = hfi1_vnic_send_dma;
 
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h 
b/drivers/infiniband/hw/hfi1/vnic.h
index d620aec..36996f0 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -49,6 +49,7 @@
 
 #include 
 #include "hfi.h"
+#include "sdma.h"
 
 #define HFI1_VNIC_MAX_TXQ 16
 #define HFI1_VNIC_MAX_PAD 12
@@ -85,6 +86,26 @@
 #define HFI1_VNIC_MAX_QUEUE 16
 
 /**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+   struct hfi1_devdata *dd;
+   struct sdma_engine  *sde;
+   struct hfi1_vnic_vport_info *vinfo;
+   struct iowait wait;
+   struct sdma_txreq stx;
+   unsigned int state;
+   u8 q_idx;
+};
+
+/**
  * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
  * @idx: queue index
  * @vinfo: pointer to vport information
@@ -111,6 +132,7 @@ struct hfi1_vnic_rx_queue {
  * @vesw_id: virtual switch id
  * @rxq: Array of receive queues
  * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
  */
 struct hfi1_vnic_vport_info {
struct hfi1_devdata *dd;
@@ -126,6 +148,7 @@ struct hfi1_vnic_vport_info {
struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
 
struct opa_vnic_stats  stats[HFI1_VNIC_MAX_QUEUE];
+   struct hfi1_vnic_sdma  sdma[HFI1_VNIC_MAX_TXQ];
 };
 
 #define v_dbg(format, arg...) \
@@ -138,8 +161,13 @@ struct hfi1_vnic_vport_info {
 /* vnic hfi1 internal functions */
 void hfi1_vnic_setup(struct hfi1_devdata *dd);
 void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+   u8 q_idx);
 
 /* vnic rdma netdev operations */
 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c 
b/drivers/infiniband/hw/hfi1/vnic_main.c
index 4a9bb8c..8f354e7 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -408,6 +408,10 @@ static void hfi1_vnic_maybe_stop_tx(struct 
hfi1_vnic_vport_info *vinfo,
u8 q_idx)
 {
netif_stop_subqueue(vinfo->netdev, q_idx);
+   if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+   return;
+

[PATCH 06/11] IB/opa-vnic: VNIC MAC table support

2017-02-22 Thread Vishwanathapura, Niranjana
OPA VNIC MAC table contains the MAC address to DLID mappings provided by
the Ethernet manager. During transmission, the MAC table provides the MAC
address to DLID translation. Implement MAC table using simple hash list.
Also provide support to update/query the MAC table by Ethernet manager.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   | 236 +
 .../infiniband/ulp/opa_vnic/opa_vnic_internal.h|  51 +
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |   4 +
 3 files changed, 291 insertions(+)

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c 
b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index c74d02a..2e8fee9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -96,6 +96,238 @@ static inline void opa_vnic_make_header(u8 *hdr, u32 slid, 
u32 dlid, u16 len,
memcpy(hdr, h, OPA_VNIC_HDR_LEN);
 }
 
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+   struct opa_vnic_mac_tbl_node *node;
+   struct hlist_node *tmp;
+   int bkt;
+
+   if (!mactbl)
+   return;
+
+   vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+   hash_del(&node->hlist);
+   kfree(node);
+   }
+   kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+   u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+   struct hlist_head *mactbl;
+
+   mactbl = kzalloc(size, GFP_KERNEL);
+   if (!mactbl)
+   return ERR_PTR(-ENOMEM);
+
+   vnic_hash_init(mactbl);
+   return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+   struct hlist_head *mactbl;
+
+   mutex_lock(&adapter->mactbl_lock);
+   mactbl = rcu_access_pointer(adapter->mactbl);
+   rcu_assign_pointer(adapter->mactbl, NULL);
+   synchronize_rcu();
+   opa_vnic_free_mac_tbl(mactbl);
+   mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+   struct opa_veswport_mactable *tbl)
+{
+   struct opa_vnic_mac_tbl_node *node;
+   struct hlist_head *mactbl;
+   int bkt;
+   u16 loffset, lnum_entries;
+
+   rcu_read_lock();
+   mactbl = rcu_dereference(adapter->mactbl);
+   if (!mactbl)
+   goto get_mac_done;
+
+   loffset = be16_to_cpu(tbl->offset);
+   lnum_entries = be16_to_cpu(tbl->num_entries);
+
+   vnic_hash_for_each(mactbl, bkt, node, hlist) {
+   struct __opa_vnic_mactable_entry *nentry = &node->entry;
+   struct opa_veswport_mactable_entry *entry;
+
+   if ((node->index < loffset) ||
+   (node->index >= (loffset + lnum_entries)))
+   continue;
+
+   /* populate entry in the tbl corresponding to the index */
+   entry = &tbl->tbl_entries[node->index - loffset];
+   memcpy(entry->mac_addr, nentry->mac_addr,
+  ARRAY_SIZE(entry->mac_addr));
+   memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+  ARRAY_SIZE(entry->mac_addr_mask));
+   entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+   }
+   tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+   rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ *  - Allocate a new mac (hash) table.
+ *  - Add the specified entries to the new table.
+ *(except the ones that are requested to be deleted).
+ *  - Add all the other entries from the old mac table.
+ *  - If there is a failure, free the new table and return.
+ *  - Switch to the new table.
+ *  - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+   struct opa_veswport_mactable *tbl)
+{
+   struct opa_vnic_mac_tbl_node *node, *new_node;
+   struct hlist_head *new_mactbl, *old_mactbl;
+   int i, bkt, rc = 0;
+   u8 key;
+   u16 loffset, lnum_entries;
+
+   mutex_lock(&adapter->mactbl_lock);
+   /* allocate new mac table */
+   new_mactbl = opa_vnic_alloc_mac_tbl();
+   if (IS_ERR(

Re: [RFC v3 00/11] HFI Virtual Network Interface Controller (VNIC)

2017-02-22 Thread Vishwanathapura, Niranjana

On Mon, Feb 13, 2017 at 10:09:35AM -0700, Jason Gunthorpe wrote:

On Sun, Feb 12, 2017 at 01:26:35PM +, Liran Liss wrote:

> From: linux-rdma-ow...@vger.kernel.org [mailto:linux-rdma-
> ow...@vger.kernel.org] On Behalf Of Vishwanathapura, Niranjana

>
> ChangeLog:
> =
> v2 => v3:
> a) Introduce and adopt generic RDMA netdev interface including,
>  - having bottom hfi1 driver directly interfacing with netstack.
>  - optimizing interface between hfi_vnic and hfi1 driver.
> b) Remove bitfield usage.
> c) Move hfi_vnic driver to drivers/infiniband/ulp folder.

The vnic driver should be placed under drivers/infiniband/hw/hfi1/*
since it is HFI-specific.


I think they should call it opa_vnic and keep it in ulp to avoid this
confusion.



Alright, I am renaming it as opa_vnic to avoid any confusion.

Thanks,
Niranjana


Jason


Re: [RFC v3 09/11] IB/hfi1: HFI_VNIC RDMA netdev support

2017-02-08 Thread Vishwanathapura, Niranjana

On Wed, Feb 08, 2017 at 04:06:40AM +, Parav Pandit wrote:

Hi,


-Original Message-
From: linux-rdma-ow...@vger.kernel.org [mailto:linux-rdma-
ow...@vger.kernel.org] On Behalf Of Vishwanathapura, Niranjana
Sent: Tuesday, February 7, 2017 2:23 PM
To: dledf...@redhat.com
Cc: linux-r...@vger.kernel.org; netdev@vger.kernel.org;
dennis.dalessan...@intel.com; ira.we...@intel.com; Niranjana
Vishwanathapura ; Andrzej
Kacprowski 
Subject: [RFC v3 09/11] IB/hfi1: HFI_VNIC RDMA netdev support

Add support to create and free HFI_VNIC rdma netdev devices.
Implement netstack interface functionality including xmit_skb, receive side
NAPI etc. Also implement rdma netdev control functions.



All code in this particular patch belong to netdev VNIC ULP driver.
There is nothing much that appears specific to IB/RDMA that makes 
drivers/infiniband/hw as better place to be.
It has netdev tx, rx, napi, stats in this patch.
If VNIC is a ULP than most of the VNIC specific code should reside in the ULP 
directory or drivers/net/ethernet ?


This patch implements HW specific portion of VNIC netdev. As it turnes out HW 
queue spefific netstack interfacing (like NAPI, xmit_skb, Tx queue throttling 
etc) are specific to HW implementation of the queues. Netstack APIs are 
designed for that. Hence they belong in HW driver here and not in ULP.


The HFI_VNIC ULP, implements control plane operatings by tapping into control 
operations (open, close etc and encapsulation as specified by control plane).


Niranjana



Re: [RFC v3 02/11] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface

2017-02-07 Thread Vishwanathapura, Niranjana

On Wed, Feb 08, 2017 at 12:43:40AM +, Parav Pandit wrote:

@@ -2096,6 +2114,15 @@ struct ib_device {
   struct
ib_rwq_ind_table_init_attr *init_attr,
   struct ib_udata
*udata);
int(*destroy_rwq_ind_table)(struct 
ib_rwq_ind_table
*wq_ind_table);
+   /* rdma netdev operations */
+   struct net_device *(*alloc_rdma_netdev)(
+   struct ib_device *device,
+   u8 port_num,
+   enum rdma_netdev_t type,
+   const char *name,
+   unsigned char name_assign_type,
+   void (*setup)(struct net_device *));
+   void (*free_rdma_netdev)(struct net_device *netdev);
struct ib_dma_mapping_ops   *dma_ops;

struct module   *owner;


As its clear from the cover letter and from the request to place this in 
drivers/infiniband/ulp,
Instead of increasing the ib_dev structure further,
Can you change the code to make use of ib_register_client() and friend 
functions to register vnic as ULP.
(similar to other ULP such as uverbs, srp, ipoib).
This will also allow you get to get notified for removing the vnic device when 
underlying rdma device gets removed.
Based on the property that gets exposed by the ibdev, vnic driver filters 
whether it needs to load its vnic to specific device or not.
This way modules are isolated between core and ULP little better.
Would it work for you?


HFI_VNIC driver is using ib_register_client() and friend fucntions. Below patch 
in this series does that.

[RFC v3 08/11] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) function

Niranjana





Re: [RFC v3 02/11] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface

2017-02-07 Thread Vishwanathapura, Niranjana

On Tue, Feb 07, 2017 at 03:19:25PM -0700, Jason Gunthorpe wrote:

On Tue, Feb 07, 2017 at 02:06:30PM -0800, Vishwanathapura, Niranjana wrote:


>>IB_DEVICE_RAW_SCATTER_FCS   = (1ULL << 34),
>>+   IB_DEVICE_RDMA_NETDEV_HFI_VNIC  = (1ULL << 35),
>
>What is this called HFI_VNIC anyhow? Shouldn't this be OPA_VNIC? There
>is nothing really HFI specific, right?

Agreed, OPA_VNIC is more appropriate here. Will change it.


And probably lots of other places too.. :)



Well, our driver is called HFI1 and HFI_VNIC is in accordance with our naming 
convention. I will only change the above device attribute name to OPA_VNIC in 
the ib interface just to be consitant with other such defintions here.





>And this should be rn->dev_priv ?

Yah, both will result in same behavior. But yah, what you are suggesting
will remove any confusion. Will change in next PATCH series.


Only because the struct has no members, as soon as someone adds
something it would go booom.



Agreed.


Jason


Re: [RFC v3 00/11] HFI Virtual Network Interface Controller (VNIC)

2017-02-07 Thread Vishwanathapura, Niranjana

On Tue, Feb 07, 2017 at 09:58:50PM +, Bart Van Assche wrote:

On Tue, 2017-02-07 at 21:44 +, Hefty, Sean wrote:

This is Ethernet - not IP - encapsulation over a non-InfiniBand device/protocol.


That's more than clear from the cover letter. In my opinion the cover letter
should explain why it is considered useful to have such a driver upstream
and what the use cases are of encapsulating Ethernet frames inside RDMA
packets.



We believe on our HW, HFI VNIC design gives better hardware resource usage 
which is also scalable and hence room for better performance.
Also as evident in the cover letter, it gives us better manageability by 
defining virtual Ethernet switches overlaid on the fabric and

use standard Ethernet support provided by Linux.

Niranjana




Re: [RFC v3 02/11] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface

2017-02-07 Thread Vishwanathapura, Niranjana

On Tue, Feb 07, 2017 at 02:19:01PM -0700, Jason Gunthorpe wrote:

On Tue, Feb 07, 2017 at 12:23:01PM -0800, Vishwanathapura, Niranjana wrote:

Add rdma netdev interface to ib device structure allowing rdma netdev
devices to be allocated by ib clients.
Define HFI VNIC interface between hardware independent VNIC
functionality and the hardware dependent VNIC functionality.


This commit message could be a bit clearer.

The alloc_rdma_netdev multiplexer is inteded as a new general
interface and this adds a protocol definition for ethernet VNIC on
OPA.



Ok, will add the statement to the commit message in PATCH series.


The hope is that ipoib can follow the same example and use the same
alloc_rdma_netdev entry point. Hopefully Mellanox will look at this
patch as I have talked to them in the past about doing this...

It looks like HFI turned out fairly well, the driver code and higher
level code have a reasonably nice split in my quick look.



Yes, HFI_VNIC design is leaner now with standard netdev interface.


IB_DEVICE_RAW_SCATTER_FCS   = (1ULL << 34),
+   IB_DEVICE_RDMA_NETDEV_HFI_VNIC  = (1ULL << 35),


What is this called HFI_VNIC anyhow? Shouldn't this be OPA_VNIC? There
is nothing really HFI specific, right?



Agreed, OPA_VNIC is more appropriate here. Will change it.


+/* hfi vnic rdma netdev's private data structure */
+struct hfi_vnic_rdma_netdev {
+   struct rdma_netdev rn;  /* keep this first */
+   /* followed by device private data */
+   char *dev_priv[0];
+};
+
+static inline void *hfi_vnic_priv(const struct net_device *dev)
+{
+   struct rdma_netdev *rn = netdev_priv(dev);
+
+   return rn->clnt_priv;
+}
+
+static inline void *hfi_vnic_dev_priv(const struct net_device *dev)
+{
+   struct rdma_netdev *rn = netdev_priv(dev);


Shouldn't this be hfi_vnic_rdma_netdev ?


+   return rn + 1;


And this should be rn->dev_priv ?



Yah, both will result in same behavior. But yah, what you are suggesting will 
remove any confusion. Will change in next PATCH series.


Niranjana


Jason


Re: [RFC v3 00/11] HFI Virtual Network Interface Controller (VNIC)

2017-02-07 Thread Vishwanathapura, Niranjana

On Tue, Feb 07, 2017 at 01:00:05PM -0800, Hefty, Sean wrote:

I didn't read patches yet, and prefer to ask it in advance. Does this
new ULP work with all
drivers/infiniband/hw/* devices as it is expected from ULP?


Like the way ipoib or srp work with all hw devices?  What is the real point of 
this question?


Leon,
It was already discussed in below threads.

https://www.spinics.net/lists/linux-rdma/msg44128.html
https://www.spinics.net/lists/linux-rdma/msg44131.html
https://www.spinics.net/lists/linux-rdma/msg44155.html

Niranjana



[RFC v3 04/11] IB/hfi-vnic: VNIC Ethernet Management (EM) structure definitions

2017-02-07 Thread Vishwanathapura, Niranjana
Define VNIC EM MAD structures and the associated macros. These structures
are used for information exchange between VNIC EM agent (EMA) on the HFI
host and the Ethernet manager. These include the virtual ethernet switch
(vesw) port information, vesw port mac table, summay and error counters,
vesw port interface mac lists and the EMA trap.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Tanya K Jajodia 
---
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h   | 423 +
 .../infiniband/ulp/hfi_vnic/hfi_vnic_internal.h|  33 ++
 2 files changed, 456 insertions(+)

diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
index aa93f62..73c2bdc 100644
--- a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
@@ -52,7 +52,430 @@
  * and decapsulation of Ethernet packets
  */
 
+#include 
+#include 
+
+/* EMA class version */
+#define HFI_EMA_CLASS_VERSION   0x80
+
+/*
+ * Define the Intel vendor management class for HFI
+ * ETHERNET MANAGEMENT
+ */
+#define HFI_MGMT_CLASS_INTEL_EMA0x34
+
+/* EM attribute IDs */
+#define HFI_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define HFI_EM_ATTR_VESWPORT_INFO   0x0011
+#define HFI_EM_ATTR_VESWPORT_MAC_ENTRIES0x0012
+#define HFI_EM_ATTR_IFACE_UCAST_MACS0x0013
+#define HFI_EM_ATTR_IFACE_MCAST_MACS0x0014
+#define HFI_EM_ATTR_DELETE_VESW 0x0015
+#define HFI_EM_ATTR_VESWPORT_SUMMARY_COUNTERS   0x0020
+#define HFI_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
 #define HFI_VESW_MAX_NUM_DEF_PORT   16
 #define HFI_VNIC_MAX_NUM_PCP8
 
+#define HFI_VNIC_EMA_DATA(OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define HFI_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define HFI_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define HFI_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define HFI_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define HFI_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
+#define HFI_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
+
+/**
+ * struct hfi_vesw_info - HFI vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct hfi_vesw_info {
+   __be16  fabric_id;
+   __be16  vesw_id;
+
+   u8  rsvd0[6];
+   __be16  def_port_mask;
+
+   u8  rsvd1[2];
+   __be16  pkey;
+
+   u8  rsvd2[4];
+   __be32  u_mcast_dlid;
+   __be32  u_ucast_dlid[HFI_VESW_MAX_NUM_DEF_PORT];
+
+   u8  rsvd3[44];
+   __be16  eth_mtu[HFI_VNIC_MAX_NUM_PCP];
+   __be16  eth_mtu_non_vlan;
+   u8  rsvd4[2];
+} __packed;
+
+/**
+ * struct hfi_per_veswport_info - HFI vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct hfi_per_veswport_info {
+   __be32  port_num;
+
+   u8  eth_link_status;
+   u8  rsvd0[3];
+
+   u8  base_mac_addr[ETH_ALEN];
+   u8  config_state;
+   u8  oper_state;
+
+   __be16  max_mac_tbl_ent;
+   __be16  max_smac_ent;
+   __be32  mac_tbl_digest;
+   u8  rsvd1[4];
+
+   __be32  encap_slid;
+
+   u8  pcp_to_sc_uc[HFI_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_vl_uc[HFI_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_sc_mc[HFI_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_vl_mc[HFI_VNIC_MAX_NUM_PCP];
+
+   u8  non_vlan_sc_uc;

[RFC v3 03/11] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev

2017-02-07 Thread Vishwanathapura, Niranjana
HFI VNIC netdev function supports Ethernet functionality over Omni-Path
fabric by encapsulating Ethernet packets inside Omni-Path packet header.
It allocates a rdma netdev device and interfaces with the network stack to
provide standard Ethernet network interfaces. It overrides HFI device's
netdev operations where it is required.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Sudeep Dutt 
Signed-off-by: Tanya K Jajodia 
Signed-off-by: Andrzej Kacprowski 
---
 MAINTAINERS|   7 +
 drivers/infiniband/Kconfig |   1 +
 drivers/infiniband/ulp/Makefile|   1 +
 drivers/infiniband/ulp/hfi_vnic/Kconfig|   8 +
 drivers/infiniband/ulp/hfi_vnic/Makefile   |   6 +
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c   | 239 +
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h   |  58 +
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c |  65 ++
 .../infiniband/ulp/hfi_vnic/hfi_vnic_internal.h| 186 
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_netdev.c  | 224 +++
 10 files changed, 795 insertions(+)
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/Kconfig
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/Makefile
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_netdev.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 5f0420a..83a8cfa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5764,6 +5764,13 @@ F:   drivers/block/cciss*
 F: include/linux/cciss_ioctl.h
 F: include/uapi/linux/cciss_ioctl.h
 
+HFI-VNIC DRIVER
+M: Dennis Dalessandro 
+M: Niranjana Vishwanathapura 
+L: linux-r...@vger.kernel.org
+S: Supported
+F: drivers/infiniband/ulp/hfi_vnic
+
 HFI1 DRIVER
 M: Mike Marciniszyn 
 M: Dennis Dalessandro 
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 6709173..7d97e58 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/ulp/hfi_vnic/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 source "drivers/infiniband/sw/rxe/Kconfig"
 
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf..9d20ac7 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP)+= srp/
 obj-$(CONFIG_INFINIBAND_SRPT)  += srpt/
 obj-$(CONFIG_INFINIBAND_ISER)  += iser/
 obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_HFI_VNIC)  += hfi_vnic/
diff --git a/drivers/infiniband/ulp/hfi_vnic/Kconfig 
b/drivers/infiniband/ulp/hfi_vnic/Kconfig
new file mode 100644
index 000..cca6810
--- /dev/null
+++ b/drivers/infiniband/ulp/hfi_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_HFI_VNIC
+   tristate "Intel HFI VNIC support"
+   depends on X86_64 && INFINIBAND
+   ---help---
+   This is HFI Virtual Network Interface Controller (VNIC) driver
+   for Ethernet over HFI feature. It implements the HW independent
+   VNIC functionality. It interfaces with Linux stack for data path
+   and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/hfi_vnic/Makefile 
b/drivers/infiniband/ulp/hfi_vnic/Makefile
new file mode 100644
index 000..ad7db32
--- /dev/null
+++ b/drivers/infiniband/ulp/hfi_vnic/Makefile
@@ -0,0 +1,6 @@
+# Makefile - Intel HFI Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_HFI_VNIC) += hfi_vnic.o
+
+hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o
diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c
new file mode 100644
index 000..159172c
--- /dev/null
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNE

[RFC v3 10/11] IB/hfi1: Virtual Network Interface Controller (VNIC) HW support

2017-02-07 Thread Vishwanathapura, Niranjana
HFI1 HW specific support for VNIC functionality.
Dynamically allocate a set of contexts for VNIC when the first vnic
port is instantiated. Allocate VNIC contexts from user contexts pool
and return them back to the same pool while freeing up. Set aside
enough MSI-X interrupts for VNIC contexts and assign them when the
contexts are allocated. On the receive side, use an RSM rule to
spread TCP/UDP streams among VNIC contexts.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/aspm.h |  15 +-
 drivers/infiniband/hw/hfi1/chip.c | 293 +-
 drivers/infiniband/hw/hfi1/chip.h |   4 +-
 drivers/infiniband/hw/hfi1/debugfs.c  |   8 +-
 drivers/infiniband/hw/hfi1/driver.c   |  52 --
 drivers/infiniband/hw/hfi1/file_ops.c |  27 ++-
 drivers/infiniband/hw/hfi1/hfi.h  |  29 ++-
 drivers/infiniband/hw/hfi1/init.c |  29 +--
 drivers/infiniband/hw/hfi1/mad.c  |  10 +-
 drivers/infiniband/hw/hfi1/pio.c  |  19 +-
 drivers/infiniband/hw/hfi1/pio.h  |   8 +-
 drivers/infiniband/hw/hfi1/sysfs.c|   4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c |   8 +-
 drivers/infiniband/hw/hfi1/user_pages.c   |   5 +-
 drivers/infiniband/hw/hfi1/verbs.c|   8 +-
 drivers/infiniband/hw/hfi1/vnic.h |   3 +
 drivers/infiniband/hw/hfi1/vnic_main.c| 245 -
 include/rdma/opa_port_info.h  |   4 +-
 18 files changed, 663 insertions(+), 108 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/aspm.h 
b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3..794e681 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long 
data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
 }
 
-/* Disable interrupt processing for verbs contexts when PSM contexts are open 
*/
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
 static inline void aspm_disable_all(struct hfi1_devdata *dd)
 {
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
-   rcd->ctxt < rcd->dd->first_user_ctxt;
+   rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
 }
 
 static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
 
-   for (i = 0; i < dd->first_user_ctxt; i++)
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
 
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c 
b/drivers/infiniband/hw/hfi1/chip.c
index ef72bc2..fe19066 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -125,9 +125,16 @@ struct flag_table {
 #define DEFAULT_KRCVQS   2
 #define MIN_KERNEL_KCTXTS 2
 #define FIRST_KERNEL_KCTXT1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ *   0 - Verbs
+ *   1 - User Fecn Handling
+ *   2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN  1
+#define RSM_INS_VNIC  2
 
 /* Bit offset into the GUID which carries HFI id information */
 #define GUID_HFI_INDEX_SHIFT

[RFC v3 11/11] IB/hfi1: VNIC SDMA support

2017-02-07 Thread Vishwanathapura, Niranjana
HFI1 VNIC SDMA support enables transmission of VNIC packets over SDMA.
Map VNIC queues to SDMA engines and support halting and wakeup of the
VNIC queues.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   1 +
 drivers/infiniband/hw/hfi1/init.c  |   1 +
 drivers/infiniband/hw/hfi1/vnic.h  |  28 +++
 drivers/infiniband/hw/hfi1/vnic_main.c |  24 ++-
 drivers/infiniband/hw/hfi1/vnic_sdma.c | 323 +
 6 files changed, 376 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 2280538..88085f6 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o vnic_main.o
+   verbs_txreq.o vnic_main.o vnic_sdma.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 39bcd46..59109e4 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -835,6 +835,7 @@ struct hfi1_asic_data {
 /* Virtual NIC information */
 struct hfi1_vnic_data {
struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+   struct kmem_cache *txreq_cache;
u8 num_vports;
struct idr vesw_idr;
u8 rmt_start;
diff --git a/drivers/infiniband/hw/hfi1/init.c 
b/drivers/infiniband/hw/hfi1/init.c
index e3a94d4..24e6e9b 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -680,6 +680,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+   dd->process_vnic_dma_send = hfi1_vnic_send_dma;
 
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h 
b/drivers/infiniband/hw/hfi1/vnic.h
index 10ae2be..72dce3b 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -49,6 +49,7 @@
 
 #include 
 #include "hfi.h"
+#include "sdma.h"
 
 #define HFI1_VNIC_MAX_TXQ 16
 #define HFI1_VNIC_MAX_PAD 12
@@ -85,6 +86,26 @@
 #define HFI_VNIC_MAX_QUEUE 16
 
 /**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+   struct hfi1_devdata *dd;
+   struct sdma_engine  *sde;
+   struct hfi1_vnic_vport_info *vinfo;
+   struct iowait wait;
+   struct sdma_txreq stx;
+   unsigned int state;
+   u8 q_idx;
+};
+
+/**
  * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
  * @idx: queue index
  * @vinfo: pointer to vport information
@@ -111,6 +132,7 @@ struct hfi1_vnic_rx_queue {
  * @vesw_id: virtual switch id
  * @rxq: Array of receive queues
  * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
  */
 struct hfi1_vnic_vport_info {
struct hfi1_devdata *dd;
@@ -126,6 +148,7 @@ struct hfi1_vnic_vport_info {
struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
 
struct hfi_vnic_stats  stats[HFI_VNIC_MAX_QUEUE];
+   struct hfi1_vnic_sdma  sdma[HFI1_VNIC_MAX_TXQ];
 };
 
 #define v_dbg(format, arg...) \
@@ -138,8 +161,13 @@ struct hfi1_vnic_vport_info {
 /* vnic hfi1 internal functions */
 void hfi1_vnic_setup(struct hfi1_devdata *dd);
 void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+   u8 q_idx);
 
 /* vnic rdma netdev operations */
 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c 
b/drivers/infiniband/hw/hfi1/vnic_main.c
index 9952a04..e859846 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -408,6 +408,10 @@ static void hfi1_vnic_maybe_stop_tx(struct 
hfi1_vnic_vport_info *vinfo,
u8 q_idx)
 {
netif_stop_subqueue(vinfo->netdev, q_idx);
+   if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+   return;
+
+ 

[RFC v3 09/11] IB/hfi1: HFI_VNIC RDMA netdev support

2017-02-07 Thread Vishwanathapura, Niranjana
Add support to create and free HFI_VNIC rdma netdev devices.
Implement netstack interface functionality including xmit_skb,
receive side NAPI etc. Also implement rdma netdev control functions.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/Makefile|   2 +-
 drivers/infiniband/hw/hfi1/driver.c|  25 +-
 drivers/infiniband/hw/hfi1/hfi.h   |  27 +-
 drivers/infiniband/hw/hfi1/init.c  |   9 +-
 drivers/infiniband/hw/hfi1/vnic.h  | 153 
 drivers/infiniband/hw/hfi1/vnic_main.c | 646 +
 6 files changed, 855 insertions(+), 7 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a0..2280538 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o
+   verbs_txreq.o vnic_main.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/driver.c 
b/drivers/infiniband/hw/hfi1/driver.c
index 4fbaee6..3c226a8 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -59,6 +59,7 @@
 #include "trace.h"
 #include "qp.h"
 #include "sdma.h"
+#include "vnic.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1359,15 +1360,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
 }
 
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+   /* Packet received in VNIC context via RSM */
+   if (packet->rcd->is_vnic)
+   return true;
+
+   if ((HFI1_GET_L2_TYPE(packet->ebuf) == HFI_VNIC_L2_TYPE) &&
+   (HFI1_GET_L4_TYPE(packet->ebuf) == HFI_VNIC_L4_ETHR))
+   return true;
+
+   return false;
+}
+
 int process_receive_bypass(struct hfi1_packet *packet)
 {
struct hfi1_devdata *dd = packet->rcd->dd;
 
-   if (unlikely(rhf_err_flags(packet->rhf)))
+   if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+   } else if (hfi1_is_vnic_packet(packet)) {
+   hfi1_vnic_bypass_rcv(packet);
+   return RHF_RCV_CONTINUE;
+   }
 
-   dd_dev_err(dd,
-  "Bypass packets are not supported in normal operation. 
Dropping\n");
+   dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 751a0fb..a3641fb 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -337,6 +337,12 @@ struct hfi1_ctxtdata {
 * packets with the wrong interrupt handler.
 */
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+   /* Indicates that this is vnic context */
+   bool is_vnic;
+
+   /* vnic queue index this context is mapped to */
+   u8 vnic_q_idx;
 };
 
 /*
@@ -809,6 +815,19 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
 };
 
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT   8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+   struct idr vesw_idr;
+};
+
+struct hfi1_vnic_vport_info;
+
 /* device data struct now contains only "general per-device" info.
  * fields related to a physical IB port are in a hfi1_pportdata struct.
  */
@@ -1116,6 +1135,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+   int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+stru

[RFC v3 07/11] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) interface

2017-02-07 Thread Vishwanathapura, Niranjana
HFI VNIC EMA interface functions are the management interfaces to the HFI
VNIC netdev. Add support to add and remove VNIC ports. Implement the
required GET/SET management interface functions and processing of new
management information. Add support to send trap notifications upon various
events like interface status change, unicast/multicast mac list update and
mac address change.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Tanya K Jajodia 
---
 drivers/infiniband/ulp/hfi_vnic/Makefile   |   3 +-
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h   |   4 +
 .../infiniband/ulp/hfi_vnic/hfi_vnic_internal.h|  44 +++
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_netdev.c  | 144 +++-
 .../infiniband/ulp/hfi_vnic/hfi_vnic_vema_iface.c  | 390 +
 5 files changed, 582 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_vema_iface.c

diff --git a/drivers/infiniband/ulp/hfi_vnic/Makefile 
b/drivers/infiniband/ulp/hfi_vnic/Makefile
index ad7db32..32d6821 100644
--- a/drivers/infiniband/ulp/hfi_vnic/Makefile
+++ b/drivers/infiniband/ulp/hfi_vnic/Makefile
@@ -3,4 +3,5 @@
 #
 obj-$(CONFIG_INFINIBAND_HFI_VNIC) += hfi_vnic.o
 
-hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o
+hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o \
+  hfi_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
index 73c2bdc..be6d870 100644
--- a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
@@ -95,6 +95,10 @@
 #define HFI_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
 #define HFI_VNIC_DLID_SD_GET_DLID(dlid_sd)((dlid_sd) >> 8)
 
+/* VNIC Ethernet link status */
+#define HFI_VNIC_ETH_LINK_UP 1
+#define HFI_VNIC_ETH_LINK_DOWN   2
+
 /**
  * struct hfi_vesw_info - HFI vnic switch information
  * @fabric_id: 10-bit fabric id
diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
index a94dd2a..2ee80a7 100644
--- a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
@@ -161,14 +161,28 @@ struct __hfi_veswport_trap {
 } __packed;
 
 /**
+ * struct hfi_vnic_ctrl_port - HFI virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: hfi vnic control operations
+ */
+struct hfi_vnic_ctrl_port {
+   struct ib_device   *ibdev;
+   struct hfi_vnic_ctrl_ops   *ops;
+};
+
+/**
  * struct hfi_vnic_adapter - HFI VNIC netdev private data structure
  * @netdev: pointer to associated netdev
  * @ibdev: ib device
+ * @cport: pointer to hfi vnic control port
  * @rn_ops: rdma netdev's net_device_ops
  * @port_num: OPA port number
  * @vport_num: vesw port number
  * @lock: adapter lock
  * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
  * @mactbl: hash table of MAC entries
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
@@ -177,6 +191,7 @@ struct __hfi_veswport_trap {
 struct hfi_vnic_adapter {
struct net_device *netdev;
struct ib_device  *ibdev;
+   struct hfi_vnic_ctrl_port *cport;
const struct net_device_ops   *rn_ops;
 
u8 port_num;
@@ -186,6 +201,9 @@ struct hfi_vnic_adapter {
struct mutex lock;
 
struct __hfi_veswport_info  info;
+   u8  vema_mac_addr[ETH_ALEN];
+   u32 umac_hash;
+   u32 mmac_hash;
struct hlist_head  __rcu   *mactbl;
 
/* Lock used to protect updates to mac table */
@@ -225,6 +243,11 @@ struct hfi_vnic_mac_tbl_node {
 #define v_warn(format, arg...) \
netdev_warn(adapter->netdev, format, ## arg)
 
+#define c_err(format, arg...) \
+   dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+   dev_info(&cport->ibdev->dev, format, ## arg)
+
 /* The maximum allowed entries in the mac table */
 #define HFI_VNIC_MAC_TBL_MAX_ENTRIES  2048
 /* Limit of smac entries in mac table */
@@ -264,11 +287,32 @@ struct hfi_vnic_adapter *hfi_vnic_add_netdev(struct 
ib_device *ibdev,
 void hfi_vnic_encap_skb(struct hfi_vnic_adapter *adapter, struct sk_buff *skb);
 u8 hfi_vnic_get_vl(struct hfi_vnic_adapter *adapter, struct sk_buff *skb);
 u8 hfi_vnic_calc_entropy(struct hfi_vnic_adapter *adapter, struct sk_buff 
*skb);
+void hfi_vnic_process_vema_config(struct hfi_vnic_adapter *adapter);
 void hfi_vnic_release_mac_tbl(struct hfi_vnic_adapter *adapter);
 void hfi_vnic_query_mac_tbl(struct hfi_vnic_adapter *adapter,
struct hfi_veswport_mactable *tbl);
 int hfi_vnic

[RFC v3 06/11] IB/hfi-vnic: VNIC MAC table support

2017-02-07 Thread Vishwanathapura, Niranjana
HFI VNIC MAC table contains the MAC address to DLID mappings provided by
the Ethernet manager. During transmission, the MAC table provides the MAC
address to DLID translation. Implement MAC table using simple hash list.
Also provide support to update/query the MAC table by Ethernet manager.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c   | 236 +
 .../infiniband/ulp/hfi_vnic/hfi_vnic_internal.h|  53 -
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_netdev.c  |   4 +
 3 files changed, 292 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c
index 159172c..dc4f97f 100644
--- a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c
@@ -96,6 +96,238 @@ static inline void hfi_vnic_make_header(u8 *hdr, u32 slid, 
u32 dlid, u16 len,
memcpy(hdr, h, HFI_VNIC_HDR_LEN);
 }
 
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void hfi_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+   struct hfi_vnic_mac_tbl_node *node;
+   struct hlist_node *tmp;
+   int bkt;
+
+   if (!mactbl)
+   return;
+
+   vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+   hash_del(&node->hlist);
+   kfree(node);
+   }
+   kfree(mactbl);
+}
+
+static struct hlist_head *hfi_vnic_alloc_mac_tbl(void)
+{
+   u32 size = sizeof(struct hlist_head) * HFI_VNIC_MAC_TBL_SIZE;
+   struct hlist_head *mactbl;
+
+   mactbl = kzalloc(size, GFP_KERNEL);
+   if (!mactbl)
+   return ERR_PTR(-ENOMEM);
+
+   vnic_hash_init(mactbl);
+   return mactbl;
+}
+
+/* hfi_vnic_release_mac_tbl - empty and free the mac table */
+void hfi_vnic_release_mac_tbl(struct hfi_vnic_adapter *adapter)
+{
+   struct hlist_head *mactbl;
+
+   mutex_lock(&adapter->mactbl_lock);
+   mactbl = rcu_access_pointer(adapter->mactbl);
+   rcu_assign_pointer(adapter->mactbl, NULL);
+   synchronize_rcu();
+   hfi_vnic_free_mac_tbl(mactbl);
+   mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * hfi_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void hfi_vnic_query_mac_tbl(struct hfi_vnic_adapter *adapter,
+   struct hfi_veswport_mactable *tbl)
+{
+   struct hfi_vnic_mac_tbl_node *node;
+   struct hlist_head *mactbl;
+   int bkt;
+   u16 loffset, lnum_entries;
+
+   rcu_read_lock();
+   mactbl = rcu_dereference(adapter->mactbl);
+   if (!mactbl)
+   goto get_mac_done;
+
+   loffset = be16_to_cpu(tbl->offset);
+   lnum_entries = be16_to_cpu(tbl->num_entries);
+
+   vnic_hash_for_each(mactbl, bkt, node, hlist) {
+   struct __hfi_vnic_mactable_entry *nentry = &node->entry;
+   struct hfi_veswport_mactable_entry *entry;
+
+   if ((node->index < loffset) ||
+   (node->index >= (loffset + lnum_entries)))
+   continue;
+
+   /* populate entry in the tbl corresponding to the index */
+   entry = &tbl->tbl_entries[node->index - loffset];
+   memcpy(entry->mac_addr, nentry->mac_addr,
+  ARRAY_SIZE(entry->mac_addr));
+   memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+  ARRAY_SIZE(entry->mac_addr_mask));
+   entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+   }
+   tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+   rcu_read_unlock();
+}
+
+/*
+ * hfi_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ *  - Allocate a new mac (hash) table.
+ *  - Add the specified entries to the new table.
+ *(except the ones that are requested to be deleted).
+ *  - Add all the other entries from the old mac table.
+ *  - If there is a failure, free the new table and return.
+ *  - Switch to the new table.
+ *  - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int hfi_vnic_update_mac_tbl(struct hfi_vnic_adapter *adapter,
+   struct hfi_veswport_mactable *tbl)
+{
+   struct hfi_vnic_mac_tbl_node *node, *new_node;
+   struct hlist_head *new_mactbl, *old_mactbl;
+   int i, bkt, rc = 0;
+   u8 key;
+   u16 loffset, lnum_entries;
+
+   mutex_lock(&adapter->mactbl_lock);
+   /* allocate new mac table */
+   new_mactbl = hfi_vnic_alloc_mac_tbl();
+   

[RFC v3 05/11] IB/hfi-vnic: VNIC statistics support

2017-02-07 Thread Vishwanathapura, Niranjana
HFI VNIC driver statistics support maintains various counters including
standard netdev counters and the Ethernet manager defined counters.
Add the Ethtool hook to read the counters.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c | 128 +
 .../infiniband/ulp/hfi_vnic/hfi_vnic_internal.h|   4 +
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_netdev.c  |  20 
 3 files changed, 152 insertions(+)

diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
index ca058a4..a9925df 100644
--- a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
@@ -53,9 +53,137 @@
 
 #include "hfi_vnic_internal.h"
 
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+   char stat_string[ETH_GSTRING_LEN];
+   struct {
+   int type;
+   int sizeof_stat;
+   int stat_offset;
+   };
+};
+
+#define VNIC_STAT(m){ VNIC_STATS,   \
+ FIELD_SIZEOF(struct hfi_vnic_stats, m),   \
+ offsetof(struct hfi_vnic_stats, m) }
+#define VNIC_NETDEV_STAT(m) { NETDEV_STATS, \
+ FIELD_SIZEOF(struct net_device, m),   \
+ offsetof(struct net_device, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+   /* NETDEV stats */
+   {"rx_packets", VNIC_NETDEV_STAT(stats.rx_packets)},
+   {"tx_packets", VNIC_NETDEV_STAT(stats.tx_packets)},
+   {"rx_bytes", VNIC_NETDEV_STAT(stats.rx_bytes)},
+   {"tx_bytes", VNIC_NETDEV_STAT(stats.tx_bytes)},
+   {"rx_errors", VNIC_NETDEV_STAT(stats.rx_errors)},
+   {"tx_errors", VNIC_NETDEV_STAT(stats.tx_errors)},
+   {"rx_dropped", VNIC_NETDEV_STAT(stats.rx_dropped)},
+   {"tx_dropped", VNIC_NETDEV_STAT(stats.tx_dropped)},
+
+   /* SUMMARY counters */
+   {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+   {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+   {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+   {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+   {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+   {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+   {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+   {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+   {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+   {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+   {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+   {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+   {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+   {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+   {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+   {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+   {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+   {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+   {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+   {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+   {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+   {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+   /* ERROR counters */
+   {"rx_fifo_errors", VNIC_NETDEV_STAT(stats.rx_fifo_errors)},
+   {"rx_length_errors", VNIC_NETDEV_STAT(stats.rx_length_errors)},
+
+   {"tx_fifo_errors", VNIC_NETDEV_STAT(stats.tx_fifo_errors)},
+   {"tx_carrier_errors", VNIC_NETDEV_STAT(stats.tx_carrier_errors)},
+
+   {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+   {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+   {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+   {"rx_oversize", VNIC_STAT(rx_oversize)},
+   {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+   return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+  struct ethtool_stats *stats, u64 *data)
+{
+   struct hfi_vnic_adapter *adapter = hfi_vnic_priv(netdev);
+   struct hfi_vnic_stats vstats;
+   char *p = NULL;
+   int i;
+
+   memset(&vstats, 0, sizeof(vstats));
+   mutex_lock(&adapter->stats_lock);
+   adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+   for (i = 0; i < VNIC_STATS_LEN; i++) {
+   switch (vnic_gstrings_stats[i].type) {
+   case NETDEV_STATS:
+   p = (char *)netdev +
+ vnic_gstrings_stats[i].stat_offset;
+   break;
+   case VNIC_STATS:
+   p = (char *)&vstats +
+ vnic_gstrings_stats[i].stat_offset;
+  

[RFC v3 08/11] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) function

2017-02-07 Thread Vishwanathapura, Niranjana
HFI VEMA function interfaces with the Infiniband MAD stack to exchange the
management information packets with the Ethernet Manager (EM).
It interfaces with the HFI VNIC netdev function to SET/GET the management
information. The information exchanged with the EM includes class port
details, encapsulation configuration, various counters, unicast and
multicast MAC list and the MAC table. It also supports sending traps
to the EM.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Tanya K Jajodia 
Signed-off-by: Sudeep Dutt 
---
 drivers/infiniband/ulp/hfi_vnic/Makefile   |2 +-
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c |   12 +
 .../infiniband/ulp/hfi_vnic/hfi_vnic_internal.h|   17 +-
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_vema.c| 1071 
 .../infiniband/ulp/hfi_vnic/hfi_vnic_vema_iface.c  |2 +-
 5 files changed, 1099 insertions(+), 5 deletions(-)
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_vema.c

diff --git a/drivers/infiniband/ulp/hfi_vnic/Makefile 
b/drivers/infiniband/ulp/hfi_vnic/Makefile
index 32d6821..9262b6c 100644
--- a/drivers/infiniband/ulp/hfi_vnic/Makefile
+++ b/drivers/infiniband/ulp/hfi_vnic/Makefile
@@ -4,4 +4,4 @@
 obj-$(CONFIG_INFINIBAND_HFI_VNIC) += hfi_vnic.o
 
 hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o \
-  hfi_vnic_vema_iface.o
+  hfi_vnic_vema.o hfi_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
index a9925df..aca54ca 100644
--- a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
@@ -125,6 +125,17 @@ struct vnic_stats {
 
 #define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
 
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+struct ethtool_drvinfo *drvinfo)
+{
+   strlcpy(drvinfo->driver, hfi_vnic_driver_name, sizeof(drvinfo->driver));
+   strlcpy(drvinfo->version, hfi_vnic_driver_version,
+   sizeof(drvinfo->version));
+   strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+   sizeof(drvinfo->bus_info));
+}
+
 /* vnic_get_sset_count - get string set count */
 static int vnic_get_sset_count(struct net_device *netdev, int sset)
 {
@@ -180,6 +191,7 @@ static void vnic_get_strings(struct net_device *netdev, u32 
stringset, u8 *data)
 
 /* ethtool ops */
 static const struct ethtool_ops hfi_vnic_ethtool_ops = {
+   .get_drvinfo = vnic_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = vnic_get_strings,
.get_sset_count = vnic_get_sset_count,
diff --git a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h 
b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
index 2ee80a7..ad61624 100644
--- a/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
@@ -164,10 +164,12 @@ struct __hfi_veswport_trap {
  * struct hfi_vnic_ctrl_port - HFI virtual NIC control port
  * @ibdev: pointer to ib device
  * @ops: hfi vnic control operations
+ * @num_ports: number of hfi ports
  */
 struct hfi_vnic_ctrl_port {
struct ib_device   *ibdev;
struct hfi_vnic_ctrl_ops   *ops;
+   u8  num_ports;
 };
 
 /**
@@ -187,6 +189,8 @@ struct hfi_vnic_ctrl_port {
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
  * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
  */
 struct hfi_vnic_adapter {
struct net_device *netdev;
@@ -213,6 +217,9 @@ struct hfi_vnic_adapter {
struct mutex stats_lock;
 
u8 flow_tbl[HFI_VNIC_FLOW_TBL_SIZE];
+
+   unsigned long trap_timeout;
+   u8trap_count;
 };
 
 /* Same as hfi_veswport_mactable_entry, but without bitwise attribute */
@@ -247,6 +254,8 @@ struct hfi_vnic_mac_tbl_node {
dev_err(&cport->ibdev->dev, format, ## arg)
 #define c_info(format, arg...) \
dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+   dev_dbg(&cport->ibdev->dev, format, ## arg)
 
 /* The maximum allowed entries in the mac table */
 #define HFI_VNIC_MAC_TBL_MAX_ENTRIES  2048
@@ -281,6 +290,9 @@ struct hfi_vnic_mac_tbl_node {
!obj && (bkt) < HFI_VNIC_MAC_TBL_SIZE; (bkt)++)   \
hlist_for_each_entry(obj, &name[bkt], member)
 
+extern char hfi_vnic_driver_name[];
+extern const char hfi_vnic_driver_version[];
+
 struct hfi_vnic_adapter *hfi_vnic_add_netdev(struct ib_device *ibdev,
 u8 port_num, u8 vport_num);
 void hfi_vnic_rem_netdev(struct hfi_vnic_adapter *adapter);
@@ -310,9 +322,8 @@ void hfi_vnic_get_

[RFC v3 02/11] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface

2017-02-07 Thread Vishwanathapura, Niranjana
Add rdma netdev interface to ib device structure allowing rdma netdev
devices to be allocated by ib clients.
Define HFI VNIC interface between hardware independent VNIC
functionality and the hardware dependent VNIC functionality.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 include/rdma/ib_verbs.h |  27 +
 include/rdma/opa_hfi.h  | 147 
 2 files changed, 174 insertions(+)
 create mode 100644 include/rdma/opa_hfi.h

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b1ac973..b9897d9d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -55,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -221,6 +222,7 @@ enum ib_device_cap_flags {
IB_DEVICE_SG_GAPS_REG   = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION  = (1ULL << 33),
IB_DEVICE_RAW_SCATTER_FCS   = (1ULL << 34),
+   IB_DEVICE_RDMA_NETDEV_HFI_VNIC  = (1ULL << 35),
 };
 
 enum ib_signature_prot_cap {
@@ -1844,6 +1846,22 @@ struct ib_port_immutable {
u32   max_mad_size;
 };
 
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+   RDMA_NETDEV_HFI_VNIC
+};
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+   void *clnt_priv;
+
+   /* control functions */
+   void (*set_id)(struct net_device *netdev, int id);
+};
+
 struct ib_device {
struct device*dma_device;
 
@@ -2096,6 +2114,15 @@ struct ib_device {
   struct 
ib_rwq_ind_table_init_attr *init_attr,
   struct ib_udata 
*udata);
int(*destroy_rwq_ind_table)(struct 
ib_rwq_ind_table *wq_ind_table);
+   /* rdma netdev operations */
+   struct net_device *(*alloc_rdma_netdev)(
+   struct ib_device *device,
+   u8 port_num,
+   enum rdma_netdev_t type,
+   const char *name,
+   unsigned char name_assign_type,
+   void (*setup)(struct net_device *));
+   void (*free_rdma_netdev)(struct net_device *netdev);
struct ib_dma_mapping_ops   *dma_ops;
 
struct module   *owner;
diff --git a/include/rdma/opa_hfi.h b/include/rdma/opa_hfi.h
new file mode 100644
index 000..f357d04
--- /dev/null
+++ b/include/rdma/opa_hfi.h
@@ -0,0 +1,147 @@
+#ifndef _OPA_HFI_H
+#define _OPA_HFI_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in
+ *the documentation and/or other materials provided with the
+ *distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *contributors may be used to endorse or promote products derived
+ *from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVE

[RFC v3 01/11] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) documentation

2017-02-07 Thread Vishwanathapura, Niranjana
Add HFI VNIC design document explaining the VNIC architecture and the
driver design.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 Documentation/infiniband/hfi_vnic.txt | 102 ++
 1 file changed, 102 insertions(+)
 create mode 100644 Documentation/infiniband/hfi_vnic.txt

diff --git a/Documentation/infiniband/hfi_vnic.txt 
b/Documentation/infiniband/hfi_vnic.txt
new file mode 100644
index 000..c6c801e
--- /dev/null
+++ b/Documentation/infiniband/hfi_vnic.txt
@@ -0,0 +1,102 @@
+Intel Omni-Path Host Fabric Interface (HFI) Virtual Network Interface
+Controller (VNIC) feature supports Ethernet functionality over Omni-Path
+fabric by encapsulating the Ethernet packets between HFI nodes.
+
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes.
+
+ +---+
+ |  Subnet/  |
+ | Ethernet  |
+ |  Manager  |
+ +---+
+/  /
+  /   /
+//
+  / /
++-+  +--+
+|  Virtual Ethernet Switch|  |  Virtual Ethernet Switch |
+|  +-++-+ |  | +-++-+   |
+|  | VPORT   ||  VPORT  | |  | |  VPORT  ||  VPORT  |   |
++--+-++-+-+  +-+-++-+---+
+ | \/ |
+ |   \/   |
+ | \/ |
+ |/  \|
+ |  /  \  |
+ +---++  +---++
+ |   VNIC|VNIC|  |VNIC   |VNIC|
+ +---++  +---++
+ |  HFI   |  |  HFI   |
+ ++  ++
+
+
+Intel HFI VNIC software design is presented in the below diagram.
+HFI VNIC functionality has a HW dependent component and a HW
+independent component.
+
+The support has been added for IB device to allocate and free the RDMA
+netdev devices. The RDMA netdev supports interfacing with the network
+stack thus creating standard network interfaces. HFI_VNIC is an RDMA
+netdev device type.
+
+The HW dependent VNIC functionality is part of the HFI1 driver. It
+implements the verbs to allocate and free the HFI_VNIC RDMA netdev.
+It involves HW resource allocation/management for VNIC functionality.
+It interfaces with the network stack and implements the required
+net_device_ops functions. It expects Omni-Path encapsulated Ethernet
+packets in the transmit path and provides HW access to them. It strips
+the Omni-Path header from the received packets before passing them up
+the network stack. It also implements the RDMA netdev control operations.
+
+The HFI VNIC module implements the HW independent VNIC functionality.
+It consists of two parts. The VNIC Ethernet Management Agent (VEMA)
+registers itself with IB core as an IB client and interfaces with the
+IB MAD stack. It exchanges the management information with the Ethernet
+Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees
+the HFI_VNIC RDMA netdev devices. It overrides the net_device_ops functions
+set by HW dependent VNIC driver where required to accommodate any control
+operation. It also handles the encapsulation of Ethernet packets with an
+Omni-Path header in the transmit path. For each VNIC interface, the
+information required for encapsulation is configured by the EM via VEMA MAD
+interface. It also passes any control information to the HW dependent driver
+by invoking the RDMA netdev control operations.
+
++---+ +--

[RFC v3 00/11] HFI Virtual Network Interface Controller (VNIC)

2017-02-07 Thread Vishwanathapura, Niranjana
ack  |
+---+ +--+
 |   |  |
 |   |  |
++  |
||  |
|  HFI VNIC Module   |  |
|  (HFI VNIC RDMA Netdev |  |
| & EMA functions)   |  |
||  |
++  |
|   |
|   |
   +--+ |
   | IB core  | |
   +--+ |
|   |
|   |
++
||
|  HFI1 Driver with VNIC support |
||
++

Vishwanathapura, Niranjana (11):
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) documentation
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev
  IB/hfi-vnic: VNIC Ethernet Management (EM) structure definitions
  IB/hfi-vnic: VNIC statistics support
  IB/hfi-vnic: VNIC MAC table support
  IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) interface
  IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) function
  IB/hfi1: HFI_VNIC RDMA netdev support
  IB/hfi1: Virtual Network Interface Controller (VNIC) HW support
  IB/hfi1: VNIC SDMA support

 Documentation/infiniband/hfi_vnic.txt  |  102 ++
 MAINTAINERS|7 +
 drivers/infiniband/Kconfig |1 +
 drivers/infiniband/hw/hfi1/Makefile|2 +-
 drivers/infiniband/hw/hfi1/aspm.h  |   15 +-
 drivers/infiniband/hw/hfi1/chip.c  |  293 +-
 drivers/infiniband/hw/hfi1/chip.h  |4 +-
 drivers/infiniband/hw/hfi1/debugfs.c   |8 +-
 drivers/infiniband/hw/hfi1/driver.c|   77 +-
 drivers/infiniband/hw/hfi1/file_ops.c  |   27 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   57 +-
 drivers/infiniband/hw/hfi1/init.c  |   39 +-
 drivers/infiniband/hw/hfi1/mad.c   |   10 +-
 drivers/infiniband/hw/hfi1/pio.c   |   19 +-
 drivers/infiniband/hw/hfi1/pio.h   |8 +-
 drivers/infiniband/hw/hfi1/sysfs.c |4 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c  |8 +-
 drivers/infiniband/hw/hfi1/user_pages.c|5 +-
 drivers/infiniband/hw/hfi1/verbs.c |8 +-
 drivers/infiniband/hw/hfi1/vnic.h  |  184 
 drivers/infiniband/hw/hfi1/vnic_main.c |  909 +
 drivers/infiniband/hw/hfi1/vnic_sdma.c |  323 ++
 drivers/infiniband/ulp/Makefile|1 +
 drivers/infiniband/ulp/hfi_vnic/Kconfig|8 +
 drivers/infiniband/ulp/hfi_vnic/Makefile   |7 +
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c   |  475 +
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h   |  485 +
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c |  205 
 .../infiniband/ulp/hfi_vnic/hfi_vnic_internal.h|  329 ++
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_netdev.c  |  388 +++
 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_vema.c| 1071 
 .../infiniband/ulp/hfi_vnic/hfi_vnic_vema_iface.c  |  390 +++
 include/rdma/ib_verbs.h|   27 +
 include/rdma/opa_hfi.h |  147 +++
 include/rdma/opa_port_info.h   |4 +-
 35 files changed, 5533 insertions(+), 114 deletions(-)
 create mode 100644 Documentation/infiniband/hfi_vnic.txt
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/Kconfig
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/Makefile
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.c
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_encap.h
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_ethtool.c
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_internal.h
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_netdev.c
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_vema.c
 create mode 100644 drivers/infiniband/ulp/hfi_vnic/hfi_vnic_vema_iface.c
 create mode 100644 include/rdma/opa_hfi.h

-- 
1.8.3.1



Re: [RFC v2 00/10] HFI Virtual Network Interface Controller (VNIC)

2017-01-17 Thread Vishwanathapura, Niranjana

Thanks Jason for the valuable inputs.

Here is the new generic interface.

Overview:
Bottom driver defines net_device_ops. The upper driver can override it.
For example, upper driver can implement ndo_open() which calls bottom driver's 
ndo_open() and also do some book keeping.



include/rdma/ib_verbs.h:

/* rdma netdev type - specifies protocol type */
enum rdma_netdev_t {
RDMA_NETDEV_HFI_VNIC,
};

/* rdma netdev
 * For usecases where netstack interfacing is required.
 */
struct rdma_netdev {
struct net_device *netdev;
u8 port_num;

/* client private data structure */
void *clnt_priv;

/* control functions */
void (*set_id)(struct rdma_netdev *rn, int id);
void (*set_state)(struct rdma_netdev *rn, int state);
};

struct ib_device {
...
...
/* rdma netdev operations */
struct net_device *(*alloc_rdma_netdev)(struct ib_device *device,
u8 port_num,
enum rdma_netdev_t type,
const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *));
void (*free_rdma_netdev)(struct net_device *netdev);
};


hfi1 driver:

/* rdma netdev's private data structure */
struct hfi1_rdma_netdev {
struct rdma_netdev  rn; /* keep this first */
/* hfi1's vnic private data follows */
};


include/rdma/opa_hfi.h:

/* Client's ndo operations use below function instead of netdev_priv() */
static inline void *hfi_vnic_priv(const struct net_device *dev)
{
struct rdma_netdev *rn = netdev_priv(dev);

return rn->clnt_priv;
}

/* Overrides rtnl_link_stats64 to include hfi_vnic stats.
 * ndo_get_stats64() can be used to get the stats
 */
struct hfi_vnic_stats {
/* standard netdev statistics */
struct rtnl_link_stats64  netstat;

/* HFI VNIC statistics */
u64  tx_mcastbcast;
u64  tx_untagged;
u64  tx_vlan;
u64  tx_64_size;
u64  tx_65_127;
u64  tx_128_255;
u64  tx_256_511;
u64  tx_512_1023;
u64  tx_1024_1518;
u64  tx_1519_max;

u64  rx_untagged;
u64  rx_vlan;
u64  rx_64_size;
u64  rx_65_127;
u64  rx_128_255;
u64  rx_256_511;
u64  rx_512_1023;
u64  rx_1024_1518;
u64  rx_1519_max;

u64  rx_runt;
u64  rx_oversize;
};

I have started working on porting hfi_vnic as per this new interface.
I will post RFC v3 later.
Posting the interface definition early for comments.

Thanks,
Niranjana



Re: [RFC v2 00/10] HFI Virtual Network Interface Controller (VNIC)

2017-01-11 Thread Vishwanathapura, Niranjana

On Tue, Jan 10, 2017 at 10:14:02AM +0200, Leon Romanovsky wrote:

On Mon, Jan 09, 2017 at 07:39:54PM -0800, Vishwanathapura, Niranjana wrote:

We are also looking into Jason’s suggestion to make hfi_vnic interface to
the bottom driver a generic interface. This will include moving some of the
netstack interfacing to the bottom hfi1 driver.


Great, do you have rough estimation when will it be posted on the ML?

Thanks



Hi Leon,

I am currently doing some prototype on this. I will respond with the interface 
definition first once I have it solidified. RFC v3 with implemetion will be 
later.


Niranjana




Re: [RFC v2 00/10] HFI Virtual Network Interface Controller (VNIC)

2017-01-09 Thread Vishwanathapura, Niranjana

On Mon, Jan 09, 2017 at 09:51:04AM +0200, Leon Romanovsky wrote:

On Thu, Dec 15, 2016 at 11:28:06AM -0500, Doug Ledford wrote:

On 12/15/2016 9:52 AM, ira.weiny wrote:

2) With more than 60% of the code being MAD related, and another
significant chunk being hfi related, and only a minor bit (20% maybe?)
being net related,


Hi Doug and Ira,

I may admit that I didn't read the code very deep, but from brief
overview, I didn't find support for the claim the "60% code is MAD related".
It looks like the opposite thing will be more accurate.

Can you help me to understand this claim? How did you come to this
conclusion?

Thanks


Hi Leon,

Here is the breakdown of patches based on functionality.
In this series, patches #3..#8 compose hfi_vnic driver. In that, patches #4, #7 
and #8 are MAD focused (interfacing with MAD agent and handling MAD packets).
Patch #6 and half of #3 (_encap.c/h) are OPA encapsulation related. Patch #5 is 
netdev statistic related (which includes statistics MAD definitions).
So, only part of patch #3 (_netdev.c and _ethtool.c) deals with interfacing 
with netstack.
Those percentage numbers are based on actual lines of code in these patches 
(files).


We are also looking into Jason’s suggestion to make hfi_vnic interface to the 
bottom driver a generic interface. This will include moving some of the 
netstack interfacing to the bottom hfi1 driver.


Niranjana



Re: [RFC v2 03/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev

2016-12-18 Thread Vishwanathapura, Niranjana

On Thu, Dec 15, 2016 at 09:24:20PM -0700, Jason Gunthorpe wrote:

>>+struct __hfi_vesw_info {
>>+   u16  fabric_id;
>>+   u16  vesw_id;
>>+
>>+   u8   rsvd0[6];
>>+   u16  def_port_mask;
>>+
>>+   u8   rsvd1[2];
>>+   u16  pkey;
>>+
>>+   u8   rsvd2[4];
>>+   u32  u_mcast_dlid;
>>+   u32  u_ucast_dlid[HFI_VESW_MAX_NUM_DEF_PORT];
>>+
>>+   u8   rsvd3[44];
>>+   u16  eth_mtu[HFI_VNIC_MAX_NUM_PCP];
>>+   u16  eth_mtu_non_vlan;
>>+   u8   rsvd4[2];
>>+} __packed;
>
>This goes on the network too? Also looks like it has endian problems.
>
>Ditto for all the __packed structures.
>

This is in CPU format. There is a separate big endian version of
this


Why are CPU handled structures packed and full of reserved fields?
Don't pack them if they are not pushed out to the network..

There were lots of __packed structures, any that go on the network
need be/le annoations.



Well, driver treats the reserved fields to be sticky. ie., information
block returned (upon GET) to EM is not changed (from SET) except few fields 
which driver is expected to modify.
Structures that go on wire are big endian __packed structures in 
hfi_vnic_encap.h. Ok, I will remove the __packed attribute from CPU handled 
structures here.


Niranjana


Jason


Re: [RFC v2 00/10] HFI Virtual Network Interface Controller (VNIC)

2016-12-15 Thread Vishwanathapura, Niranjana

On Thu, Dec 15, 2016 at 09:56:11AM -0700, Jason Gunthorpe wrote:

On Wed, Dec 14, 2016 at 11:59:32PM -0800, Vishwanathapura, Niranjana wrote:

 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/Kconfig
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/Makefile


Stil NAK on these paths, I already explained why 'sw' is totally
unsuitable. Put it in drivers/net or drivers/infiniband/ulp



I understand. I did not want to change dirver location until we concenses
on where it belongs.
In next revision, I will move it under drivers/infiniband/ulp/hfi_vnic.
If anybody thinks it should be in a different folder, let me know.

Niranjana


Jason


Re: [RFC v2 03/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev

2016-12-15 Thread Vishwanathapura, Niranjana

On Thu, Dec 15, 2016 at 10:01:09AM -0700, Jason Gunthorpe wrote:

On Wed, Dec 14, 2016 at 11:59:35PM -0800, Vishwanathapura, Niranjana wrote:

+/**
+ * union hfi_vnic_bypass_hdr - VNIC bypass header
+ * @slid: source lid
+ * @length: length of packet
+ * @becn: backward explicit congestion notification
+ * @dlid: destination lid
+ * @sc: service class
+ * @fecn: forward explicit congestion notification
+ * @l2: L2 type (2=16B)
+ * @lt: link transfer field
+ * @l4: L4 type
+ * @slid_high: upper 4 bits of source lid
+ * @dlid_high: upper 4 bits of destination lid
+ * @pkey: partition key
+ * @entropy: entropy
+ * @age: packet age
+ * @l4_hdr: L4 header
+ */
+union hfi_vnic_bypass_hdr {
+   struct {
+   struct {
+   uint64_t slid   : 20;
+   uint64_t length : 11;
+   uint64_t becn   : 1;
+   uint64_t dlid   : 20;
+   uint64_t sc : 5;
+   uint64_t rsvd   : 3;
+   uint64_t fecn   : 1;
+   uint64_t l2 : 2;
+   uint64_t lt : 1;
+   };
+   struct {
+   uint64_t l4: 8;
+   uint64_t slid_high : 4;
+   uint64_t dlid_high : 4;
+   uint64_t pkey  : 16;
+   uint64_t entropy   : 16;
+   uint64_t age   : 8;
+   uint64_t rsvd1 : 8;
+   };
+   struct {
+   uint32_t rsvd2  : 16;
+   uint32_t l4_hdr : 16;
+   };
+   } __packed;
+   u32 dw[5];
+};


This isn't going to work on BE, please fix it.



We have made the hfi_vnic driver dependent on CONFIG_X86_64.
But I agree with all the feedback here. I will remove bitfields
and instead use bit operations in the next revision.


+/**
+ * struct __hfi_vesw_info - HFI vnic virtual switch info
+ */
+struct __hfi_vesw_info {
+   u16  fabric_id;
+   u16  vesw_id;
+
+   u8   rsvd0[6];
+   u16  def_port_mask;
+
+   u8   rsvd1[2];
+   u16  pkey;
+
+   u8   rsvd2[4];
+   u32  u_mcast_dlid;
+   u32  u_ucast_dlid[HFI_VESW_MAX_NUM_DEF_PORT];
+
+   u8   rsvd3[44];
+   u16  eth_mtu[HFI_VNIC_MAX_NUM_PCP];
+   u16  eth_mtu_non_vlan;
+   u8   rsvd4[2];
+} __packed;


This goes on the network too? Also looks like it has endian problems.

Ditto for all the __packed structures.



This is in CPU format. There is a separate big endian version of this structure 
defined in hfi_vnic_encap.h in below patch (which gets sent on wire).

https://www.spinics.net/lists/linux-rdma/msg44111.html


+#define v_dbg(format, arg...) \
+   netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+   netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+   netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+   netdev_warn(adapter->netdev, format, ## arg)


Relies on an 'adapter' local varable?? Ugly.



I am using the same approach as Intel NIC driver like e1000e and ixgbe.


Jason


Re: [RFC v2 00/10] HFI Virtual Network Interface Controller (VNIC)

2016-12-15 Thread Vishwanathapura, Niranjana

On Thu, Dec 15, 2016 at 10:07:13AM -0700, Jason Gunthorpe wrote:

On Thu, Dec 15, 2016 at 11:28:06AM -0500, Doug Ledford wrote:


1) Since your intent is to make this work with multiple versions of the
hfi drivers, I disagree with Jason that just because there is only one
driver today that we should keep it simple.  Design it right from the
beginning of multi driver is your intent is, IMO, a better way to go.
You'll work out the bugs in the initial implementation and when it comes
time to add the second driver, things will go much more smoothly.


If that is your position then this should be a straight up IB ULP that
works with any IB hardware.

There is nothing HFI specific about it except for the
micro-optimization of pushing packets via SDMA instead of post_send,
and that same micro optimization probably applies to ipoib.



Responded on the other thread. As mentioned, there are differences between 
ipoib and hfi_vnic interface. For hfi_vnic, we need simple interface as defined 
in the include/rdma/opa_hfi.h that represents HW to put/get already 
encapsulated OPA packets.



In other words, lets see the first version as a straight ULP with no
special HFI hooks, then we can discuss how best to micro optimize it
for HFI SDMA.



As mentioned in other thread, that would be putting hfi_vnic_ctrl_ops in 
ib_device structure.


Niranjana


Jason


Re: [RFC v2 00/10] HFI Virtual Network Interface Controller (VNIC)

2016-12-15 Thread Vishwanathapura, Niranjana

On Thu, Dec 15, 2016 at 08:24:05PM -0500, ira.weiny wrote:

On Thu, Dec 15, 2016 at 11:48:37AM -0700, Jason Gunthorpe wrote:

On Thu, Dec 15, 2016 at 01:19:18PM -0500, Doug Ledford wrote:
> On 12/15/2016 12:07 PM, Jason Gunthorpe wrote:
> > On Thu, Dec 15, 2016 at 11:28:06AM -0500, Doug Ledford wrote:
> >
> >> 1) Since your intent is to make this work with multiple versions of the
> >> hfi drivers, I disagree with Jason that just because there is only one
> >> driver today that we should keep it simple.  Design it right from the
> >> beginning of multi driver is your intent is, IMO, a better way to go.
> >> You'll work out the bugs in the initial implementation and when it comes
> >> time to add the second driver, things will go much more smoothly.
> >
> > If that is your position then this should be a straight up IB ULP that
> > works with any IB hardware.
>
> Yes, see my comments in point #3 of my previous email...

Well, I'm not opposed to the vnic idea - Mellanox had (has?) a similar
IB driver. There are lots of good reasons to strictly maintain the
ethernet presentation.


Agreed.  I'm pretty worried about the idea of putting VNIC into IPoIB.  It
seems like a force fit at best.



Just to add what Jason, Ira already mentioned,
1) This isn't much common code between hfi_vnic and ipoib.
Besides we expect both ipoib and hfi_vnic to function parallely.
Registering with the network stack is also different.
hfi_vnic exchanges encapsulation information via IB MAD interface from OPA
EM which is not the case with ipoib.
We needed minimal set of interfaces (defined in include/rdma/opa_hfi.h in this 
path series) that represents HW.


2) The design is very different. There are no path record queries, QPs etc in 
hfi_vnic.


3) hfi_vnic also does the encapsulation with fabric (OPA) header, so bottom 
driver only puts it on the wire.

Whereas in ipoib, bottom ib device driver does the encapsulation for ipoib.

4) hfi_vnic do not need ib work request/completion structures.
hfi_vnic supports multiple TX/RX queues.



There is much more going on here than just changing the LLADDR,
essentially everything MAD focused is different compared to ipoib, and
it looks like the required datastructures are different too. This is
more of a map a mac to a OPA_LRH approach with SA mediated discovery,
by my eye.

The main share is the 'skb send' part, we've talked about hoisting
that out of ipoib in the past anyhow. A generic verb along those lines
would probably allow the sdma optimization for hfi for both this new
ulp and ipoib without creating such an ugly HFI1 specific interface.


I'm not sure what you mean about "skb send" being used by ipoib.  Right now
IPoIB already supplies a "generic skb send" for _Verbs_ in ipoib_send.

I don't know what other devices would do to implement ipoib_send?  To me, it
seems like the abstraction for IPoIB is at the proper layer now.

For OPA, the hfi driver supports both IPoIB and VNIC.  So expecting IPoIB and
VNIC to use a generic "skb send" in ib_device is going to make hfi1 do a lot of
work to determine which ULP is calling it or make the interface kind of ugly.
Either way I don't see how this is better than a separate set of functions.

IMO the cleanest way to "clean up the ugly HFI1 interface" is to just  put the
VNIC operations into ib_device similar to the iWarp specific structure
"iw_cm_verbs" which is there today.

If a device supports the VNIC operations then it can set the pointer and if not
it will be NULL.  VNIC will look for that pointer for the support it needs.  If
in the future other devices need modifications to that interface we can modify
it then.

Ira


Yes, I agree. The interface defined in include/rdma/opa_hfi.h in this patch 
series is pretty simple and generic interface that represents the HW.
If we include this file and put the hfi_vnic_ctrl_ops directly in ib_device 
structure, then it will simplify lot of stuff. We don't need to abstract

out hfi_ibdev and define any ib device capability flag for VNIC support.





Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC v2 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface

2016-12-15 Thread Vishwanathapura, Niranjana

On Wed, Dec 14, 2016 at 11:59:34PM -0800, Vishwanathapura, Niranjana wrote:

+
+static inline bool is_hfi_ibdev(struct ib_device *ibdev)
+{
+   return !memcmp(ibdev->name, "hfi", 3);
+}


I am thinking of adding a device capability flag to indicate HFI VNIC capabilty 
instead of relying on the device name as above to identify a hfi ib deice.

Any comments? Probably it can be addressed by a separate patch later.

Niranjana





[RFC v2 08/10] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) function

2016-12-15 Thread Vishwanathapura, Niranjana
HFI VEMA function interfaces with the Infiniband MAD stack to exchange the
management information packets with the Ethernet Manager (EM).
It interfaces with the HFI VNIC netdev function to SET/GET the management
information. The information exchanged with the EM includes class port
details, encapsulation configuration, various counters, unicast and
multicast MAC list and the MAC table. It also supports sending traps
to the EM.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Tanya K Jajodia 
Signed-off-by: Sudeep Dutt 
---
 drivers/infiniband/sw/intel/hfi_vnic/Makefile  |2 +-
 .../sw/intel/hfi_vnic/hfi_vnic_ethtool.c   |   12 +
 .../sw/intel/hfi_vnic/hfi_vnic_internal.h  |   11 +
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_vema.c   | 1024 
 .../sw/intel/hfi_vnic/hfi_vnic_vema_iface.c|4 +-
 5 files changed, 1050 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_vema.c

diff --git a/drivers/infiniband/sw/intel/hfi_vnic/Makefile 
b/drivers/infiniband/sw/intel/hfi_vnic/Makefile
index a0562af..16c0830 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/Makefile
+++ b/drivers/infiniband/sw/intel/hfi_vnic/Makefile
@@ -4,4 +4,4 @@
 obj-$(CONFIG_HFI_VNIC) += hfi_vnic.o
 
 hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o \
-  hfi_vnic_vema_iface.o
+  hfi_vnic_vema.o hfi_vnic_vema_iface.o
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
index 9289ab2..9c2ed37 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
@@ -130,6 +130,17 @@ struct vnic_stats {
 
 #define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
 
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+struct ethtool_drvinfo *drvinfo)
+{
+   strlcpy(drvinfo->driver, hfi_vnic_driver_name, sizeof(drvinfo->driver));
+   strlcpy(drvinfo->version, hfi_vnic_driver_version,
+   sizeof(drvinfo->version));
+   strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+   sizeof(drvinfo->bus_info));
+}
+
 /* vnic_get_sset_count - get string set count */
 static int vnic_get_sset_count(struct net_device *netdev, int sset)
 {
@@ -183,6 +194,7 @@ static void vnic_get_strings(struct net_device *netdev, u32 
stringset, u8 *data)
 
 /* ethtool ops */
 static const struct ethtool_ops hfi_vnic_ethtool_ops = {
+   .get_drvinfo = vnic_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = vnic_get_strings,
.get_sset_count = vnic_get_sset_count,
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
index 7723a4e..b36bb76 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
@@ -246,10 +246,12 @@ struct __hfi_veswport_trap {
  * struct hfi_vnic_ctrl_port - HFI virtual NIC control port
  * @ibdev: pointer to ib device
  * @ops: hfi vnic control operations
+ * @num_ports: number of hfi ports
  */
 struct hfi_vnic_ctrl_port {
struct ib_device   *ibdev;
struct hfi_vnic_ctrl_ops   *ops;
+   u8  num_ports;
 };
 
 /**
@@ -280,6 +282,8 @@ struct hfi_vnic_rx_queue {
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
  * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
  * @q_sum_cntrs: per queue EM summary counters
  * @q_err_cntrs: per queue EM error counters
  * @q_rx_logic_errors: per queue rx logic (default) errors
@@ -314,6 +318,8 @@ struct hfi_vnic_adapter {
struct mutex stats_lock;
 
u8 flow_tbl[HFI_VNIC_FLOW_TBL_SIZE];
+   unsigned long trap_timeout;
+   u8trap_count;
 
struct __hfi_vnic_summary_counters  q_sum_cntrs[HFI_VNIC_MAX_QUEUE];
struct __hfi_vnic_error_countersq_err_cntrs[HFI_VNIC_MAX_QUEUE];
@@ -394,6 +400,9 @@ struct hfi_vnic_mac_tbl_node {
!obj && (bkt) < HFI_VNIC_MAC_TBL_SIZE; (bkt)++)   \
hlist_for_each_entry(obj, &name[bkt], member)
 
+extern char hfi_vnic_driver_name[];
+extern const char hfi_vnic_driver_version[];
+
 struct hfi_vnic_adapter *hfi_vnic_add_netdev(struct hfi_vnic_port *vport,
 struct device *parent);
 void hfi_vnic_rem_netdev(struct hfi_vnic_port *vport);
@@ -428,5 +437,7 @@ struct hfi_vnic_adapter *hfi_vnic_add_vport(struct 
hfi_vnic_ctrl_port *cport,
u8 port_num, u8 vport_num);
 void hfi_vnic_rem_vport(struct hfi_vnic_ada

[RFC v2 05/10] IB/hfi-vnic: VNIC statistics support

2016-12-15 Thread Vishwanathapura, Niranjana
HFI VNIC driver statistics support maintains various counters including
standard netdev counters and the Ethernet manager defined counters.
Add the Ethtool hook to read the counters.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c  |  19 +-
 .../sw/intel/hfi_vnic/hfi_vnic_ethtool.c   | 131 +++
 .../sw/intel/hfi_vnic/hfi_vnic_internal.h  |  84 +++
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_netdev.c | 260 -
 4 files changed, 486 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
index 093df67..3fdfb7b 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
@@ -209,8 +209,10 @@ int hfi_vnic_encap_skb(struct hfi_vnic_adapter *adapter, 
struct sk_buff *skb)
hdr->slid_high = info->vport.encap_slid >> 20;
 
dlid = hfi_vnic_get_dlid(adapter, skb, def_port);
-   if (unlikely(!dlid))
+   if (unlikely(!dlid)) {
+   adapter->q_err_cntrs[skb->queue_mapping].tx_dlid_zero++;
return -EFAULT;
+   }
 
hdr->dlid = dlid;
hdr->dlid_high = dlid >> 20;
@@ -233,6 +235,19 @@ int hfi_vnic_encap_skb(struct hfi_vnic_adapter *adapter, 
struct sk_buff *skb)
 /* hfi_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
 int hfi_vnic_decap_skb(struct hfi_vnic_rx_queue *rxq, struct sk_buff *skb)
 {
+   struct hfi_vnic_adapter *adapter = rxq->adapter;
+   int max_len = adapter->netdev->mtu + VLAN_ETH_HLEN;
+   int rc = -EFAULT;
+
skb_pull(skb, HFI_VNIC_HDR_LEN);
-   return 0;
+
+   /* Validate Packet length */
+   if (skb->len > max_len)
+   adapter->q_err_cntrs[rxq->idx].rx_oversize++;
+   else if (skb->len < ETH_ZLEN)
+   adapter->q_err_cntrs[rxq->idx].rx_runt++;
+   else
+   rc = 0;
+
+   return rc;
 }
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
index 0b4da5e..9289ab2 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
@@ -53,9 +53,140 @@
 
 #include "hfi_vnic_internal.h"
 
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+   char stat_string[ETH_GSTRING_LEN];
+   struct {
+   int type;
+   int sizeof_stat;
+   int stat_offset;
+   };
+};
+
+#define VNIC_STAT(m){ VNIC_STATS,   \
+ FIELD_SIZEOF(struct hfi_vnic_adapter, m), \
+ offsetof(struct hfi_vnic_adapter, m) }
+#define VNIC_NETDEV_STAT(m) { NETDEV_STATS, \
+ FIELD_SIZEOF(struct net_device, m),   \
+ offsetof(struct net_device, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+   /* NETDEV stats */
+   {"rx_packets", VNIC_NETDEV_STAT(stats.rx_packets)},
+   {"tx_packets", VNIC_NETDEV_STAT(stats.tx_packets)},
+   {"rx_bytes", VNIC_NETDEV_STAT(stats.rx_bytes)},
+   {"tx_bytes", VNIC_NETDEV_STAT(stats.tx_bytes)},
+   {"rx_errors", VNIC_NETDEV_STAT(stats.rx_errors)},
+   {"tx_errors", VNIC_NETDEV_STAT(stats.tx_errors)},
+   {"rx_dropped", VNIC_NETDEV_STAT(stats.rx_dropped)},
+   {"tx_dropped", VNIC_NETDEV_STAT(stats.tx_dropped)},
+
+   {"rx_fifo_errors", VNIC_NETDEV_STAT(stats.rx_fifo_errors)},
+   {"rx_missed_errors", VNIC_NETDEV_STAT(stats.rx_missed_errors)},
+   {"tx_carrier_errors", VNIC_NETDEV_STAT(stats.tx_carrier_errors)},
+   {"tx_fifo_errors", VNIC_NETDEV_STAT(stats.tx_fifo_errors)},
+
+   /* SUMMARY counters */
+   {"tx_unicast", VNIC_STAT(sum_cntrs.tx_grp.unicast)},
+   {"tx_mcastbcast", VNIC_STAT(sum_cntrs.tx_grp.mcastbcast)},
+   {"tx_untagged", VNIC_STAT(sum_cntrs.tx_grp.untagged)},
+   {"tx_vlan", VNIC_STAT(sum_cntrs.tx_grp.vlan)},
+
+   {"tx_64_size", VNIC_STAT(sum_cntrs.tx_grp.xx_64_size)},
+   {"tx_65_127", VNIC_STAT(sum_cntrs.tx_grp.xx_65_127)},
+   {"tx_128_255", VNIC_STAT(sum_cntrs.tx_grp.xx_128_255)},
+   {"tx_256_511", VNIC_STAT(sum_cntrs.tx_grp.xx_256_511)},
+   {"tx_512_1023", VNIC_STAT(sum_cntrs.tx_grp.xx_512_1023)},
+   {"tx_1024_1518", VNIC_STAT(sum_cntrs.tx_grp.xx_1024_1518)},
+   {"tx_1519_max", VNIC_STAT(sum_cntrs.tx_grp.xx_1519_max)},
+
+   {"rx_unicast", VNIC_STAT(sum_cntrs.rx_grp.unicast)},
+   {"rx_mcastbcast", VNIC_STAT(sum_cntrs.rx_grp.mcastbcast)},
+   {"rx_untagged", VNIC_STAT(sum_cntrs.rx_grp.untagged)},
+   {"rx_vlan", VNIC_STAT(sum_cntrs.rx_grp.vlan)},
+
+   {"rx_64_size", VNIC_STAT(sum_cntrs.rx_grp.xx_64_size)},
+   

[RFC v2 09/10] IB/hfi1: Virtual Network Interface Controller (VNIC) support

2016-12-15 Thread Vishwanathapura, Niranjana
HFI1 HW specific support for VNIC functionality. Add support to add
and remove VNIC ports. Also implement the operations to allocate
resources, transmit and receive of Omni-Path encapsulated Ethernet
packets.

Dynamically allocate a set of contexts for VNIC when the first vnic
port is instantiated. Allocate VNIC contexts from user contexts pool
and return them back to the same pool while freeing up. Set aside
enough MSI-X interrupts for VNIC contexts and assign them when the
contexts are allocated. On the receive side, use an RSM rule to
spread TCP/UDP streams among VNIC contexts.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/Makefile   |   2 +-
 drivers/infiniband/hw/hfi1/aspm.h |  13 +-
 drivers/infiniband/hw/hfi1/chip.c | 270 +++--
 drivers/infiniband/hw/hfi1/chip.h |   2 +
 drivers/infiniband/hw/hfi1/debugfs.c  |   6 +-
 drivers/infiniband/hw/hfi1/driver.c   |  74 +++-
 drivers/infiniband/hw/hfi1/file_ops.c |  25 +-
 drivers/infiniband/hw/hfi1/hfi.h  |  49 ++-
 drivers/infiniband/hw/hfi1/init.c |  37 +-
 drivers/infiniband/hw/hfi1/mad.c  |   8 +-
 drivers/infiniband/hw/hfi1/pio.c  |  17 +
 drivers/infiniband/hw/hfi1/pio.h  |   6 +
 drivers/infiniband/hw/hfi1/sysfs.c|   2 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c |   6 +-
 drivers/infiniband/hw/hfi1/user_pages.c   |   3 +-
 drivers/infiniband/hw/hfi1/verbs.c|   7 +
 drivers/infiniband/hw/hfi1/vnic.h | 145 +++
 drivers/infiniband/hw/hfi1/vnic_main.c| 614 ++
 drivers/infiniband/hw/hfi1/vnic_sdma.c|  60 +++
 include/rdma/opa_port_info.h  |   2 +-
 20 files changed, 1252 insertions(+), 96 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c

diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a0..88085f6 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o
+   verbs_txreq.o vnic_main.o vnic_sdma.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/aspm.h 
b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3..3a01b69 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long 
data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
 }
 
-/* Disable interrupt processing for verbs contexts when PSM contexts are open 
*/
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
 static inline void aspm_disable_all(struct hfi1_devdata *dd)
 {
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
-   rcd->ctxt < rcd->dd->first_user_ctxt;
+   rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
 }
 
 static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
 
-   for (i = 0; i < dd->first_user_ctxt; i++)
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
 
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c 
b/drivers/infiniband/hw/hfi1/chip.c
index 9263984..472ce55 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -125,9 +125,16 @@ struct flag_table {
 #define DEFAULT_KRCVQS

[RFC v2 10/10] IB/hfi1: VNIC SDMA support

2016-12-15 Thread Vishwanathapura, Niranjana
HFI1 VNIC SDMA support enables transmission of VNIC packets over SDMA.
Map VNIC queues to SDMA engines and support halting and wakeup of the
VNIC queues.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/hw/hfi1/hfi.h   |   1 +
 drivers/infiniband/hw/hfi1/vnic.h  |  30 +++-
 drivers/infiniband/hw/hfi1/vnic_main.c |  21 ++-
 drivers/infiniband/hw/hfi1/vnic_sdma.c | 260 +
 4 files changed, 309 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 78d1726..8d5949f 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -855,6 +855,7 @@ struct hfi1_asic_data {
 /* Virtual NIC information */
 struct hfi1_vnic_data {
struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+   struct kmem_cache *txreq_cache;
u8 num_vports;
struct idr vesw_idr;
u8 rmt_start;
diff --git a/drivers/infiniband/hw/hfi1/vnic.h 
b/drivers/infiniband/hw/hfi1/vnic.h
index 047845e..2d4eb8f 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -49,6 +49,7 @@
 
 #include 
 #include "hfi.h"
+#include "sdma.h"
 
 #define HFI1_VNIC_ICRC_LEN   4
 #define HFI1_VNIC_TAIL_LEN   1
@@ -90,6 +91,26 @@
 #define HFI1_VNIC_SC_SHIFT  4
 
 /**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+   struct hfi1_devdata *dd;
+   struct sdma_engine  *sde;
+   struct hfi1_vnic_vport_info *vinfo;
+   struct iowait wait;
+   struct sdma_txreq stx;
+   unsigned int state;
+   u8 q_idx;
+};
+
+/**
  * struct hfi1_vnic_notifier - VNIC notifer structure
  * @cb - vnic callback function
  */
@@ -104,6 +125,7 @@ struct hfi1_vnic_notifier {
  * @event_flags: event notification flags
  * @vport: vnic port pointer
  * @skbq: Array of queues for received socket buffers
+ * @sdma: VNIC SDMA structure per TXQ
  */
 struct hfi1_vnic_vport_info {
struct hfi1_devdata *dd;
@@ -112,7 +134,8 @@ struct hfi1_vnic_vport_info {
DECLARE_BITMAP(event_flags, HFI_VNIC_NUM_EVTS);
struct hfi_vnic_port *vport;
 
-   struct sk_buff_head skbq[HFI1_NUM_VNIC_CTXT];
+   struct sk_buff_headskbq[HFI1_NUM_VNIC_CTXT];
+   struct hfi1_vnic_sdma  sdma[HFI1_VNIC_MAX_TXQ];
 };
 
 static inline struct hfi1_devdata *vnic_dev2dd(struct hfi_vnic_port *vport)
@@ -131,8 +154,13 @@ static inline void hfi1_vnic_update_pad(unsigned char 
*pad, u8 plen)
 /* vnic hfi1 internal functions */
 void hfi1_vnic_setup(struct hfi1_devdata *dd);
 void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+   u8 q_idx);
 
 /* vnic port operations */
 struct hfi_vnic_port *hfi1_vnic_add_vport(struct ib_device *device,
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c 
b/drivers/infiniband/hw/hfi1/vnic_main.c
index 1e237f3..19843a4 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -289,15 +289,21 @@ static int hfi1_vnic_put_skb(struct hfi_vnic_port *vport,
 
 static u8 hfi1_vnic_select_queue(struct hfi_vnic_port *vport, u8 vl, u8 
entropy)
 {
-   return 0;
+   struct hfi1_vnic_vport_info *vinfo = vport->hfi_priv;
+   struct sdma_engine *sde;
+
+   sde = sdma_select_engine_vl(vinfo->dd, entropy, vl);
+   return sde->this_idx;
 }
 
 static bool hfi1_vnic_get_write_avail(struct hfi_vnic_port *vport, u8 q_idx)
 {
+   struct hfi1_vnic_vport_info *vinfo = vport->hfi_priv;
+
if (q_idx >= vport->hfi_info.num_tx_q)
return false;
 
-   return true;
+   return hfi1_vnic_sdma_write_avail(vinfo, q_idx);
 }
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
@@ -499,6 +505,12 @@ static int hfi1_vnic_init(struct hfi_vnic_port *vport)
int i, rc = 0;
 
mutex_lock(&hfi1_mutex);
+   if (!dd->vnic.num_vports) {
+   rc = hfi1_vnic_txreq_init(dd);
+   if (rc)
+   goto txreq_fail;
+   }
+
for (i = dd->vnic.num_ctxt; i < vport->hfi_info.num_rx_q; i++) {
rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
if (rc)
@@ -526,7 +538,11 @@ static int hfi1_vnic_init(struct hfi_vnic_port *vport)
 
dd->vnic.num_vports++;
vinfo->vport = vport;
+   hfi1_vnic_sdma_init(vinfo);
 alloc_fail:
+   if (!dd->vnic.num_vports)
+   

[RFC v2 01/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) documentation

2016-12-15 Thread Vishwanathapura, Niranjana
Add HFI VNIC design document explaining the VNIC architecture and the
driver design.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 Documentation/infiniband/hfi_vnic.txt | 95 +++
 1 file changed, 95 insertions(+)
 create mode 100644 Documentation/infiniband/hfi_vnic.txt

diff --git a/Documentation/infiniband/hfi_vnic.txt 
b/Documentation/infiniband/hfi_vnic.txt
new file mode 100644
index 000..1f39d8b
--- /dev/null
+++ b/Documentation/infiniband/hfi_vnic.txt
@@ -0,0 +1,95 @@
+Intel Omni-Path Host Fabric Interface (HFI) Virtual Network Interface
+Controller (VNIC) feature supports Ethernet functionality over Omni-Path
+fabric by encapsulating the Ethernet packets between HFI nodes.
+
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes.
+
+ +---+
+ |  Subnet/  |
+ | Ethernet  |
+ |  Manager  |
+ +---+
+/  /
+  /   /
+//
+  / /
++-+  +--+
+|  Virtual Ethernet Switch|  |  Virtual Ethernet Switch |
+|  +-++-+ |  | +-++-+   |
+|  | VPORT   ||  VPORT  | |  | |  VPORT  ||  VPORT  |   |
++--+-++-+-+  +-+-++-+---+
+ | \/ |
+ |   \/   |
+ | \/ |
+ |/  \|
+ |  /  \  |
+ +---++  +---++
+ |   VNIC|VNIC|  |VNIC   |VNIC|
+ +---++  +---++
+ |  HFI   |  |  HFI   |
+ ++  ++
+
+Intel HFI VNIC software design is presented in the below diagram.
+HFI VNIC functionality has a HW dependent component and a HW
+independent component.
+
+The HW dependent VNIC functionality is part of the HFI1 driver. It
+implements the callback functions to do various tasks which includes
+adding and removing of VNIC ports, HW resource allocation for VNIC
+functionality and actual transmission and reception of encapsulated
+Ethernet packets over the fabric. Each VNIC port is addressed by the
+HFI port number, and the VNIC port number on that HFI port.
+
+The HFI VNIC module implements the HW independent VNIC functionality.
+It consists of two parts. The VNIC Ethernet Management Agent (VEMA)
+registers itself with IB core as an IB client and interfaces with the
+IB MAD stack. It exchanges the management information with the Ethernet
+Manager (EM) and the VNIC netdev. The VNIC netdev part interfaces with
+the Linux network stack, thus providing standard Ethernet network
+interfaces. It invokes HFI device's VNIC callback functions for HW access.
+The VNIC netdev encapsulates the Ethernet packets with an Omni-Path
+header before passing them to the HFI1 driver for transmission.
+Similarly, it de-encapsulates the received Omni-Path packets before
+passing them to the network stack. For each VNIC interface, the
+information required for encapsulation is configured by EM via VEMA MAD
+interface.
+
+
++---+ +--+
+|   | |   Linux  |
+| IB MAD| |  Network |
+|   | |   Stack  |
++---+ +--+
+ |   |
+ |   |
+++
+| 

[RFC v2 04/10] IB/hfi-vnic: VNIC Ethernet Management (EM) structure definitions

2016-12-15 Thread Vishwanathapura, Niranjana
Define VNIC EM MAD structures and the associated macros. These structures
are used for information exchange between VNIC EM agent (EMA) on the HFI
host and the Ethernet manager. These include the virtual ethernet switch
(vesw) port information, vesw port mac table, summay and error counters,
vesw port interface mac lists and the EMA trap.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Tanya K Jajodia 
---
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h  | 444 +
 .../sw/intel/hfi_vnic/hfi_vnic_internal.h  |  33 ++
 2 files changed, 477 insertions(+)

diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
index 6786cce..a6770ef 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
@@ -52,11 +52,455 @@
  * and decapsulation of Ethernet packets
  */
 
+#include 
+#include 
+
+/* Maximum number of vnics supported */
+#define HFI_MAX_VPORTS_SUPPORTED 256
+
+/* EMA class version */
+#define HFI_EMA_CLASS_VERSION   0x80
+
+/*
+ * Define the Intel vendor management class for HFI
+ * ETHERNET MANAGEMENT
+ */
+#define HFI_MGMT_CLASS_INTEL_EMA0x34
+
+/* EM attribute IDs */
+#define HFI_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define HFI_EM_ATTR_VESWPORT_INFO   0x0011
+#define HFI_EM_ATTR_VESWPORT_MAC_ENTRIES0x0012
+#define HFI_EM_ATTR_IFACE_UCAST_MACS0x0013
+#define HFI_EM_ATTR_IFACE_MCAST_MACS0x0014
+#define HFI_EM_ATTR_DELETE_VESW 0x0015
+#define HFI_EM_ATTR_VESWPORT_SUMMARY_COUNTERS   0x0020
+#define HFI_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
 #define HFI_VESW_MAX_NUM_DEF_PORT   16
 #define HFI_VNIC_MAX_NUM_PCP8
 
+#define HFI_VNIC_EMA_DATA(OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define HFI_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define HFI_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define HFI_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define HFI_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
 /* VNIC configured and operational state values */
 #define HFI_VNIC_STATE_DROP_ALL0x1
 #define HFI_VNIC_STATE_FORWARDING  0x3
 
+/**
+ * struct hfi_vesw_info - HFI vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct hfi_vesw_info {
+   __be16  fabric_id;
+   __be16  vesw_id;
+
+   u8  rsvd0[6];
+   __be16  def_port_mask;
+
+   u8  rsvd1[2];
+   __be16  pkey;
+
+   u8  rsvd2[4];
+   __be32  u_mcast_dlid;
+   __be32  u_ucast_dlid[HFI_VESW_MAX_NUM_DEF_PORT];
+
+   u8  rsvd3[44];
+   __be16  eth_mtu[HFI_VNIC_MAX_NUM_PCP];
+   __be16  eth_mtu_non_vlan;
+   u8  rsvd4[2];
+} __packed;
+
+/**
+ * struct hfi_per_veswport_info - HFI vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct hfi_per_veswport_info {
+   __be32  port_num;
+
+   u8  eth_link_status;
+   u8  rsvd0[3];
+
+   u8  base_mac_addr[ETH_ALEN];
+   u8  config_state;
+   u8  oper_state;
+
+   __be16  max_mac_tbl_ent;
+   __be16  max_smac_ent;
+   __be32  mac_tbl_digest;
+   u8  rsvd1[4];
+
+   __be32  encap_slid;
+
+   u8  pcp_to_sc_uc[HFI_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_vl_uc[HFI_VNIC_MAX_NUM_PCP];
+   u8  pcp_to_sc_mc[

[RFC v2 00/10] HFI Virtual Network Interface Controller (VNIC)

2016-12-15 Thread Vishwanathapura, Niranjana
   | |   Stack  |
+---+ +--+
 |   |
 |   |
++
||
| HFI VNIC Module|
|(HFI VNIC Netdev and EMA drivers)   |
||
++
 |
 |
+--+
|  IB core |
+--+
 |
 |
++
||
|  HFI1 Driver with VNIC support |
||
+----+

Vishwanathapura, Niranjana (10):
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) documentation
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev
  IB/hfi-vnic: VNIC Ethernet Management (EM) structure definitions
  IB/hfi-vnic: VNIC statistics support
  IB/hfi-vnic: VNIC MAC table support
  IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) interface
  IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) function
  IB/hfi1: Virtual Network Interface Controller (VNIC) support
  IB/hfi1: VNIC SDMA support

 Documentation/infiniband/hfi_vnic.txt  |   95 ++
 MAINTAINERS|7 +
 drivers/infiniband/Kconfig |1 +
 drivers/infiniband/hw/hfi1/Makefile|2 +-
 drivers/infiniband/hw/hfi1/aspm.h  |   13 +-
 drivers/infiniband/hw/hfi1/chip.c  |  272 +-
 drivers/infiniband/hw/hfi1/chip.h  |2 +
 drivers/infiniband/hw/hfi1/debugfs.c   |6 +-
 drivers/infiniband/hw/hfi1/driver.c|   84 +-
 drivers/infiniband/hw/hfi1/file_ops.c  |   25 +-
 drivers/infiniband/hw/hfi1/hfi.h   |   52 +-
 drivers/infiniband/hw/hfi1/init.c  |   41 +-
 drivers/infiniband/hw/hfi1/intr.c  |2 +-
 drivers/infiniband/hw/hfi1/mad.c   |   10 +-
 drivers/infiniband/hw/hfi1/pio.c   |   17 +
 drivers/infiniband/hw/hfi1/pio.h   |6 +
 drivers/infiniband/hw/hfi1/qp.c|   24 +-
 drivers/infiniband/hw/hfi1/ruc.c   |2 +-
 drivers/infiniband/hw/hfi1/sysfs.c |   24 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c  |6 +-
 drivers/infiniband/hw/hfi1/user_pages.c|3 +-
 drivers/infiniband/hw/hfi1/verbs.c |  120 +--
 drivers/infiniband/hw/hfi1/verbs.h |9 +-
 drivers/infiniband/hw/hfi1/vnic.h  |  173 
 drivers/infiniband/hw/hfi1/vnic_main.c |  631 
 drivers/infiniband/hw/hfi1/vnic_sdma.c |  320 ++
 drivers/infiniband/sw/Makefile |1 +
 drivers/infiniband/sw/intel/hfi_vnic/Kconfig   |8 +
 drivers/infiniband/sw/intel/hfi_vnic/Makefile  |7 +
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c  |  489 ++
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h  |  510 ++
 .../sw/intel/hfi_vnic/hfi_vnic_ethtool.c   |  208 
 .../sw/intel/hfi_vnic/hfi_vnic_internal.h  |  443 +
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_netdev.c |  810 
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_vema.c   | 1024 
 .../sw/intel/hfi_vnic/hfi_vnic_vema_iface.c|  432 +
 include/rdma/opa_hfi.h |  199 
 include/rdma/opa_port_info.h   |2 +-
 38 files changed, 5891 insertions(+), 189 deletions(-)
 create mode 100644 Documentation/infiniband/hfi_vnic.txt
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/Kconfig
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/Makefile
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_netdev.c
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_vema.c
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_vema_iface.c
 create mode 100644 include/rdma/opa_hfi.h

-- 
1.8.3.1



[RFC v2 06/10] IB/hfi-vnic: VNIC MAC table support

2016-12-15 Thread Vishwanathapura, Niranjana
HFI VNIC MAC table contains the MAC address to DLID mappings provided by
the Ethernet manager. During transmission, the MAC table provides the MAC
address to DLID translation. Implement MAC table using simple hash list.
Also provide support to update/query the MAC table by Ethernet manager.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
---
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c  | 236 +
 .../sw/intel/hfi_vnic/hfi_vnic_internal.h  |  53 -
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_netdev.c |   4 +
 3 files changed, 292 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
index 3fdfb7b..e45cff8 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
@@ -104,6 +104,238 @@
 
 #define HFI_VNIC_SC_MASK 0x1f
 
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void hfi_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+   struct hfi_vnic_mac_tbl_node *node;
+   struct hlist_node *tmp;
+   int bkt;
+
+   if (!mactbl)
+   return;
+
+   vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+   hash_del(&node->hlist);
+   kfree(node);
+   }
+   kfree(mactbl);
+}
+
+static struct hlist_head *hfi_vnic_alloc_mac_tbl(void)
+{
+   u32 size = sizeof(struct hlist_head) * HFI_VNIC_MAC_TBL_SIZE;
+   struct hlist_head *mactbl;
+
+   mactbl = kzalloc(size, GFP_KERNEL);
+   if (!mactbl)
+   return ERR_PTR(-ENOMEM);
+
+   vnic_hash_init(mactbl);
+   return mactbl;
+}
+
+/* hfi_vnic_release_mac_tbl - empty and free the mac table */
+void hfi_vnic_release_mac_tbl(struct hfi_vnic_adapter *adapter)
+{
+   struct hlist_head *mactbl;
+
+   mutex_lock(&adapter->mactbl_lock);
+   mactbl = rcu_access_pointer(adapter->mactbl);
+   rcu_assign_pointer(adapter->mactbl, NULL);
+   synchronize_rcu();
+   hfi_vnic_free_mac_tbl(mactbl);
+   mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * hfi_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void hfi_vnic_query_mac_tbl(struct hfi_vnic_adapter *adapter,
+   struct hfi_veswport_mactable *tbl)
+{
+   struct hfi_vnic_mac_tbl_node *node;
+   struct hlist_head *mactbl;
+   int bkt;
+   u16 loffset, lnum_entries;
+
+   rcu_read_lock();
+   mactbl = rcu_dereference(adapter->mactbl);
+   if (!mactbl)
+   goto get_mac_done;
+
+   loffset = be16_to_cpu(tbl->offset);
+   lnum_entries = be16_to_cpu(tbl->num_entries);
+
+   vnic_hash_for_each(mactbl, bkt, node, hlist) {
+   struct __hfi_vnic_mactable_entry *nentry = &node->entry;
+   struct hfi_veswport_mactable_entry *entry;
+
+   if ((node->index < loffset) ||
+   (node->index >= (loffset + lnum_entries)))
+   continue;
+
+   /* populate entry in the tbl corresponding to the index */
+   entry = &tbl->tbl_entries[node->index - loffset];
+   memcpy(entry->mac_addr, nentry->mac_addr,
+  ARRAY_SIZE(entry->mac_addr));
+   memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+  ARRAY_SIZE(entry->mac_addr_mask));
+   entry->dlid_sd.dw = cpu_to_be32(nentry->dlid_sd.dw);
+   }
+   tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+   rcu_read_unlock();
+}
+
+/*
+ * hfi_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ *  - Allocate a new mac (hash) table.
+ *  - Add the specified entries to the new table.
+ *(except the ones that are requested to be deleted).
+ *  - Add all the other entries from the old mac table.
+ *  - If there is a failure, free the new table and return.
+ *  - Switch to the new table.
+ *  - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int hfi_vnic_update_mac_tbl(struct hfi_vnic_adapter *adapter,
+   struct hfi_veswport_mactable *tbl)
+{
+   struct hfi_vnic_mac_tbl_node *node, *new_node;
+   struct hlist_head *new_mactbl, *old_mactbl;
+   int i, bkt, rc = 0;
+   u8 key;
+   u16 loffset, lnum_entries;
+
+   mutex_lock(&adapter->mactbl_lock);
+   /* allocate new mac table */
+   new_mactbl = hfi_vnic_alloc_mac_tbl();
+   if (IS_ERR(new_mactbl)) {
+   mutex_unlock(&ada

[RFC v2 07/10] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) interface

2016-12-15 Thread Vishwanathapura, Niranjana
HFI VNIC EMA interface functions are the management interfaces to the HFI
VNIC netdev. Add support to add and remove VNIC ports. Implement the
required GET/SET management interface functions and processing of new
management information. Add support to send trap notifications upon various
events like interface status change, unicast/multicast mac list update and
mac address change.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Tanya K Jajodia 
---
 drivers/infiniband/sw/intel/hfi_vnic/Makefile  |   3 +-
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h  |   4 +
 .../sw/intel/hfi_vnic/hfi_vnic_internal.h  |  44 +++
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_netdev.c | 153 +++-
 .../sw/intel/hfi_vnic/hfi_vnic_vema_iface.c| 432 +
 5 files changed, 633 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_vema_iface.c

diff --git a/drivers/infiniband/sw/intel/hfi_vnic/Makefile 
b/drivers/infiniband/sw/intel/hfi_vnic/Makefile
index 8e3dca7..a0562af 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/Makefile
+++ b/drivers/infiniband/sw/intel/hfi_vnic/Makefile
@@ -3,4 +3,5 @@
 #
 obj-$(CONFIG_HFI_VNIC) += hfi_vnic.o
 
-hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o
+hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o \
+  hfi_vnic_vema_iface.o
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
index a6770ef..54e9081 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
@@ -99,6 +99,10 @@
 #define HFI_VNIC_STATE_DROP_ALL0x1
 #define HFI_VNIC_STATE_FORWARDING  0x3
 
+/* VNIC Ethernet link status */
+#define HFI_VNIC_ETH_LINK_UP 1
+#define HFI_VNIC_ETH_LINK_DOWN   2
+
 /**
  * struct hfi_vesw_info - HFI vnic switch information
  * @fabric_id: 10-bit fabric id
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
index 6d5c5f8..7723a4e 100644
--- a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
@@ -243,6 +243,16 @@ struct __hfi_veswport_trap {
 } __packed;
 
 /**
+ * struct hfi_vnic_ctrl_port - HFI virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: hfi vnic control operations
+ */
+struct hfi_vnic_ctrl_port {
+   struct ib_device   *ibdev;
+   struct hfi_vnic_ctrl_ops   *ops;
+};
+
+/**
  * struct hfi_vnic_rx_queue - HFI VNIC receive queue
  * @idx: queue index
  * @adapter: netdev adapter
@@ -257,11 +267,15 @@ struct hfi_vnic_rx_queue {
 /**
  * struct hfi_vnic_adapter - HFI VNIC netdev private data structure
  * @netdev: pointer to associated netdev
+ * @cport: pointer to hfi vnic control port
  * @vport: pointer to hfi vnic port
  * @flags: flags indicating various states
  * @lock: adapter lock
  * @rxq: receive queue array
  * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
  * @mactbl: hash table of MAC entries
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
@@ -278,6 +292,7 @@ struct hfi_vnic_rx_queue {
  */
 struct hfi_vnic_adapter {
struct net_device *netdev;
+   struct hfi_vnic_ctrl_port *cport;
struct hfi_vnic_port  *vport;
unsigned long  flags;
 
@@ -287,6 +302,9 @@ struct hfi_vnic_adapter {
struct hfi_vnic_rx_queue  rxq[HFI_VNIC_MAX_QUEUE];
 
struct __hfi_veswport_info  info;
+   u8  vema_mac_addr[ETH_ALEN];
+   u32 umac_hash;
+   u32 mmac_hash;
struct hlist_head  __rcu   *mactbl;
 
/* Lock used to protect updates to mac table */
@@ -338,6 +356,11 @@ struct hfi_vnic_mac_tbl_node {
 #define v_warn(format, arg...) \
netdev_warn(adapter->netdev, format, ## arg)
 
+#define c_err(format, arg...) \
+   dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+   dev_info(&cport->ibdev->dev, format, ## arg)
+
 /* The maximum allowed entries in the mac table */
 #define HFI_VNIC_MAC_TBL_MAX_ENTRIES  2048
 /* Limit of smac entries in mac table */
@@ -377,12 +400,33 @@ struct hfi_vnic_adapter *hfi_vnic_add_netdev(struct 
hfi_vnic_port *vport,
 int hfi_vnic_encap_skb(struct hfi_vnic_adapter *adapter, struct sk_buff *skb);
 int hfi_vnic_decap_skb(struct hfi_vnic_rx_queue *rxq, struct sk_buff *skb);
 u8 hfi_vnic_calc_entropy(struct hfi_vnic_adapter *adapter, struct sk_buff 
*skb);
+void hfi_vnic_process_vema_config(struct hfi_vnic_adapter *adapter);
 void hfi_vnic_release_mac_tbl(struct hf

[RFC v2 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) interface

2016-12-15 Thread Vishwanathapura, Niranjana
Create hfi_ibdev abstraction which hfi1_ibdev will extend.
Define HFI VNIC interface between hardware independent VNIC
functionality and the hardware dependent VNIC functionality.
Add VNIC control operations to add and remove VNIC devices,
to the hfi_ibdev structure.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/hw/hfi1/chip.c   |   2 +-
 drivers/infiniband/hw/hfi1/driver.c |  10 +-
 drivers/infiniband/hw/hfi1/hfi.h|   2 +-
 drivers/infiniband/hw/hfi1/init.c   |   4 +-
 drivers/infiniband/hw/hfi1/intr.c   |   2 +-
 drivers/infiniband/hw/hfi1/mad.c|   2 +-
 drivers/infiniband/hw/hfi1/qp.c |  24 +++--
 drivers/infiniband/hw/hfi1/ruc.c|   2 +-
 drivers/infiniband/hw/hfi1/sysfs.c  |  22 ++--
 drivers/infiniband/hw/hfi1/verbs.c  | 113 ++--
 drivers/infiniband/hw/hfi1/verbs.h  |   9 +-
 include/rdma/opa_hfi.h  | 199 
 12 files changed, 298 insertions(+), 93 deletions(-)
 create mode 100644 include/rdma/opa_hfi.h

diff --git a/drivers/infiniband/hw/hfi1/chip.c 
b/drivers/infiniband/hw/hfi1/chip.c
index 37d8af5..9263984 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -10452,7 +10452,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 
state)
sdma_all_running(dd);
 
/* Signal the IB layer that the port has went active */
-   event.device = &dd->verbs_dev.rdi.ibdev;
+   event.device = &dd->verbs_dev.hfidev.rdi.ibdev;
event.element.port_num = ppd->port;
event.event = IB_EVENT_PORT_ACTIVE;
}
diff --git a/drivers/infiniband/hw/hfi1/driver.c 
b/drivers/infiniband/hw/hfi1/driver.c
index d426116..e219c3b 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -163,7 +163,8 @@ const char *get_unit_name(int unit)
 
 const char *get_card_name(struct rvt_dev_info *rdi)
 {
-   struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+   struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev,
+   hfidev.rdi);
struct hfi1_devdata *dd = container_of(ibdev,
   struct hfi1_devdata, verbs_dev);
return get_unit_name(dd->unit);
@@ -171,7 +172,8 @@ const char *get_card_name(struct rvt_dev_info *rdi)
 
 struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
 {
-   struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+   struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev,
+   hfidev.rdi);
struct hfi1_devdata *dd = container_of(ibdev,
   struct hfi1_devdata, verbs_dev);
return dd->pcidev;
@@ -281,7 +283,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct 
hfi1_pportdata *ppd,
int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct hfi1_devdata *dd = ppd->dd;
-   struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+   struct rvt_dev_info *rdi = &dd->verbs_dev.hfidev.rdi;
 
if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
return;
@@ -600,7 +602,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
struct rvt_qp *qp;
struct ib_header *hdr;
struct ib_other_headers *ohdr;
-   struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+   struct rvt_dev_info *rdi = &dd->verbs_dev.hfidev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
int is_ecn = 0;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4163596..1fc5b68 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1601,7 +1601,7 @@ static inline struct hfi1_pportdata *ppd_from_ibp(struct 
hfi1_ibport *ibp)
 
 static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
 {
-   return container_of(rdi, struct hfi1_ibdev, rdi);
+   return container_of(rdi, struct hfi1_ibdev, hfidev.rdi);
 }
 
 static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
diff --git a/drivers/infiniband/hw/hfi1/init.c 
b/drivers/infiniband/hw/hfi1/init.c
index 60db615..13f6862 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1020,7 +1020,7 @@ static void __hfi1_free_devdata(struct kobject *kobj)
free_percpu(dd->int_counter);
free_percpu(dd->rcv_limit);
free_percpu(dd->send_schedule);
-   rvt_dealloc_device(&dd->verbs_dev.rdi);
+   rvt_dealloc_device(&dd->verbs_dev.hfidev.rdi);
 }
 
 static struct kobj_type hfi1_devdata_type = {
@@ -1133,7 +113

[RFC v2 03/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev

2016-12-15 Thread Vishwanathapura, Niranjana
HFI VNIC netdev function supports Ethernet functionality over Omni-Path
fabric by encapsulating Ethernet packets inside Omni-Path packet header.
It interfaces with the network stack to provide standard Ethernet network
interfaces. It invokes HFI device's VNIC callback functions for HW access.

Reviewed-by: Dennis Dalessandro 
Reviewed-by: Ira Weiny 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Sudeep Dutt 
Signed-off-by: Tanya K Jajodia 
Signed-off-by: Andrzej Kacprowski 
---
 MAINTAINERS|   7 +
 drivers/infiniband/Kconfig |   1 +
 drivers/infiniband/sw/Makefile |   1 +
 drivers/infiniband/sw/intel/hfi_vnic/Kconfig   |   8 +
 drivers/infiniband/sw/intel/hfi_vnic/Makefile  |   6 +
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c  | 238 
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h  |  62 
 .../sw/intel/hfi_vnic/hfi_vnic_ethtool.c   |  65 
 .../sw/intel/hfi_vnic/hfi_vnic_internal.h  | 220 +++
 .../infiniband/sw/intel/hfi_vnic/hfi_vnic_netdev.c | 409 +
 10 files changed, 1017 insertions(+)
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/Kconfig
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/Makefile
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.h
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_ethtool.c
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_internal.h
 create mode 100644 drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_netdev.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 2c7a7b6..62db3ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5628,6 +5628,13 @@ F:   drivers/block/cciss*
 F: include/linux/cciss_ioctl.h
 F: include/uapi/linux/cciss_ioctl.h
 
+HFI-VNIC DRIVER
+M: Dennis Dalessandro 
+M: Niranjana Vishwanathapura 
+L: linux-r...@vger.kernel.org
+S: Supported
+F: drivers/infiniband/sw/intel/hfi_vnic
+
 HFI1 DRIVER
 M: Mike Marciniszyn 
 M: Dennis Dalessandro 
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 6709173..900daf3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/sw/intel/hfi_vnic/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 source "drivers/infiniband/sw/rxe/Kconfig"
 
diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile
index 8b095b2..2792559 100644
--- a/drivers/infiniband/sw/Makefile
+++ b/drivers/infiniband/sw/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_INFINIBAND_RDMAVT)+= rdmavt/
 obj-$(CONFIG_RDMA_RXE) += rxe/
+obj-$(CONFIG_HFI_VNIC) += intel/hfi_vnic/
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/Kconfig 
b/drivers/infiniband/sw/intel/hfi_vnic/Kconfig
new file mode 100644
index 000..84d13e7
--- /dev/null
+++ b/drivers/infiniband/sw/intel/hfi_vnic/Kconfig
@@ -0,0 +1,8 @@
+config HFI_VNIC
+   tristate "Intel HFI VNIC support"
+   depends on X86_64 && INFINIBAND
+   ---help---
+   This is HFI Virtual Network Interface Controller (VNIC) driver
+   for Ethernet over HFI feature. It implements the HW independent
+   VNIC functionality. It interfaces with Linux stack for data path
+   and IB MAD for the control path.
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/Makefile 
b/drivers/infiniband/sw/intel/hfi_vnic/Makefile
new file mode 100644
index 000..8e3dca7
--- /dev/null
+++ b/drivers/infiniband/sw/intel/hfi_vnic/Makefile
@@ -0,0 +1,6 @@
+# Makefile - Intel HFI Virtual Network Controller driver
+# Copyright(c) 2016, Intel Corporation.
+#
+obj-$(CONFIG_HFI_VNIC) += hfi_vnic.o
+
+hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o
diff --git a/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c 
b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
new file mode 100644
index 000..093df67
--- /dev/null
+++ b/drivers/infiniband/sw/intel/hfi_vnic/hfi_vnic_encap.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License fo

Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-30 Thread Vishwanathapura, Niranjana

On Tue, Nov 29, 2016 at 09:21:13AM -0700, Jason Gunthorpe wrote:

On Mon, Nov 28, 2016 at 10:29:38PM -0800, Vishwanathapura, Niranjana wrote:

On Thu, Nov 24, 2016 at 09:15:45AM -0700, Jason Gunthorpe wrote:
>>And will move the hfi_vnic module under
>>???drivers/infiniband/ulp/hfi_vnic???.
>
>I would prefer drivers/net/ethernet
>
>This is clearly not a ULP since it doesn't use verbs.
>

I understand it is not using verbs, but the control path (ib_device client)
is using verbs (IB MAD).
Our prefernce is to keep it somewhere under drivers/infiniband. Summarizing
reasons again here,

- VNIC control driver (ib_device client) is an IB MAD agent.
- It is purly a software construct, encapsualtes ethernet packets in
Omni-path packet and depends on hfi1 driver here for HW access.


Is the majority of the code MAD focused or net stack focused?

I'm not sure it matters, it isn't like we can review Intel's
proprietary mad stuff anyhow. :\

Jason


That is an intersting measure. In hfi_vnic driver, I would say, >60% of the 
code is MAD focused, mainly interfacing with the IB MAD agent.
It also includes populating/parsing those MAD packets. At the least it is not 
supporting the driver to be put under net folder.


Even in the remaining <40%, half of it is involved with encapsulating ethernet 
frames with Omni-path header (does this makes it belong under 
drivers/infiniband/hw?).

The net stack interface part is pretty standard, hence is not much of code.

I do see the reason to put it under net folder, but I am seeing more reason for 
it to be somewhere under drivers/infiniband.


Niranjana



Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-30 Thread Vishwanathapura, Niranjana

On Tue, Nov 29, 2016 at 09:50:09AM -0700, Jason Gunthorpe wrote:

On Tue, Nov 29, 2016 at 04:44:37PM +, Hefty, Sean wrote:

> You are not making a subsystem. Don't overcomplicate things. A
> multi-part device device can just directly link.

The VNIC may be usable over multiple generations of HFIs, but I
don't know if that is the intent.


If Intel wants to build a HFI subystem within RDMA with multiple
drivers then sure, but they are not there yet, and we don't even know
what that could look like. So it is better to leave it simple for now.

Jason


Sorry for the delay, I was weighing in couple options.
We envisioned vnic as a pure software construct and hence should be independent 
(like ipoib). ie., both hfi_vnic and hfi1 should be independently loadable 
(like ipoib) despite hfi_vnic being dependent on hfi1 here for HW access.


There doesn't seem to be much value of hfi_vnic being a 'ib client', if it 
still has compilation and module dependency on hfi1 module.


The more I think of it, having vnic ops added to ib device structure (option 
(b)) makes it cleaner (no dependency). We can probably consider extending 'ib 
device' in hfi1 in order for hfi_vnic to get to the vnic ops. But (b) makes it 
simpler.


Though Jason's suggestion could be a temporary measure for this patch series, 
the above approach is what I would like to target here.


Niranjana



Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-28 Thread Vishwanathapura, Niranjana

On Fri, Nov 25, 2016 at 12:05:09PM -0700, Jason Gunthorpe wrote:

On Thu, Nov 24, 2016 at 06:13:50PM -0800, Vishwanathapura, Niranjana wrote:


In order to be truely device independent the hfi_vnic ULP should not depend
on a device exported symbol. Instead device should register its functions
with the ULP. Hence the approaches a) and b).


It is not device independent, it is hard linked to hfi1, just like our
other multi-component drivers.. So don't worry about that.



We would like to keep the design clean and avoid any tight coupling here (our 
original design in this series tackled these).

Any strong reason not to go with a) or b) ?

Niranjana


Jason


Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-28 Thread Vishwanathapura, Niranjana

On Thu, Nov 24, 2016 at 09:15:45AM -0700, Jason Gunthorpe wrote:

And will move the hfi_vnic module under
‘drivers/infiniband/ulp/hfi_vnic’.


I would prefer drivers/net/ethernet

This is clearly not a ULP since it doesn't use verbs.



I understand it is not using verbs, but the control path (ib_device client) is 
using verbs (IB MAD).
Our prefernce is to keep it somewhere under drivers/infiniband. Summarizing 
reasons again here,


- VNIC control driver (ib_device client) is an IB MAD agent.
- It is purly a software construct, encapsualtes ethernet packets in Omni-path 
packet and depends on hfi1 driver here for HW access.


Doug,
Any comments?


Jason


Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-24 Thread Vishwanathapura, Niranjana

On Thu, Nov 24, 2016 at 09:15:45AM -0700, Jason Gunthorpe wrote:

On Wed, Nov 23, 2016 at 04:08:25PM -0800, Vishwanathapura, Niranjana wrote:


In order to pass the hfi function pointers to the hfi_vnic ULP, I can,
a) Have hfi_vnic ULP define an interface API for hfi1 driver to call to
register its callback (as you pointed). Unfortunately there will be a module
dependency here.
Or,


That is probably backwards


b) Add a new member ‘struct vnic_ops’ either to the ib_device structure or
ib_port_immutable structure. As it is hfi1 specific, only hfi1 driver will
set it. No module dependency here.


You can add a hfi1_get_vnic_ops(struct ib_device *) and implement it
in your module..



In order to be truely device independent the hfi_vnic ULP should not depend on 
a device exported symbol. Instead device should register its functions with the 
ULP. Hence the approaches a) and b).


Niranjana


Jason


Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-23 Thread Vishwanathapura, Niranjana

On Tue, Nov 22, 2016 at 05:49:32PM -0700, Jason Gunthorpe wrote:

> > We could add a custom Interface between HFI1 driver and hfi_vnic drivers
> > without involving a bus.
>
> hfi is already registering on the infiniband class, just use that.

I don't understand what you mean here?


Get the struct ib_device for the hfi and then do something to get hfi
specific function calls.

Or work it backwards with a _register function..



OK, thanks for your feedback.
We can make the hfi_vnic module as an ib client (which it is) like other ULPs, 
and do not have an in-built or custom bus for binding.
Then the hfi_vnic ULP by some mechanism will identify the device as hfi1 device 
and will only serve that device.


In order to pass the hfi function pointers to the hfi_vnic ULP, I can,
a) Have hfi_vnic ULP define an interface API for hfi1 driver to call to 
register its callback (as you pointed). Unfortunately there will be a module 
dependency here.

Or,
b) Add a new member ‘struct vnic_ops’ either to the ib_device structure or 
ib_port_immutable structure. As it is hfi1 specific, only hfi1 driver will set 
it. No module dependency here.


And will move the hfi_vnic module under ‘drivers/infiniband/ulp/hfi_vnic’.
All these will remove undue complexity and fit the driver in current design 
framework as per your suggestion.

Let me know your comments.

Niranjana



Jason


Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-22 Thread Vishwanathapura, Niranjana

On Tue, Nov 22, 2016 at 05:04:37PM -0600, Christoph Lameter wrote:

On Tue, 22 Nov 2016, Vishwanathapura, Niranjana wrote:


Ok, I do understand Jason's point that we should probably not put this driver
under drivers/infiniband/sw/.., as this driver is not a HCA.
It is an ULP similar to ipoib, built on top of Omni-path irrespective of
whether we register a hfi_vnic_bus or a direct custom interface with HFI1.
This ULP will transmit and recieve Omni-path packets over the fabric, and is
dependent on IB MAD interface and the HFI1 driver.


This is something that encapsulates IP (v4 right?) in something else.
Would belong into

linux/net/ipv4

You already have similar implementations there

See f.e. ipip.c, ip_tunnel.c and lots more (try
ls linux/net/ipv4/*tunnel*

)

If this is more like a device then it would belong into

linux/drivers/net/hfi or so (see also linux/drivers/net/ppp, plip,
loopback, etc etc)



It is Ethernet packet encapsulated in Omni-path header by hfi_vnic driver.
The packets are sent and received over the wire by the HFI1 device driven by 
HFI1 driver. The encapsulation information is obtained via IB MAD control 
interface.


Niranjana






Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-22 Thread Vishwanathapura, Niranjana
Ok, I do understand Jason's point that we should probably not put this driver 
under drivers/infiniband/sw/.., as this driver is not a HCA.
It is an ULP similar to ipoib, built on top of Omni-path irrespective of 
whether we register a hfi_vnic_bus or a direct custom interface with HFI1.
This ULP will transmit and recieve Omni-path packets over the fabric, and is 
dependent on IB MAD interface and the HFI1 driver.


Doug,
Will it be acceptable if we put it under 'drivers/infiniband/ulp/hfi_vnic'?

Niranjana



Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-21 Thread Vishwanathapura, Niranjana

On Mon, Nov 21, 2016 at 04:31:18PM -0700, Jason Gunthorpe wrote:

+   ida_init(&hfi_vnic_ctrl_ida);
+   idr_init(&hfi_vnic_idr);
+
+   rc = bus_register(&hfi_vnic_bus);
>>>
>>>Why on earth do we need this? Didn't I give you enough grief for the
>>>psm stuff and now you want to create an entire subystem hidden away!?
>>>
>>>Use some netlink scheme to control your vnic like the rest of the net
>>>stack..
>>>
>>
>>The hfi_vnic_bus is only abstracting the HW independent functionality (like
>>Ethernet interface, encapsulation, IB MAD interface etc) with the HW
>>dependent functionality (sending/receiving packets on the wire).
>>Thus providing a cleaner interface between HW independent hfi_vnic Ethernet
>>and Control drivers and the HW dependent HFI1 driver.
>
>That doesn't explain anything, sound like you don't need it so get rid
>of it.



>>There is no other User interface here other than the standard Ethernet
>>interface through network stack.
>
>Good, then this isn't needed, because it doesn't provide a user interface.
>

Can you explain what exactly you are asking to get rid of here and why?


Get rid of the bus_register/etc as drivers do not get to call this.



There are many example drivers in kernel which are using bus_register() in
an initcall.
We could add a custom Interface between HFI1 driver and hfi_vnic drivers 
without involving a bus.
But using the existing bus model gave a lot of in-built flexibility in 
decoupling devices from the drivers.


Niranjana


Jason


Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-21 Thread Vishwanathapura, Niranjana

On Mon, Nov 21, 2016 at 02:39:30PM -0700, Jason Gunthorpe wrote:

On Mon, Nov 21, 2016 at 01:30:17PM -0800, Vishwanathapura, Niranjana wrote:

On Sat, Nov 19, 2016 at 12:04:45PM -0700, Jason Gunthorpe wrote:
>On Fri, Nov 18, 2016 at 02:42:10PM -0800, Vishwanathapura, Niranjana wrote:
>>+HFI-VNIC DRIVER
>>+M: Dennis Dalessandro 
>>+M: Niranjana Vishwanathapura 
>>+L: linux-r...@vger.kernel.org
>>+S: Supported
>>+F: drivers/infiniband/sw/intel/vnic
>
>This is either a net driver or a ULP, no idea why it should go in this
>directory!?
>
>It sounds like an ethernet driver, so you should probably put it
>there...
>

The hfi_vnic is an Ethernet driver. It is similar to ULP like ipoib, but
instead it is Ethernet over Omni-path here.
The VNIC Ethernet (hfi_vnic) driver encapsulates Ethernet packets in an
Omni-path header.
The hfi_vnic Ethernet driver do not access the HW. It interfaces with HFI1
driver which sends/receives Omni-Path encapsulated Ethernet frames from HW.
Also, the VNIC control path driver (VEMA) is an IB MAD agent which should be
under drivers/infiniband/.. .
Putting the VNIC Ethernet driver and the VNIC control driver together under
a single module (hfi_vnic.ko) provided a simpler interface between them.

So, we have put the driver under drivers/infiniband/sw/intel for two reasons:
a) We have VNIC control driver (VEMA) which is an IB mad agent.
b) hfi_vnic Ethernet driver is dependent on HFI1 driver for sending/receving
Omni-path encapsulated Ethernet packets from HW.


Sounds like this driver belongs under net/ someplace to me.

NAK on drivers/infiniband/sw/ at least - that dir is only for HCA
drivers.



I did not see any example IB mad agent outside drivers/inifiniband folder.
I did see some netdev drivers outside the net/ folder (like ipoib and 
drivers/infiniband/hw/nes/).

The hfi_vnic Ethernet driver is entirely a soft driver without any HW access.
Also, any interface changes between hfi_vnic and the HFI driver makes it 
difficult to manage if they are under two subsystems/maintainers.

So drivers/infiniband is probably more approriate for this driver.

We have 'rdmavt' and 'soft-roce' drivers under drivers/infiniband/sw/ folder 
which are soft drivers similar to hfi_vnic. So we decided to put 'hif_vnic' 
under there.

Other places under drivers/infiniband where we can put this driver are,
drivers/infiniband/ulp/hfi_vnic
drivers/infiniband/hw/hfi_vnic


>>+/* hfi_vnic_bus_init - initialize the hfi vnic bus drvier */
>>+static int hfi_vnic_bus_init(void)
>>+{
>>+   int rc;
>>+
>>+   ida_init(&hfi_vnic_ctrl_ida);
>>+   idr_init(&hfi_vnic_idr);
>>+
>>+   rc = bus_register(&hfi_vnic_bus);
>
>Why on earth do we need this? Didn't I give you enough grief for the
>psm stuff and now you want to create an entire subystem hidden away!?
>
>Use some netlink scheme to control your vnic like the rest of the net
>stack..
>

The hfi_vnic_bus is only abstracting the HW independent functionality (like
Ethernet interface, encapsulation, IB MAD interface etc) with the HW
dependent functionality (sending/receiving packets on the wire).
Thus providing a cleaner interface between HW independent hfi_vnic Ethernet
and Control drivers and the HW dependent HFI1 driver.


That doesn't explain anything, sound like you don't need it so get rid
of it.


There is no other User interface here other than the standard Ethernet
interface through network stack.


Good, then this isn't needed, because it doesn't provide a user interface.



Can you explain what exactly you are asking to get rid of here and why?


#ls /sys/bus/hfi_vnic_bus/devices/
   hfi_vnic_ctrl_00 /* control device for HFI instance 0 */
   hfi_vnic_00.01.00/* first VNIC port on HFI instance 0, port 1 */


Jason


Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver

2016-11-21 Thread Vishwanathapura, Niranjana

On Sat, Nov 19, 2016 at 12:04:45PM -0700, Jason Gunthorpe wrote:

On Fri, Nov 18, 2016 at 02:42:10PM -0800, Vishwanathapura, Niranjana wrote:

+HFI-VNIC DRIVER
+M: Dennis Dalessandro 
+M: Niranjana Vishwanathapura 
+L: linux-r...@vger.kernel.org
+S: Supported
+F: drivers/infiniband/sw/intel/vnic


This is either a net driver or a ULP, no idea why it should go in this
directory!?

It sounds like an ethernet driver, so you should probably put it
there...



The hfi_vnic is an Ethernet driver. It is similar to ULP like ipoib, but 
instead it is Ethernet over Omni-path here.
The VNIC Ethernet (hfi_vnic) driver encapsulates Ethernet packets in an 
Omni-path header.
The hfi_vnic Ethernet driver do not access the HW. It interfaces with HFI1 
driver which sends/receives Omni-Path encapsulated Ethernet frames from HW.
Also, the VNIC control path driver (VEMA) is an IB MAD agent which should be 
under drivers/infiniband/.. .
Putting the VNIC Ethernet driver and the VNIC control driver together under a 
single module (hfi_vnic.ko) provided a simpler interface between them.


So, we have put the driver under drivers/infiniband/sw/intel for two reasons:
a) We have VNIC control driver (VEMA) which is an IB mad agent.
b) hfi_vnic Ethernet driver is dependent on HFI1 driver for sending/receving 
Omni-path encapsulated Ethernet packets from HW.



+/* hfi_vnic_bus_init - initialize the hfi vnic bus drvier */
+static int hfi_vnic_bus_init(void)
+{
+   int rc;
+
+   ida_init(&hfi_vnic_ctrl_ida);
+   idr_init(&hfi_vnic_idr);
+
+   rc = bus_register(&hfi_vnic_bus);


Why on earth do we need this? Didn't I give you enough grief for the
psm stuff and now you want to create an entire subystem hidden away!?

Use some netlink scheme to control your vnic like the rest of the net
stack..



The hfi_vnic_bus is only abstracting the HW independent functionality (like 
Ethernet interface, encapsulation, IB MAD interface etc) with the HW dependent 
functionality (sending/receiving packets on the wire).
Thus providing a cleaner interface between HW independent hfi_vnic Ethernet and 
Control drivers and the HW dependent HFI1 driver.


There is no other User interface here other than the standard Ethernet 
interface through network stack.


HFI1 driver creates VNIC devices on the hfi_vnic_bus as below and the hfi_vnic 
Ethernet and Control drivers drive them.


#ls /sys/bus/hfi_vnic_bus/devices/
   hfi_vnic_ctrl_00 /* control device for HFI instance 0 */
   hfi_vnic_00.01.00/* first VNIC port on HFI instance 0, port 1 */
   hfi_vnic_00.01.01/* second VNIC port on HFI instance 0, port 1 */

The design is as shown in the below diagram.

+---+ +--+
|   | |   Linux  |
| IB MAD| |  Network |
|   | |   Stack  |
+---+ +--+
 |   |
 |   |
++
||
| HFI VNIC Module|
|(HFI VNIC Netdev and EMA drivers)   |
|  (HW independent)  |
++
 |
 |
++
|  HFI VNIC Bus  |
++
 |
 |
++
||
|  HFI1 Driver with VNIC support |
|   (HW dependent)   |
++

Niranjana


Jason


[RFC 10/10] IB/hfi1: VNIC SDMA support

2016-11-18 Thread Vishwanathapura, Niranjana
HFI1 VNIC SDMA support enables transmission of VNIC packets over SDMA.
Map VNIC queues to SDMA engines and support halting and wakeup of the
VNIC queues.

Change-Id: I2d2d23bda9fb8a7194d9722e23bc69b110cdcf86
Reviewed-by: Dennis Dalessandro 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/infiniband/hw/hfi1/hfi.h |   1 +
 drivers/infiniband/hw/hfi1/vnic.h|  30 +++-
 drivers/infiniband/hw/hfi1/vnic_device.c |   2 +-
 drivers/infiniband/hw/hfi1/vnic_main.c   |  22 ++-
 drivers/infiniband/hw/hfi1/vnic_sdma.c   | 260 +++
 5 files changed, 311 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 2ff3453..f476188 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -855,6 +855,7 @@ struct hfi1_asic_data {
 /* Virtual NIC information */
 struct hfi1_vnic_data {
struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+   struct kmem_cache *txreq_cache;
u8 num_vports;
struct hfi_vnic_ctrl_device *ctrl_dev;
struct idr vesw_idr;
diff --git a/drivers/infiniband/hw/hfi1/vnic.h 
b/drivers/infiniband/hw/hfi1/vnic.h
index d91c35b..4bdfe2b 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -49,6 +49,7 @@
 
 #include "hfi_vnic.h"
 #include "hfi.h"
+#include "sdma.h"
 
 #define HFI1_VNIC_ICRC_LEN   4
 #define HFI1_VNIC_TAIL_LEN   1
@@ -90,6 +91,26 @@
 #define HFI1_VNIC_SC_SHIFT  4
 
 /**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+   struct hfi1_devdata *dd;
+   struct sdma_engine  *sde;
+   struct hfi1_vnic_vport_info *vinfo;
+   struct iowait wait;
+   struct sdma_txreq stx;
+   unsigned int state;
+   u8 q_idx;
+};
+
+/**
  * struct hfi1_vnic_notifier - VNIC notifer structure
  * @cb - vnic callback function
  */
@@ -104,6 +125,7 @@ struct hfi1_vnic_notifier {
  * @event_flags: event notification flags
  * @notifier: vnic notifier
  * @skbq: Array of queues for received socket buffers
+ * @sdma: VNIC SDMA structure per TXQ
  */
 struct hfi1_vnic_vport_info {
struct hfi1_devdata *dd;
@@ -112,7 +134,8 @@ struct hfi1_vnic_vport_info {
DECLARE_BITMAP(event_flags, HFI_VNIC_NUM_EVTS);
struct hfi_vnic_device *vdev;
 
-   struct sk_buff_head skbq[HFI1_NUM_VNIC_CTXT];
+   struct sk_buff_headskbq[HFI1_NUM_VNIC_CTXT];
+   struct hfi1_vnic_sdma  sdma[HFI1_VNIC_MAX_TXQ];
 };
 
 static inline struct hfi1_devdata *vnic_dev2dd(struct hfi_vnic_device *vdev)
@@ -131,10 +154,15 @@ static inline void hfi1_vnic_update_pad(unsigned char 
*pad, u8 plen)
 /* vnic hfi1 internal functions */
 int hfi1_vnic_setup(struct hfi1_devdata *dd);
 void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 int hfi1_vnic_add_ctrl_port(struct hfi1_devdata *dd, struct device *parent);
 void hfi1_vnic_rem_ctrl_port(struct hfi1_devdata *dd);
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+   u8 q_idx);
 
 /* vnic device bus ops */
 int hfi1_vnic_init(struct hfi_vnic_device *vdev);
diff --git a/drivers/infiniband/hw/hfi1/vnic_device.c 
b/drivers/infiniband/hw/hfi1/vnic_device.c
index 468e197..5fb1a49 100644
--- a/drivers/infiniband/hw/hfi1/vnic_device.c
+++ b/drivers/infiniband/hw/hfi1/vnic_device.c
@@ -85,7 +85,7 @@ static int hfi1_vdev_create(struct hfi_vnic_ctrl_device *cdev,
return -ENOMEM;
 
vinfo->dd = dd;
-   hfi_info.num_tx_q = 1;
+   hfi_info.num_tx_q = dd->chip_sdma_engines;
hfi_info.num_rx_q = HFI1_NUM_VNIC_CTXT;
hfi_info.cap = HFI_VNIC_CAP_SG;
vdev = hfi_vnic_device_register(cdev, port_num, vport_num, vinfo,
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c 
b/drivers/infiniband/hw/hfi1/vnic_main.c
index 82e30bd..a21e4cd 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -294,15 +294,21 @@ int hfi1_vnic_put_skb(struct hfi_vnic_device *vdev,
 
 u8 hfi1_vnic_select_queue(struct hfi_vnic_device *vdev, u8 vl, u8 entropy)
 {
-   return 0;
+   struct hfi1_devdata *dd = (struct hfi1_devdata *)vdev->cdev->hfi_priv;
+   struct sdma_engine *sde;
+
+   sde = sdma_select_engine_vl(dd, entropy, vl);
+   return sde->this_idx;
 }
 
 bool hfi1_vnic_get_write_avail(struct hfi_vnic_device *vdev, u8 q_idx)
 {
+   struct hfi1_vnic_vport_info *vinfo = vdev->hfi_priv;
+
if (q_idx >= vdev->hfi_info.num_tx_q)
return false

[RFC 08/10] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) driver

2016-11-18 Thread Vishwanathapura, Niranjana
HFI VEMA driver interfaces with the Infiniband MAD stack to exchange the
management information packets with the Ethernet Manager (EM).
It interfaces with the HFI VNIC netdev driver to SET/GET the management
information. The information exchanged with the EM includes class port
details, encapsulation configuration, various counters, unicast and
multicast MAC list and the MAC table. It also supports sending traps
to the EM.

Change-Id: I7439f96858c9019455da1e924a0201eb27177b85
Reviewed-by: Dennis Dalessandro 
Signed-off-by: Sadanand Warrier 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Tanya K Jajodia 
Signed-off-by: Sudeep Dutt 
---
 drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile |2 +-
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h |9 +
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c   |9 +-
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c | 1024 
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c   |2 +-
 5 files changed, 1043 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c

diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile 
b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
index 375cd09..e05b72b 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
@@ -5,4 +5,4 @@ ccflags-y += -I$(src)/../include
 obj-$(CONFIG_HFI_VNIC) += hfi_vnic.o
 
 hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o \
-  hfi_vnic_vema_iface.o
+  hfi_vnic_vema.o hfi_vnic_vema_iface.o
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h 
b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
index 8ebed89..fbebf68 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
@@ -268,6 +268,8 @@ struct hfi_vnic_rx_queue {
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
  * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
  * @q_sum_cntrs: per queue EM summary counters
  * @q_err_cntrs: per queue EM error counters
  * @q_rx_logic_errors: per queue rx logic (default) errors
@@ -301,6 +303,8 @@ struct hfi_vnic_adapter {
struct mutex stats_lock;
 
u8 flow_tbl[HFI_VNIC_FLOW_TBL_SIZE];
+   unsigned long trap_timeout;
+   u8trap_count;
 
struct __hfi_vnic_summary_counters  q_sum_cntrs[HFI_VNIC_MAX_QUEUE];
struct __hfi_vnic_error_countersq_err_cntrs[HFI_VNIC_MAX_QUEUE];
@@ -410,4 +414,9 @@ void hfi_vnic_set_per_veswport_info(struct hfi_vnic_adapter 
*adapter,
 void hfi_vnic_vema_report_event(struct hfi_vnic_adapter *adapter, u8 event);
 void hfi_vnic_set_ethtool_ops(struct net_device *ndev);
 
+int hfi_vnic_vema_init(void);
+void hfi_vnic_vema_deinit(void);
+void hfi_vnic_vema_send_trap(struct hfi_vnic_adapter *adapter,
+struct __hfi_veswport_trap *data, u32 lid);
+
 #endif /* _HFI_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c 
b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
index 75a3fd2..4ee5bb6 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
@@ -855,9 +855,15 @@ static int __init hfi_vnic_init_module(void)
pr_info("HFI Virtual Network Driver - %s\n",
hfi_vnic_driver_version);
 
-   rc = hfi_vnic_driver_register(&hfi_vnic_drv);
+   rc = hfi_vnic_vema_init();
if (rc)
+   return rc;
+
+   rc = hfi_vnic_driver_register(&hfi_vnic_drv);
+   if (rc) {
pr_err("VNIC driver register failed %d\n", rc);
+   hfi_vnic_vema_deinit();
+   }
 
return rc;
 }
@@ -867,6 +873,7 @@ static int __init hfi_vnic_init_module(void)
 static void __exit hfi_vnic_exit_module(void)
 {
hfi_vnic_driver_unregister(&hfi_vnic_drv);
+   hfi_vnic_vema_deinit();
 }
 module_exit(hfi_vnic_exit_module);
 
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c 
b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c
new file mode 100644
index 000..b947cdf
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c
@@ -0,0 +1,1024 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; witho

[RFC 09/10] IB/hfi1: Virtual Network Interface Controller (VNIC) support

2016-11-18 Thread Vishwanathapura, Niranjana
HFI1 HW specific support for VNIC functionality. Add support to create
VNIC devices on HFI VNIC Bus. Also implement the bus operations to
allocate resources, transmit and receive of Omni-Path encapsulated
Ethernet packets.

Dynamically allocate a set of contexts for VNIC when the first vnic
port is instantiated. Allocate VNIC contexts from user contexts pool
and return them back to the same pool while freeing up. Set aside
enough MSI-X interrupts for VNIC contexts and assign them when the
contexts are allocated. On the receive side, use an RSM rule to
spread TCP/UDP streams among VNIC contexts.

Change-Id: I1b275a7585d6c2e3573039a9137014031f1f5c7e
Reviewed-by: Dennis Dalessandro 
Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andrzej Kacprowski 
---
 drivers/infiniband/hw/hfi1/Kconfig|   2 +-
 drivers/infiniband/hw/hfi1/Makefile   |   3 +-
 drivers/infiniband/hw/hfi1/aspm.h |  13 +-
 drivers/infiniband/hw/hfi1/chip.c | 270 ---
 drivers/infiniband/hw/hfi1/chip.h |   2 +
 drivers/infiniband/hw/hfi1/debugfs.c  |   6 +-
 drivers/infiniband/hw/hfi1/driver.c   |  78 -
 drivers/infiniband/hw/hfi1/file_ops.c |  25 +-
 drivers/infiniband/hw/hfi1/hfi.h  |  50 ++-
 drivers/infiniband/hw/hfi1/init.c |  44 ++-
 drivers/infiniband/hw/hfi1/mad.c  |   8 +-
 drivers/infiniband/hw/hfi1/pio.c  |  17 +
 drivers/infiniband/hw/hfi1/pio.h  |   6 +
 drivers/infiniband/hw/hfi1/sysfs.c|   2 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c |   6 +-
 drivers/infiniband/hw/hfi1/user_pages.c   |   3 +-
 drivers/infiniband/hw/hfi1/vnic.h | 155 +
 drivers/infiniband/hw/hfi1/vnic_device.c  | 168 +
 drivers/infiniband/hw/hfi1/vnic_main.c| 555 ++
 drivers/infiniband/hw/hfi1/vnic_sdma.c|  60 
 include/rdma/opa_port_info.h  |   2 +-
 21 files changed, 1376 insertions(+), 99 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_device.c
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c

diff --git a/drivers/infiniband/hw/hfi1/Kconfig 
b/drivers/infiniband/hw/hfi1/Kconfig
index f6ea088..6c07117 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_HFI1
tristate "Intel OPA Gen1 support"
-   depends on X86_64 && INFINIBAND_RDMAVT && I2C
+   depends on X86_64 && INFINIBAND_RDMAVT && I2C && HFI_VNIC_BUS
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
diff --git a/drivers/infiniband/hw/hfi1/Makefile 
b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a0..c579f98 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -6,13 +6,14 @@
 # Called from the kernel module build system.
 #
 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
+ccflags-y += -I$(src)/../../sw/intel/vnic/include
 
 hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
eprom.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-   verbs_txreq.o
+   verbs_txreq.o vnic_main.o vnic_device.o vnic_sdma.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/aspm.h 
b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3..3a01b69 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long 
data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
 }
 
-/* Disable interrupt processing for verbs contexts when PSM contexts are open 
*/
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
 static inline void aspm_disable_all(struct hfi1_devdata *dd)
 {
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
 
-   for (i = 0; i < dd->first_user_ctxt; i++) {
+   for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supp

  1   2   >