Re: [PATCH V1 08/16] i40iw: add files for iwarp interface

2015-12-24 Thread Faisal Latif
On Thu, Dec 24, 2015 at 10:25:08AM +0200, Or Gerlitz wrote:
> On 12/24/2015 9:31 AM, Faisal Latif wrote:
> >On Wed, Dec 23, 2015 at 08:42:01AM -0800, Or Gerlitz wrote:
> >>On 12/22/2015 1:13 AM, Faisal Latif wrote:
> >>>+
> >>>+enum i40iw_memreg_type {
> >>>+  IW_MEMREG_TYPE_MEM = 0x,
> >>>+  IW_MEMREG_TYPE_QP = 0x0001,
> >>>+  IW_MEMREG_TYPE_CQ = 0x0002,
> >>>+  IW_MEMREG_TYPE_MW = 0x0003,
> >>>+  IW_MEMREG_TYPE_FMR = 0x0004,
> >>>+  IW_MEMREG_TYPE_FMEM = 0x0005,
> >>>+};
> >>Can't you re-use IB core values or derive that from the actual uverbs
> >>command?
> >I did not see anything which will have types that I needed.
> 
> what do you need? what is the role of this enum?

We register register memory from user library for qp and cq rings as well
as user memory registration and do need to distinguish in the driver.

> 
> >It will be confusing otherwise.
> 
> 
> >I will be reducing number of types from here though.
> 
> so why some of it can go? is that deal values which aren't used by the code

Yes, had added all different types for user requests. But For the time being, 
we will
just keep MEM, CQ, QP and add other as needed like Memory Windows.
> 
> Or.
> 
> >Thanks
> >Faisal
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] Display extended counter set if available

2015-12-24 Thread Hal Rosenstock
On 12/24/2015 11:22 AM, eran ben elisha wrote:
> On Mon, Dec 21, 2015 at 4:20 PM, Christoph Lameter  wrote:
>> V2->V3: Add check for NOIETF mode and create special table
>>   for that case.
>>
>> Check if the extended counters are available and if so
>> create the proper extended and additional counters.
>>
>> Reviewed-by: Hal Rosenstock 
>> Signed-off-by: Christoph Lameter 
>> ---
>>  drivers/infiniband/core/sysfs.c | 104 
>> +++-
>>  include/rdma/ib_pma.h   |   1 +
>>  2 files changed, 104 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/infiniband/core/sysfs.c 
>> b/drivers/infiniband/core/sysfs.c
>> index 34dcc23..b179fca 100644
>> --- a/drivers/infiniband/core/sysfs.c
>> +++ b/drivers/infiniband/core/sysfs.c
>> @@ -320,6 +320,13 @@ struct port_table_attribute port_pma_attr_##_name = {   
>>\
>> .attr_id = IB_PMA_PORT_COUNTERS ,   \
>>  }
>>
>> +#define PORT_PMA_ATTR_EXT(_name, _width, _offset)  \
>> +struct port_table_attribute port_pma_attr_ext_##_name = {  \
>> +   .attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),\
>> +   .index = (_offset) | ((_width) << 16),  \
>> +   .attr_id = IB_PMA_PORT_COUNTERS_EXT ,   \
>> +}
>> +
>>  /*
>>   * Get a Perfmgmt MAD block of data.
>>   * Returns error code or the number of bytes retrieved.
>> @@ -400,6 +407,11 @@ static ssize_t show_pma_counter(struct ib_port *p, 
>> struct port_attribute *attr,
>> ret = sprintf(buf, "%u\n",
>>   be32_to_cpup((__be32 *)data));
>> break;
>> +   case 64:
>> +   ret = sprintf(buf, "%llu\n",
>> +   be64_to_cpup((__be64 *)data));
>> +   break;
>> +
>> default:
>> ret = 0;
>> }
>> @@ -424,6 +436,18 @@ static PORT_PMA_ATTR(port_rcv_data , 13, 
>> 32, 224);
>>  static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
>>  static PORT_PMA_ATTR(port_rcv_packets  , 15, 32, 288);
>>
>> +/*
>> + * Counters added by extended set
>> + */
>> +static PORT_PMA_ATTR_EXT(port_xmit_data, 64,  64);
>> +static PORT_PMA_ATTR_EXT(port_rcv_data , 64, 128);
>> +static PORT_PMA_ATTR_EXT(port_xmit_packets , 64, 192);
>> +static PORT_PMA_ATTR_EXT(port_rcv_packets  , 64, 256);
>> +static PORT_PMA_ATTR_EXT(unicast_xmit_packets  , 64, 320);
>> +static PORT_PMA_ATTR_EXT(unicast_rcv_packets   , 64, 384);
>> +static PORT_PMA_ATTR_EXT(multicast_xmit_packets, 64, 448);
>> +static PORT_PMA_ATTR_EXT(multicast_rcv_packets , 64, 512);
>> +
>>  static struct attribute *pma_attrs[] = {
>> _pma_attr_symbol_error.attr.attr,
>> _pma_attr_link_error_recovery.attr.attr,
>> @@ -444,11 +468,65 @@ static struct attribute *pma_attrs[] = {
>> NULL
>>  };
>>
>> +static struct attribute *pma_attrs_ext[] = {
>> +   _pma_attr_symbol_error.attr.attr,
>> +   _pma_attr_link_error_recovery.attr.attr,
>> +   _pma_attr_link_downed.attr.attr,
>> +   _pma_attr_port_rcv_errors.attr.attr,
>> +   _pma_attr_port_rcv_remote_physical_errors.attr.attr,
>> +   _pma_attr_port_rcv_switch_relay_errors.attr.attr,
>> +   _pma_attr_port_xmit_discards.attr.attr,
>> +   _pma_attr_port_xmit_constraint_errors.attr.attr,
>> +   _pma_attr_port_rcv_constraint_errors.attr.attr,
>> +   _pma_attr_local_link_integrity_errors.attr.attr,
>> +   _pma_attr_excessive_buffer_overrun_errors.attr.attr,
>> +   _pma_attr_VL15_dropped.attr.attr,
>> +   _pma_attr_ext_port_xmit_data.attr.attr,
>> +   _pma_attr_ext_port_rcv_data.attr.attr,
>> +   _pma_attr_ext_port_xmit_packets.attr.attr,
>> +   _pma_attr_ext_port_rcv_packets.attr.attr,
>> +   _pma_attr_ext_unicast_rcv_packets.attr.attr,
>> +   _pma_attr_ext_unicast_xmit_packets.attr.attr,
>> +   _pma_attr_ext_multicast_rcv_packets.attr.attr,
>> +   _pma_attr_ext_multicast_xmit_packets.attr.attr,
>> +   NULL
>> +};
>> +
>> +static struct attribute *pma_attrs_noietf[] = {
>> +   _pma_attr_symbol_error.attr.attr,
>> +   _pma_attr_link_error_recovery.attr.attr,
>> +   _pma_attr_link_downed.attr.attr,
>> +   _pma_attr_port_rcv_errors.attr.attr,
>> +   _pma_attr_port_rcv_remote_physical_errors.attr.attr,
>> +   _pma_attr_port_rcv_switch_relay_errors.attr.attr,
>> +   _pma_attr_port_xmit_discards.attr.attr,
>> +   _pma_attr_port_xmit_constraint_errors.attr.attr,
>> +   _pma_attr_port_rcv_constraint_errors.attr.attr,
>> +   _pma_attr_local_link_integrity_errors.attr.attr,
>> +   _pma_attr_excessive_buffer_overrun_errors.attr.attr,
>> +   _pma_attr_VL15_dropped.attr.attr,
>> +   _pma_attr_ext_port_xmit_data.attr.attr,
>> + 

Re: [RFC PATCH 00/15] staging/rdma/hfi1: Initial patches to add rdmavt support in HFI1

2015-12-24 Thread ira.weiny
On Tue, Dec 22, 2015 at 06:27:57PM -0800, gre...@linuxfoundation.org wrote:
> On Tue, Dec 22, 2015 at 02:15:08PM -0500, ira.weiny wrote:
> > On Mon, Dec 21, 2015 at 05:01:48PM -0800, gre...@linuxfoundation.org wrote:

[snip]

> > > 
> > > No, git is good :)
> > > 
> > > > How do we handle changes which affect both qib and hfi1?
> > > 
> > > I don't know, now this gets messy...
> > > 
> > 
> > Agreed and this is what we are worried about.
> > 
> > Can we do what Dan and Doug have proposed in the past and have Doug take 
> > over
> > the staging/rdma sub-tree?
> > 
> > http://driverdev.linuxdriverproject.org/pipermail/driverdev-devel/2015-November/081922.html
> > 
> > I think the upcoming merge window is a reasonable time for him to do that.
> 
> Ok, but keeping on top of all of the generic staging patches that come
> in is a tough thing to do, that's up to Doug, if he is ready for it...
> 

To help this process, once the change over happens, we will help to monitor
driverdev-devel for anything submitted to staging/rdma.  If something is
submitted which was not to Doug and linux-rdma we can handle alerting the
submitter to make sure it gets submitted to Doug as per the current MAINTAINERS
file.

Hope this helps,
Ira

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] RDMA/ocrdma: Dispatch only port event when port state changes

2015-12-24 Thread Devesh Sharma
Dispatch only port event to IB stack when port state changes.
Don't explicitly modify qps to error. Let application listen to
port events on async event queue or let QP fail with retry-exceeded
completion error.

Signed-off-by: Padmanabh Ratnakar 
Signed-off-by: Devesh Sharma 
---
 drivers/infiniband/hw/ocrdma/ocrdma_main.c | 23 ---
 1 file changed, 23 deletions(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 62b7009..ebe40b4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -386,30 +386,7 @@ static int ocrdma_open(struct ocrdma_dev *dev)
 
 static int ocrdma_close(struct ocrdma_dev *dev)
 {
-   int i;
-   struct ocrdma_qp *qp, **cur_qp;
struct ib_event err_event;
-   struct ib_qp_attr attrs;
-   int attr_mask = IB_QP_STATE;
-
-   attrs.qp_state = IB_QPS_ERR;
-   mutex_lock(>dev_lock);
-   if (dev->qp_tbl) {
-   cur_qp = dev->qp_tbl;
-   for (i = 0; i < OCRDMA_MAX_QP; i++) {
-   qp = cur_qp[i];
-   if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
-   /* change the QP state to ERROR */
-   _ocrdma_modify_qp(>ibqp, , attr_mask);
-
-   err_event.event = IB_EVENT_QP_FATAL;
-   err_event.element.qp = >ibqp;
-   err_event.device = >ibdev;
-   ib_dispatch_event(_event);
-   }
-   }
-   }
-   mutex_unlock(>dev_lock);
 
err_event.event = IB_EVENT_PORT_ERR;
err_event.element.port_num = 1;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] RDMA/ocrdma: Depend on async link events from CNA

2015-12-24 Thread Devesh Sharma
Recently Dough Ledford reported a deadlock happening
between ocrdma-load sequence and NetworkManager service
issuing "open" on be2net interface.

The deadlock happens when any be2net hook (e.g. open/close) is called
in parallel to insmod ocrdma.ko.

A. be2net is sending administrative open/close event to ocrdma holding
   device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net.
   So sequence of locks is rtnl_lock---> device_list lock

B.  When new ocrdma roce device gets registered, infiniband stack now
takes rtnl_lock in ib_register_device() in GID initialization routines.
So sequence of locks in this path is device_list lock ---> rtnl_lock.

This improper locking sequence causes deadlock.

With this patch we stop using administrative open and close events
injected by be2net driver. These events were used to dispatch PORT_ACTIVE
and PORT_ERROR events to the IB-stack. This patch implements a logic
to receive async-link-events generated from CNA whenever link-state-change
is detected. Now on, these async-events will be used to dispatch
PORT_ACTIVE and PORT_ERROR events to IB-stack.

Depending on async-events from CNA removes the need to hold device-list-mutex
and thus breaks the busy-wait scenario.

Reported-by: Doug Ledford 
CC: Sathya Perla 
Signed-off-by: Padmanabh Ratnakar 
Signed-off-by: Selvin Xavier 
Signed-off-by: Devesh Sharma 
---
 drivers/infiniband/hw/ocrdma/ocrdma.h   | 10 ++
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c| 42 -
 drivers/infiniband/hw/ocrdma/ocrdma_hw.h|  4 ++-
 drivers/infiniband/hw/ocrdma/ocrdma_main.c  | 34 ++--
 drivers/infiniband/hw/ocrdma/ocrdma_sli.h   | 49 ++---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c |  2 +-
 6 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h 
b/drivers/infiniband/hw/ocrdma/ocrdma.h
index ae80590..040bb8b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -232,6 +232,10 @@ struct phy_info {
u16 interface_type;
 };
 
+enum ocrdma_flags {
+   OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01
+};
+
 struct ocrdma_dev {
struct ib_device ibdev;
struct ocrdma_dev_attr attr;
@@ -287,6 +291,7 @@ struct ocrdma_dev {
atomic_t update_sl;
u16 pvid;
u32 asic_id;
+   u32 flags;
 
ulong last_stats_time;
struct mutex stats_lock; /* provide synch for debugfs operations */
@@ -591,4 +596,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state)
(state & OCRDMA_STATE_FLAG_SYNC);
 }
 
+static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
+{
+   return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
+}
+
 #endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 4fc2bb4..283ca84 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -579,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
 
cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
+   /* Request link events on this  MQ. */
+   cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE);
 
cmd->async_cqid_ringsize = cq->id;
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -819,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev 
*dev,
}
 }
 
+static void ocrdma_process_link_state(struct ocrdma_dev *dev,
+ struct ocrdma_ae_mcqe *cqe)
+{
+   struct ocrdma_ae_lnkst_mcqe *evt;
+   u8 lstate;
+
+   evt = (struct ocrdma_ae_lnkst_mcqe *)cqe;
+   lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn);
+
+   if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK))
+   return;
+
+   if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)
+   ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK));
+}
+
 static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
 {
/* async CQE processing */
struct ocrdma_ae_mcqe *cqe = ae_cqe;
u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
-
-   if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
+   switch (evt_code) {
+   case OCRDMA_ASYNC_LINK_EVE_CODE:
+   ocrdma_process_link_state(dev, cqe);
+   break;
+   case OCRDMA_ASYNC_RDMA_EVE_CODE:
ocrdma_dispatch_ibevent(dev, cqe);
-   else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
+   break;
+   case OCRDMA_ASYNC_GRP5_EVE_CODE:
ocrdma_process_grp5_aync(dev, cqe);
-   else
+ 

[PATCH 0/4] ocrdma bug fix for linux-4.4-rc

2015-12-24 Thread Devesh Sharma
This patch series fixes for critical bugs introduced due to recent
changes in linux Infiniband stack.

Patch 0001 fixes a vlan-id assignment issues in presence of PFC.
This bug was introduced recently got introduced and it fixes
dbf727de7440 ('IB/core: Use GID table in AH creation and dmac resolution')

Patches 0002 to 0004 fixes a deadlock condition reported by Dough Ledford
while testing linux-4.4-rc5. The deadlock happens when any be2net hook 
(e.g. open/close) is called in parallel to insmod ocrdma.ko.

A. be2net is sending administrative open/close event to ocrdma holding
   device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net.
   So sequence of locks is rtnl_lock---> device_list lock

B.  When new ocrdma roce device gets registered, infiniband stack now
takes rtnl_lock in ib_register_device() in GID initialization routines.
So sequence of locks in this path is device_list lock ---> rtnl_lock.

This improper locking sequence causes deadlock.

Patch 0002 removes an unwanted code to force all active-QPs to error state
using a management command. It is rather simple to let active-QPs to move
to error on their own due to a completion error after the port error is
reported.

Patch 0003 makes IB-dispatch-event mechanism independent of administrative
open/close events injected from be2net driver. Instead of those events, now
ocrdma relies on async-events generated from CNA. Thus, solving the problem
of keeping device_list_mutex busy under rtnl-lock. Moving from administrative
events to CNA async-events does not change the functionality of ocrdma driver
in any way.

Patch 0004 Removes the dead code from be2net driver.

Devesh Sharma (4):
  RDMA/ocrdma: Fix vlan-id assignment in qp parameters
  RDMA/ocrdma: Dispatch only port event when port state changes
  RDMA/ocrdma: Depend on async link events from CNA
  RDMA/be2net: Remove open and close entry points

 drivers/infiniband/hw/ocrdma/ocrdma.h   | 10 +
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c| 49 -
 drivers/infiniband/hw/ocrdma/ocrdma_hw.h|  4 +-
 drivers/infiniband/hw/ocrdma/ocrdma_main.c  | 57 +
 drivers/infiniband/hw/ocrdma/ocrdma_sli.h   | 49 +++--
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c |  2 +-
 drivers/net/ethernet/emulex/benet/be.h  |  2 -
 drivers/net/ethernet/emulex/benet/be_main.c |  4 --
 drivers/net/ethernet/emulex/benet/be_roce.c | 36 --
 drivers/net/ethernet/emulex/benet/be_roce.h |  4 +-
 10 files changed, 124 insertions(+), 93 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] RDMA/be2net: Remove open and close entry points

2015-12-24 Thread Devesh Sharma
Recently Dough Ledford reported a deadlock happening
between ocrdma-load sequence and NetworkManager service
issueing "open" on be2net interface.

The deadlock happens when any be2net hook (e.g. open/close) is called
in parallel to insmod ocrdma.ko.

A. be2net is sending administrative open/close event to ocrdma holding
   device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net.
   So sequence of locks is rtnl_lock---> device_list lock

B.  When new ocrdma roce device gets registered, infiniband stack now
takes rtnl_lock in ib_register_device() in GID initialization routines.
So sequence of locks in this path is device_list lock ---> rtnl_lock.

This improper locking sequence causes deadlock.

In order to resolve the above deadlock condition, ocrdma intorduced a
patch to stop listening to administrative open/close events generated from
be2net driver. It now depends on link-state-change async-event generated from
CNA. This change leaves behind dead code which used to generate administrative
open/close events. This patch cleans-up all that dead code from be2net.

Reported-by: Doug Ledford 
CC: Sathya Perla 
Signed-off-by: Padmanabh Ratnakar 
Signed-off-by: Selvin Xavier 
Signed-off-by: Devesh Sharma 
---
 drivers/net/ethernet/emulex/benet/be.h  |  2 --
 drivers/net/ethernet/emulex/benet/be_main.c |  4 
 drivers/net/ethernet/emulex/benet/be_roce.c | 36 -
 drivers/net/ethernet/emulex/benet/be_roce.h |  4 +---
 4 files changed, 1 insertion(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h 
b/drivers/net/ethernet/emulex/benet/be.h
index d463563..6ee78c2 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -848,8 +848,6 @@ void be_roce_dev_remove(struct be_adapter *);
 /*
  * internal function to open-close roce device during ifup-ifdown.
  */
-void be_roce_dev_open(struct be_adapter *);
-void be_roce_dev_close(struct be_adapter *);
 void be_roce_dev_shutdown(struct be_adapter *);
 
 #endif /* BE_H */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c 
b/drivers/net/ethernet/emulex/benet/be_main.c
index b6ad029..ff2ff89 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3432,8 +3432,6 @@ static int be_close(struct net_device *netdev)
 
be_disable_if_filters(adapter);
 
-   be_roce_dev_close(adapter);
-
if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
for_all_evt_queues(adapter, eqo, i) {
napi_disable(>napi);
@@ -3601,8 +3599,6 @@ static int be_open(struct net_device *netdev)
be_link_status_update(adapter, link_status);
 
netif_tx_start_all_queues(netdev);
-   be_roce_dev_open(adapter);
-
 #ifdef CONFIG_BE2NET_VXLAN
if (skyhawk_chip(adapter))
vxlan_get_rx_port(netdev);
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c 
b/drivers/net/ethernet/emulex/benet/be_roce.c
index 6036820..4089156 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -116,40 +116,6 @@ void be_roce_dev_remove(struct be_adapter *adapter)
}
 }
 
-static void _be_roce_dev_open(struct be_adapter *adapter)
-{
-   if (ocrdma_drv && adapter->ocrdma_dev &&
-   ocrdma_drv->state_change_handler)
-   ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
-BE_DEV_UP);
-}
-
-void be_roce_dev_open(struct be_adapter *adapter)
-{
-   if (be_roce_supported(adapter)) {
-   mutex_lock(_adapter_list_lock);
-   _be_roce_dev_open(adapter);
-   mutex_unlock(_adapter_list_lock);
-   }
-}
-
-static void _be_roce_dev_close(struct be_adapter *adapter)
-{
-   if (ocrdma_drv && adapter->ocrdma_dev &&
-   ocrdma_drv->state_change_handler)
-   ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
-BE_DEV_DOWN);
-}
-
-void be_roce_dev_close(struct be_adapter *adapter)
-{
-   if (be_roce_supported(adapter)) {
-   mutex_lock(_adapter_list_lock);
-   _be_roce_dev_close(adapter);
-   mutex_unlock(_adapter_list_lock);
-   }
-}
-
 void be_roce_dev_shutdown(struct be_adapter *adapter)
 {
if (be_roce_supported(adapter)) {
@@ -177,8 +143,6 @@ int be_roce_register_driver(struct ocrdma_driver *drv)
 
_be_roce_dev_add(dev);
netdev = dev->netdev;
-   if (netif_running(netdev) && netif_oper_up(netdev))
-   _be_roce_dev_open(dev);
}
mutex_unlock(_adapter_list_lock);
return 0;
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h 

[PATCH 1/4] RDMA/ocrdma: Fix vlan-id assignment in qp parameters

2015-12-24 Thread Devesh Sharma
vlan-id is wrongly getting as 0 when PFC is enabled.
Set vlan-id configured by user in QP parameters.
In case vlan interface is not used, flash a warning to
user to configure vlan and assign vlan-id as 0 in qp params.

Fixes: dbf727de7440 ('IB/core: Use GID table in AH creation and dmac 
resolution')
Cc: Matan Barak 
Signed-off-by: Devesh Sharma 
---
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 30f67be..4fc2bb4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -2515,9 +2515,10 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
ocrdma_cpu_to_le32(>params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
 
-   if (vlan_id < 0x1000) {
-   if (dev->pfc_state) {
-   vlan_id = 0;
+   if (vlan_id == 0x)
+   vlan_id = 0;
+   if (vlan_id || dev->pfc_state) {
+   if (!vlan_id) {
pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
   dev->id);
pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 0/3] Add cross-channel support

2015-12-24 Thread Doug Ledford
On 12/24/2015 05:41 AM, Or Gerlitz wrote:
> On 12/24/2015 12:00 PM, Christoph Hellwig wrote:
>> On Thu, Dec 24, 2015 at 10:02:29AM +0200, Or Gerlitz wrote:
>>> We had consensus among the reviewers that the 1st patch ("IB/core: Align
>>> coding style of ib_device_cap_flags structure") is wrong cleanup which
>>> basically is (1) unneeded (2) creates more damage (git blame and such,
>>> non-applicable to uapi, more) than benefit, etc -- finally Leon was
>>> convinced too [1].
>> It's not really an issue vs uapi.  Using the the wierd BIT() macro
>> would have been, but without it I think this cleanup is ok, even if I
>> personally wouldn't have done it.  git-blame isn't really a major
>> issue either, as you can blame past revisions.
> 
> I would personally wouldn't done cleanup either and I managed to
> convinced Leon to drop it, so we had concensus among the developers, the
> maintainer didn't have other opinion and he took the wrong step -- so
> we're asking to fix, that's all.

That's not true.  I didn't bother to speak up in the thread, and I read
all of the comments.  I didn't move to BIT macros for the reason that
Christoph thinks they are crap and I didn't bother to prove him wrong
and took his word for it.  However, just aligning the macros in the area
that the patch touched is reasonable (versus aligning the entire file
just for the fun of it), and git blame will continue working fine.  My
taking of this patch was intentional (in fact, the patch didn't apply, I
had to redo it entirely by hand because the comment changes caused by
Christoph's MR cleanup patches kept this patch from applying at all).
In any case, it wasn't a mistake, and there is nothing to fix up.

>>
>>> Leon will re-spin in the coming 1-2 hours V2, could please pick it
>>> instead
>>> of V1, when people agree on direction X and you are not against it,
>>> lets do
>>> X and not Y.
>> It would be great if we could stop rebasing whats already in the tree
>> for the benefit of everyone building on top of this.  For example just
>> finished rebasing my series to move many constants includin this one
>> to the uapi headers, and I'd hate to rebase it once again now that
>> the dust has settled.
> 
> The root issue here is that nothing was picked before 4.4-rc6, so we're
> in a situation where rebases are needed in the own-maintainer tree
> (github) to make things right. No way to avoid that.
> 
> We should aim that for 4.6 and onward, code for -next will start getting
> in around rc1-2 and then things will be more robust, etc
> 
> Or.
> 
> Or.
> 


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH 3/3] Display extended counter set if available

2015-12-24 Thread ira.weiny
On Thu, Dec 24, 2015 at 06:22:14PM +0200, eran ben elisha wrote:
> On Mon, Dec 21, 2015 at 4:20 PM, Christoph Lameter  wrote:

[snip]

> >
> > +/*
> > + * Figure out which counter table to use depending on
> > + * the device capabilities.
> > + */
> > +static struct attribute_group *get_counter_table(struct ib_device *dev)
> > +{
> > +   struct ib_class_port_info cpi;
> > +
> > +   if (get_perf_mad(dev, 0, IB_PMA_CLASS_PORT_INFO,
> 
> Why 0, need to pass port num.
> See proposal Matan and myself sent.
> 

Passing a port num to a ClassPortInfo query makes no sense?

Your proposal sets a field which is wrong (I think it sets part of TrapGID if
my math is correct) in the ClassPortInfo query as per the spec.

I think the fix needs to be somewhere else.

Ira

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 15/16] i40iw: add entry in rdma_netlink

2015-12-24 Thread Or Gerlitz

On 12/24/2015 9:05 AM, Faisal Latif wrote:


>Why the iwarp port mapper implementationhas to be repeated in each
>driver? can you join your code in a common place and avoid the duplication?
>
>root@r-dcs58 hw]# git grep RDMA_NL_ nes
>nes/nes.c:  [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
>nes/nes.c:  [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
>nes/nes.c:  [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump =
>iwpm_add_and_query_mapping_cb},
>nes/nes.c:  [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
>nes/nes.c:  [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
>nes/nes.c:  [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
>nes/nes.c:  [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump =
>iwpm_ack_mapping_info_cb}
>nes/nes.c:  if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS,
>nes_nl_cb_table))
>nes/nes.c:  ret = iwpm_init(RDMA_NL_NES);
>nes/nes.c:  ibnl_remove_client(RDMA_NL_NES);
>nes/nes.c:  ibnl_remove_client(RDMA_NL_NES);
>nes/nes.c:  iwpm_exit(RDMA_NL_NES);
>nes/nes_cm.c: _sockaddr, RDMA_NL_NES);
>nes/nes_cm.c:   return iwpm_remove_mapping(_sockaddr, RDMA_NL_NES);
>nes/nes_cm.c:   _addr, RDMA_NL_NES);
>nes/nes_cm.c:   iwpm_err = iwpm_register_pid(_reg_msg,
>RDMA_NL_NES);
>nes/nes_cm.c:   iwpm_err = iwpm_add_mapping(_msg,
>RDMA_NL_NES);
>nes/nes_cm.c:   iwpm_err = iwpm_register_pid(_reg_msg, RDMA_NL_NES);
>nes/nes_cm.c:   iwpm_err = iwpm_add_and_query_mapping(_msg,
>RDMA_NL_NES);
>



i40iw iwarp driver registers with port mapper and uses its services. Beside 
that it is not the scope of the patch series.



You are asked a question by reviewer and your reply is

(1) YES, I did C & P from driver X to my driver

(2) explaining why C is right goes beyond the scope of reviewing my driver

This isn't how things work in upstream.

If needed, talk to the upstream Intel networking folks, they can assist 
you catching up on upstream practices.


Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 08/16] i40iw: add files for iwarp interface

2015-12-24 Thread Or Gerlitz

On 12/24/2015 9:31 AM, Faisal Latif wrote:

On Wed, Dec 23, 2015 at 08:42:01AM -0800, Or Gerlitz wrote:

On 12/22/2015 1:13 AM, Faisal Latif wrote:

+
+enum i40iw_memreg_type {
+   IW_MEMREG_TYPE_MEM = 0x,
+   IW_MEMREG_TYPE_QP = 0x0001,
+   IW_MEMREG_TYPE_CQ = 0x0002,
+   IW_MEMREG_TYPE_MW = 0x0003,
+   IW_MEMREG_TYPE_FMR = 0x0004,
+   IW_MEMREG_TYPE_FMEM = 0x0005,
+};

Can't you re-use IB core values or derive that from the actual uverbs
command?

I did not see anything which will have types that I needed.


what do you need? what is the role of this enum?


It will be confusing otherwise.




I will be reducing number of types from here though.


so why some of it can go? is that deal values which aren't used by the code

Or.


Thanks
Faisal


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/cma: cma_match_net_dev needs to take into account port_num

2015-12-24 Thread Or Gerlitz

On 12/24/2015 9:57 AM, Matan Barak wrote:

I totally agree that it's better to use the local IP address and not
just get a random device by using 127.0.0.1. You could get a specific
device by binding it, but then - use its local IP instead of
127.0.0.1.


Yes guys, it might be better but the user can do that. However, loopback 
ala 127.0.0.1 is working in the rdma-cm since the 2.6.18 day one and was 
broken recently when Eth ports are around.


Lets fix.

Indeed, this is 4.5 and not 4.4 material.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 0/3] Add cross-channel support

2015-12-24 Thread Or Gerlitz

On 12/24/2015 5:31 AM, Doug Ledford wrote:

On 12/20/2015 12:16 PM, Leon Romanovsky wrote:

Leon Romanovsky (3):
   IB/core: Align coding style of ib_device_cap_flags structure
   IB/core: Add cross-channel support
   IB/mlx5: Add driver cross-channel support

  drivers/infiniband/core/uverbs_cmd.c |  5 ++-
  drivers/infiniband/hw/mlx5/cq.c  |  7 +++-
  drivers/infiniband/hw/mlx5/main.c|  3 ++
  drivers/infiniband/hw/mlx5/mlx5_ib.h | 16 
  drivers/infiniband/hw/mlx5/qp.c  | 54 ++-
  include/linux/mlx5/qp.h  |  3 ++
  include/rdma/ib_verbs.h  | 71 +---
  7 files changed, 117 insertions(+), 42 deletions(-)



I took the series as is.  Please make sure to resubmit the libibverbs portion 
of these changes with the requested man page updates.


Doug,

Wait.

We had consensus among the reviewers that the 1st patch ("IB/core: Align 
coding style of ib_device_cap_flags structure") is wrong cleanup which 
basically is (1) unneeded (2) creates more damage (git blame and such,  
non-applicable to uapi, more) than benefit, etc -- finally Leon was 
convinced too [1].


Leon will re-spin in the coming 1-2 hours V2, could please pick it 
instead of V1, when people agree on direction X and you are not against 
it, lets do X and not Y.


thanks,

Or.

[1] http://marc.info/?t=14506106803=1=2


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 0/2] Add cross-channel support

2015-12-24 Thread Leon Romanovsky
From: Leon Romanovsky 

The following set of patches implements cross-channel (CC) support
in the RDMA core and in the mlx5 infiniband driver.

The cross-channel feature allows to execute WQEs that involve
cross-channel synchronization of IO operations’ on different QPs.

Complex applications usually requires synchronizations for IO
operations from multiple sources before continuing their
execution. In order to implement this the host software
needs to handle completions from each one of the receive
queues (arriving in arbitrary order), process the data after
last message arrival and only then post work request on the
send queue to send the combined data to its destination.

Execution of such an operation generates multiple interrupts at
an unpredictable time with huge overhead for interrupt handling
and context switch.

Current submission adds synchronization primitives which gives
ability to perform conditional flows and a following submission
will introduce arithmetic calculation offload.

Synchronization abilities combined with arithmetic calculations
will allow to program complex flows with a single function call,
hereby significantly reducing overhead associated with IO processing.

Patch #1 adds CQ and QP initialization flags to RDMA core structures
in order to support cross-channel feature.
Patch #2 implements mlx5 infiniband driver configuration logic.

Changes from v1:
  * Enrich cover message and commit messages.
  * Drop patch with checkpatch warning fix.

Changes from v0:
  * Set UAR to be the same for QP and CQ.

Leon Romanovsky (2):
  IB/core: Add cross-channel support
  IB/mlx5: Add driver cross-channel support

 drivers/infiniband/core/uverbs_cmd.c |  5 +++-
 drivers/infiniband/hw/mlx5/cq.c  |  7 -
 drivers/infiniband/hw/mlx5/main.c|  3 ++
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 16 +++
 drivers/infiniband/hw/mlx5/qp.c  | 54 +---
 include/linux/mlx5/qp.h  |  3 ++
 include/rdma/ib_verbs.h  | 11 
 7 files changed, 87 insertions(+), 12 deletions(-)

-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 2/2] IB/mlx5: Add driver cross-channel support

2015-12-24 Thread Leon Romanovsky
From: Leon Romanovsky 

Add support of cross-channel functionality to mlx5
driver. This includes ability to ignore overrun for CQ
which intended for cross-channel, export device capability and
configure the QP to be sync master/slave queues.

The cross-channel enabled QP supports combination of
three possible properties:
  * WQE processing on the receive queue of this QP
  * WQE processing on the send queue of this QP
  * WQE are supported on the send queue

Signed-off-by: Leon Romanovsky 
Reviewed-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx5/cq.c  |  7 -
 drivers/infiniband/hw/mlx5/main.c|  3 ++
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 16 +++
 drivers/infiniband/hw/mlx5/qp.c  | 54 +---
 include/linux/mlx5/qp.h  |  3 ++
 5 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 3dfd287256d6..c363b71c4b0b 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -760,7 +760,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
 
-   if (attr->flags)
+   if (check_cq_create_flags(attr->flags))
return ERR_PTR(-EINVAL);
 
if (entries < 0)
@@ -779,6 +779,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
spin_lock_init(>lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+   cq->create_flags = attr->flags;
 
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +797,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 
cq->cqe_size = cqe_size;
cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+   if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+   cqb->ctx.cqe_sz_flags |= (1 << 1);
+
cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
err = mlx5_vector2eqn(dev->mdev, vector, , );
if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index a51b5943019e..41ebecd24422 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -294,6 +294,9 @@ static int mlx5_ib_init_device_flags(struct ib_device 
*ibdev)
ibdev->odp_caps = dev->odp_caps;
 #endif
 
+   if (MLX5_CAP_GEN(mdev, cd))
+   props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 633347260b79..fdbd761fd7fd 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -85,6 +85,10 @@ enum mlx5_ib_mad_ifc_flags {
MLX5_MAD_IFC_NET_VIEW   = 4,
 };
 
+enum {
+   MLX5_CROSS_CHANNEL_UUAR = 0,
+};
+
 struct mlx5_ib_ucontext {
struct ib_ucontext  ibucontext;
struct list_headdb_page_list;
@@ -242,6 +246,9 @@ struct mlx5_ib_cq_buf {
 enum mlx5_ib_qp_flags {
MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
MLX5_IB_QP_SIGNATURE_HANDLING   = 1 << 1,
+   MLX5_IB_QP_CROSS_CHANNEL= 1 << 2,
+   MLX5_IB_QP_MANAGED_SEND = 1 << 3,
+   MLX5_IB_QP_MANAGED_RECV = 1 << 4,
 };
 
 struct mlx5_umr_wr {
@@ -284,6 +291,7 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf  *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+   u32 create_flags;
 };
 
 struct mlx5_ib_srq {
@@ -662,4 +670,12 @@ static inline int is_qp1(enum ib_qp_type qp_type)
 #define MLX5_MAX_UMR_SHIFT 16
 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
 
+static inline u32 check_cq_create_flags(u32 flags)
+{
+   /*
+* It returns non-zero value for unsupported CQ
+* create flags, otherwise it returns zero.
+*/
+   return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN);
+}
 #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 307bdbca8938..c18bf42c51ac 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -615,18 +615,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct 
ib_pd *pd,
/*
 * TBD: should come from the verbs when we have the API
 */
-   uuarn = alloc_uuar(>uuari, MLX5_IB_LATENCY_CLASS_HIGH);
-   if (uuarn < 0) {
-   mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
-   mlx5_ib_dbg(dev, "reverting to medium latency\n");
-   uuarn = alloc_uuar(>uuari, 
MLX5_IB_LATENCY_CLASS_MEDIUM);
+   if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+   /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+   uuarn = MLX5_CROSS_CHANNEL_UUAR;
+   else {
+ 

[PATCH V2 1/2] IB/core: Add cross-channel support

2015-12-24 Thread Leon Romanovsky
From: Leon Romanovsky 

The cross-channel feature allows to execute WQEs that involve
synchronization of IO operations’ on different QPs.

This capability enables to program complex flows with a single
function call, hereby significantly reducing overhead associated
with IO processing.

The queue pairs can be configured to work as a “sync master queue”
or “sync slave queues”.

The added flags are:

1. Device capability flag IB_DEVICE_CROSS_CHANNEL for the
   devices that can perform cross-channel operations.

2. CQ property flag IB_CQ_FLAGS_IGNORE_OVERRUN to disable CQ overrun
   check. In cross-channel mode, the send/receive queues will forward
   their completions to managing QP.

3. QP property flags to indicate if queues are slave or master:
   * IB_QP_CREATE_MANAGED_SEND indicates that posted send work requests
 will not be executed immediately and requires enabling.
   * IB_QP_CREATE_MANAGED_RECV indicates that posted receive work
 requests will not be executed immediately and requires enabling.
   * IB_QP_CREATE_CROSS_CHANNEL declares the QP to work in cross-channel
 mode. If IB_QP_CREATE_MANAGED_SEND and IB_QP_CREATE_MANAGED_RECV are
 not provided, this QP will be sync master queue, else it will be sync
 slave.

Signed-off-by: Leon Romanovsky 
Reviewed-by: Sagi Grimberg 
---
 drivers/infiniband/core/uverbs_cmd.c |  5 -
 include/rdma/ib_verbs.h  | 11 +++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c 
b/drivers/infiniband/core/uverbs_cmd.c
index 1add53653825..63ddd6a122b0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1829,7 +1829,10 @@ static int create_qp(struct ib_uverbs_file *file,
  sizeof(cmd->create_flags))
attr.create_flags = cmd->create_flags;
 
-   if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+   if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+   IB_QP_CREATE_CROSS_CHANNEL |
+   IB_QP_CREATE_MANAGED_SEND |
+   IB_QP_CREATE_MANAGED_RECV)) {
ret = -EINVAL;
goto err_put;
}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a7dbbfc9d202..d00f3e2e39ac 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -141,6 +141,13 @@ enum ib_device_cap_flags {
IB_DEVICE_MEM_WINDOW_TYPE_2B= (1<<24),
IB_DEVICE_RC_IP_CSUM= (1<<25),
IB_DEVICE_RAW_IP_CSUM   = (1<<26),
+   /*
+* Devices should set IB_DEVICE_CROSS_CHANNEL if they
+* support execution of WQEs that involve synchronization
+* of I/O operations with single completion queue managed
+* by hardware.
+*/
+   IB_DEVICE_CROSS_CHANNEL = (1<<27),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
IB_DEVICE_SIGNATURE_HANDOVER= (1<<30),
IB_DEVICE_ON_DEMAND_PAGING  = (1<<31),
@@ -186,6 +193,7 @@ struct ib_odp_caps {
 
 enum ib_cq_creation_flags {
IB_CQ_FLAGS_TIMESTAMP_COMPLETION   = 1 << 0,
+   IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
 };
 
 struct ib_cq_init_attr {
@@ -823,6 +831,9 @@ enum ib_qp_type {
 enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO   = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
+   IB_QP_CREATE_CROSS_CHANNEL  = 1 << 2,
+   IB_QP_CREATE_MANAGED_SEND   = 1 << 3,
+   IB_QP_CREATE_MANAGED_RECV   = 1 << 4,
IB_QP_CREATE_NETIF_QP   = 1 << 5,
IB_QP_CREATE_SIGNATURE_EN   = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO   = 1 << 7,
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 10/32] IB/rxe: User/kernel shared queues infrastructure

2015-12-24 Thread Kamal Heib
mmap routines

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_mmap.c | 173 
 1 file changed, 173 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_mmap.c

diff --git a/drivers/staging/rdma/rxe/rxe_mmap.c 
b/drivers/staging/rdma/rxe/rxe_mmap.c
new file mode 100644
index 000..fbe3e1d
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_mmap.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+void rxe_mmap_release(struct kref *ref)
+{
+   struct rxe_mmap_info *ip = container_of(ref,
+   struct rxe_mmap_info, ref);
+   struct rxe_dev *rxe = to_rdev(ip->context->device);
+
+   spin_lock_bh(>pending_lock);
+
+   if (!list_empty(>pending_mmaps))
+   list_del(>pending_mmaps);
+
+   spin_unlock_bh(>pending_lock);
+
+   vfree(ip->obj); /* buf */
+   kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the memory region is mapped,
+ * to avoid releasing it.
+ */
+static void rxe_vma_open(struct vm_area_struct *vma)
+{
+   struct rxe_mmap_info *ip = vma->vm_private_data;
+
+   kref_get(>ref);
+}
+
+static void rxe_vma_close(struct vm_area_struct *vma)
+{
+   struct rxe_mmap_info *ip = vma->vm_private_data;
+
+   kref_put(>ref, rxe_mmap_release);
+}
+
+static struct vm_operations_struct rxe_vm_ops = {
+   .open = rxe_vma_open,
+   .close = rxe_vma_close,
+};
+
+/**
+ * rxe_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+   struct rxe_dev *rxe = to_rdev(context->device);
+   unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+   unsigned long size = vma->vm_end - vma->vm_start;
+   struct rxe_mmap_info *ip, *pp;
+   int ret;
+
+   /*
+* Search the device's list of objects waiting for a mmap call.
+* Normally, this list is very short since a call to create a
+* CQ, QP, or SRQ is soon followed by a call to mmap().
+*/
+   spin_lock_bh(>pending_lock);
+   list_for_each_entry_safe(ip, pp, >pending_mmaps, pending_mmaps) {
+   if (context != ip->context || (__u64)offset != ip->info.offset)
+   continue;
+
+   /* Don't allow a mmap larger than the object. */
+   if (size > ip->info.size) {
+   pr_err("mmap region is larger than the object!\n");
+   spin_unlock_bh(>pending_lock);
+   ret = -EINVAL;
+   goto done;
+   }
+
+   goto found_it;
+   }
+   pr_warn("unable to find pending mmap info\n");
+   spin_unlock_bh(>pending_lock);
+   ret = -EINVAL;
+   goto done;
+
+found_it:
+   list_del_init(>pending_mmaps);
+   spin_unlock_bh(>pending_lock);
+
+   ret = remap_vmalloc_range(vma, ip->obj, 0);
+   if (ret) {
+   pr_err("rxe: err %d from remap_vmalloc_range\n", ret);
+   goto done;
+   }
+
+   vma->vm_ops = _vm_ops;
+

[PATCH rdma-next V2 05/32] IB/rxe: Default rxe device and port parameters

2015-12-24 Thread Kamal Heib
Default/initial rxe device parameter settings.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_param.h | 177 +++
 1 file changed, 177 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_param.h

diff --git a/drivers/staging/rdma/rxe/rxe_param.h 
b/drivers/staging/rdma/rxe/rxe_param.h
new file mode 100644
index 000..320b8e5
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_param.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_PARAM_H
+#define RXE_PARAM_H
+
+static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu)
+{
+   if (mtu < 256)
+   return 0;
+   else if (mtu < 512)
+   return IB_MTU_256;
+   else if (mtu < 1024)
+   return IB_MTU_512;
+   else if (mtu < 2048)
+   return IB_MTU_1024;
+   else if (mtu < 4096)
+   return IB_MTU_2048;
+   else
+   return IB_MTU_4096;
+}
+
+/* Find the IB mtu for a given network MTU. */
+static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)
+{
+   mtu -= RXE_MAX_HDR_LENGTH;
+
+   return rxe_mtu_int_to_enum(mtu);
+}
+
+/* default/initial rxe device parameter settings */
+enum rxe_device_param {
+   RXE_FW_VER  = 0,
+   RXE_MAX_MR_SIZE = -1ull,
+   RXE_PAGE_SIZE_CAP   = 0xf000,
+   RXE_VENDOR_ID   = 0,
+   RXE_VENDOR_PART_ID  = 0,
+   RXE_HW_VER  = 0,
+   RXE_MAX_QP  = 0x1,
+   RXE_MAX_QP_WR   = 0x4000,
+   RXE_MAX_INLINE_DATA = 400,
+   RXE_DEVICE_CAP_FLAGS= IB_DEVICE_BAD_PKEY_CNTR
+   | IB_DEVICE_BAD_QKEY_CNTR
+   | IB_DEVICE_AUTO_PATH_MIG
+   | IB_DEVICE_CHANGE_PHY_PORT
+   | IB_DEVICE_UD_AV_PORT_ENFORCE
+   | IB_DEVICE_PORT_ACTIVE_EVENT
+   | IB_DEVICE_SYS_IMAGE_GUID
+   | IB_DEVICE_RC_RNR_NAK_GEN
+   | IB_DEVICE_SRQ_RESIZE,
+   RXE_MAX_SGE = 27,
+   RXE_MAX_SGE_RD  = 0,
+   RXE_MAX_CQ  = 16384,
+   RXE_MAX_LOG_CQE = 13,
+   RXE_MAX_MR  = 2 * 1024,
+   RXE_MAX_PD  = 0x7ffc,
+   RXE_MAX_QP_RD_ATOM  = 128,
+   RXE_MAX_EE_RD_ATOM  = 0,
+   RXE_MAX_RES_RD_ATOM = 0x3f000,
+   RXE_MAX_QP_INIT_RD_ATOM = 128,
+   RXE_MAX_EE_INIT_RD_ATOM = 0,
+   RXE_ATOMIC_CAP  = 1,
+   RXE_MAX_EE  = 0,
+   RXE_MAX_RDD = 0,
+   RXE_MAX_MW  = 0,
+   RXE_MAX_RAW_IPV6_QP = 0,
+   RXE_MAX_RAW_ETHY_QP = 0,
+   RXE_MAX_MCAST_GRP   = 8192,
+   RXE_MAX_MCAST_QP_ATTACH = 56,
+   RXE_MAX_TOT_MCAST_QP_ATTACH = 0x7,
+   RXE_MAX_AH  = 100,
+   RXE_MAX_FMR = 2 * 1024,
+   RXE_MAX_MAP_PER_FMR = 100,
+   RXE_MAX_SRQ   

[PATCH rdma-next V2 03/32] IB/rxe: IBA header types and methods

2015-12-24 Thread Kamal Heib
Add declarations for data structures used to hold per opcode
and per work request opcode tables.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_hdr.h | 950 +
 1 file changed, 950 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_hdr.h

diff --git a/drivers/staging/rdma/rxe/rxe_hdr.h 
b/drivers/staging/rdma/rxe/rxe_hdr.h
new file mode 100644
index 000..d8bc4a3
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_hdr.h
@@ -0,0 +1,950 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_HDR_H
+#define RXE_HDR_H
+
+/* extracted information about a packet carried in an sk_buff struct fits in
+ * the skbuff cb array. Must be at most 48 bytes.
+ */
+struct rxe_pkt_info {
+   struct rxe_dev  *rxe;   /* device that owns packet */
+   struct rxe_qp   *qp;/* qp that owns packet */
+   struct rxe_send_wqe *wqe;   /* send wqe */
+   u8  *hdr;   /* points to bth */
+   u32 mask;   /* useful info about pkt */
+   u32 psn;/* bth psn of packet */
+   u16 pkey_index; /* partition of pkt */
+   u16 paylen; /* length of bth - icrc */
+   u8  port_num;   /* port pkt received on */
+   u8  opcode; /* bth opcode of packet */
+   u8  offset; /* bth offset from pkt->hdr */
+};
+
+#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb)
+#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb)
+
+/*
+ * IBA header types and methods
+ *
+ * Some of these are for reference and completeness only since
+ * rxe does not currently support RD transport
+ * most of this could be moved into IB core. ib_pack.h has
+ * part of this but is incomplete
+ *
+ * Header specific routines to insert/extract values to/from headers
+ * the routines that are named __hhh_(set_)fff() take a pointer to a
+ * hhh header and get(set) the fff field. The routines named
+ * hhh_(set_)fff take a packet info struct and find the
+ * header and field based on the opcode in the packet.
+ * Conversion to/from network byte order from cpu order is also done.
+ */
+
+#define RXE_ICRC_SIZE  (4)
+#define RXE_MAX_HDR_LENGTH (80)
+
+/**
+ * Base Transport Header
+ 
**/
+struct rxe_bth {
+   u8  opcode;
+   u8  flags;
+   __be16  pkey;
+   __be32  qpn;
+   __be32  apsn;
+};
+
+#define BTH_TVER   (0)
+#define BTH_DEF_PKEY   (0x)
+
+#define BTH_SE_MASK(0x80)
+#define BTH_MIG_MASK   (0x40)
+#define BTH_PAD_MASK   (0x30)
+#define BTH_TVER_MASK  (0x0f)
+#define BTH_FECN_MASK  (0x8000)
+#define BTH_BECN_MASK  (0x4000)
+#define BTH_RESV6A_MASK(0x3f00)
+#define BTH_QPN_MASK   (0x00ff)
+#define BTH_ACK_MASK   (0x8000)
+#define BTH_RESV7_MASK (0x7f00)
+#define BTH_PSN_MASK   (0x00ff)
+
+static inline 

[PATCH rdma-next V2 32/32] IB/rxe: TODO file while in staging

2015-12-24 Thread Kamal Heib
From: Amir Vadai 

Things todo in order to get out of staging subtree.

Signed-off-by: Amir Vadai 
Signed-off-by: Kamal Heib 
---
 drivers/staging/rdma/rxe/TODO | 18 ++
 1 file changed, 18 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/TODO

diff --git a/drivers/staging/rdma/rxe/TODO b/drivers/staging/rdma/rxe/TODO
new file mode 100644
index 000..b3cb8a4
--- /dev/null
+++ b/drivers/staging/rdma/rxe/TODO
@@ -0,0 +1,18 @@
+Aug, 2015
+
+- Remove software processing of IB protocol and place in library for use
+  by qib, ipath (if still present), hfi1, and soft-roce
+- Do not use tasklet in completion flow
+- Need to free resources if user space didn't.
+- Share structures from ib_user_verbs.h instead of copying in ib_rxe.h
+- Move IBA header types and methods from rxe_hdr.h into IB core
+- Cleanup members of rxe_pkt_info that already exists in packet header
+- Refactor post_send_one function to get better performance.
+- Refactor rxe_mem struct to be clear what is type of memory that it's holding.
+- Use single reference count from the pool to the device, instead of having a 
single
+  reference on the device kept by each element in the pool.
+- Calculate ICRC for incoming packets.
+- Use hash table to hold net_info instead of fixed size array used now.
+
+Sep, 2015
+- Support work request interface memory registration.
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 18/32] IB/rxe: Queue Pair (QP) handling

2015-12-24 Thread Kamal Heib
Functions to manipulate QP objects.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
---
 drivers/staging/rdma/rxe/rxe_qp.c | 835 ++
 1 file changed, 835 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_qp.c

diff --git a/drivers/staging/rdma/rxe/rxe_qp.c 
b/drivers/staging/rdma/rxe/rxe_qp.c
new file mode 100644
index 000..dcc3e2d
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_qp.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *Redistribution and use in source and binary forms, with or
+ *without modification, are permitted provided that the following
+ *conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+#include "rxe_task.h"
+
+char *rxe_qp_state_name[] = {
+   [QP_STATE_RESET]= "RESET",
+   [QP_STATE_INIT] = "INIT",
+   [QP_STATE_READY]= "READY",
+   [QP_STATE_DRAIN]= "DRAIN",
+   [QP_STATE_DRAINED]  = "DRAINED",
+   [QP_STATE_ERROR]= "ERROR",
+};
+
+static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
+ int has_srq)
+{
+   if (cap->max_send_wr > rxe->attr.max_qp_wr) {
+   pr_warn("invalid send wr = %d > %d\n",
+   cap->max_send_wr, rxe->attr.max_qp_wr);
+   goto err1;
+   }
+
+   if (cap->max_send_sge > rxe->attr.max_sge) {
+   pr_warn("invalid send sge = %d > %d\n",
+   cap->max_send_sge, rxe->attr.max_sge);
+   goto err1;
+   }
+
+   if (!has_srq) {
+   if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
+   pr_warn("invalid recv wr = %d > %d\n",
+   cap->max_recv_wr, rxe->attr.max_qp_wr);
+   goto err1;
+   }
+
+   if (cap->max_recv_sge > rxe->attr.max_sge) {
+   pr_warn("invalid recv sge = %d > %d\n",
+   cap->max_recv_sge, rxe->attr.max_sge);
+   goto err1;
+   }
+   }
+
+   if (cap->max_inline_data > rxe->max_inline_data) {
+   pr_warn("invalid max inline data = %d > %d\n",
+   cap->max_inline_data, rxe->max_inline_data);
+   goto err1;
+   }
+
+   return 0;
+
+err1:
+   return -EINVAL;
+}
+
+int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
+{
+   struct ib_qp_cap *cap = >cap;
+   struct rxe_port *port;
+   int port_num = init->port_num;
+
+   if (!init->recv_cq || !init->send_cq) {
+   pr_warn("missing cq\n");
+   goto err1;
+   }
+
+   if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
+   goto err1;
+
+   if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
+   if (port_num < 1 || port_num > rxe->num_ports) {
+   pr_warn("invalid port = %d\n", port_num);
+   goto err1;
+   }
+
+   port = >port[port_num - 1];
+
+   if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
+   pr_warn("SMI QP exists for port %d\n", port_num);
+   goto err1;
+   }
+
+   if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
+   pr_warn("GSI QP exists for port %d\n", port_num);
+   goto err1;
+   }
+   }
+
+  

[PATCH rdma-next V2 01/32] IB/core: Macro for RoCEv2 UDP port

2015-12-24 Thread Kamal Heib
From: Amir Vadai 

Adding a macro for RoCEv2 UDP destination port.

Signed-off-by: Amir Vadai 
Signed-off-by: Kamal Heib 
---
 include/rdma/ib_verbs.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 368fc22..9904394 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -79,6 +79,8 @@ enum ib_gid_type {
IB_GID_TYPE_SIZE
 };
 
+#define ROCE_V2_UDP_DPORT  4791
+
 struct ib_gid_attr {
enum ib_gid_typegid_type;
struct net_device   *ndev;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 13/32] IB/rxe: Allocation pool for RDMA objects

2015-12-24 Thread Kamal Heib
Manage and allocate pool of objects with given limit on number of
elements.  Gets parameters from rxe_type_info. Pool elements are
allocated out of a slab cache.  Objects that are using this facility
are: PD, QP, SRQ, CQ, MR, FMR, MW, etc.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_pool.c | 511 
 drivers/staging/rdma/rxe/rxe_pool.h | 161 
 2 files changed, 672 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_pool.c
 create mode 100644 drivers/staging/rdma/rxe/rxe_pool.h

diff --git a/drivers/staging/rdma/rxe/rxe_pool.c 
b/drivers/staging/rdma/rxe/rxe_pool.c
new file mode 100644
index 000..1e0787a
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_pool.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *Redistribution and use in source and binary forms, with or
+ *without modification, are permitted provided that the following
+ *conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+/* info about object pools
+   note that mr, fmr and mw share a single index space
+   so that one can map an lkey to the correct type of object */
+struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
+   [RXE_TYPE_UC] = {
+   .name   = "uc",
+   .size   = sizeof(struct rxe_ucontext),
+   },
+   [RXE_TYPE_PD] = {
+   .name   = "pd",
+   .size   = sizeof(struct rxe_pd),
+   },
+   [RXE_TYPE_AH] = {
+   .name   = "ah",
+   .size   = sizeof(struct rxe_ah),
+   .flags  = RXE_POOL_ATOMIC,
+   },
+   [RXE_TYPE_SRQ] = {
+   .name   = "srq",
+   .size   = sizeof(struct rxe_srq),
+   .flags  = RXE_POOL_INDEX,
+   .min_index  = RXE_MIN_SRQ_INDEX,
+   .max_index  = RXE_MAX_SRQ_INDEX,
+   },
+   [RXE_TYPE_QP] = {
+   .name   = "qp",
+   .size   = sizeof(struct rxe_qp),
+   .cleanup= rxe_qp_cleanup,
+   .flags  = RXE_POOL_INDEX,
+   .min_index  = RXE_MIN_QP_INDEX,
+   .max_index  = RXE_MAX_QP_INDEX,
+   },
+   [RXE_TYPE_CQ] = {
+   .name   = "cq",
+   .size   = sizeof(struct rxe_cq),
+   .cleanup= rxe_cq_cleanup,
+   },
+   [RXE_TYPE_MR] = {
+   .name   = "mr",
+   .size   = sizeof(struct rxe_mem),
+   .cleanup= rxe_mem_cleanup,
+   .flags  = RXE_POOL_INDEX,
+   .max_index  = RXE_MAX_MR_INDEX,
+   .min_index  = RXE_MIN_MR_INDEX,
+   },
+   [RXE_TYPE_FMR] = {
+   .name   = "fmr",
+   .size   = sizeof(struct rxe_mem),
+   .cleanup= rxe_mem_cleanup,
+   .flags  = RXE_POOL_INDEX,
+   .max_index  = RXE_MAX_FMR_INDEX,
+   .min_index  = RXE_MIN_FMR_INDEX,
+   },
+   [RXE_TYPE_MW] = {
+   .name   = "mw",
+   .size   = sizeof(struct rxe_mem),
+   .flags  = RXE_POOL_INDEX,
+   .max_index  = RXE_MAX_MW_INDEX,
+   .min_index  = RXE_MIN_MW_INDEX,
+   },
+   [RXE_TYPE_MC_GRP] = 

[PATCH rdma-next V2 23/32] IB/rxe: QP request handling

2015-12-24 Thread Kamal Heib
QP request logic.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
---
 drivers/staging/rdma/rxe/rxe_req.c | 679 +
 1 file changed, 679 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_req.c

diff --git a/drivers/staging/rdma/rxe/rxe_req.c 
b/drivers/staging/rdma/rxe/rxe_req.c
new file mode 100644
index 000..41d13a5
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_req.c
@@ -0,0 +1,679 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+  unsigned opcode);
+
+static inline void retry_first_write_send(struct rxe_qp *qp,
+ struct rxe_send_wqe *wqe,
+ unsigned mask, int npsn)
+{
+   int i;
+
+   for (i = 0; i < npsn; i++) {
+   int to_send = (wqe->dma.resid > qp->mtu) ?
+   qp->mtu : wqe->dma.resid;
+
+   qp->req.opcode = next_opcode(qp, wqe,
+wqe->wr.opcode);
+
+   if (wqe->wr.send_flags & IB_SEND_INLINE) {
+   wqe->dma.resid -= to_send;
+   wqe->dma.sge_offset += to_send;
+   } else {
+   advance_dma_data(>dma, to_send);
+   }
+   if (mask & WR_WRITE_MASK)
+   wqe->iova += qp->mtu;
+   }
+}
+
+static void req_retry(struct rxe_qp *qp)
+{
+   struct rxe_send_wqe *wqe;
+   unsigned int wqe_index;
+   unsigned int mask;
+   int npsn;
+   int first = 1;
+
+   wqe = queue_head(qp->sq.queue);
+   npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
+
+   qp->req.wqe_index   = consumer_index(qp->sq.queue);
+   qp->req.psn = qp->comp.psn;
+   qp->req.opcode  = -1;
+
+   for (wqe_index = consumer_index(qp->sq.queue);
+   wqe_index != producer_index(qp->sq.queue);
+   wqe_index = next_index(qp->sq.queue, wqe_index)) {
+   wqe = addr_from_index(qp->sq.queue, wqe_index);
+   mask = wr_opcode_mask(wqe->wr.opcode, qp);
+
+   if (wqe->state == wqe_state_posted)
+   break;
+
+   if (wqe->state == wqe_state_done)
+   continue;
+
+   wqe->iova = (mask & WR_ATOMIC_MASK) ?
+   wqe->wr.wr.atomic.remote_addr :
+   wqe->wr.wr.rdma.remote_addr;
+
+   if (!first || (mask & WR_READ_MASK) == 0) {
+   wqe->dma.resid = wqe->dma.length;
+   wqe->dma.cur_sge = 0;
+   wqe->dma.sge_offset = 0;
+   }
+
+   if (first) {
+   first = 0;
+
+   if (mask & WR_WRITE_OR_SEND_MASK)
+   retry_first_write_send(qp, wqe, mask, npsn);
+
+   if (mask & WR_READ_MASK)
+   wqe->iova += npsn * qp->mtu;
+   }
+
+   wqe->state = wqe_state_posted;
+   }
+}
+
+void rnr_nak_timer(unsigned long data)
+{
+   struct rxe_qp *qp = (struct rxe_qp *)data;
+
+   pr_debug("rnr nak timer fired\n");
+   rxe_run_task(>req.task, 1);
+}
+
+static struct rxe_send_wqe 

[PATCH rdma-next V2 22/32] IB/rxe: Completion handling

2015-12-24 Thread Kamal Heib
Handling of Work Completions.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
---
 drivers/staging/rdma/rxe/rxe_comp.c | 728 
 1 file changed, 728 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_comp.c

diff --git a/drivers/staging/rdma/rxe/rxe_comp.c 
b/drivers/staging/rdma/rxe/rxe_comp.c
new file mode 100644
index 000..7abbdaa
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_comp.c
@@ -0,0 +1,728 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+#include "rxe_task.h"
+
+enum comp_state {
+   COMPST_GET_ACK,
+   COMPST_GET_WQE,
+   COMPST_COMP_WQE,
+   COMPST_COMP_ACK,
+   COMPST_CHECK_PSN,
+   COMPST_CHECK_ACK,
+   COMPST_READ,
+   COMPST_ATOMIC,
+   COMPST_WRITE_SEND,
+   COMPST_UPDATE_COMP,
+   COMPST_ERROR_RETRY,
+   COMPST_RNR_RETRY,
+   COMPST_ERROR,
+   COMPST_EXIT, /* We have an issue, and we want to rerun the completer */
+   COMPST_DONE, /* The completer finished successflly */
+};
+
+static char *comp_state_name[] =  {
+   [COMPST_GET_ACK]= "GET ACK",
+   [COMPST_GET_WQE]= "GET WQE",
+   [COMPST_COMP_WQE]   = "COMP WQE",
+   [COMPST_COMP_ACK]   = "COMP ACK",
+   [COMPST_CHECK_PSN]  = "CHECK PSN",
+   [COMPST_CHECK_ACK]  = "CHECK ACK",
+   [COMPST_READ]   = "READ",
+   [COMPST_ATOMIC] = "ATOMIC",
+   [COMPST_WRITE_SEND] = "WRITE/SEND",
+   [COMPST_UPDATE_COMP]= "UPDATE COMP",
+   [COMPST_ERROR_RETRY]= "ERROR RETRY",
+   [COMPST_RNR_RETRY]  = "RNR RETRY",
+   [COMPST_ERROR]  = "ERROR",
+   [COMPST_EXIT]   = "EXIT",
+   [COMPST_DONE]   = "DONE",
+};
+
+static unsigned long rnrnak_usec[32] = {
+   [IB_RNR_TIMER_655_36] = 655360,
+   [IB_RNR_TIMER_000_01] = 10,
+   [IB_RNR_TIMER_000_02] = 20,
+   [IB_RNR_TIMER_000_03] = 30,
+   [IB_RNR_TIMER_000_04] = 40,
+   [IB_RNR_TIMER_000_06] = 60,
+   [IB_RNR_TIMER_000_08] = 80,
+   [IB_RNR_TIMER_000_12] = 120,
+   [IB_RNR_TIMER_000_16] = 160,
+   [IB_RNR_TIMER_000_24] = 240,
+   [IB_RNR_TIMER_000_32] = 320,
+   [IB_RNR_TIMER_000_48] = 480,
+   [IB_RNR_TIMER_000_64] = 640,
+   [IB_RNR_TIMER_000_96] = 960,
+   [IB_RNR_TIMER_001_28] = 1280,
+   [IB_RNR_TIMER_001_92] = 1920,
+   [IB_RNR_TIMER_002_56] = 2560,
+   [IB_RNR_TIMER_003_84] = 3840,
+   [IB_RNR_TIMER_005_12] = 5120,
+   [IB_RNR_TIMER_007_68] = 7680,
+   [IB_RNR_TIMER_010_24] = 10240,
+   [IB_RNR_TIMER_015_36] = 15360,
+   [IB_RNR_TIMER_020_48] = 20480,
+   [IB_RNR_TIMER_030_72] = 30720,
+   [IB_RNR_TIMER_040_96] = 40960,
+   [IB_RNR_TIMER_061_44] = 61410,
+   [IB_RNR_TIMER_081_92] = 81920,
+   [IB_RNR_TIMER_122_88] = 122880,
+   [IB_RNR_TIMER_163_84] = 163840,
+   [IB_RNR_TIMER_245_76] = 245760,
+   [IB_RNR_TIMER_327_68] = 327680,
+   [IB_RNR_TIMER_491_52] = 491520,
+};
+
+static inline unsigned long rnrnak_jiffies(u8 timeout)
+{
+   return max_t(unsigned long,
+   usecs_to_jiffies(rnrnak_usec[timeout]), 1);
+}
+
+static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
+{
+   switch (opcode) 

[PATCH rdma-next V2 15/32] IB/rxe: Address vector manipulation functions

2015-12-24 Thread Kamal Heib
Functions to manipulate Address Vector.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_av.c | 87 +++
 1 file changed, 87 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_av.c

diff --git a/drivers/staging/rdma/rxe/rxe_av.c 
b/drivers/staging/rdma/rxe/rxe_av.c
new file mode 100644
index 000..cc4b179
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_av.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *Redistribution and use in source and binary forms, with or
+ *without modification, are permitted provided that the following
+ *conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
+{
+   struct rxe_port *port;
+
+   if (attr->port_num < 1 || attr->port_num > rxe->num_ports) {
+   pr_info("rxe: invalid port_num = %d\n", attr->port_num);
+   return -EINVAL;
+   }
+
+   port = >port[attr->port_num - 1];
+
+   if (attr->ah_flags & IB_AH_GRH) {
+   if (attr->grh.sgid_index > port->attr.gid_tbl_len) {
+   pr_info("rxe: invalid sgid index = %d\n",
+   attr->grh.sgid_index);
+   return -EINVAL;
+   }
+   }
+
+   return 0;
+}
+
+int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
+struct rxe_av *av, struct ib_ah_attr *attr)
+{
+   memset(av, 0, sizeof(*av));
+   memcpy(>grh, >grh, sizeof(attr->grh));
+   av->port_num = port_num;
+   return 0;
+}
+
+int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
+  struct ib_ah_attr *attr)
+{
+   memcpy(>grh, >grh, sizeof(av->grh));
+   attr->port_num = av->port_num;
+   return 0;
+}
+
+int rxe_av_fill_ip_info(struct rxe_dev *rxe,
+   struct rxe_av *av,
+   struct ib_ah_attr *attr,
+   struct ib_gid_attr *sgid_attr,
+   union ib_gid *sgid)
+{
+   rdma_gid2ip(>sgid_addr._sockaddr, sgid);
+   rdma_gid2ip(>dgid_addr._sockaddr, >grh.dgid);
+   av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
+
+   return 0;
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 09/32] IB/rxe: Work request's opcode information table

2015-12-24 Thread Kamal Heib
Useful information about work request opcodes and pkt opcodes in table
form.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_opcode.c | 961 ++
 1 file changed, 961 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_opcode.c

diff --git a/drivers/staging/rdma/rxe/rxe_opcode.c 
b/drivers/staging/rdma/rxe/rxe_opcode.c
new file mode 100644
index 000..359b75f
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_opcode.c
@@ -0,0 +1,961 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include "rxe_opcode.h"
+#include "rxe_hdr.h"
+
+/* useful information about work request opcodes and pkt opcodes in
+ * table form
+ */
+struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
+   [IB_WR_RDMA_WRITE]  = {
+   .name   = "IB_WR_RDMA_WRITE",
+   .mask   = {
+   [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
+   [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
+   },
+   },
+   [IB_WR_RDMA_WRITE_WITH_IMM] = {
+   .name   = "IB_WR_RDMA_WRITE_WITH_IMM",
+   .mask   = {
+   [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
+   [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
+   },
+   },
+   [IB_WR_SEND]= {
+   .name   = "IB_WR_SEND",
+   .mask   = {
+   [IB_QPT_SMI]= WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_GSI]= WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
+   },
+   },
+   [IB_WR_SEND_WITH_IMM]   = {
+   .name   = "IB_WR_SEND_WITH_IMM",
+   .mask   = {
+   [IB_QPT_SMI]= WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_GSI]= WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
+   [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
+   },
+   },
+   [IB_WR_RDMA_READ]   = {
+   .name   = "IB_WR_RDMA_READ",
+   .mask   = {
+   [IB_QPT_RC] = WR_READ_MASK,
+   },
+   },
+   [IB_WR_ATOMIC_CMP_AND_SWP]  = {
+   .name   = "IB_WR_ATOMIC_CMP_AND_SWP",
+   .mask   = {
+   [IB_QPT_RC] = WR_ATOMIC_MASK,
+   },
+   },
+   [IB_WR_ATOMIC_FETCH_AND_ADD]= {
+   .name   = "IB_WR_ATOMIC_FETCH_AND_ADD",
+   .mask   = {
+   [IB_QPT_RC] = WR_ATOMIC_MASK,
+   },
+   },
+   [IB_WR_LSO] = {
+   .name   = "IB_WR_LSO",
+   .mask   = {
+   /* not supported */
+   },
+   },
+   [IB_WR_SEND_WITH_INV]   = {
+   .name   

[PATCH rdma-next V2 00/32] Soft-RoCE driver

2015-12-24 Thread Kamal Heib
Doug and list Hi,

This patchset introduces Soft RoCE driver.

Some background on the driver: The original Soft-RoCE driver was implemented by
Bob Pearson from SFW. Bob started the submission process [1], but his work was
abandoned after v2.
Mellanox decided to pick it up and continue the submission. As part of the
process we detected some problems with the original implementation. Mainly, we
wanted to RoCEv2, also, there are too many locks and
context switches in the data path. Most of them are already removed.

We've located the driver in the staging subtree. This follows a requirement
to implement an IB transport library - Soft RoCE is in the same boat like the 
hfi1
driver. We need to define and implement a lib to prevent those code
duplications.

We did address the feedback provided on the original submission.

Soft-RoCE is sitting on top of Matan's RoCEv2 series [2] which was taken
to 4.5 and present Doug's k.o/for-4.5 branch.

RXE user space (librxe) is located at github [4] with instructions how to use
it [5]

Some notes on the architecture and design:

ib_rxe, implements the RDMA transport and registers with the RDMA core as a
kernel verbs provider. It also implements the packet IO layer. ib_rxe attaches
to the Linux netdev stack as a udp encapsulating protocol and can send and
receive packets over any Ethernet device. It uses the RoCEv2 protocol to handle
RDMA transport.

The modules are configured by entries in /sys. There is a configuration script
(rxe_cfg) that simplifies the use of this interface. rxe_cfg is part of the
rxe user space code, librxe.

The use of rxe verbs in user space requires the inclusion of librxe as a device
specific plug-in to libibverbs. librxe is packaged separately [4].

Copies of the user space library and tools for 'upstream' and a clone of Doug's 
tree with
these patches applied are available at github [3] under rxe_submission-v2 branch

Architecture:

~

 +---+
 |  Application  |
 +---+
 +---+
 | libibverbs|
User +---+
 ++ ++
 | librxe | | HW RoCE lib|
 ++ ++
~
 +--+   ++
 | Sockets  |   | RDMA ULP   |
 +--+   ++
 +--+  +-+
 | TCP/IP   |  | ib_core |
 +--+  +-+
 ++ ++
Kernel   | ib_rxe | | HW RoCE driver |
 ++ ++
 ++
 | NIC driver |
 ++
~

The driver components and a non asci chart of the module could be found at a
pdf [6] presented by Bob before the original submission.
The design is very similar, one thing that was changed, is the arbiter task
that was removed. This reduced the number of context switches and locks during
the data path.

A TODO file is placed under the driver folder.

Thanks,
Kamal, Liran and Amir

[1] - http://www.spinics.net/lists/linux-rdma/msg08936.html
[2] - http://marc.info/?l=linux-rdma=145087562709661=2
[3] - https://github.com/SoftRoCE/rxe-dev
[4] - https://github.com/SoftRoCE/librxe-dev
[5] - https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
[6] - 
http://downloads.openfabrics.org/Media/Sonoma2010/Sonoma_2010_Wednesday_rxe.pdf

Changes from V0:
- Rebased to 4.3-rc1
- IPv4 based sessions work
- Fixed the link speed and width we report to the query port verb
- Update the TODO file with Sagi's request

Changes from V1:
- Rebased to 4.4.0-rc6 and to Doug's k.o/for-4.5 github branch 
- Move driver to be under "drivers/staging/rdma/"

Amir Vadai (3):
  IB/core: Macro for RoCEv2 UDP port
  IB/rxe: Shared objects between user and kernel
  IB/rxe: TODO file while in staging

Kamal Heib (29):
  IB/core: Add SEND_LAST_INV and SEND_ONLY_INV opcodes
  IB/rxe: IBA header types and methods
  IB/rxe: Bit mask and lengths declaration for different opcodes
  IB/rxe: Default rxe device and port parameters
  IB/rxe: External interface to lower level modules
  IB/rxe: Misc local interfaces between files in ib_rxe
  IB/rxe: Add maintainer for rxe driver
  IB/rxe: Work 

[PATCH rdma-next V2 04/32] IB/rxe: Bit mask and lengths declaration for different opcodes

2015-12-24 Thread Kamal Heib
header bit mask definitions and header lengths declaration of the
rxe_opcode_info struct and rxe_wr_opcode_info struct.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_opcode.h | 128 ++
 1 file changed, 128 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_opcode.h

diff --git a/drivers/staging/rdma/rxe/rxe_opcode.h 
b/drivers/staging/rdma/rxe/rxe_opcode.h
new file mode 100644
index 000..3682c16
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_opcode.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_OPCODE_H
+#define RXE_OPCODE_H
+
+/*
+ * contains header bit mask definitions and header lengths
+ * declaration of the rxe_opcode_info struct and
+ * rxe_wr_opcode_info struct
+ */
+
+enum rxe_wr_mask {
+   WR_INLINE_MASK  = BIT(0),
+   WR_ATOMIC_MASK  = BIT(1),
+   WR_SEND_MASK= BIT(2),
+   WR_READ_MASK= BIT(3),
+   WR_WRITE_MASK   = BIT(4),
+   WR_LOCAL_MASK   = BIT(5),
+
+   WR_READ_OR_WRITE_MASK   = WR_READ_MASK | WR_WRITE_MASK,
+   WR_READ_WRITE_OR_SEND_MASK  = WR_READ_OR_WRITE_MASK | WR_SEND_MASK,
+   WR_WRITE_OR_SEND_MASK   = WR_WRITE_MASK | WR_SEND_MASK,
+   WR_ATOMIC_OR_READ_MASK  = WR_ATOMIC_MASK | WR_READ_MASK,
+};
+
+#define WR_MAX_QPT (8)
+
+struct rxe_wr_opcode_info {
+   char*name;
+   enum rxe_wr_maskmask[WR_MAX_QPT];
+};
+
+extern struct rxe_wr_opcode_info rxe_wr_opcode_info[];
+
+enum rxe_hdr_type {
+   RXE_LRH,
+   RXE_GRH,
+   RXE_BTH,
+   RXE_RETH,
+   RXE_AETH,
+   RXE_ATMETH,
+   RXE_ATMACK,
+   RXE_IETH,
+   RXE_RDETH,
+   RXE_DETH,
+   RXE_IMMDT,
+   RXE_PAYLOAD,
+   NUM_HDR_TYPES
+};
+
+enum rxe_hdr_mask {
+   RXE_LRH_MASK= BIT(RXE_LRH),
+   RXE_GRH_MASK= BIT(RXE_GRH),
+   RXE_BTH_MASK= BIT(RXE_BTH),
+   RXE_IMMDT_MASK  = BIT(RXE_IMMDT),
+   RXE_RETH_MASK   = BIT(RXE_RETH),
+   RXE_AETH_MASK   = BIT(RXE_AETH),
+   RXE_ATMETH_MASK = BIT(RXE_ATMETH),
+   RXE_ATMACK_MASK = BIT(RXE_ATMACK),
+   RXE_IETH_MASK   = BIT(RXE_IETH),
+   RXE_RDETH_MASK  = BIT(RXE_RDETH),
+   RXE_DETH_MASK   = BIT(RXE_DETH),
+   RXE_PAYLOAD_MASK= BIT(RXE_PAYLOAD),
+
+   RXE_REQ_MASK= BIT(NUM_HDR_TYPES + 0),
+   RXE_ACK_MASK= BIT(NUM_HDR_TYPES + 1),
+   RXE_SEND_MASK   = BIT(NUM_HDR_TYPES + 2),
+   RXE_WRITE_MASK  = BIT(NUM_HDR_TYPES + 3),
+   RXE_READ_MASK   = BIT(NUM_HDR_TYPES + 4),
+   RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),
+
+   RXE_RWR_MASK= BIT(NUM_HDR_TYPES + 6),
+   RXE_COMP_MASK   = BIT(NUM_HDR_TYPES + 7),
+
+   RXE_START_MASK  = BIT(NUM_HDR_TYPES + 8),
+   RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),
+   RXE_END_MASK= BIT(NUM_HDR_TYPES + 10),
+
+   RXE_LOOPBACK_MASK   = BIT(NUM_HDR_TYPES + 12),
+
+   RXE_READ_OR_ATOMIC  = (RXE_READ_MASK | RXE_ATOMIC_MASK),
+   RXE_WRITE_OR_SEND   = (RXE_WRITE_MASK | RXE_SEND_MASK),
+};
+
+#define OPCODE_NONE

[PATCH rdma-next V2 02/32] IB/core: Add SEND_LAST_INV and SEND_ONLY_INV opcodes

2015-12-24 Thread Kamal Heib
Intorduce Add SEND_LAST_INV and SEND_ONLY_INV opcodes in ib_pack.h to be
used by RXE for RC.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 include/rdma/ib_pack.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index a193081..1d957fe 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -103,6 +103,8 @@ enum {
IB_OPCODE_ATOMIC_ACKNOWLEDGE= 0x12,
IB_OPCODE_COMPARE_SWAP  = 0x13,
IB_OPCODE_FETCH_ADD = 0x14,
+   IB_OPCODE_SEND_LAST_INV = 0x16,
+   IB_OPCODE_SEND_ONLY_INV = 0x17,
 
/* real constants follow -- see comment about above IB_OPCODE()
   macro for more details */
@@ -129,6 +131,8 @@ enum {
IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
IB_OPCODE(RC, COMPARE_SWAP),
IB_OPCODE(RC, FETCH_ADD),
+   IB_OPCODE(RC, SEND_LAST_INV),
+   IB_OPCODE(RC, SEND_ONLY_INV),
 
/* UC */
IB_OPCODE(UC, SEND_FIRST),
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 26/32] IB/rxe: ICRC calculations

2015-12-24 Thread Kamal Heib
Compute ICRC for UDP/IP/BTH headers

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
---
 drivers/staging/rdma/rxe/rxe_icrc.c | 96 +
 1 file changed, 96 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_icrc.c

diff --git a/drivers/staging/rdma/rxe/rxe_icrc.c 
b/drivers/staging/rdma/rxe/rxe_icrc.c
new file mode 100644
index 000..02b73d6
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_icrc.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+/* Compute a partial ICRC for all the IB transport headers. */
+u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+{
+   unsigned int bth_offset = 0;
+   struct iphdr *ip4h = NULL;
+   struct ipv6hdr *ip6h = NULL;
+   struct udphdr *udph;
+   struct rxe_bth *bth;
+   int crc;
+   int length;
+   int hdr_size = sizeof(struct udphdr) +
+   (skb->protocol == htons(ETH_P_IP) ?
+   sizeof(struct iphdr) : sizeof(struct ipv6hdr));
+   /* pseudo header buffer size is calculate using ipv6 header size since
+* it is bigger than ipv4
+*/
+   u8 pshdr[sizeof(struct udphdr) +
+   sizeof(struct ipv6hdr) +
+   RXE_BTH_BYTES];
+
+   /* This seed is the result of computing a CRC with a seed of
+* 0xfff and 8 bytes of 0xff representing a masked LRH.
+*/
+   crc = 0xdebb20e3;
+
+   if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */
+   memcpy(pshdr, ip_hdr(skb), hdr_size);
+   ip4h = (struct iphdr *)pshdr;
+   udph = (struct udphdr *)(ip4h + 1);
+
+   ip4h->ttl = 0xff;
+   ip4h->check = CSUM_MANGLED_0;
+   ip4h->tos = 0xff;
+   } else {/* IPv6 */
+   memcpy(pshdr, ipv6_hdr(skb), hdr_size);
+   ip6h = (struct ipv6hdr *)pshdr;
+   udph = (struct udphdr *)(ip6h + 1);
+
+   memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl));
+   ip6h->priority = 0xf;
+   ip6h->hop_limit = 0xff;
+   }
+   udph->check = CSUM_MANGLED_0;
+
+   bth_offset += hdr_size;
+
+   memcpy([bth_offset], pkt->hdr, RXE_BTH_BYTES);
+   bth = (struct rxe_bth *)[bth_offset];
+
+   /* exclude bth.resv8a */
+   bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);
+
+   length = hdr_size + RXE_BTH_BYTES;
+   crc = crc32_le(crc, pshdr, length);
+
+   /* And finish to compute the CRC on the remainder of the headers. */
+   crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,
+  rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
+   return crc;
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 06/32] IB/rxe: External interface to lower level modules

2015-12-24 Thread Kamal Heib
Functions to be called by the networking layer.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe.h | 70 ++
 1 file changed, 70 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe.h

diff --git a/drivers/staging/rdma/rxe/rxe.h b/drivers/staging/rdma/rxe/rxe.h
new file mode 100644
index 000..f781619
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_H
+#define RXE_H
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "rxe_net.h"
+#include "rxe_opcode.h"
+#include "rxe_hdr.h"
+#include "rxe_param.h"
+#include "rxe_verbs.h"
+
+#define RXE_UVERBS_ABI_VERSION (1)
+
+#define IB_PHYS_STATE_LINK_UP  (5)
+
+#define RXE_ROCE_V2_SPORT  (0xc000)
+
+int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu,
+   unsigned int port_num);
+
+int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
+
+void rxe_remove(struct rxe_dev *rxe);
+
+int rxe_rcv(struct sk_buff *skb);
+
+#endif /* RXE_H */
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 07/32] IB/rxe: Misc local interfaces between files in ib_rxe

2015-12-24 Thread Kamal Heib
Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_loc.h | 291 +
 1 file changed, 291 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_loc.h

diff --git a/drivers/staging/rdma/rxe/rxe_loc.h 
b/drivers/staging/rdma/rxe/rxe_loc.h
new file mode 100644
index 000..814b51d
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_loc.h
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_LOC_H
+#define RXE_LOC_H
+
+/* rxe_av.c */
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr);
+
+int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
+struct rxe_av *av, struct ib_ah_attr *attr);
+
+int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
+  struct ib_ah_attr *attr);
+
+int rxe_av_fill_ip_info(struct rxe_dev *rxe,
+   struct rxe_av *av,
+   struct ib_ah_attr *attr,
+   struct ib_gid_attr *sgid_attr,
+   union ib_gid *sgid);
+
+/* rxe_cq.c */
+int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
+   int cqe, int comp_vector, struct ib_udata *udata);
+
+int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
+int comp_vector, struct ib_ucontext *context,
+struct ib_udata *udata);
+
+int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata 
*udata);
+
+int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
+
+void rxe_cq_cleanup(void *arg);
+
+/* rxe_mcast.c */
+int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mc_grp **grp_p);
+
+int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+  struct rxe_mc_grp *grp);
+
+int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+   union ib_gid *mgid);
+
+void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
+
+void rxe_mc_cleanup(void *arg);
+
+/* rxe_mmap.c */
+struct rxe_mmap_info {
+   struct list_headpending_mmaps;
+   struct ib_ucontext  *context;
+   struct kref ref;
+   void*obj;
+
+   struct mminfo info;
+};
+
+void rxe_mmap_release(struct kref *ref);
+
+struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev,
+  u32 size,
+  struct ib_ucontext *context,
+  void *obj);
+
+int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+/* rxe_mr.c */
+enum copy_direction {
+   to_mem_obj,
+   from_mem_obj,
+};
+
+int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
+int access, struct rxe_mem *mem);
+
+int rxe_mem_init_phys(struct rxe_dev *rxe, struct rxe_pd *pd,
+ int access, u64 iova, struct ib_phys_buf *buf,
+ int num_buf, struct rxe_mem *mem);
+
+int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
+ u64 length, u64 iova, int access, struct ib_udata *udata,
+ struct rxe_mem *mr);
+
+int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
+ int max_pages, struct rxe_mem *mem);
+
+int 

[PATCH rdma-next V2 14/32] IB/rxe: RXE tasks handling

2015-12-24 Thread Kamal Heib
A 'task' is a short function that returns 0 as long as it needs to be
called again. rxe tasks are based on the kernel's tasklet infrastructure.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_task.c | 154 
 drivers/staging/rdma/rxe/rxe_task.h |  95 ++
 2 files changed, 249 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_task.c
 create mode 100644 drivers/staging/rdma/rxe/rxe_task.h

diff --git a/drivers/staging/rdma/rxe/rxe_task.c 
b/drivers/staging/rdma/rxe/rxe_task.c
new file mode 100644
index 000..162fa1a
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_task.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *Redistribution and use in source and binary forms, with or
+ *without modification, are permitted provided that the following
+ *conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include "rxe_task.h"
+
+int __rxe_do_task(struct rxe_task *task)
+
+{
+   int ret;
+
+   while ((ret = task->func(task->arg)) == 0)
+   ;
+
+   task->ret = ret;
+
+   return ret;
+}
+
+/*
+ * this locking is due to a potential race where
+ * a second caller finds the task already running
+ * but looks just after the last call to func
+ */
+void rxe_do_task(unsigned long data)
+{
+   int cont;
+   int ret;
+   unsigned long flags;
+   struct rxe_task *task = (struct rxe_task *)data;
+
+   spin_lock_irqsave(>state_lock, flags);
+   switch (task->state) {
+   case TASK_STATE_START:
+   task->state = TASK_STATE_BUSY;
+   spin_unlock_irqrestore(>state_lock, flags);
+   break;
+
+   case TASK_STATE_BUSY:
+   task->state = TASK_STATE_ARMED;
+   /* fall through to */
+   case TASK_STATE_ARMED:
+   spin_unlock_irqrestore(>state_lock, flags);
+   return;
+
+   default:
+   spin_unlock_irqrestore(>state_lock, flags);
+   pr_warn("bad state = %d in rxe_do_task\n", task->state);
+   return;
+   }
+
+   do {
+   cont = 0;
+   ret = task->func(task->arg);
+
+   spin_lock_irqsave(>state_lock, flags);
+   switch (task->state) {
+   case TASK_STATE_BUSY:
+   if (ret)
+   task->state = TASK_STATE_START;
+   else
+   cont = 1;
+   break;
+
+   /* soneone tried to run the task since the last time we called
+* func, so we will call one more time regardless of the
+* return value
+*/
+   case TASK_STATE_ARMED:
+   task->state = TASK_STATE_BUSY;
+   cont = 1;
+   break;
+
+   default:
+   pr_warn("bad state = %d in rxe_do_task\n",
+   task->state);
+   }
+   spin_unlock_irqrestore(>state_lock, flags);
+   } while (cont);
+
+   task->ret = ret;
+}
+
+int rxe_init_task(void *obj, struct rxe_task *task,
+ void *arg, int (*func)(void *), char *name)
+{
+   task->obj   = obj;
+   task->arg   = arg;
+   task->func  = func;
+   snprintf(task->name, sizeof(task->name), "%s", name);
+
+   tasklet_init(>tasklet, rxe_do_task, (unsigned long)task);
+
+   task->state = 

[PATCH rdma-next V2 31/32] IB/rxe: Add Soft-RoCE to kbuild and makefiles

2015-12-24 Thread Kamal Heib
Kconfig and Makefiles for RXE driver

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/Kconfig  |  2 ++
 drivers/staging/rdma/Makefile |  1 +
 drivers/staging/rdma/rxe/Kconfig  | 23 +++
 drivers/staging/rdma/rxe/Makefile | 24 
 4 files changed, 50 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/Kconfig
 create mode 100644 drivers/staging/rdma/rxe/Makefile

diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
index ba87650..9f92e0d 100644
--- a/drivers/staging/rdma/Kconfig
+++ b/drivers/staging/rdma/Kconfig
@@ -30,4 +30,6 @@ source "drivers/staging/rdma/hfi1/Kconfig"
 
 source "drivers/staging/rdma/ipath/Kconfig"
 
+source "drivers/staging/rdma/rxe/Kconfig"
+
 endif
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
index 139d78e..3648a42 100644
--- a/drivers/staging/rdma/Makefile
+++ b/drivers/staging/rdma/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_AMSO1100)   += amso1100/
 obj-$(CONFIG_INFINIBAND_EHCA)  += ehca/
 obj-$(CONFIG_INFINIBAND_HFI1)  += hfi1/
 obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
+obj-$(CONFIG_INFINIBAND_RXE)+= rxe/
diff --git a/drivers/staging/rdma/rxe/Kconfig b/drivers/staging/rdma/rxe/Kconfig
new file mode 100644
index 000..649b7be
--- /dev/null
+++ b/drivers/staging/rdma/rxe/Kconfig
@@ -0,0 +1,23 @@
+config INFINIBAND_RXE
+   tristate "Software RDMA over Ethernet (RoCE) driver"
+   depends on INET && PCI && INFINIBAND
+   ---help---
+   This driver implements the InfiniBand RDMA transport over
+   the Linux network stack. It enables a system with a
+   standard Ethernet adapter to interoperate with a RoCE
+   adapter or with another system running the RXE driver.
+   Documentation on InfiniBand and RoCE can be downloaded at
+   www.infinibandta.org and www.openfabrics.org. (See also
+   siw which is a similar software driver for iWARP.)
+
+   The driver is split into two layers, one interfaces with the
+   Linux RDMA stack and implements a kernel or user space
+   verbs API. The user space verbs API requires a support
+   library named librxe which is loaded by the generic user
+   space verbs API, libibverbs. The other layer interfaces
+   with the Linux network stack at layer 3.
+
+   To configure and work with soft-RoCE driver please use the
+   following wiki page under "configure Soft-RoCE (RXE)" section:
+
+   https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
diff --git a/drivers/staging/rdma/rxe/Makefile 
b/drivers/staging/rdma/rxe/Makefile
new file mode 100644
index 000..7cf7774
--- /dev/null
+++ b/drivers/staging/rdma/rxe/Makefile
@@ -0,0 +1,24 @@
+obj-$(CONFIG_INFINIBAND_RXE) += ib_rxe.o
+
+ib_rxe-y := \
+   rxe.o \
+   rxe_comp.o \
+   rxe_req.o \
+   rxe_resp.o \
+   rxe_recv.o \
+   rxe_pool.o \
+   rxe_queue.o \
+   rxe_verbs.o \
+   rxe_av.o \
+   rxe_srq.o \
+   rxe_qp.o \
+   rxe_cq.o \
+   rxe_mr.o \
+   rxe_dma.o \
+   rxe_opcode.o \
+   rxe_mmap.o \
+   rxe_icrc.o \
+   rxe_mcast.o \
+   rxe_task.o \
+   rxe_net.o \
+   rxe_sysfs.o
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 24/32] IB/rxe: QP response handling

2015-12-24 Thread Kamal Heib
QP response logic.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
---
 drivers/staging/rdma/rxe/rxe_resp.c | 1368 +++
 1 file changed, 1368 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_resp.c

diff --git a/drivers/staging/rdma/rxe/rxe_resp.c 
b/drivers/staging/rdma/rxe/rxe_resp.c
new file mode 100644
index 000..78304c6
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_resp.c
@@ -0,0 +1,1368 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+enum resp_states {
+   RESPST_NONE,
+   RESPST_GET_REQ,
+   RESPST_CHK_PSN,
+   RESPST_CHK_OP_SEQ,
+   RESPST_CHK_OP_VALID,
+   RESPST_CHK_RESOURCE,
+   RESPST_CHK_LENGTH,
+   RESPST_CHK_RKEY,
+   RESPST_EXECUTE,
+   RESPST_READ_REPLY,
+   RESPST_COMPLETE,
+   RESPST_ACKNOWLEDGE,
+   RESPST_CLEANUP,
+   RESPST_DUPLICATE_REQUEST,
+   RESPST_ERR_MALFORMED_WQE,
+   RESPST_ERR_UNSUPPORTED_OPCODE,
+   RESPST_ERR_MISALIGNED_ATOMIC,
+   RESPST_ERR_PSN_OUT_OF_SEQ,
+   RESPST_ERR_MISSING_OPCODE_FIRST,
+   RESPST_ERR_MISSING_OPCODE_LAST_C,
+   RESPST_ERR_MISSING_OPCODE_LAST_D1E,
+   RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
+   RESPST_ERR_RNR,
+   RESPST_ERR_RKEY_VIOLATION,
+   RESPST_ERR_LENGTH,
+   RESPST_ERR_CQ_OVERFLOW,
+   RESPST_ERROR,
+   RESPST_RESET,
+   RESPST_DONE,
+   RESPST_EXIT,
+};
+
+static char *resp_state_name[] = {
+   [RESPST_NONE]   = "NONE",
+   [RESPST_GET_REQ]= "GET_REQ",
+   [RESPST_CHK_PSN]= "CHK_PSN",
+   [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ",
+   [RESPST_CHK_OP_VALID]   = "CHK_OP_VALID",
+   [RESPST_CHK_RESOURCE]   = "CHK_RESOURCE",
+   [RESPST_CHK_LENGTH] = "CHK_LENGTH",
+   [RESPST_CHK_RKEY]   = "CHK_RKEY",
+   [RESPST_EXECUTE]= "EXECUTE",
+   [RESPST_READ_REPLY] = "READ_REPLY",
+   [RESPST_COMPLETE]   = "COMPLETE",
+   [RESPST_ACKNOWLEDGE]= "ACKNOWLEDGE",
+   [RESPST_CLEANUP]= "CLEANUP",
+   [RESPST_DUPLICATE_REQUEST]  = "DUPLICATE_REQUEST",
+   [RESPST_ERR_MALFORMED_WQE]  = "ERR_MALFORMED_WQE",
+   [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE",
+   [RESPST_ERR_MISALIGNED_ATOMIC]  = "ERR_MISALIGNED_ATOMIC",
+   [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ",
+   [RESPST_ERR_MISSING_OPCODE_FIRST]   = "ERR_MISSING_OPCODE_FIRST",
+   [RESPST_ERR_MISSING_OPCODE_LAST_C]  = "ERR_MISSING_OPCODE_LAST_C",
+   [RESPST_ERR_MISSING_OPCODE_LAST_D1E]= "ERR_MISSING_OPCODE_LAST_D1E",
+   [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]  = "ERR_TOO_MANY_RDMA_ATM_REQ",
+   [RESPST_ERR_RNR]= "ERR_RNR",
+   [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION",
+   [RESPST_ERR_LENGTH] = "ERR_LENGTH",
+   [RESPST_ERR_CQ_OVERFLOW]= "ERR_CQ_OVERFLOW",
+   [RESPST_ERROR]  = "ERROR",
+   [RESPST_RESET]  = "RESET",
+   [RESPST_DONE]   = "DONE",

[PATCH rdma-next V2 25/32] IB/rxe: Dummy DMA callbacks for RXE device

2015-12-24 Thread Kamal Heib
Dummy DMA processing for RXE device.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_dma.c | 166 +
 1 file changed, 166 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_dma.c

diff --git a/drivers/staging/rdma/rxe/rxe_dma.c 
b/drivers/staging/rdma/rxe/rxe_dma.c
new file mode 100644
index 000..265b03d
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_dma.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+#define DMA_BAD_ADDER ((u64)0)
+
+static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+   return dma_addr == DMA_BAD_ADDER;
+}
+
+static u64 rxe_dma_map_single(struct ib_device *dev,
+ void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+   WARN_ON(!valid_dma_direction(direction));
+   return (u64)cpu_addr;
+}
+
+static void rxe_dma_unmap_single(struct ib_device *dev,
+u64 addr, size_t size,
+enum dma_data_direction direction)
+{
+   WARN_ON(!valid_dma_direction(direction));
+}
+
+static u64 rxe_dma_map_page(struct ib_device *dev,
+   struct page *page,
+   unsigned long offset,
+   size_t size, enum dma_data_direction direction)
+{
+   u64 addr;
+
+   WARN_ON(!valid_dma_direction(direction));
+
+   if (offset + size > PAGE_SIZE) {
+   addr = DMA_BAD_ADDER;
+   goto done;
+   }
+
+   addr = (u64)page_address(page);
+   if (addr)
+   addr += offset;
+
+done:
+   return addr;
+}
+
+static void rxe_dma_unmap_page(struct ib_device *dev,
+  u64 addr, size_t size,
+  enum dma_data_direction direction)
+{
+   WARN_ON(!valid_dma_direction(direction));
+}
+
+static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+   struct scatterlist *sg;
+   u64 addr;
+   int i;
+   int ret = nents;
+
+   WARN_ON(!valid_dma_direction(direction));
+
+   for_each_sg(sgl, sg, nents, i) {
+   addr = (u64)page_address(sg_page(sg));
+   if (!addr) {
+   ret = 0;
+   break;
+   }
+   sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+   sg->dma_length = sg->length;
+#endif
+   }
+
+   return ret;
+}
+
+static void rxe_unmap_sg(struct ib_device *dev,
+struct scatterlist *sg, int nents,
+enum dma_data_direction direction)
+{
+   WARN_ON(!valid_dma_direction(direction));
+}
+
+static void rxe_sync_single_for_cpu(struct ib_device *dev,
+   u64 addr,
+   size_t size, enum dma_data_direction dir)
+{
+}
+
+static void rxe_sync_single_for_device(struct ib_device *dev,
+  u64 addr,
+  size_t size, enum dma_data_direction dir)
+{
+}
+
+static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,
+   u64 *dma_handle, gfp_t flag)
+{
+ 

[PATCH rdma-next V2 17/32] IB/rxe: Completion Queue (CQ) manipulation functions

2015-12-24 Thread Kamal Heib
Functions to manipulate CQ.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_cq.c | 165 ++
 1 file changed, 165 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_cq.c

diff --git a/drivers/staging/rdma/rxe/rxe_cq.c 
b/drivers/staging/rdma/rxe/rxe_cq.c
new file mode 100644
index 000..a572e4d
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_cq.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *Redistribution and use in source and binary forms, with or
+ *without modification, are permitted provided that the following
+ *conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
+   int cqe, int comp_vector, struct ib_udata *udata)
+{
+   int count;
+
+   if (cqe <= 0) {
+   pr_warn("cqe(%d) <= 0\n", cqe);
+   goto err1;
+   }
+
+   if (cqe > rxe->attr.max_cqe) {
+   pr_warn("cqe(%d) > max_cqe(%d)\n",
+   cqe, rxe->attr.max_cqe);
+   goto err1;
+   }
+
+   if (cq) {
+   count = queue_count(cq->queue);
+   if (cqe < count) {
+   pr_warn("cqe(%d) < current # elements in queue (%d)",
+   cqe, count);
+   goto err1;
+   }
+   }
+
+   return 0;
+
+err1:
+   return -EINVAL;
+}
+
+static void rxe_send_complete(unsigned long data)
+{
+   struct rxe_cq *cq = (struct rxe_cq *)data;
+
+   cq->ibcq.comp_handler(>ibcq, cq->ibcq.cq_context);
+}
+
+int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
+int comp_vector, struct ib_ucontext *context,
+struct ib_udata *udata)
+{
+   int err;
+
+   cq->queue = rxe_queue_init(rxe, ,
+  sizeof(struct rxe_cqe));
+   if (!cq->queue) {
+   pr_warn("unable to create cq\n");
+   return -ENOMEM;
+   }
+
+   err = do_mmap_info(rxe, udata, false, context, cq->queue->buf,
+  cq->queue->buf_size, >queue->ip);
+   if (err) {
+   kvfree(cq->queue->buf);
+   kfree(cq->queue);
+   return err;
+   }
+
+   if (udata)
+   cq->is_user = 1;
+
+   tasklet_init(>comp_task, rxe_send_complete, (unsigned long)cq);
+
+   spin_lock_init(>cq_lock);
+   cq->ibcq.cqe = cqe;
+   return 0;
+}
+
+int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata)
+{
+   int err;
+
+   err = rxe_queue_resize(cq->queue, (unsigned int *),
+  sizeof(struct rxe_cqe),
+  cq->queue->ip ? cq->queue->ip->context : NULL,
+  udata, NULL, >cq_lock);
+   if (!err)
+   cq->ibcq.cqe = cqe;
+
+   return err;
+}
+
+int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
+{
+   struct ib_event ev;
+   unsigned long flags;
+
+   spin_lock_irqsave(>cq_lock, flags);
+
+   if (unlikely(queue_full(cq->queue))) {
+   spin_unlock_irqrestore(>cq_lock, flags);
+   if (cq->ibcq.event_handler) {
+   ev.device = cq->ibcq.device;
+   ev.element.cq = >ibcq;
+   ev.event = IB_EVENT_CQ_ERR;
+   cq->ibcq.event_handler(, 

[PATCH rdma-next V2 29/32] IB/rxe: sysfs interface to RXE

2015-12-24 Thread Kamal Heib
sysfs interface for ib_rxe

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_sysfs.c | 168 +++
 1 file changed, 168 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_sysfs.c

diff --git a/drivers/staging/rdma/rxe/rxe_sysfs.c 
b/drivers/staging/rdma/rxe/rxe_sysfs.c
new file mode 100644
index 000..35bc299
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_sysfs.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_net.h"
+
+/* Copy argument and remove trailing CR. Return the new length. */
+static int sanitize_arg(const char *val, char *intf, int intf_len)
+{
+   int len;
+
+   if (!val)
+   return 0;
+
+   /* Remove newline. */
+   for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++)
+   intf[len] = val[len];
+   intf[len] = 0;
+
+   if (len == 0 || (val[len] != 0 && val[len] != '\n'))
+   return 0;
+
+   return len;
+}
+
+/* Caller must hold net_info_lock */
+static void rxe_set_port_state(struct net_device *ndev)
+{
+   struct rxe_dev *rxe;
+
+   rxe = net_to_rxe(ndev);
+   if (!rxe)
+   goto out;
+
+   if (net_info[ndev->ifindex].status == IB_PORT_ACTIVE)
+   rxe_net_up(ndev);
+   else
+   rxe_net_down(ndev); /* down for unknown state */
+out:
+   return;
+}
+
+static int rxe_param_set_add(const char *val, struct kernel_param *kp)
+{
+   int i, len, err;
+   char intf[32];
+
+   len = sanitize_arg(val, intf, sizeof(intf));
+   if (!len) {
+   pr_err("rxe: add: invalid interface name\n");
+   return -EINVAL;
+   }
+
+   spin_lock_bh(_info_lock);
+   for (i = 0; i < RXE_MAX_IF_INDEX; i++) {
+   struct net_device *ndev = net_info[i].ndev;
+
+   if (ndev && (0 == strncmp(intf, ndev->name, len))) {
+   spin_unlock_bh(_info_lock);
+   if (net_info[i].rxe)
+   pr_info("rxe: already configured on %s\n",
+   intf);
+   else {
+   err = rxe_net_add(ndev);
+   if (!err && net_info[i].rxe) {
+   rxe_set_port_state(ndev);
+   } else {
+   pr_err("rxe: add appears to have failed 
for %s (index %d)\n",
+  intf, i);
+   }
+   }
+   return 0;
+   }
+   }
+   spin_unlock_bh(_info_lock);
+
+   pr_warn("interface %s not found\n", intf);
+
+   return 0;
+}
+
+static void rxe_remove_all(void)
+{
+   int i;
+   struct rxe_dev *rxe;
+
+   for (i = 0; i < RXE_MAX_IF_INDEX; i++) {
+   if (net_info[i].rxe) {
+   spin_lock_bh(_info_lock);
+   rxe = net_info[i].rxe;
+   net_info[i].rxe = NULL;
+   spin_unlock_bh(_info_lock);
+
+   rxe_remove(rxe);
+   }
+   }
+}
+
+static int rxe_param_set_remove(const char *val, struct kernel_param *kp)
+{
+   int i, len;
+   char intf[32];
+   

[PATCH rdma-next V2 28/32] IB/rxe: Interface to netdev stack

2015-12-24 Thread Kamal Heib
Linux netdev related code

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
---
 drivers/staging/rdma/rxe/rxe_net.c | 729 +
 drivers/staging/rdma/rxe/rxe_net.h |  78 
 2 files changed, 807 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_net.c
 create mode 100644 drivers/staging/rdma/rxe/rxe_net.h

diff --git a/drivers/staging/rdma/rxe/rxe_net.c 
b/drivers/staging/rdma/rxe/rxe_net.c
new file mode 100644
index 000..14789a9c
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_net.c
@@ -0,0 +1,729 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "rxe.h"
+#include "rxe_net.h"
+#include "rxe_loc.h"
+
+/*
+ * note: this table is a replacement for a protocol specific pointer
+ * in struct net_device which exists for other ethertypes
+ * this allows us to not have to patch that data structure
+ * eventually we want to get our own when we're famous
+ */
+struct rxe_net_info net_info[RXE_MAX_IF_INDEX];
+spinlock_t net_info_lock; /* spinlock for net_info array */
+struct rxe_recv_sockets recv_sockets;
+
+static __be64 rxe_mac_to_eui64(struct net_device *ndev)
+{
+   unsigned char *mac_addr = ndev->dev_addr;
+   __be64 eui64;
+   unsigned char *dst = (unsigned char *)
+
+   dst[0] = mac_addr[0] ^ 2;
+   dst[1] = mac_addr[1];
+   dst[2] = mac_addr[2];
+   dst[3] = 0xff;
+   dst[4] = 0xfe;
+   dst[5] = mac_addr[3];
+   dst[6] = mac_addr[4];
+   dst[7] = mac_addr[5];
+
+   return eui64;
+}
+
+static __be64 node_guid(struct rxe_dev *rxe)
+{
+   return rxe_mac_to_eui64(rxe->ndev);
+}
+
+static __be64 port_guid(struct rxe_dev *rxe, unsigned int port_num)
+{
+   return rxe_mac_to_eui64(rxe->ndev);
+}
+
+static struct device *dma_device(struct rxe_dev *rxe)
+{
+   struct net_device *ndev;
+
+   ndev = rxe->ndev;
+
+   if (ndev->priv_flags & IFF_802_1Q_VLAN)
+   ndev = vlan_dev_real_dev(ndev);
+
+   return ndev->dev.parent;
+}
+
+static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+   int err;
+   unsigned char ll_addr[ETH_ALEN];
+
+   ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+   err = dev_mc_add(rxe->ndev, ll_addr);
+
+   return err;
+}
+
+static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+   int err;
+   unsigned char ll_addr[ETH_ALEN];
+
+   ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+   err = dev_mc_del(rxe->ndev, ll_addr);
+
+   return err;
+}
+
+static struct rtable *rxe_find_route4(struct in_addr *saddr,
+ struct in_addr *daddr)
+{
+   struct rtable *rt;
+   struct flowi4 fl = { { 0 } };
+
+   memset(, 0, sizeof(fl));
+   memcpy(, saddr, sizeof(*saddr));
+   memcpy(, daddr, sizeof(*daddr));
+   fl.flowi4_proto = IPPROTO_UDP;
+
+   rt = ip_route_output_key(_net, );
+   if (IS_ERR(rt)) {
+   pr_err("no route to %pI4\n", >s_addr);
+   return NULL;
+   }
+
+   return rt;
+}
+
+static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+struct in6_addr *saddr,
+struct in6_addr *daddr)
+{
+   struct dst_entry *ndst;
+   struct flowi6 fl6 = { { 0 } };
+
+   memset(, 0, sizeof(fl6));
+  

[PATCH rdma-next V2 30/32] IB/rxe: Shared objects between user and kernel

2015-12-24 Thread Kamal Heib
From: Amir Vadai 

Objects used by the userspace to post work requests.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 include/uapi/rdma/Kbuild   |   1 +
 include/uapi/rdma/ib_rxe.h | 139 +
 2 files changed, 140 insertions(+)
 create mode 100644 include/uapi/rdma/ib_rxe.h

diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 231901b..94f9401 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -6,3 +6,4 @@ header-y += ib_user_verbs.h
 header-y += rdma_netlink.h
 header-y += rdma_user_cm.h
 header-y += hfi/
+header-y += ib_rxe.h
diff --git a/include/uapi/rdma/ib_rxe.h b/include/uapi/rdma/ib_rxe.h
new file mode 100644
index 000..fc1d9ca
--- /dev/null
+++ b/include/uapi/rdma/ib_rxe.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_RXE_H
+#define IB_RXE_H
+
+#include 
+
+union rxe_gid {
+   __u8raw[16];
+   struct {
+   __be64  subnet_prefix;
+   __be64  interface_id;
+   } global;
+};
+
+struct rxe_global_route {
+   union rxe_gid   dgid;
+   __u32   flow_label;
+   __u8sgid_index;
+   __u8hop_limit;
+   __u8traffic_class;
+};
+
+struct rxe_av {
+   __u8port_num;
+   __u8network_type;
+   struct rxe_global_route grh;
+   union {
+   struct sockaddr _sockaddr;
+   struct sockaddr_in  _sockaddr_in;
+   struct sockaddr_in6 _sockaddr_in6;
+   } sgid_addr, dgid_addr;
+};
+
+struct rxe_send_wr {
+   __u64   wr_id;
+   __u32   num_sge;
+   __u32   opcode;
+   __u32   send_flags;
+   union {
+   __u32   imm_data;
+   __u32   invalidate_rkey;
+   } ex;
+   union {
+   struct {
+   __u64   remote_addr;
+   __u32   rkey;
+   } rdma;
+   struct {
+   __u64   remote_addr;
+   __u64   compare_add;
+   __u64   swap;
+   __u32   rkey;
+   } atomic;
+   struct {
+   __u32   remote_qpn;
+   __u32   remote_qkey;
+   __u16   pkey_index;
+   } ud;
+   } wr;
+};
+
+struct rxe_sge {
+   __u64   addr;
+   __u32   length;
+   __u32   lkey;
+};
+
+struct mminfo {
+   __u64   offset;
+   __u32   size;
+   __u32   pad;
+};
+
+struct rxe_dma_info {
+   __u32   length;
+   __u32   resid;
+   __u32   cur_sge;
+   __u32   num_sge;
+   __u32   sge_offset;
+   union {
+   __u8inline_data[0];
+   struct rxe_sge  sge[0];
+   };
+};
+
+struct rxe_send_wqe {
+   struct rxe_send_wr  wr;
+   struct rxe_av   av;
+   __u32   status;
+   __u32   state;
+   __u64   iova;
+   __u32   mask;
+   __u32   first_psn;
+   __u32   last_psn;
+   __u32   ack_length;
+   __u32  

[PATCH rdma-next V2 20/32] IB/rxe: Multicast implementation

2015-12-24 Thread Kamal Heib
Multicast groups handling.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_mcast.c | 190 +++
 1 file changed, 190 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_mcast.c

diff --git a/drivers/staging/rdma/rxe/rxe_mcast.c 
b/drivers/staging/rdma/rxe/rxe_mcast.c
new file mode 100644
index 000..bcf37be
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_mcast.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *Redistribution and use in source and binary forms, with or
+ *without modification, are permitted provided that the following
+ *conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mc_grp **grp_p)
+{
+   int err;
+   struct rxe_mc_grp *grp;
+
+   if (rxe->attr.max_mcast_qp_attach == 0) {
+   err = -EINVAL;
+   goto err1;
+   }
+
+   grp = rxe_pool_get_key(>mc_grp_pool, mgid);
+   if (grp)
+   goto done;
+
+   grp = rxe_alloc(>mc_grp_pool);
+   if (!grp) {
+   err = -ENOMEM;
+   goto err1;
+   }
+
+   INIT_LIST_HEAD(>qp_list);
+   spin_lock_init(>mcg_lock);
+   grp->rxe = rxe;
+
+   rxe_add_key(grp, mgid);
+
+   err = rxe->ifc_ops->mcast_add(rxe, mgid);
+   if (err)
+   goto err2;
+
+done:
+   *grp_p = grp;
+   return 0;
+
+err2:
+   rxe_drop_ref(grp);
+err1:
+   return err;
+}
+
+int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+  struct rxe_mc_grp *grp)
+{
+   int err;
+   struct rxe_mc_elem *elem;
+
+   /* check to see of the qp is already a member of the group */
+   spin_lock_bh(>grp_lock);
+   spin_lock_bh(>mcg_lock);
+   list_for_each_entry(elem, >qp_list, qp_list) {
+   if (elem->qp == qp) {
+   err = 0;
+   goto out;
+   }
+   }
+
+   if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {
+   err = -ENOMEM;
+   goto out;
+   }
+
+   elem = rxe_alloc(>mc_elem_pool);
+   if (!elem) {
+   err = -ENOMEM;
+   goto out;
+   }
+
+   /* each qp holds a ref on the grp */
+   rxe_add_ref(grp);
+
+   grp->num_qp++;
+   elem->qp = qp;
+   elem->grp = grp;
+
+   list_add(>qp_list, >qp_list);
+   list_add(>grp_list, >grp_list);
+
+   err = 0;
+out:
+   spin_unlock_bh(>mcg_lock);
+   spin_unlock_bh(>grp_lock);
+   return err;
+}
+
+int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+   union ib_gid *mgid)
+{
+   struct rxe_mc_grp *grp;
+   struct rxe_mc_elem *elem, *tmp;
+
+   grp = rxe_pool_get_key(>mc_grp_pool, mgid);
+   if (!grp)
+   goto err1;
+
+   spin_lock_bh(>grp_lock);
+   spin_lock_bh(>mcg_lock);
+
+   list_for_each_entry_safe(elem, tmp, >qp_list, qp_list) {
+   if (elem->qp == qp) {
+   list_del(>qp_list);
+   list_del(>grp_list);
+   grp->num_qp--;
+
+   spin_unlock_bh(>mcg_lock);
+   spin_unlock_bh(>grp_lock);
+   rxe_drop_ref(elem);
+   rxe_drop_ref(grp);  /* ref held by QP */
+   

[PATCH rdma-next V2 08/32] IB/rxe: Add maintainer for rxe driver

2015-12-24 Thread Kamal Heib
Add maintainer for rxe driver

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 233f834..ae1aff2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6945,6 +6945,15 @@ W:   http://www.mellanox.com
 Q: http://patchwork.ozlabs.org/project/netdev/list/
 F: drivers/net/ethernet/mellanox/mlxsw/
 
+SOFT-ROCE DRIVER (rxe)
+M: Kamal Heib 
+L: linux-rdma@vger.kernel.org
+S: Supported
+W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+Q: http://patchwork.kernel.org/project/linux-rdma/list/
+F: drivers/staging/rdma/rxe/
+F: include/uapi/rdma/ib_rxe.h
+
 MEMBARRIER SUPPORT
 M: Mathieu Desnoyers 
 M: "Paul E. McKenney" 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH rdma-next V2 21/32] IB/rxe: Received packets handling

2015-12-24 Thread Kamal Heib
Handles receiving new packets which are sent to either request or
response processing.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_recv.c | 371 
 1 file changed, 371 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_recv.c

diff --git a/drivers/staging/rdma/rxe/rxe_recv.c 
b/drivers/staging/rdma/rxe/rxe_recv.c
new file mode 100644
index 000..092
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_recv.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+   struct rxe_qp *qp)
+{
+   if (unlikely(!qp->valid))
+   goto err1;
+
+   switch (qp_type(qp)) {
+   case IB_QPT_RC:
+   if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {
+   pr_warn_ratelimited("bad qp type\n");
+   goto err1;
+   }
+   break;
+   case IB_QPT_UC:
+   if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {
+   pr_warn_ratelimited("bad qp type\n");
+   goto err1;
+   }
+   break;
+   case IB_QPT_UD:
+   case IB_QPT_SMI:
+   case IB_QPT_GSI:
+   if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {
+   pr_warn_ratelimited("bad qp type\n");
+   goto err1;
+   }
+   break;
+   default:
+   pr_warn_ratelimited("unsupported qp type\n");
+   goto err1;
+   }
+
+   if (pkt->mask & RXE_REQ_MASK) {
+   if (unlikely(qp->resp.state != QP_STATE_READY))
+   goto err1;
+   } else if (unlikely(qp->req.state < QP_STATE_READY ||
+   qp->req.state > QP_STATE_DRAINED))
+   goto err1;
+
+   return 0;
+
+err1:
+   return -EINVAL;
+}
+
+static void set_bad_pkey_cntr(struct rxe_port *port)
+{
+   spin_lock_bh(>port_lock);
+   port->attr.bad_pkey_cntr = min((u32)0x,
+  port->attr.bad_pkey_cntr + 1);
+   spin_unlock_bh(>port_lock);
+}
+
+static void set_qkey_viol_cntr(struct rxe_port *port)
+{
+   spin_lock_bh(>port_lock);
+   port->attr.qkey_viol_cntr = min((u32)0x,
+   port->attr.qkey_viol_cntr + 1);
+   spin_unlock_bh(>port_lock);
+}
+
+static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ u32 qpn, struct rxe_qp *qp)
+{
+   int i;
+   int found_pkey = 0;
+   struct rxe_port *port = >port[pkt->port_num - 1];
+   u16 pkey = bth_pkey(pkt);
+
+   pkt->pkey_index = 0;
+
+   if (qpn == 1) {
+   for (i = 0; i < port->attr.pkey_tbl_len; i++) {
+   if (pkey_match(pkey, port->pkey_tbl[i])) {
+   pkt->pkey_index = i;
+   found_pkey = 1;
+   break;
+   }
+   }
+
+   if (!found_pkey) {
+   pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);
+   set_bad_pkey_cntr(port);
+   goto err1;
+   }
+   } else if (qpn 

[PATCH rdma-next V2 11/32] IB/rxe: Common user/kernel queue implementation

2015-12-24 Thread Kamal Heib
A simple circular buffer that can optionally be shared between user
space and the kernel and can be resized.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_queue.c | 217 +++
 drivers/staging/rdma/rxe/rxe_queue.h | 178 
 2 files changed, 395 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_queue.c
 create mode 100644 drivers/staging/rdma/rxe/rxe_queue.h

diff --git a/drivers/staging/rdma/rxe/rxe_queue.c 
b/drivers/staging/rdma/rxe/rxe_queue.c
new file mode 100644
index 000..aabe04b
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_queue.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must retailuce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+int do_mmap_info(struct rxe_dev *rxe,
+struct ib_udata *udata,
+bool is_req,
+struct ib_ucontext *context,
+struct rxe_queue_buf *buf,
+size_t buf_size,
+struct rxe_mmap_info **ip_p)
+{
+   int err;
+   u32 len, offset;
+   struct rxe_mmap_info *ip = NULL;
+
+   if (udata) {
+   if (is_req) {
+   len = udata->outlen - sizeof(struct mminfo);
+   offset = sizeof(struct mminfo);
+   } else {
+   len = udata->outlen;
+   offset = 0;
+   }
+
+   if (len < sizeof(ip->info))
+   goto err1;
+
+   ip = rxe_create_mmap_info(rxe, buf_size, context, buf);
+   if (!ip)
+   goto err1;
+
+   err = copy_to_user(udata->outbuf + offset, >info,
+  sizeof(ip->info));
+   if (err)
+   goto err2;
+
+   spin_lock_bh(>pending_lock);
+   list_add(>pending_mmaps, >pending_mmaps);
+   spin_unlock_bh(>pending_lock);
+   }
+
+   *ip_p = ip;
+
+   return 0;
+
+err2:
+   kfree(ip);
+err1:
+   return -EINVAL;
+}
+
+struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
+int *num_elem,
+unsigned int elem_size)
+{
+   struct rxe_queue *q;
+   size_t buf_size;
+   unsigned int num_slots;
+
+   /* num_elem == 0 is allowed, but uninteresting */
+   if (*num_elem < 0)
+   goto err1;
+
+   q = kmalloc(sizeof(*q), GFP_KERNEL);
+   if (!q)
+   goto err1;
+
+   q->rxe = rxe;
+
+   /* used in resize, only need to copy used part of queue */
+   q->elem_size = elem_size;
+
+   /* pad element up to at least a cacheline and always a power of 2 */
+   if (elem_size < cache_line_size())
+   elem_size = cache_line_size();
+   elem_size = roundup_pow_of_two(elem_size);
+
+   q->log2_elem_size = order_base_2(elem_size);
+
+   num_slots = *num_elem + 1;
+   num_slots = roundup_pow_of_two(num_slots);
+   q->index_mask = num_slots - 1;
+
+   buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size;
+
+   q->buf = vmalloc_user(buf_size);
+   if (!q->buf)
+   goto err2;
+
+   q->buf->log2_elem_size = q->log2_elem_size;
+   q->buf->index_mask = q->index_mask;
+
+   q->buf_size = buf_size;
+
+   

[PATCH rdma-next V2 16/32] IB/rxe: Shared Receive Queue (SRQ) manipulation functions

2015-12-24 Thread Kamal Heib
Functions to manipulate SRQ.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe_srq.c | 195 +
 1 file changed, 195 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_srq.c

diff --git a/drivers/staging/rdma/rxe/rxe_srq.c 
b/drivers/staging/rdma/rxe/rxe_srq.c
new file mode 100644
index 000..1411fd2
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_srq.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
+struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
+{
+   if (srq && srq->error) {
+   pr_warn("srq in error state\n");
+   goto err1;
+   }
+
+   if (mask & IB_SRQ_MAX_WR) {
+   if (attr->max_wr > rxe->attr.max_srq_wr) {
+   pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+   attr->max_wr, rxe->attr.max_srq_wr);
+   goto err1;
+   }
+
+   if (attr->max_wr <= 0) {
+   pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+   goto err1;
+   }
+
+   if (srq && srq->limit && (attr->max_wr < srq->limit)) {
+   pr_warn("max_wr (%d) < srq->limit (%d)\n",
+   attr->max_wr, srq->limit);
+   goto err1;
+   }
+
+   if (attr->max_wr < RXE_MIN_SRQ_WR)
+   attr->max_wr = RXE_MIN_SRQ_WR;
+   }
+
+   if (mask & IB_SRQ_LIMIT) {
+   if (attr->srq_limit > rxe->attr.max_srq_wr) {
+   pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
+   attr->srq_limit, rxe->attr.max_srq_wr);
+   goto err1;
+   }
+
+   if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
+   pr_warn("srq_limit (%d) > cur limit(%d)\n",
+   attr->srq_limit,
+srq->rq.queue->buf->index_mask);
+   goto err1;
+   }
+   }
+
+   if (mask == IB_SRQ_INIT_MASK) {
+   if (attr->max_sge > rxe->attr.max_srq_sge) {
+   pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
+   attr->max_sge, rxe->attr.max_srq_sge);
+   goto err1;
+   }
+
+   if (attr->max_sge < RXE_MIN_SRQ_SGE)
+   attr->max_sge = RXE_MIN_SRQ_SGE;
+   }
+
+   return 0;
+
+err1:
+   return -EINVAL;
+}
+
+int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_init_attr *init,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+   int err;
+   int srq_wqe_size;
+   struct rxe_queue *q;
+
+   srq->event_handler  = init->event_handler;
+   srq->context= init->srq_context;
+   srq->limit  = init->attr.srq_limit;
+   srq->srq_num= srq->pelem.index;
+   srq->rq.max_wr  = init->attr.max_wr;
+   srq->rq.max_sge = init->attr.max_sge;
+
+   srq_wqe_size= rcv_wqe_size(srq->rq.max_sge);
+
+   

[PATCH rdma-next V2 27/32] IB/rxe: Module init hooks

2015-12-24 Thread Kamal Heib
Module main for ib_rxe

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
---
 drivers/staging/rdma/rxe/rxe.c | 436 +
 drivers/staging/rdma/rxe/rxe.h |   2 +
 2 files changed, 438 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe.c

diff --git a/drivers/staging/rdma/rxe/rxe.c b/drivers/staging/rdma/rxe/rxe.c
new file mode 100644
index 000..616b07b
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
+MODULE_DESCRIPTION("Soft RDMA transport");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.2");
+
+/* free resources for all ports on a device */
+static void rxe_cleanup_ports(struct rxe_dev *rxe)
+{
+   unsigned int port_num;
+   struct rxe_port *port;
+
+   for (port_num = 1; port_num <= rxe->num_ports; port_num++) {
+   port = >port[port_num - 1];
+
+   kfree(port->pkey_tbl);
+   port->pkey_tbl = NULL;
+   }
+
+   kfree(rxe->port);
+   rxe->port = NULL;
+}
+
+/* free resources for a rxe device all objects created for this device must
+ * have been destroyed
+ */
+static void rxe_cleanup(struct rxe_dev *rxe)
+{
+   rxe_pool_cleanup(>uc_pool);
+   rxe_pool_cleanup(>pd_pool);
+   rxe_pool_cleanup(>ah_pool);
+   rxe_pool_cleanup(>srq_pool);
+   rxe_pool_cleanup(>qp_pool);
+   rxe_pool_cleanup(>cq_pool);
+   rxe_pool_cleanup(>mr_pool);
+   rxe_pool_cleanup(>fmr_pool);
+   rxe_pool_cleanup(>mw_pool);
+   rxe_pool_cleanup(>mc_grp_pool);
+   rxe_pool_cleanup(>mc_elem_pool);
+
+   rxe_cleanup_ports(rxe);
+}
+
+/* called when all references have been dropped */
+void rxe_release(struct kref *kref)
+{
+   struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);
+
+   rxe_cleanup(rxe);
+   ib_dealloc_device(>ib_dev);
+}
+
+void rxe_dev_put(struct rxe_dev *rxe)
+{
+   kref_put(>ref_cnt, rxe_release);
+}
+EXPORT_SYMBOL_GPL(rxe_dev_put);
+
+/* initialize rxe device parameters */
+static int rxe_init_device_param(struct rxe_dev *rxe)
+{
+   rxe->num_ports  = RXE_NUM_PORT;
+   rxe->max_inline_data= RXE_MAX_INLINE_DATA;
+
+   rxe->attr.fw_ver= RXE_FW_VER;
+   rxe->attr.max_mr_size   = RXE_MAX_MR_SIZE;
+   rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP;
+   rxe->attr.vendor_id = RXE_VENDOR_ID;
+   rxe->attr.vendor_part_id= RXE_VENDOR_PART_ID;
+   rxe->attr.hw_ver= RXE_HW_VER;
+   rxe->attr.max_qp= RXE_MAX_QP;
+   rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
+   rxe->attr.device_cap_flags  = RXE_DEVICE_CAP_FLAGS;
+   rxe->attr.max_sge   = RXE_MAX_SGE;
+   rxe->attr.max_sge_rd= RXE_MAX_SGE_RD;
+   rxe->attr.max_cq= RXE_MAX_CQ;
+   rxe->attr.max_cqe   = (1 << RXE_MAX_LOG_CQE) - 1;
+   rxe->attr.max_mr= RXE_MAX_MR;
+   rxe->attr.max_pd= RXE_MAX_PD;
+   rxe->attr.max_qp_rd_atom= RXE_MAX_QP_RD_ATOM;
+

[PATCH rdma-next V2 12/32] IB/rxe: Interface to ib_core

2015-12-24 Thread Kamal Heib
rxe interface to rdma/core

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
Reviewed-by: Haggai Eran 
Signed-off-by: Kamal Heib 
---
 drivers/staging/rdma/rxe/rxe_verbs.c | 1423 ++
 drivers/staging/rdma/rxe/rxe_verbs.h |  486 
 2 files changed, 1909 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_verbs.c
 create mode 100644 drivers/staging/rdma/rxe/rxe_verbs.h

diff --git a/drivers/staging/rdma/rxe/rxe_verbs.c 
b/drivers/staging/rdma/rxe/rxe_verbs.c
new file mode 100644
index 000..5cbbb35
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_verbs.c
@@ -0,0 +1,1423 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+static int rxe_query_device(struct ib_device *dev,
+   struct ib_device_attr *attr,
+   struct ib_udata *uhw)
+{
+   struct rxe_dev *rxe = to_rdev(dev);
+
+   if (uhw->inlen || uhw->outlen)
+   return -EINVAL;
+
+   *attr = rxe->attr;
+   return 0;
+}
+
+static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed,
+   u8 *active_width)
+{
+   if (speed <= 1000) {
+   *active_width = IB_WIDTH_1X;
+   *active_speed = IB_SPEED_SDR;
+   } else if (speed <= 1) {
+   *active_width = IB_WIDTH_1X;
+   *active_speed = IB_SPEED_FDR10;
+   } else if (speed <= 2) {
+   *active_width = IB_WIDTH_4X;
+   *active_speed = IB_SPEED_DDR;
+   } else if (speed <= 3) {
+   *active_width = IB_WIDTH_4X;
+   *active_speed = IB_SPEED_QDR;
+   } else if (speed <= 4) {
+   *active_width = IB_WIDTH_4X;
+   *active_speed = IB_SPEED_FDR10;
+   } else {
+   *active_width = IB_WIDTH_4X;
+   *active_speed = IB_SPEED_EDR;
+   }
+}
+
+static int rxe_query_port(struct ib_device *dev,
+ u8 port_num, struct ib_port_attr *attr)
+{
+   struct rxe_dev *rxe = to_rdev(dev);
+   struct rxe_port *port;
+   struct ethtool_cmd cmd;
+
+   if (unlikely(port_num < 1 || port_num > rxe->num_ports)) {
+   pr_warn("invalid port_number %d\n", port_num);
+   goto err1;
+   }
+
+   port = >port[port_num - 1];
+
+   *attr = port->attr;
+
+   mutex_lock(>usdev_lock);
+   rxe->ndev->ethtool_ops->get_settings(rxe->ndev, );
+   rxe_eth_speed_to_ib_speed(cmd.speed, >active_speed,
+   >active_width);
+   mutex_unlock(>usdev_lock);
+
+   return 0;
+
+err1:
+   return -EINVAL;
+}
+
+static int rxe_query_gid(struct ib_device *device,
+u8 port_num, int index, union ib_gid *gid)
+{
+   int ret;
+
+   if (index > RXE_PORT_GID_TBL_LEN)
+   return -EINVAL;
+
+   ret = ib_get_cached_gid(device, port_num, index, gid, NULL);
+   if (ret == -EAGAIN) {
+   memcpy(gid, , sizeof(*gid));
+   return 0;
+   }
+
+   return ret;
+}
+
+static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int
+  index, const union ib_gid *gid,
+  const struct ib_gid_attr *attr, void **context)
+{
+   return 0;
+}
+
+static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int
+  

[PATCH rdma-next V2 19/32] IB/rxe: Memory Region (MR) handling

2015-12-24 Thread Kamal Heib
MR objects handling.

Signed-off-by: Kamal Heib 
Signed-off-by: Amir Vadai 
---
 drivers/staging/rdma/rxe/rxe_mr.c | 764 ++
 1 file changed, 764 insertions(+)
 create mode 100644 drivers/staging/rdma/rxe/rxe_mr.c

diff --git a/drivers/staging/rdma/rxe/rxe_mr.c 
b/drivers/staging/rdma/rxe/rxe_mr.c
new file mode 100644
index 000..89a5c2b
--- /dev/null
+++ b/drivers/staging/rdma/rxe/rxe_mr.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+/*
+ * lfsr (linear feedback shift register) with period 255
+ */
+static u8 rxe_get_key(void)
+{
+   static unsigned key = 1;
+
+   key = key << 1;
+
+   key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
+   ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
+
+   key &= 0xff;
+
+   return key;
+}
+
+int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
+{
+   switch (mem->type) {
+   case RXE_MEM_TYPE_DMA:
+   return 0;
+
+   case RXE_MEM_TYPE_MR:
+   case RXE_MEM_TYPE_FMR:
+   return ((iova < mem->iova) ||
+   ((iova + length) > (mem->iova + mem->length))) ?
+   -EFAULT : 0;
+
+   default:
+   return -EFAULT;
+   }
+}
+
+#define IB_ACCESS_REMOTE   (IB_ACCESS_REMOTE_READ  \
+   | IB_ACCESS_REMOTE_WRITE\
+   | IB_ACCESS_REMOTE_ATOMIC)
+
+static void rxe_mem_init(int access, struct rxe_mem *mem)
+{
+   u32 lkey = mem->pelem.index << 8 | rxe_get_key();
+   u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
+
+   if (mem->pelem.pool->type == RXE_TYPE_MR) {
+   mem->ibmr.lkey  = lkey;
+   mem->ibmr.rkey  = rkey;
+   } else {
+   mem->ibfmr.lkey = lkey;
+   mem->ibfmr.rkey = rkey;
+   }
+
+   mem->pd = NULL;
+   mem->umem   = NULL;
+   mem->lkey   = lkey;
+   mem->rkey   = rkey;
+   mem->state  = RXE_MEM_STATE_INVALID;
+   mem->type   = RXE_MEM_TYPE_NONE;
+   mem->va = 0;
+   mem->iova   = 0;
+   mem->length = 0;
+   mem->offset = 0;
+   mem->access = 0;
+   mem->page_shift = 0;
+   mem->page_mask  = 0;
+   mem->map_shift  = ilog2(RXE_BUF_PER_MAP);
+   mem->map_mask   = 0;
+   mem->num_buf= 0;
+   mem->max_buf= 0;
+   mem->num_map= 0;
+   mem->map= NULL;
+}
+
+void rxe_mem_cleanup(void *arg)
+{
+   struct rxe_mem *mem = arg;
+   int i;
+
+   if (mem->umem)
+   ib_umem_release(mem->umem);
+
+   if (mem->map) {
+   for (i = 0; i < mem->num_map; i++)
+   kfree(mem->map[i]);
+
+   kfree(mem->map);
+   }
+}
+
+static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
+{
+   int i;
+   int num_map;
+   struct rxe_map **map = mem->map;
+
+   num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
+
+   mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
+   if (!mem->map)
+   goto err1;
+
+   for (i = 0; i < num_map; i++) {
+  

Re: [RFC] Generic InfiniBand transport done in software

2015-12-24 Thread Moni Shoua
>
>
> There were discussions, and Mellanox even contributed code to the effort.
> See Kamal's patches in the patch set I provided.
>
As far as I see it discussions were shallow and never produced an
agreement. Kamal's patches should not be considered as as such.

>> http://marc.info/?l=linux-rdma=144952098726776=2 presents a work
>> that besides keeping the name RVT is far from the immature concept I
>> mentioned earlier and its scope was changed from general purpose
>> solution to Intel and HFI/QIB only.
>
>
> The scope has never changed. Our goal is, and has always been to remove the
> code duplication between qib and hfi1. We are doing that by way of rdmavt.
> It is limited in scope to Intel's drivers currently for what I hope are
> obvious reasons.
>
So you actually agree that rdmavt was intended to be a solution to
Intel's specific drivers.
Fair, but IMO this is not what we aimed for.
In fact, if this is an Intel specific solution then why put it in
drivers/infiniband/sw and why publish it when it is not ready?

> I think it makes sense that soft-roce be added as well and hope that
> Mellanox decides to contribute rather than reinventing the wheel.
>
> Is there something in rdmavt that would not work for soft-roce, or is
> something fundamental missing? I have asked this a number of times and
> nothing has been raised so I assume there are no issues. If there are lets
> discuss them.
>
Interfaces between rdmavt and its backends are missing. I consider
this as fundamental.
Concerns were raised but answers were not provided, at least not
satisfying answers.

> Reading through your RFC here, perhaps something like the multicast add
> and delete is concerning?  This is something that is not really needed by
> qib and hfi1 but may be for soft-roce. All that means is soft-roce needs to
> provide it and it would be optional for qib and hfi1. The rdmavt
> architecture is flexible and allows exactly this.
>
>> I therefore conclude that the
>> concept of RVT, as it was supposed to be, was abandoned.
>
>
> This is absolutely incorrect. As mentioned above, nothing has changed.
>
>
> -Denny
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-24 Thread Moni Shoua
> I'm not sure I understand what you mean.  Denny has posted several high level
> emails similar to this one and has asked for public feedback.  We have been
> addressing all the feedback as it has come in and we continue to work toward
> this common layer.
>
> To that end we have been asking for you to identify any place you see an
> incompatibility with the rdmavt layer for SoftRoCE.  Patches and comments to
> rdmavt are encouraged and we have already incorporated patches from outside
> Intel.
>
I really don't know how to answer to that because I don't expect that
the stub interface to last and if it does, how the problem of code
duplication is going to be solved. When I know how interfaces look
like plus some other documentation I will be able to answer this.

> We are very sensitive to the fact that other drivers will want to use this
> layer in the future.  But I don't think this layer is set in stone.  To
> that end, any functionality which is specific to SoftRoCE should be added when
> SoftRoCE is submitted to the kernel.  To date I have not seen any show stopper
> objections except Sagi's comment on the lack of IB_WR_LOCAL_INV which we will
> drop in the next submission.
>
SoftRoCE was submitted today to staging. Now, if I want to take it out
of staging by using rdmavt I actually can't unless I'll use the stubs
and watch for rdmavt changes. This is unacceptable. Missing final
interfaces and documentation of how to use them is a show stopper IMO

> Also, please be aware that we are being very careful with this work to not
> sacrifice the performance of either qib or hfi1.  There are a couple of items
> you mention below which seem to indicate you would like a more "pure"
> separation of the driver.  I hope you understand that any work in this area
> which affects our performance must be accounted for and may not result in as
> "pure" a separation as you may like.  If that is a show stopper for SoftRoCE
> lets work through the specific examples.
Eventually you'll have to break WR to packets of MTU size, wouldn't you?
Anyway, this is what we had to finalize in the early discussion and
not after code posting

Ira,
I've read your comments to the RFC and I find them worth a thought
before I answer.
In general, finding a good abstraction of the back-end is the hardest
thing in this project and affects how the interface looks like.
I think that most if not all your comments fall in the category of abstraction.


>
> More inline.
>
>>
>> The following is a RFC that presents a solution made of a single
>> generic InfiniBand
>> driver and many hardware specific back-end drivers. The RFC defines
>> the requirements
>> and the interfaces that have to be part of any generic InfiniBand driver.
>> A generic InfiniBand driver that is not compliant with this RFC wouldn't be 
>> able
>> to serve different back-ends and therefore would miss its target.
>>
>> 
>>
>> A. Introduction
>> 
>> In Linux kernel, the responsibility to implement the InfiniBand protocol is
>> roughly divided between 2 elements. The first are the core drivers which 
>> expose
>> an abstract verbs interface to the upper layer as well as interfaces to some
>> common IB services like MAD, SA and CM. The second are vendor drivers and
>> hardware which implement the abstract verbs interface.
>>
>> A high level view of the model is in Figure A1
>>
>>  +-+
>>  | |
>>  |IB core  |
>>  |drivers  |
>>  | |
>>  +++
>>   | Common
>> 
>>   | Vendor
>>  +++
>>  | |
>>  |  Hardware   |
>>  |  drivers|
>>  | |
>>  +++
>>  | |
>>  |  Hardware   |
>>  | |
>>  +-+
>>
>> A1 - IB implementation model in Linux kernel
>>
>> In the vendor part of the model, the devision of work between software and
>> hardware is undefined and is usually one of the two below
>>
>> - Context and logic are  managed in software. Hardware role is limited to
>>   lower layer protocols (depending on the link layer) and maybe some offloads
>> - Context and logic are managed in hardware while software role is to create
>>   or destroy a context in the hardware and gets notified when hardware 
>> reports
>>   about a completions tasks.
>>
>> The following examples demonstrates the difference between the approaches 
>> above.
>>
>> - Send flow: application calls post_send() with QP and a WR. In the software
>>   based approach the QP context is retrieved, the WR is parsed and a proper 
>> IB
>>   packet 

Re: [PATCH v2 for-next 5/7] IB/mlx4: Add IB counters table

2015-12-24 Thread Matan Barak
On Thu, Dec 24, 2015 at 4:07 PM, Matan Barak  wrote:
> On Thu, Dec 24, 2015 at 2:38 PM, Or Gerlitz  wrote:
>> On 12/24/2015 12:42 PM, Sagi Grimberg wrote:
>>>
>>>
> This patch seems to generate a list corruption [1] when I test
> with Doug's for-4.5 tree. Eran, care to take a look at this?


 This patch is part from a series that was introduced in 4.3-rc1 [1],
>>>
>>>
>>> Then something else broke it. Can people check their patches on doug's
>>> tree? At the moment it's unusable...
>>
>
> Leon and I have checked Doug's tree with mlx4_ib disabled and we
> didn't encounter any error.
> We ran ucmatose over IB connection (in mlx5) and it worked flawlessly.
>
>>
>> Yes, I checked the branch up to commit 882f3b3 "Merge branches
>> '4.5/Or-cleanup' and '4.5/rdma-cq' into k.o/for-4.5" and it works (rping,
>> ibv_rc_pingpong over top of mlx4 VPI)
>>

Regarding mlx4, Eran and I analyzed it. We didn't test that, but it
seems like the bug is introduced in the 64bit counters test. Here's a
proposal:

diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 539040f..8da3c83 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -714,11 +714,12 @@ err:
  * Figure out which counter table to use depending on
  * the device capabilities.
  */
-static struct attribute_group *get_counter_table(struct ib_device *dev)
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+  int port_num)
 {
struct ib_class_port_info cpi;

-   if (get_perf_mad(dev, 0, IB_PMA_CLASS_PORT_INFO,
+ if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
, 40, sizeof(cpi)) >= 0) {

if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
@@ -776,7 +777,7 @@ static int add_port(struct ib_device *device, int port_num,
goto err_put;
}

-   p->pma_table = get_counter_table(device);
+ p->pma_table = get_counter_table(device, port_num);
ret = sysfs_create_group(>kobj, p->pma_table);
if (ret)
goto err_put_gid_attrs;


>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-24 Thread Dennis Dalessandro

On Thu, Dec 24, 2015 at 05:43:11PM +0200, Moni Shoua wrote:



There were discussions, and Mellanox even contributed code to the effort.
See Kamal's patches in the patch set I provided.


As far as I see it discussions were shallow and never produced an
agreement. Kamal's patches should not be considered as as such.


Point is others have looked at the code. No issues have been called out to 
date as to why what is there won't work for everyone.



http://marc.info/?l=linux-rdma=144952098726776=2 presents a work
that besides keeping the name RVT is far from the immature concept I
mentioned earlier and its scope was changed from general purpose
solution to Intel and HFI/QIB only.



The scope has never changed. Our goal is, and has always been to remove the
code duplication between qib and hfi1. We are doing that by way of rdmavt.
It is limited in scope to Intel's drivers currently for what I hope are
obvious reasons.


So you actually agree that rdmavt was intended to be a solution to
Intel's specific drivers.
Fair, but IMO this is not what we aimed for.
In fact, if this is an Intel specific solution then why put it in
drivers/infiniband/sw and why publish it when it is not ready?


Yes it is specific to Intel *now*, that doesn't mean it should stay that 
way. Rdmavt could, and in my opinion should, be extended to support 
soft-roce. I don't think replicating the same thing is a great idea.


As to the location, where do you think it should go. drivers/infiniband/sw 
makes the most sense to me, but open to suggestions.


And for the question of why publish when it's not ready, the better question 
is why not?  Is it not good to see the work in progress as it evolves so the 
community can provide feedback?



I think it makes sense that soft-roce be added as well and hope that
Mellanox decides to contribute rather than reinventing the wheel.

Is there something in rdmavt that would not work for soft-roce, or is
something fundamental missing? I have asked this a number of times and
nothing has been raised so I assume there are no issues. If there are lets
discuss them.


Interfaces between rdmavt and its backends are missing. I consider
this as fundamental.
Concerns were raised but answers were not provided, at least not
satisfying answers.


No one is arguing that. It is a work in progress and will get there. More 
details are in in Ira's response.


http://marc.info/?l=linux-rdma=145091253118395=2

-Denny
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] IB/qib: Improve ipoib UD performance

2015-12-24 Thread Mike Marciniszyn
Based on profiling, UD performance drops in case of processes
in a single client due to excess context switches when
the progress workqueue is scheduled.

This is solved by modifying the heuristic to select the
direct progress instead of the scheduling progress via
the workqueue when UD-like situations are detected in
the heuristic.

Reviewed-by: Vinit Agnihotri 
Signed-off-by: Mike Marciniszyn 
---
 drivers/infiniband/hw/qib/qib_verbs.c |   11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_verbs.c 
b/drivers/infiniband/hw/qib/qib_verbs.c
index de6cb6f..f29f097 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct 
ib_send_wr *wr,
unsigned long flags;
struct qib_lkey_table *rkt;
struct qib_pd *pd;
+   int avoid_schedule = 0;
 
spin_lock_irqsave(>s_lock, flags);
 
@@ -438,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct 
ib_send_wr *wr,
qp->ibqp.qp_type == IB_QPT_RC) {
if (wqe->length > 0x8000U)
goto bail_inval_free;
+   if (wqe->length <= qp->pmtu)
+   avoid_schedule = 1;
} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
- qp->port_num - 1)->ibmtu)
+ qp->port_num - 1)->ibmtu) {
goto bail_inval_free;
-   else
+   } else {
atomic_inc(_iah(ud_wr(wr)->ah)->refcount);
+   avoid_schedule = 1;
+   }
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
 
@@ -458,7 +463,7 @@ bail_inval_free:
 bail_inval:
ret = -EINVAL;
 bail:
-   if (!ret && !wr->next &&
+   if (!ret && !wr->next && !avoid_schedule &&
 !qib_sdma_empty(
   dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
qib_schedule_send(qp);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 0/3] Add cross-channel support

2015-12-24 Thread Christoph Hellwig
On Thu, Dec 24, 2015 at 10:02:29AM +0200, Or Gerlitz wrote:
> We had consensus among the reviewers that the 1st patch ("IB/core: Align
> coding style of ib_device_cap_flags structure") is wrong cleanup which
> basically is (1) unneeded (2) creates more damage (git blame and such,
> non-applicable to uapi, more) than benefit, etc -- finally Leon was
> convinced too [1].

It's not really an issue vs uapi.  Using the the wierd BIT() macro
would have been, but without it I think this cleanup is ok, even if I
personally wouldn't have done it.  git-blame isn't really a major
issue either, as you can blame past revisions.

> Leon will re-spin in the coming 1-2 hours V2, could please pick it instead
> of V1, when people agree on direction X and you are not against it, lets do
> X and not Y.

It would be great if we could stop rebasing whats already in the tree
for the benefit of everyone building on top of this.  For example just
finished rebasing my series to move many constants includin this one
to the uapi headers, and I'd hate to rebase it once again now that
the dust has settled.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 00/10] iSER support for remote invalidate

2015-12-24 Thread Christoph Hellwig
> Applied to target-pending/for-next as v4.5-rc1 material, along with
> Reviewed-by tags from HCH.

So this is both in your and Dougs now it seems.  Given the non-trivial
merge with the other RDMA updates I'd suggest to drop it from the
target tree as Doug already sorted out the merge.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 00/10] iSER support for remote invalidate

2015-12-24 Thread Sagi Grimberg



Applied to target-pending/for-next as v4.5-rc1 material, along with
Reviewed-by tags from HCH.


So this is both in your and Dougs now it seems.  Given the non-trivial
merge with the other RDMA updates I'd suggest to drop it from the
target tree as Doug already sorted out the merge.


Yea, this conflicts with the CQ API stuff. Doug and I sorting it out.

Thanks,
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 5/7] IB/mlx4: Add IB counters table

2015-12-24 Thread Or Gerlitz

On 12/24/2015 12:12 PM, Sagi Grimberg wrote:

This patch seems to generate a list corruption [1] when I test
with Doug's for-4.5 tree. Eran, care to take a look at this? 


This patch is part from a series that was introduced in 4.3-rc1 [1], did 
4.4-rc5/6 worked for you before you uploaded there further patches?


Or.

[1]
fbfb662 IB/mlx4: Add support for blocking multicast loopback QP creation 
user flag
7b59f0f IB/mlx4: Add counter based implementation for QP multicast 
loopback block

3ba8e31 IB/mlx4: Add IB counters table
74194fb net/mlx4_en: Implement mcast loopback prevention for ETH qps
9a89283 net/mlx4_core: Add support for filtering multicast loopback
ddf9529 IB/core: Allow setting create flags in QP init attribute
6d8a749 IB/core: Extend ib_uverbs_create_qp


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


libmlx5 : Maintainer/Git change

2015-12-24 Thread Yishai Hadas
I’m taking the responsibility from Mellanox side on libmlx5, the formal 
tree to be used: git://openfabrics.org/~yishaih/libmlx5.git


Thanks Eli for creating and maintaining it from day one.

Yishai


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 5/7] IB/mlx4: Add IB counters table

2015-12-24 Thread Sagi Grimberg

This patch seems to generate a list corruption [1] when I test
with Doug's for-4.5 tree.

Eran, care to take a look at this?

[1]:
mlx4_core: Mellanox ConnectX core driver v2.2-1 (Feb, 2014) 

mlx4_core: Initializing :04:00.0 

mlx4_core :04:00.0: PCIe link speed is 8.0GT/s, device supports 
8.0GT/s
mlx4_core :04:00.0: PCIe link width is x8, device supports x8 

 mlx4_ib_add: mlx4_ib: Mellanox ConnectX InfiniBand driver 
v2.2-1 (Feb 2014)
 mlx4_ib_add: counter index 0 for port 1 allocated 0 

 mlx4_ib_add: counter index 1 for port 2 allocated 0 

BUG: unable to handle kernel NULL pointer dereference at 
(null)
IP: [] __list_add+0x26/0xd0 

PGD 46da14067 PUD 46daa0067 PMD 0 

Oops:  [#1] SMP 

Modules linked in: mlx4_ib(+) ib_sa ib_mad mlx4_core mlx5_ib mlx5_core 
ib_core ib_addr netconsole configfs nfsv3 nfs fscache cfg80211 rfkill 
x86_pkg_temp_thermal coretemp kvm_intel kvm irqbypass crc32c_intel 
aesni_intel aes_x86_64 glue_helper lrw dm_mod gf128mul ablk_helper 
cryptd iTCO_wdt iTCO_vendor_support sb_edac shpchp ipmi_si ioatdma 
lpc_ich mfd_core edac_core pcspkr wmi ipmi_msghandler i2c_i801 
acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 
mbcache jbd2 sd_mod isci libsas igb serio_raw ahci ptp pps_core libahci 
i2c_algo_bit scsi_transport_sas i2c_core dca ipv6 autofs4 [last 
unloaded: mlx5_core] 

CPU: 0 PID: 1737 Comm: modprobe Not tainted 4.4.0-rc6+ #107 



Hardware name: Supermicro SYS-1027R-WRF/X9DRW, BIOS 3.0a 08/08/2013 



task: 8804673da800 ti: 880466694000 task.ti: 880466694000 



RIP: 0010:[]  [] 
__list_add+0x26/0xd0 

RSP: 0018:880466697898  EFLAGS: 00010246 



RAX:  RBX: 8804666978c8 RCX: 8804673da800 



RDX: 88086b8539b8 RSI:  RDI: 8804666978c8 



RBP: 8804666978b8 R08:  R09: 0001 



R10:  R11: fffe R12: 88086b8539b8 



R13:  R14: 88086b8539b8 R15: 880466697908
FS:  7f37a02cf700() GS:88047fc0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2:  CR3: 00046b6ee000 CR4: 000406f0
Stack:
 8804673da800 88086b8539b0 8804673da800 88086b8539b4
 880466697958 8154f7be 880466697904 0292
 880466697938 81259bc1 7f49 824000c0
Call Trace:
 [] __mutex_lock_slowpath+0x6e/0x110
 [] ? ida_simple_get+0x91/0x100
 [] ? kernfs_next_descendant_post+0x1e/0x90
 [] ? kernfs_activate+0x86/0xf0
 [] mutex_lock+0x1e/0x40
 [] iboe_process_mad+0x73/0x180 [mlx4_ib]
 [] mlx4_ib_process_mad+0xd6/0x110 [mlx4_ib]
 [] get_perf_mad+0x103/0x140 [ib_core]
 [] get_counter_table+0x24/0x40 [ib_core]
 [] ? __kmalloc+0xde/0xe0
 [] add_port+0x115/0x3f0 [ib_core]
 [] ib_device_register_sysfs+0xee/0x160 [ib_core]
 [] ib_register_device+0x1d5/0x300 [ib_core]
 [] mlx4_ib_add+0x78b/0xd00 [mlx4_ib]
 [] mlx4_add_device+0x3e/0xb0 [mlx4_core]
 [] mlx4_register_interface+0x87/0xe0 [mlx4_core]
 [] mlx4_ib_init+0x55/0x72 [mlx4_ib]
 [] ? 0xa0096000
 [] do_one_initcall+0xa8/0x1c0
 [] do_init_module+0x5f/0x210
 [] load_module+0x5d7/0x700
 [] ? mod_sysfs_teardown+0x140/0x140
 [] ? module_sect_show+0x20/0x20
 [] SyS_finit_module+0xbb/0xf0
 [] entry_SYSCALL_64_fastpath+0x12/0x6a
Code: 90 90 90 90 90 55 48 89 e5 48 83 ec 20 48 89 5d e8 4c 89 65 f0 48 
89 fb 4c 89 6d f8 4c 8b 42 08 49 89 f5 49 89 d4 49 39 f0 75 31 <4d> 8b 
45 00 4d 39 c4 75 6f 4c 39 e3 74 45 4c 39 eb 74 40 49 89

RIP  [] __list_add+0x26/0xd0
 RSP 
CR2: 
---[ end trace 5f4fe0ca857661e6 ]---
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 00/11] NFS/RDMA server patches for v4.5

2015-12-24 Thread Chuck Lever
My functional test suite includes Cthon, iozone, dbench, fio,
multi-threaded builds of git and the Linux kernel, and xfstests.

This patch series passes with NFSv3, NFSv4.0, and now NFSv4.1.

--
Chuck Lever

> On Dec 23, 2015, at 21:00, J. Bruce Fields  wrote:
> 
>> On Wed, Dec 16, 2015 at 05:40:09PM +0530, Devesh Sharma wrote:
>> iozone passed on ocrdma device.
> 
> What other testing has there been of this patchset?
> 
> Connectathon, xfstests, and pynfs make more of an effort to test corner
> cases, iozone isn't much of a test of correctness.
> 
> --b.
> 
>> Link bounce fails to recover iozone
>> traffic, however failure is not related to this patch series. I am in
>> processes of finding out the patch which broke it.
>> 
>> Tested-By: Devesh Sharma 
>> 
>>> On Tue, Dec 15, 2015 at 3:00 AM, Chuck Lever  wrote:
>>> Here are patches to support server-side bi-directional RPC/RDMA
>>> operation (to enable NFSv4.1 on RPC/RDMA transports). Thanks to
>>> all who reviewed v1, v2, and v3. This version has some significant
>>> changes since the previous one.
>>> 
>>> In preparation for Doug's final topic branch, Bruce, I've rebased
>>> these on Christoph's ib_device_attr branch. There were some merge
>>> conflicts which I've fixed and tested. These are ready for your
>>> review.
>>> 
>>> Also available in the "nfsd-rdma-for-4.5" topic branch of this git repo:
>>> 
>>> git://git.linux-nfs.org/projects/cel/cel-2.6.git
>>> 
>>> Or for browsing:
>>> 
>>> http://git.linux-nfs.org/?p=cel/cel-2.6.git;a=log;h=refs/heads/nfsd-rdma-for-4.5
>>> 
>>> 
>>> Changes since v3:
>>> - Rebased on Christoph's ib_device_attr branch
>>> - Backchannel patches have been squashed together
>>> - Memory allocation overhaul to prevent blocking allocation
>>>  when sending backchannel calls
>>> 
>>> 
>>> Changes since v2:
>>> - Rebased on v4.4-rc4
>>> - Backchannel code in new source file to address dprintk issues
>>> - svc_rdma_get_context() now uses a pre-allocated cache
>>> - Dropped svc_rdma_send clean up
>>> 
>>> 
>>> Changes since v1:
>>> 
>>> - Rebased on v4.4-rc3
>>> - Removed the use of CONFIG_SUNRPC_BACKCHANNEL
>>> - Fixed computation of forward and backward max_requests
>>> - Updated some comments and patch descriptions
>>> - pr_err and pr_info converted to dprintk
>>> - Simplified svc_rdma_get_context()
>>> - Dropped patch removing access_flags field
>>> - NFSv4.1 callbacks tested with for-4.5 client
>>> 
>>> ---
>>> 
>>> Chuck Lever (11):
>>>  svcrdma: Do not send XDR roundup bytes for a write chunk
>>>  svcrdma: Clean up rdma_create_xprt()
>>>  svcrdma: Clean up process_context()
>>>  svcrdma: Improve allocation of struct svc_rdma_op_ctxt
>>>  svcrdma: Improve allocation of struct svc_rdma_req_map
>>>  svcrdma: Remove unused req_map and ctxt kmem_caches
>>>  svcrdma: Add gfp flags to svc_rdma_post_recv()
>>>  svcrdma: Remove last two __GFP_NOFAIL call sites
>>>  svcrdma: Make map_xdr non-static
>>>  svcrdma: Define maximum number of backchannel requests
>>>  svcrdma: Add class for RDMA backwards direction transport
>>> 
>>> 
>>> include/linux/sunrpc/svc_rdma.h|   37 ++-
>>> net/sunrpc/xprt.c  |1
>>> net/sunrpc/xprtrdma/Makefile   |2
>>> net/sunrpc/xprtrdma/svc_rdma.c |   41 ---
>>> net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  371 
>>> 
>>> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c|   52 
>>> net/sunrpc/xprtrdma/svc_rdma_sendto.c  |   34 ++-
>>> net/sunrpc/xprtrdma/svc_rdma_transport.c   |  284 -
>>> net/sunrpc/xprtrdma/transport.c|   30 +-
>>> net/sunrpc/xprtrdma/xprt_rdma.h|   20 +-
>>> 10 files changed, 730 insertions(+), 142 deletions(-)
>>> create mode 100644 net/sunrpc/xprtrdma/svc_rdma_backchannel.c
>>> 
>>> --
>>> Signature
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>>> the body of a message to majord...@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH rdma-next V2 00/32] Soft-RoCE driver

2015-12-24 Thread Christoph Hellwig
On Thu, Dec 24, 2015 at 11:17:46AM +0200, Kamal Heib wrote:
> We've located the driver in the staging subtree. This follows a requirement
> to implement an IB transport library - Soft RoCE is in the same boat like the 
> hfi1
> driver. We need to define and implement a lib to prevent those code
> duplications.

Given the trainwreck that the staging process is it might seems more
sensible to get it into a stage and then merge it directly.  You'll
probably save yourself a lot of work that way.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 5/7] IB/mlx4: Add IB counters table

2015-12-24 Thread Sagi Grimberg

Doug,

I'm also can't load mlx5 drivers in your tree [1] but
I don't know where it's from, it can come from pretty much everything...

Now I'm left with no useable HW to test with :(


[1]:
mlx5_core :06:00.0: firmware version: 12.14.74
mlx5_core :06:00.1: firmware version: 12.14.74
mlx5_ib: Mellanox Connect-IB Infiniband driver v2.2-1 (Feb 2014)
command failed, status bad parameter(0x3), syndrome 0x7424da
command failed, status bad parameter(0x3), syndrome 0x7424da
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 0/3] Add cross-channel support

2015-12-24 Thread Or Gerlitz

On 12/24/2015 12:00 PM, Christoph Hellwig wrote:

On Thu, Dec 24, 2015 at 10:02:29AM +0200, Or Gerlitz wrote:

We had consensus among the reviewers that the 1st patch ("IB/core: Align
coding style of ib_device_cap_flags structure") is wrong cleanup which
basically is (1) unneeded (2) creates more damage (git blame and such,
non-applicable to uapi, more) than benefit, etc -- finally Leon was
convinced too [1].

It's not really an issue vs uapi.  Using the the wierd BIT() macro
would have been, but without it I think this cleanup is ok, even if I
personally wouldn't have done it.  git-blame isn't really a major
issue either, as you can blame past revisions.


I would personally wouldn't done cleanup either and I managed to 
convinced Leon to drop it, so we had concensus among the developers, the 
maintainer didn't have other opinion and he took the wrong step -- so 
we're asking to fix, that's all.



Leon will re-spin in the coming 1-2 hours V2, could please pick it instead
of V1, when people agree on direction X and you are not against it, lets do
X and not Y.

It would be great if we could stop rebasing whats already in the tree
for the benefit of everyone building on top of this.  For example just
finished rebasing my series to move many constants includin this one
to the uapi headers, and I'd hate to rebase it once again now that
the dust has settled.


The root issue here is that nothing was picked before 4.4-rc6, so we're 
in a situation where rebases are needed in the own-maintainer tree 
(github) to make things right. No way to avoid that.


We should aim that for 4.6 and onward, code for -next will start getting 
in around rc1-2 and then things will be more robust, etc


Or.

Or.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 5/7] IB/mlx4: Add IB counters table

2015-12-24 Thread Sagi Grimberg



This patch seems to generate a list corruption [1] when I test
with Doug's for-4.5 tree. Eran, care to take a look at this?


This patch is part from a series that was introduced in 4.3-rc1 [1],


Then something else broke it. Can people check their patches on doug's
tree? At the moment it's unusable...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH rdma-next V2 00/32] Soft-RoCE driver

2015-12-24 Thread Christoph Hellwig
On Thu, Dec 24, 2015 at 02:58:10PM +0200, Or Gerlitz wrote:
> On Thu, Dec 24, 2015 at 12:02 PM, Christoph Hellwig  
> wrote:
> > On Thu, Dec 24, 2015 at 11:17:46AM +0200, Kamal Heib wrote:
> >> We've located the driver in the staging subtree. This follows a requirement
> >> to implement an IB transport library - Soft RoCE is in the same boat like 
> >> the hfi1
> >> driver. We need to define and implement a lib to prevent those code
> >> duplications.
> >
> > Given the trainwreck that the staging process is it might seems more
> > sensible to get it into a stage and then merge it directly.  You'll
> > probably save yourself a lot of work that way.
> 
> I am not sure what you mean by "get it into a stage and then merge it
> directly" --i
> is that not go through staging at all?

Sorry, I should have not finished that email in a hurry before leaving
the house.  Let me rephrase:

Given the trainwreck that the staging process is it, might be more
sensible to get it into shape and then merge it directly.  You'll
probably save yourself a lot of work that way.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 5/7] IB/mlx4: Add IB counters table

2015-12-24 Thread Or Gerlitz

On 12/24/2015 12:42 PM, Sagi Grimberg wrote:



This patch seems to generate a list corruption [1] when I test
with Doug's for-4.5 tree. Eran, care to take a look at this?


This patch is part from a series that was introduced in 4.3-rc1 [1],


Then something else broke it. Can people check their patches on doug's
tree? At the moment it's unusable...


Yes, I checked the branch up to commit 882f3b3 "Merge branches 
'4.5/Or-cleanup' and '4.5/rdma-cq' into k.o/for-4.5" and it works 
(rping, ibv_rc_pingpong over top of mlx4 VPI)

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH rdma-next V2 00/32] Soft-RoCE driver

2015-12-24 Thread Or Gerlitz
On Thu, Dec 24, 2015 at 12:02 PM, Christoph Hellwig  wrote:
> On Thu, Dec 24, 2015 at 11:17:46AM +0200, Kamal Heib wrote:
>> We've located the driver in the staging subtree. This follows a requirement
>> to implement an IB transport library - Soft RoCE is in the same boat like 
>> the hfi1
>> driver. We need to define and implement a lib to prevent those code
>> duplications.
>
> Given the trainwreck that the staging process is it might seems more
> sensible to get it into a stage and then merge it directly.  You'll
> probably save yourself a lot of work that way.

I am not sure what you mean by "get it into a stage and then merge it
directly" --i
is that not go through staging at all?

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 5/7] IB/mlx4: Add IB counters table

2015-12-24 Thread Matan Barak
On Thu, Dec 24, 2015 at 2:38 PM, Or Gerlitz  wrote:
> On 12/24/2015 12:42 PM, Sagi Grimberg wrote:
>>
>>
 This patch seems to generate a list corruption [1] when I test
 with Doug's for-4.5 tree. Eran, care to take a look at this?
>>>
>>>
>>> This patch is part from a series that was introduced in 4.3-rc1 [1],
>>
>>
>> Then something else broke it. Can people check their patches on doug's
>> tree? At the moment it's unusable...
>

Leon and I have checked Doug's tree with mlx4_ib disabled and we
didn't encounter any error.
We ran ucmatose over IB connection (in mlx5) and it worked flawlessly.

>
> Yes, I checked the branch up to commit 882f3b3 "Merge branches
> '4.5/Or-cleanup' and '4.5/rdma-cq' into k.o/for-4.5" and it works (rping,
> ibv_rc_pingpong over top of mlx4 VPI)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6] IB/uapi: expose device capability flags

2015-12-24 Thread Christoph Hellwig
Expose the device capability flags which can be queried through uverbs in
the uapi headers.

Signed-off-by: Christoph Hellwig 
---
 include/rdma/ib_verbs.h  | 94 +++-
 include/uapi/rdma/ib_verbs.h | 66 +++
 2 files changed, 98 insertions(+), 62 deletions(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 48bfcf5..b8d4113 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -151,68 +151,38 @@ enum rdma_link_layer {
 };
 
 enum ib_device_cap_flags {
-   IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
-   IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
-   IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
-   IB_DEVICE_RAW_MULTI = (1 << 3),
-   IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
-   IB_DEVICE_CHANGE_PHY_PORT   = (1 << 5),
-   IB_DEVICE_UD_AV_PORT_ENFORCE= (1 << 6),
-   IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
-   IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
-   IB_DEVICE_INIT_TYPE = (1 << 9),
-   IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
-   IB_DEVICE_SYS_IMAGE_GUID= (1 << 11),
-   IB_DEVICE_RC_RNR_NAK_GEN= (1 << 12),
-   IB_DEVICE_SRQ_RESIZE= (1 << 13),
-   IB_DEVICE_N_NOTIFY_CQ   = (1 << 14),
-
-   /*
-* This device supports a per-device lkey or stag that can be
-* used without performing a memory registration for the local
-* memory.  Note that ULPs should never check this flag, but
-* instead of use the local_dma_lkey flag in the ib_pd structure,
-* which will always contain a usable lkey.
-*/
-   IB_DEVICE_LOCAL_DMA_LKEY= (1 << 15),
-   IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16),
-   IB_DEVICE_MEM_WINDOW= (1 << 17),
-   /*
-* Devices should set IB_DEVICE_UD_IP_SUM if they support
-* insertion of UDP and TCP checksum on outgoing UD IPoIB
-* messages and can verify the validity of checksum for
-* incoming messages.  Setting this flag implies that the
-* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
-*/
-   IB_DEVICE_UD_IP_CSUM= (1 << 18),
-   IB_DEVICE_UD_TSO= (1 << 19),
-   IB_DEVICE_XRC   = (1 << 20),
-
-   /*
-* This device supports the IB "base memory management extension",
-* which includes support for fast registrations (IB_WR_REG_MR,
-* IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs).  This flag should
-* also be set by any iWarp device which must support FRs to comply
-* to the iWarp verbs spec.  iWarp devices also support the
-* IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
-* stag.
-*/
-   IB_DEVICE_MEM_MGT_EXTENSIONS= (1 << 21),
-   IB_DEVICE_BLOCK_MULTICAST_LOOPBACK  = (1 << 22),
-   IB_DEVICE_MEM_WINDOW_TYPE_2A= (1 << 23),
-   IB_DEVICE_MEM_WINDOW_TYPE_2B= (1 << 24),
-   IB_DEVICE_RC_IP_CSUM= (1 << 25),
-   IB_DEVICE_RAW_IP_CSUM   = (1 << 26),
-   /*
-* Devices should set IB_DEVICE_CROSS_CHANNEL if they
-* support execution of WQEs that involve synchronization
-* of I/O operations with single completion queue managed
-* by hardware.
-*/
-   IB_DEVICE_CROSS_CHANNEL = (1 << 27),
-   IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
-   IB_DEVICE_SIGNATURE_HANDOVER= (1 << 30),
-   IB_DEVICE_ON_DEMAND_PAGING  = (1 << 31),
+   IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR,
+   IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR,
+   IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR,
+   IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI,
+   IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG,
+   IB_DEVICE_CHANGE_PHY_PORT   = IB_UVERBS_DEVICE_CHANGE_PHY_PORT,
+   IB_DEVICE_UD_AV_PORT_ENFORCE= IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE,
+   IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE,
+   IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT,
+   IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE,
+   IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT,
+   IB_DEVICE_SYS_IMAGE_GUID= IB_UVERBS_DEVICE_SYS_IMAGE_GUID,
+   IB_DEVICE_RC_RNR_NAK_GEN= IB_UVERBS_DEVICE_RC_RNR_NAK_GEN,
+   IB_DEVICE_SRQ_RESIZE= IB_UVERBS_DEVICE_SRQ_RESIZE,
+   IB_DEVICE_N_NOTIFY_CQ   = 

[PATCH 3/6] IB/uapi: expose uverbs WC opcodes

2015-12-24 Thread Christoph Hellwig
This exposes the WC opcodes supported by uverbs as part of the uapi
headers.  It follows the same scheme as the WR opcodes.

Signed-off-by: Christoph Hellwig 
---
 include/rdma/ib_verbs.h  | 29 +
 include/uapi/rdma/ib_verbs.h | 16 
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5dccc6a..7dce204 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -819,22 +819,19 @@ enum ib_wc_status {
 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
 
 enum ib_wc_opcode {
-   IB_WC_SEND,
-   IB_WC_RDMA_WRITE,
-   IB_WC_RDMA_READ,
-   IB_WC_COMP_SWAP,
-   IB_WC_FETCH_ADD,
-   IB_WC_LSO,
-   IB_WC_LOCAL_INV,
-   IB_WC_REG_MR,
-   IB_WC_MASKED_COMP_SWAP,
-   IB_WC_MASKED_FETCH_ADD,
-/*
- * Set value of IB_WC_RECV so consumers can test if a completion is a
- * receive by testing (opcode & IB_WC_RECV).
- */
-   IB_WC_RECV  = 1 << 7,
-   IB_WC_RECV_RDMA_WITH_IMM
+   IB_WC_SEND  = IB_UVERBS_WC_SEND,
+   IB_WC_RDMA_WRITE= IB_UVERBS_WC_RDMA_WRITE,
+   IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+   IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+   IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+   IB_WC_LSO   = IB_UVERBS_WC_SEND_END,
+   IB_WC_LOCAL_INV = IB_UVERBS_WC_SEND_END + 1,
+   IB_WC_REG_MR= IB_UVERBS_WC_SEND_END + 2,
+   IB_WC_MASKED_COMP_SWAP  = IB_UVERBS_WC_SEND_END + 3,
+   IB_WC_MASKED_FETCH_ADD  = IB_UVERBS_WC_SEND_END + 4,
+
+   IB_WC_RECV  = IB_UVERBS_WC_RECV,
+   IB_WC_RECV_RDMA_WITH_IMM = IB_UVERBS_WC_RECV_END,
 };
 
 enum ib_wc_flags {
diff --git a/include/uapi/rdma/ib_verbs.h b/include/uapi/rdma/ib_verbs.h
index 3be3152..fd7a393 100644
--- a/include/uapi/rdma/ib_verbs.h
+++ b/include/uapi/rdma/ib_verbs.h
@@ -29,4 +29,20 @@ enum ib_uverbs_send_flags {
IB_UVERBS_SEND_END  = (1 << 5),
 };
 
+enum ib_uverbs_wc_opcode {
+   IB_UVERBS_WC_SEND   = 0,
+   IB_UVERBS_WC_RDMA_WRITE = 1,
+   IB_UVERBS_WC_RDMA_READ  = 2,
+   IB_UVERBS_WC_COMP_SWAP  = 3,
+   IB_UVERBS_WC_FETCH_ADD  = 4,
+   IB_UVERBS_WC_SEND_END   = 5,
+
+   /*
+* Set value of IB_WC_RECV so consumers can test if a completion is a
+* receive by testing (opcode & IB_WC_RECV).
+*/
+   IB_UVERBS_WC_RECV   = 1 << 7,
+   IB_UVERBS_WC_RECV_END   = (1 << 7) + 1,
+};
+
 #endif /* _UAPI_RDMA_IB_VERBS_H */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/6] IB/uapi: expose uverbs WC flags

2015-12-24 Thread Christoph Hellwig
This exposes the WC flags supported by uverbs as part of the uapi
headers.  It follows the same scheme as the WR opcodes.

Signed-off-by: Christoph Hellwig 
---
 include/rdma/ib_verbs.h  | 14 +++---
 include/uapi/rdma/ib_verbs.h | 10 ++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7dce204..337db70 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -835,13 +835,13 @@ enum ib_wc_opcode {
 };
 
 enum ib_wc_flags {
-   IB_WC_GRH   = 1,
-   IB_WC_WITH_IMM  = (1<<1),
-   IB_WC_WITH_INVALIDATE   = (1<<2),
-   IB_WC_IP_CSUM_OK= (1<<3),
-   IB_WC_WITH_SMAC = (1<<4),
-   IB_WC_WITH_VLAN = (1<<5),
-   IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6),
+   IB_WC_GRH   = IB_UVERBS_WC_GRH,
+   IB_WC_WITH_IMM  = IB_UVERBS_WC_WITH_IMM,
+   IB_WC_WITH_INVALIDATE   = IB_UVERBS_WC_WITH_INVALIDATE,
+   IB_WC_IP_CSUM_OK= IB_UVERBS_WC_IP_CSUM_OK,
+   IB_WC_WITH_SMAC = IB_UVERBS_WC_WITH_SMAC,
+   IB_WC_WITH_VLAN = IB_UVERBS_WC_WITH_VLAN,
+   IB_WC_WITH_NETWORK_HDR_TYPE = IB_UVERBS_WC_WITH_NETWORK_HDR_TYPE,
 };
 
 struct ib_wc {
diff --git a/include/uapi/rdma/ib_verbs.h b/include/uapi/rdma/ib_verbs.h
index fd7a393..c40c00b 100644
--- a/include/uapi/rdma/ib_verbs.h
+++ b/include/uapi/rdma/ib_verbs.h
@@ -45,4 +45,14 @@ enum ib_uverbs_wc_opcode {
IB_UVERBS_WC_RECV_END   = (1 << 7) + 1,
 };
 
+enum ib_uverbs_wc_flags {
+   IB_UVERBS_WC_GRH= (1 << 0),
+   IB_UVERBS_WC_WITH_IMM   = (1 << 1),
+   IB_UVERBS_WC_WITH_INVALIDATE= (1 << 2),
+   IB_UVERBS_WC_IP_CSUM_OK = (1 << 3),
+   IB_UVERBS_WC_WITH_SMAC  = (1 << 4),
+   IB_UVERBS_WC_WITH_VLAN  = (1 << 5),
+   IB_UVERBS_WC_WITH_NETWORK_HDR_TYPE  = (1 << 6),
+};
+
 #endif /* _UAPI_RDMA_IB_VERBS_H */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/6] IB/uapi: expose uverbs send WR flags

2015-12-24 Thread Christoph Hellwig
This exposes the send WR flags supported by uverbs as part of the uapi
headers.  It follows the same scheme as the WR opcodes.

Signed-off-by: Christoph Hellwig 
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  6 +++---
 include/rdma/ib_verbs.h  | 14 ++
 include/uapi/rdma/ib_verbs.h |  9 +
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2f82a08..6c264f0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -118,9 +118,9 @@ struct mlx5_ib_pd {
  * enum ib_send_flags and enum ib_qp_type for low-level driver
  */
 
-#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
-#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
-#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
+#define MLX5_IB_SEND_UMR_UNREG IB_SEND_END
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_END << 1)
+#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_END << 2)
 #define MLX5_IB_QPT_REG_UMRIB_QPT_RESERVED1
 #define MLX5_IB_WR_UMR (IB_WR_END + 0)
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 94509e0..5dccc6a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1134,15 +1134,13 @@ enum ib_wr_opcode {
 };
 
 enum ib_send_flags {
-   IB_SEND_FENCE   = 1,
-   IB_SEND_SIGNALED= (1<<1),
-   IB_SEND_SOLICITED   = (1<<2),
-   IB_SEND_INLINE  = (1<<3),
-   IB_SEND_IP_CSUM = (1<<4),
+   IB_SEND_FENCE   = IB_UVERBS_SEND_FENCE,
+   IB_SEND_SIGNALED= IB_UVERBS_SEND_SIGNALED,
+   IB_SEND_SOLICITED   = IB_UVERBS_SEND_SOLICITED,
+   IB_SEND_INLINE  = IB_UVERBS_SEND_INLINE,
+   IB_SEND_IP_CSUM = IB_UVERBS_SEND_IP_CSUM,
 
-   /* reserve bits 26-31 for low level drivers' internal use */
-   IB_SEND_RESERVED_START  = (1 << 26),
-   IB_SEND_RESERVED_END= (1 << 31),
+   IB_SEND_END = IB_UVERBS_SEND_END,
 };
 
 struct ib_sge {
diff --git a/include/uapi/rdma/ib_verbs.h b/include/uapi/rdma/ib_verbs.h
index 330175e..3be3152 100644
--- a/include/uapi/rdma/ib_verbs.h
+++ b/include/uapi/rdma/ib_verbs.h
@@ -20,4 +20,13 @@ enum ib_uverbs_wr_opcode {
IB_UVERBS_WR_END= 9,
 };
 
+enum ib_uverbs_send_flags {
+   IB_UVERBS_SEND_FENCE= (1 << 0),
+   IB_UVERBS_SEND_SIGNALED = (1 << 1),
+   IB_UVERBS_SEND_SOLICITED= (1 << 2),
+   IB_UVERBS_SEND_INLINE   = (1 << 3),
+   IB_UVERBS_SEND_IP_CSUM  = (1 << 4),
+   IB_UVERBS_SEND_END  = (1 << 5),
+};
+
 #endif /* _UAPI_RDMA_IB_VERBS_H */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


start moving user space visible constants to uapi headers

2015-12-24 Thread Christoph Hellwig
Currently very little of the uverbs user interface is actually exposed in
uapi headers, and it's a constant struggle to figure out what's kernel
internal and what is actually exposed in public.  This series starts
sorting this out by creating the infrastructure for a uapi header shared
between uverbs and the core IB stack, and starts moving all WR and WC
constants as well as the device capabilitity flags there.

A lot more work will have to follow, and I hope others will help out as
well.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html