Re: [PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Bryant G Ly


Quoting Bart Van Assche :


On 05/24/2016 06:52 AM, Bryant G. Ly wrote:

+config SCSI_IBMVSCSIS
+   tristate "IBM Virtual SCSI Server support"
+   depends on PPC_PSERIES && SCSI_SRP && TARGET_CORE
+   help
+ This is the IBM POWER Virtual SCSI Target Server
+
+  The userspace component needed to initialize the driver and
+ documentation can be found:
+
+  https://github.com/powervm/ibmvscsis
+
+  To compile this driver as a module, choose M here: the
+ module will be called ibmvstgt.
+


This driver has confused Linux users before. Most people know SRP as  
a SCSI transport layer that is used for communication between  
servers. This driver however uses the SRP protocol for communication  
between guests and/or the host that run on the same server. Please  
add this information to the above help text, together with a  
reference to the VIO protocol documentation in the POWER  
architecture manual.




Done


+#include 


Every Linux user can query the kernel version by running the uname  
command. Is it really useful to print the kernel version  
(UTS_RELEASE) when this driver is loaded?




Done


+#include "ibmvscsi.h"


The above include directive indirectly includes a whole bunch of  
SCSI initiator driver header files. We do not want that initiator  
header files are included in the source code of a target driver. If  
it is necessary to share definitions of symbolic constants between  
the initiator and the target driver please move these into a new  
header file.




Done


+static inline long h_send_crq(struct ibmvscsis_adapter *adapter,
+   u64 word1, u64 word2)
+{
+   long rc;
+   struct vio_dev *vdev = adapter->dma_dev;
+
+   pr_debug("ibmvscsis: ibmvscsis_send_crq(0x%x, 0x%016llx, 0x%016llx)\n",
+   vdev->unit_address, word1, word2);
+


As Joe Perches already asked, please define pr_fmt() instead of  
including the kernel module name in every pr_debug() statement.




Done


+static char system_id[64] = "";
+static char partition_name[97] = "UNKNOWN";


Where do these magic constants come from and what is their meaning?  
Please consider to introduce symbolic names for these constants.




Done


+static ssize_t ibmvscsis_tpg_enable_show(struct config_item *item,
+   char *page)
+{
+   struct se_portal_group *se_tpg = to_tpg(item);
+   struct ibmvscsis_tport *tport = container_of(se_tpg,
+   struct ibmvscsis_tport, se_tpg);
+
+   return snprintf(page, PAGE_SIZE, "%d\n", (tport->enabled) ? 1 : 0);
+}


Have you considered to use MODULE_VERSION() instead of introducing a  
sysfs attribute to export the module version? An advantage of  
MODULE_VERSION() is that it allows to query the module version  
without having to load a kernel module.




Done


+static void ibmvscsis_modify_std_inquiry(struct se_cmd *se_cmd)
+{
+   struct se_device *dev = se_cmd->se_dev;
+   unsigned char *buf = NULL;
+   u32 cmd_len = se_cmd->data_length;
+
+   if (cmd_len <= INQ_DATA_OFFSET)
+   return;
+
+   buf = transport_kmap_data_sg(se_cmd);
+   if (buf) {
+   memcpy([8], "IBM", 8);
+   if (dev->transport->get_device_type(dev) == TYPE_ROM)
+   memcpy([16], "VOPTA   ", 16);
+   else
+   memcpy([16], "3303  NVDISK", 16);
+   memcpy([32], "0001", 4);
+   transport_kunmap_data_sg(se_cmd);
+   }
+}


Shouldn't a sense code be returned to the initiator if this function fails?



This function never fails? Also this is a temporary function that will go away
soon. It's to address AIX not recognizing target emulation of inquiry, but
we have already discussed internally to add these ODM entries into AIX.


+   default:
+   pr_err("ibmvscsis: unknown task mgmt func %d\n",
+   srp_tsk->tsk_mgmt_func);
+   cmd->se_cmd.se_tmr_req->response =
+   TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
+   rc = -1;
+   break;
+   }
+
+   if (!rc) {


Please consider changing the above "break" into "goto fail" such  
that the if (!rc) can be left out and such that the indentation of  
the code below if (!rc) can be reduced.




Done


+static int ibmvscsis_queuecommand(struct ibmvscsis_adapter *adapter,
+ struct iu_entry *iue)
+{
+   struct srp_cmd *cmd = iue->sbuf->buf;
+   struct scsi_cmnd *sc;
+   struct ibmvscsis_cmnd *vsc;
+   int ret;
+
+   pr_debug("ibmvscsis: ibmvscsis_queuecommand\n");
+
+   vsc = kzalloc(sizeof(*vsc), GFP_KERNEL);


Allocating memory in the command processing path in a SCSI driver  
usually causes suboptimal performance. Other LIO target drivers  

Re: [PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Greg KH
On Tue, May 24, 2016 at 09:44:43AM -0700, Bart Van Assche wrote:
> On 05/24/2016 09:34 AM, Greg KH wrote:
> > On Tue, May 24, 2016 at 09:25:05AM -0700, Bart Van Assche wrote:
> > > > +static inline long h_send_crq(struct ibmvscsis_adapter *adapter,
> > > > +   u64 word1, u64 word2)
> > > > +{
> > > > +   long rc;
> > > > +   struct vio_dev *vdev = adapter->dma_dev;
> > > > +
> > > > +   pr_debug("ibmvscsis: ibmvscsis_send_crq(0x%x, 0x%016llx, 
> > > > 0x%016llx)\n",
> > > > +   vdev->unit_address, word1, word2);
> > > > +
> > > 
> > > As Joe Perches already asked, please define pr_fmt() instead of including
> > > the kernel module name in every pr_debug() statement.
> > 
> > Even better, as this is a driver, it should be using dev_*() calls
> > instead of pr_*() calls to properly identify the device and driver that
> > is making the message.  No driver should be using pr_*() except in
> > _very_ limited usages.
> 
> Hi Greg,
> 
> The reason I recommended pr_debug() is because ibmvscsis is a LIO target
> driver and I don't think a struct device is associated with a LIO target
> driver. See e.g. target_register_template() in
> drivers/target/target_core_configfs.c.

That seems messed up, there should be a struct device somewhere to
use...
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Bart Van Assche

On 05/24/2016 09:34 AM, Greg KH wrote:

On Tue, May 24, 2016 at 09:25:05AM -0700, Bart Van Assche wrote:

+static inline long h_send_crq(struct ibmvscsis_adapter *adapter,
+   u64 word1, u64 word2)
+{
+   long rc;
+   struct vio_dev *vdev = adapter->dma_dev;
+
+   pr_debug("ibmvscsis: ibmvscsis_send_crq(0x%x, 0x%016llx, 0x%016llx)\n",
+   vdev->unit_address, word1, word2);
+


As Joe Perches already asked, please define pr_fmt() instead of including
the kernel module name in every pr_debug() statement.


Even better, as this is a driver, it should be using dev_*() calls
instead of pr_*() calls to properly identify the device and driver that
is making the message.  No driver should be using pr_*() except in
_very_ limited usages.


Hi Greg,

The reason I recommended pr_debug() is because ibmvscsis is a LIO target 
driver and I don't think a struct device is associated with a LIO target 
driver. See e.g. target_register_template() in 
drivers/target/target_core_configfs.c.


Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv2] libfc: Update rport reference counting

2016-05-24 Thread Vasu Dev
On Tue, 2016-05-24 at 08:11 +0200, Hannes Reinecke wrote:
> Originally libfc would just be initializing the refcount to '1',
> and using the disc_mutex to synchronize if and when the final put
> should be happening.
> This has a race condition as the mutex might be delayed, causing
> other threads to access an invalid structure.
> This patch updates the rport reference counting to increase the
> reference every time 'rport_lookup' is called, and decreases
> the reference correspondingly.
> This removes the need to hold 'disc_mutex' when removing the
> structure, and avoids the above race condition.
> 
> Signed-off-by: Hannes Reinecke 
> ---
>  drivers/scsi/fcoe/fcoe_ctlr.c |  7 +--
>  drivers/scsi/libfc/fc_lport.c | 19 ++---
>  drivers/scsi/libfc/fc_rport.c | 49 ++---
> --
>  3 files changed, 38 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c
> b/drivers/scsi/fcoe/fcoe_ctlr.c
> index 3e83d48..ada4bde 100644
> --- a/drivers/scsi/fcoe/fcoe_ctlr.c
> +++ b/drivers/scsi/fcoe/fcoe_ctlr.c
> @@ -2496,14 +2496,13 @@ static int fcoe_ctlr_vn_lookup(struct
> fcoe_ctlr *fip, u32 port_id, u8 *mac)
>   struct fcoe_rport *frport;
>   int ret = -1;
>  
> - rcu_read_lock();
>   rdata = lport->tt.rport_lookup(lport, port_id);
>   if (rdata) {
>   frport = fcoe_ctlr_rport(rdata);
>   memcpy(mac, frport->enode_mac, ETH_ALEN);
>   ret = 0;
> + kref_put(>kref, lport->tt.rport_destroy);
>   }
> - rcu_read_unlock();
>   return ret;
>  }
>  
> @@ -2585,11 +2584,7 @@ static void fcoe_ctlr_vn_beacon(struct
> fcoe_ctlr *fip,
>   fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ,
> fcoe_all_vn2vn, 0);
>   return;
>   }
> - mutex_lock(>disc.disc_mutex);
>   rdata = lport->tt.rport_lookup(lport, new->ids.port_id);
> - if (rdata)
> - kref_get(>kref);
> - mutex_unlock(>disc.disc_mutex);
>   if (rdata) {
>   if (rdata->ids.node_name == new->ids.node_name &&
>   rdata->ids.port_name == new->ids.port_name) {
> diff --git a/drivers/scsi/libfc/fc_lport.c
> b/drivers/scsi/libfc/fc_lport.c
> index e01a298..b9b44da 100644
> --- a/drivers/scsi/libfc/fc_lport.c
> +++ b/drivers/scsi/libfc/fc_lport.c
> @@ -2090,7 +2090,7 @@ int fc_lport_bsg_request(struct fc_bsg_job
> *job)
>   struct fc_rport *rport;
>   struct fc_rport_priv *rdata;
>   int rc = -EINVAL;
> - u32 did;
> + u32 did, tov;
>  
>   job->reply->reply_payload_rcv_len = 0;
>   if (rsp)
> @@ -2121,15 +2121,20 @@ int fc_lport_bsg_request(struct fc_bsg_job
> *job)
>  
>   case FC_BSG_HST_CT:
>   did = ntoh24(job->request->rqst_data.h_ct.port_id);
> - if (did == FC_FID_DIR_SERV)
> + if (did == FC_FID_DIR_SERV) {
>   rdata = lport->dns_rdata;
> - else
> + if (!rdata)
> + break;
> + tov = rdata->e_d_tov;
> + } else {
>   rdata = lport->tt.rport_lookup(lport, did);
> + if (!rdata)
> + break;
> + tov = rdata->e_d_tov;
> + kref_put(>kref, lport-
> >tt.rport_destroy);
> + }
>  
> - if (!rdata)
> - break;
> -
> - rc = fc_lport_ct_request(job, lport, did, rdata-
> >e_d_tov);
> + rc = fc_lport_ct_request(job, lport, did, tov);
>   break;
>  
>   case FC_BSG_HST_ELS_NOLOGIN:
> diff --git a/drivers/scsi/libfc/fc_rport.c
> b/drivers/scsi/libfc/fc_rport.c
> index 589ff9a..93f5961 100644
> --- a/drivers/scsi/libfc/fc_rport.c
> +++ b/drivers/scsi/libfc/fc_rport.c
> @@ -95,17 +95,23 @@ static const char *fc_rport_state_names[] = {
>   * @lport:   The local port to lookup the remote port on
>   * @port_id: The remote port ID to look up
>   *
> - * The caller must hold either disc_mutex or rcu_read_lock().
> + * The reference count of the fc_rport_priv structure is
> + * increased by one.
>   */
>  static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport
> *lport,
>    u32 port_id)
>  {
> - struct fc_rport_priv *rdata;
> + struct fc_rport_priv *rdata = NULL, *tmp_rdata;
>  
> - list_for_each_entry_rcu(rdata, >disc.rports, peers)
> - if (rdata->ids.port_id == port_id)
> - return rdata;
> - return NULL;
> + rcu_read_lock();
> + list_for_each_entry_rcu(tmp_rdata, >disc.rports,
> peers)
> + if (tmp_rdata->ids.port_id == port_id &&
> + kref_get_unless_zero(_rdata->kref)) {
> + rdata = tmp_rdata;
> + break;
> + }
> + rcu_read_unlock();
> + return rdata;
>  }
>  
>  /**
> @@ -340,7 +346,6 @@ static void 

Re: [PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Greg KH
On Tue, May 24, 2016 at 09:25:05AM -0700, Bart Van Assche wrote:
> > +static inline long h_send_crq(struct ibmvscsis_adapter *adapter,
> > +   u64 word1, u64 word2)
> > +{
> > +   long rc;
> > +   struct vio_dev *vdev = adapter->dma_dev;
> > +
> > +   pr_debug("ibmvscsis: ibmvscsis_send_crq(0x%x, 0x%016llx, 0x%016llx)\n",
> > +   vdev->unit_address, word1, word2);
> > +
> 
> As Joe Perches already asked, please define pr_fmt() instead of including
> the kernel module name in every pr_debug() statement.

Even better, as this is a driver, it should be using dev_*() calls
instead of pr_*() calls to properly identify the device and driver that
is making the message.  No driver should be using pr_*() except in
_very_ limited usages.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Bart Van Assche

On 05/24/2016 06:52 AM, Bryant G. Ly wrote:

+config SCSI_IBMVSCSIS
+   tristate "IBM Virtual SCSI Server support"
+   depends on PPC_PSERIES && SCSI_SRP && TARGET_CORE
+   help
+ This is the IBM POWER Virtual SCSI Target Server
+
+  The userspace component needed to initialize the driver and
+ documentation can be found:
+
+  https://github.com/powervm/ibmvscsis
+
+  To compile this driver as a module, choose M here: the
+ module will be called ibmvstgt.
+


This driver has confused Linux users before. Most people know SRP as a 
SCSI transport layer that is used for communication between servers. 
This driver however uses the SRP protocol for communication between 
guests and/or the host that run on the same server. Please add this 
information to the above help text, together with a reference to the VIO 
protocol documentation in the POWER architecture manual.



+#include 


Every Linux user can query the kernel version by running the uname 
command. Is it really useful to print the kernel version (UTS_RELEASE) 
when this driver is loaded?



+#include "ibmvscsi.h"


The above include directive indirectly includes a whole bunch of SCSI 
initiator driver header files. We do not want that initiator header 
files are included in the source code of a target driver. If it is 
necessary to share definitions of symbolic constants between the 
initiator and the target driver please move these into a new header file.



+static inline long h_send_crq(struct ibmvscsis_adapter *adapter,
+   u64 word1, u64 word2)
+{
+   long rc;
+   struct vio_dev *vdev = adapter->dma_dev;
+
+   pr_debug("ibmvscsis: ibmvscsis_send_crq(0x%x, 0x%016llx, 0x%016llx)\n",
+   vdev->unit_address, word1, word2);
+


As Joe Perches already asked, please define pr_fmt() instead of 
including the kernel module name in every pr_debug() statement.



+static char system_id[64] = "";
+static char partition_name[97] = "UNKNOWN";


Where do these magic constants come from and what is their meaning? 
Please consider to introduce symbolic names for these constants.



+static ssize_t ibmvscsis_tpg_enable_show(struct config_item *item,
+   char *page)
+{
+   struct se_portal_group *se_tpg = to_tpg(item);
+   struct ibmvscsis_tport *tport = container_of(se_tpg,
+   struct ibmvscsis_tport, se_tpg);
+
+   return snprintf(page, PAGE_SIZE, "%d\n", (tport->enabled) ? 1 : 0);
+}


Have you considered to use MODULE_VERSION() instead of introducing a 
sysfs attribute to export the module version? An advantage of 
MODULE_VERSION() is that it allows to query the module version without 
having to load a kernel module.



+static void ibmvscsis_modify_std_inquiry(struct se_cmd *se_cmd)
+{
+   struct se_device *dev = se_cmd->se_dev;
+   unsigned char *buf = NULL;
+   u32 cmd_len = se_cmd->data_length;
+
+   if (cmd_len <= INQ_DATA_OFFSET)
+   return;
+
+   buf = transport_kmap_data_sg(se_cmd);
+   if (buf) {
+   memcpy([8], "IBM", 8);
+   if (dev->transport->get_device_type(dev) == TYPE_ROM)
+   memcpy([16], "VOPTA   ", 16);
+   else
+   memcpy([16], "3303  NVDISK", 16);
+   memcpy([32], "0001", 4);
+   transport_kunmap_data_sg(se_cmd);
+   }
+}


Shouldn't a sense code be returned to the initiator if this function fails?


+   default:
+   pr_err("ibmvscsis: unknown task mgmt func %d\n",
+   srp_tsk->tsk_mgmt_func);
+   cmd->se_cmd.se_tmr_req->response =
+   TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
+   rc = -1;
+   break;
+   }
+
+   if (!rc) {


Please consider changing the above "break" into "goto fail" such that 
the if (!rc) can be left out and such that the indentation of the code 
below if (!rc) can be reduced.



+static int ibmvscsis_queuecommand(struct ibmvscsis_adapter *adapter,
+ struct iu_entry *iue)
+{
+   struct srp_cmd *cmd = iue->sbuf->buf;
+   struct scsi_cmnd *sc;
+   struct ibmvscsis_cmnd *vsc;
+   int ret;
+
+   pr_debug("ibmvscsis: ibmvscsis_queuecommand\n");
+
+   vsc = kzalloc(sizeof(*vsc), GFP_KERNEL);


Allocating memory in the command processing path in a SCSI driver 
usually causes suboptimal performance. Other LIO target drivers 
preallocate such buffers before I/O starts.



+static uint64_t ibmvscsis_unpack_lun(const uint8_t *lun, int len)
+{
+   uint64_t res = NO_SUCH_LUN;
+   int addressing_method;
+
+   if (unlikely(len < 2)) {
+   pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
+   len);
+   goto out;
+   }
+
+   

Re: [PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Greg KH
On Tue, May 24, 2016 at 08:52:58AM -0500, Bryant G. Ly wrote:
> From: bgly 

Really?  Please go get a proper review from the other internal IBM
developers to fix this, and the other obvious problems with your patch,
before you send it publically and force us to tell you these things...

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Joe Perches
On Tue, 2016-05-24 at 08:52 -0500, Bryant G. Ly wrote:
> From: bgly 
> 
> This initial commit contains WIP of the IBM VSCSI Target Fabric
> Module. It currently supports read/writes, and I have tested
> the ability to create a file backstore with the driver and install
> RHEL VIA NIM and then boot up the partition via filio backstore
> through the driver.

Only trivial notes:

Maybe try checkpatch with the --strict option and see
if any of the additional messages are important to you.

> diff --git a/MAINTAINERS b/MAINTAINERS
[]
> @@ -5381,6 +5381,16 @@ S: Supported
>  F:   drivers/scsi/ibmvscsi/ibmvscsi*
>  F:   drivers/scsi/ibmvscsi/viosrp.h
>  
> +IBM Power Virtual SCSI Device Target Driver
> +M:   Bryant G. Ly 
> +L:   linux-scsi@vger.kernel.org
> +L:   target-de...@vger.kernel.org
> +S:   Supported
> +F:   drivers/scsi/ibmvscsi/ibmvscsis.c
> +F:  drivers/scsi/ibmvscsi/ibmvscsis.h
> +F:   drivers/scsi/libsrp.h
> +F:  drivers/scsi/libsrp.c

Please use a tab character consistently after the :

> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
[]
> @@ -847,6 +847,20 @@ config SCSI_IBMVSCSI
>     To compile this driver as a module, choose M here: the
>     module will be called ibmvscsi.
>  
> +config SCSI_IBMVSCSIS
> +     tristate "IBM Virtual SCSI Server support"
> +     depends on PPC_PSERIES && SCSI_SRP && TARGET_CORE
> +     help
> +       This is the IBM POWER Virtual SCSI Target Server
> +
> +  The userspace component needed to initialize the driver and
> +       documentation can be found:

here too.

> +
> +  https://github.com/powervm/ibmvscsis
> +
> +  To compile this driver as a module, choose M here: the
> +       module will be called ibmvstgt.
> +

[]

> diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
[]
> @@ -127,7 +127,9 @@ obj-$(CONFIG_SCSI_LASI700)+= 53c700.o lasi700.o
>  obj-$(CONFIG_SCSI_SNI_53C710)+= 53c700.o sni_53c710.o
>  obj-$(CONFIG_SCSI_NSP32) += nsp32.o
>  obj-$(CONFIG_SCSI_IPR)   += ipr.o
> +obj-$(CONFIG_SCSI_SRP)  += libsrp.o
>  obj-$(CONFIG_SCSI_IBMVSCSI)  += ibmvscsi/
> +obj-$(CONFIG_SCSI_IBMVSCSIS)+= ibmvscsi/

and here

> diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c 
> b/drivers/scsi/ibmvscsi/ibmvscsis.c
[]

> +static int ibmvscsis_probe(struct vio_dev *vdev,
> +    const struct vio_device_id *id);
[...]

It might be nice to rearrange the code to avoid these forward
function declarations.

> +static ssize_t ibmvscsis_tpg_enable_store(struct config_item *item,
> + const char *page, size_t count)
> +{
[]
> > +   ret = kstrtoul(page, 0, );
> + if (ret < 0) {
> + pr_err("Unable to extract ibmvscsis_tpg_store_enable\n");
> + return -EINVAL;
> + }

It might be nicer to add:

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

before any #include to output all the logging messages with
a standardized prefix.  Then all the other logging with an
embedded prefix can have the embedded prefix removed too.

> +
> + if ((tmp != 0) && (tmp != 1)) {
> + pr_err("Illegal value for ibmvscsis_tpg_store_enable: %lu\n",
> + tmp);
> + return -EINVAL;
> + }
> +
> + if (tmp == 1)
> + tport->enabled = true;
> + else
> + tport->enabled = false;

tport->enabled = tmp;

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ibmvscsis: Initial commit of IBM VSCSI Tgt Driver

2016-05-24 Thread Bryant G. Ly
From: bgly 

This initial commit contains WIP of the IBM VSCSI Target Fabric
Module. It currently supports read/writes, and I have tested
the ability to create a file backstore with the driver and install
RHEL VIA NIM and then boot up the partition via filio backstore
through the driver.

Signed-off-by: bryantly 
---
 MAINTAINERS   |   10 +
 drivers/scsi/Kconfig  |   24 +
 drivers/scsi/Makefile |2 +
 drivers/scsi/ibmvscsi/Makefile|1 +
 drivers/scsi/ibmvscsi/ibmvscsis.c | 2033 +
 drivers/scsi/ibmvscsi/ibmvscsis.h |  160 +++
 drivers/scsi/libsrp.c |  387 +++
 include/scsi/libsrp.h |   95 ++
 8 files changed, 2712 insertions(+)
 create mode 100644 drivers/scsi/ibmvscsi/ibmvscsis.c
 create mode 100644 drivers/scsi/ibmvscsi/ibmvscsis.h
 create mode 100644 drivers/scsi/libsrp.c
 create mode 100644 include/scsi/libsrp.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6ee06ea..b520e6c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5381,6 +5381,16 @@ S:   Supported
 F: drivers/scsi/ibmvscsi/ibmvscsi*
 F: drivers/scsi/ibmvscsi/viosrp.h
 
+IBM Power Virtual SCSI Device Target Driver
+M: Bryant G. Ly 
+L: linux-scsi@vger.kernel.org
+L: target-de...@vger.kernel.org
+S: Supported
+F: drivers/scsi/ibmvscsi/ibmvscsis.c
+F:  drivers/scsi/ibmvscsi/ibmvscsis.h
+F: drivers/scsi/libsrp.h
+F:  drivers/scsi/libsrp.c
+
 IBM Power Virtual FC Device Drivers
 M: Tyrel Datwyler 
 L: linux-scsi@vger.kernel.org
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index e2f31c9..6adf8f1 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -847,6 +847,20 @@ config SCSI_IBMVSCSI
  To compile this driver as a module, choose M here: the
  module will be called ibmvscsi.
 
+config SCSI_IBMVSCSIS
+   tristate "IBM Virtual SCSI Server support"
+   depends on PPC_PSERIES && SCSI_SRP && TARGET_CORE
+   help
+ This is the IBM POWER Virtual SCSI Target Server
+
+  The userspace component needed to initialize the driver and
+ documentation can be found:
+
+  https://github.com/powervm/ibmvscsis
+
+  To compile this driver as a module, choose M here: the
+ module will be called ibmvstgt.
+
 config SCSI_IBMVFC
tristate "IBM Virtual FC support"
depends on PPC_PSERIES && SCSI
@@ -1728,6 +1742,16 @@ config SCSI_PM8001
  This driver supports PMC-Sierra PCIE SAS/SATA 8x6G SPC 8001 chip
  based host adapters.
 
+config SCSI_SRP
+   tristate "SCSI RDMA Protocol helper library"
+   depends on SCSI && PCI
+   help
+ This scsi srp module is a library for ibmvscsi target driver.
+ If you wish to use SRP target drivers, say Y.
+
+ To compile this driver as a module, choose M here. The module will
+ be called libsrp.
+
 config SCSI_BFA_FC
tristate "Brocade BFA Fibre Channel Support"
depends on PCI && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 862ab4e..8692dd4 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -127,7 +127,9 @@ obj-$(CONFIG_SCSI_LASI700)  += 53c700.o lasi700.o
 obj-$(CONFIG_SCSI_SNI_53C710)  += 53c700.o sni_53c710.o
 obj-$(CONFIG_SCSI_NSP32)   += nsp32.o
 obj-$(CONFIG_SCSI_IPR) += ipr.o
+obj-$(CONFIG_SCSI_SRP)  += libsrp.o
 obj-$(CONFIG_SCSI_IBMVSCSI)+= ibmvscsi/
+obj-$(CONFIG_SCSI_IBMVSCSIS)+= ibmvscsi/
 obj-$(CONFIG_SCSI_IBMVFC)  += ibmvscsi/
 obj-$(CONFIG_SCSI_HPTIOP)  += hptiop.o
 obj-$(CONFIG_SCSI_STEX)+= stex.o
diff --git a/drivers/scsi/ibmvscsi/Makefile b/drivers/scsi/ibmvscsi/Makefile
index 3840c64..b241a567 100644
--- a/drivers/scsi/ibmvscsi/Makefile
+++ b/drivers/scsi/ibmvscsi/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_SCSI_IBMVSCSI)+= ibmvscsi.o
+obj-$(CONFIG_SCSI_IBMVSCSIS)+= ibmvscsis.o
 obj-$(CONFIG_SCSI_IBMVFC)  += ibmvfc.o
diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c 
b/drivers/scsi/ibmvscsi/ibmvscsis.c
new file mode 100644
index 000..c7eb347
--- /dev/null
+++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
@@ -0,0 +1,2033 @@
+/***
+ * IBM Virtual SCSI Target Driver
+ * Copyright (C) 2003-2005 Dave Boutcher (boutc...@us.ibm.com) IBM Corp.
+ *Santiago Leon (san...@us.ibm.com) IBM Corp.
+ *Linda Xie (l...@us.ibm.com) IBM Corp.
+ *
+ * Copyright (C) 2005-2011 FUJITA Tomonori 
+ * Copyright (C) 2010 Nicholas A. Bellinger 
+ * Copyright (C) 2016 Bryant G. Ly  IBM Corp.
+ *
+ * Authors: Bryant G. Ly 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms 

Re: [PATCH] USB: uas: Fix slave queue_depth not being set

2016-05-24 Thread James Bottomley
On Tue, 2016-05-24 at 08:53 +0200, Hans de Goede wrote:
> Hi,
> 
> On 23-05-16 19:36, James Bottomley wrote:
> > On Mon, 2016-05-23 at 13:49 +0200, Hans de Goede wrote:
> > > Commit 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host
> > > level")
> > > removed the scsi_change_queue_depth() call from 
> > > uas_slave_configure() assuming that the slave would inherit the 
> > > host's queue_depth, which that commit sets to the same value.
> > > 
> > > This is incorrect, without the scsi_change_queue_depth() call the
> > > slave's queue_depth defaults to 1, introducing a performance
> > > regression.
> > > 
> > > This commit restores the call, fixing the performance regression.
> > > 
> > > Cc: sta...@vger.kernel.org
> > > Fixes: 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host
> > > level")
> > > Reported-by: Tom Yan 
> > > Signed-off-by: Hans de Goede 
> > > ---
> > >  drivers/usb/storage/uas.c | 1 +
> > >  1 file changed, 1 insertion(+)
> > > 
> > > diff --git a/drivers/usb/storage/uas.c
> > > b/drivers/usb/storage/uas.c
> > > index 16bc679..ecc7d4b 100644
> > > --- a/drivers/usb/storage/uas.c
> > > +++ b/drivers/usb/storage/uas.c
> > > @@ -835,6 +835,7 @@ static int uas_slave_configure(struct
> > > scsi_device
> > > *sdev)
> > >   if (devinfo->flags & US_FL_BROKEN_FUA)
> > >   sdev->broken_fua = 1;
> > > 
> > > + scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
> > 
> > Are you sure about this?  For spinning rust, experiments imply that 
> > the optimal queue depth per device is somewhere between 2 and 4. 
> >  Obviously that's not true for SSDs, so it depends on your use 
> > case.  Plus, for ATA NCQ devices (which I believe most UAS is 
> > bridged to) you have a maximum NCQ depth of 31.
> 
> So this value is the same as host.can_queue, and is what uas has 
> always used, basically this says it is ok to queue as much as the 
> bridge can handle. We've seen a few rare multi-lun devices, but 
> typically almost all uas devices have one lun, what I really want to 
> do here is give a maximum and let say the sd driver lower that if it
> is sub-optimal.

If that's what you actually want, you should be setting sdev
->max_queue_depth and .track_queue_depth = 1 in the template.

You might also need to add calls to scsi_track_queue_full() but only if
the devices aren't responding QUEUE_FULL correctly.

James

> Also notice that uas is used a lot with ssd-s, that is mostly what
> I want to optimize for, but it is definitely also used with spinning
> rust.
> 
> And yes almost all uas devices are bridged sata devices (this may
> change in the near future though, with ssd-s specifically designed
> for usb-3 attachment, although sofar these all seem to use an
> embbeded sata bridge), so from this pov an upper limit of 31 makes 
> sense, I guess, but I've not seen any bridges which actually do more 
> then 32 streams anyways.
> 
> Still this is a bug-fix patch, essentially a partial revert, to 
> address performance regressions, so lets get this out as is and take 
> our time to come up with some tweaks (if necessary) for the say 4.8.
> 
> > There's a good reason why you don't want a queue deeper than you 
> > can handle: it tends to interfere with writeback because you build 
> > up a lot of pending I/O in the queue which can't be issued (it's 
> > very similar to why bufferbloat is a problem in networks).  In 
> > theory, as long as your devices return the correct indicator 
> > (QUEUE_FULL status), we'll handle most of this in the mid-layer by 
> > plugging the block queue, but given what I've seen from UAS
> > devices, that's less than probable.
> 
> So any smart ideas how to be nicer to spinning rust, without 
> negatively impacting ssd-s? As said if I've to choice I think we 
> should chose optimizing ssd-s, as that is where uas is used a lot 
> (although usb  attached harddisks are switching over to it too).
> 
> Note I just checked the 1TB sata/ahci harddisk in my workstation and 
> it is using a queue_depth of 31 too, so this really does seem like a 
> mid-layer problem to me.
> 
> Regards,
> 
> Hans
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi"
> in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] scsi/qla2xxx: Remove erroneous macro qla82xx_get_temp_val1()

2016-05-24 Thread Thomas Huth
That macros uses logical "&&" instead of bit-wise "&" which is
apparently wrong. Since the macro is completely unused, simply
remove it, so that nobody can accidentially use it anymore.

Signed-off-by: Thomas Huth 
---
 drivers/scsi/qla2xxx/qla_nx.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h
index 59c4778..6201dce 100644
--- a/drivers/scsi/qla2xxx/qla_nx.h
+++ b/drivers/scsi/qla2xxx/qla_nx.h
@@ -1183,7 +1183,6 @@ static const int MD_MIU_TEST_AGT_RDDATA[] = { 0x41A8, 
0x41AC,
 #define CRB_NIU_XG_PAUSE_CTL_P10x8
 
 #define qla82xx_get_temp_val(x)  ((x) >> 16)
-#define qla82xx_get_temp_val1(x)  ((x) && 0x)
 #define qla82xx_get_temp_state(x)((x) & 0x)
 #define qla82xx_encode_temp(val, state)  (((val) << 16) | (state))
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [uas/scsi] untagged commands?

2016-05-24 Thread Hans de Goede

Hi,

On 24-05-16 10:18, Tom Yan wrote:

In this commit:
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/drivers/usb/storage/uas.c?id=198de51dbc3454d95b015ca0a055b673f85f01bb

There is the following comment:

1 tag is reserved for untagged commands


So my question is, what exactly are "untagged commands"?


https://en.wikipedia.org/wiki/Tagged_Command_Queuing

Regards,

Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[uas/scsi] untagged commands?

2016-05-24 Thread Tom Yan
In this commit:
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/drivers/usb/storage/uas.c?id=198de51dbc3454d95b015ca0a055b673f85f01bb

There is the following comment:
> 1 tag is reserved for untagged commands

So my question is, what exactly are "untagged commands"?

If we need to reserve (only) 1 tag for them, does that mean "untagged
commands" will never be queued? (I mean, not counting the "tagged
commands"; so in the queue there will always be at most 1 "untagged
command" and a certain number, or none, of tagged commands?)

Also, what determines whether a command is tagged or not? Is it merely
depending on how a program (or maybe the kernel) issue the commands?
Or can a device (or maybe the driver) require specific SCSI command(s)
to always be issued untagged?

In any case, is there a way (e.g. a debug option/level of the sd or
the uas driver, etc.) to show/log whether the issued commands are
tagged or untagged?
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 118081] open-iscsi Ping timeout erro

2016-05-24 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=118081

--- Comment #4 from Liu zhengyuan  ---
Hi, nab:
Did you mean doing iozone test on a local disk LUN not a iscsi LUN from the
word "are you seeing something different wrt iozone on a Linux host..?"?  If
so, I didn`t found any different between this two LUN from application
perspective. As for iscsi LUN, the iozone would abort due to write system call
failure.
(In reply to nab from comment #3)
> Hey Mike & Co,
> 
> Apologies for missing this bug-report.  Comments below.
> 
> (Adding target-devel CC')
> 
> On Fri, 2016-05-13 at 11:19 -0500, Mike Christie wrote:
> > On 05/11/2016 10:34 PM, bugzilla-dae...@bugzilla.kernel.org wrote:
> > > https://bugzilla.kernel.org/show_bug.cgi?id=118081
> > > 
> > > Bug ID: 118081
> > >Summary: open-iscsi Ping timeout erro
> > >Product: SCSI Drivers
> > >Version: 2.5
> > > Kernel Version: 4.4.7
> > >   Hardware: All
> > > OS: Linux
> > >   Tree: Mainline
> > > Status: NEW
> > >   Severity: normal
> > >   Priority: P1
> > >  Component: Other
> > >   Assignee: scsi_drivers-ot...@kernel-bugs.osdl.org
> > >   Reporter: liuzhengyuang...@gmail.com
> > > Regression: No
> > > 
> > > Hi everyone:
> > > I create a target using fileio as the backend storage on ARM64 server. The
> > > initiator reported some errors showed bellow  while perform iozone test.
> > > 
> > > [178444.145679]  connection14:0: ping timeout of 5 secs expired, recv 
> > > timeout
> > > 5, last rx 4339462894, last ping 4339464146, now 4339465400
> > > [178444.145706]  connection14:0: detected conn error (1011)
> > > [178469.674313]  connection14:0: detected conn error (1020)
> > > [178504.420979]  connection14:0: ping timeout of 5 secs expired, recv 
> > > timeout
> > > 5, last rx 4339477953, last ping 4339479204, now 4339480456
> > > [178504.421001]  connection14:0: detected conn error (1011)
> > > [178532.064262]  connection14:0: detected conn error (1020)
> > > [178564.584087]  connection14:0: ping timeout of 5 secs expired, recv 
> > > timeout
> > > 5, last rx 4339492980, last ping 4339494232, now 4339495484
> > > ..
> > > 
> > > I try to trace the function call of target iscsi. Then, I found the  
> > > receiving 
> > > thread of target iscsi blocked at fd_execute_sync_cache -> 
> > > vfs_fsync_range.
> > > Further, vfs_fsync_range may takes more than 10 seconds to return,while
> > > initiator Ping timeout would happened after 5 seconds.   vfs_fsync_range 
> > > was
> > > call with the form vfs_fsync_range(fd_dev->fd_file, 0, LLONG_MAX, 1) every
> > > times  which means sync all device cache. 
> > > So, is this a bug?
> > > How  does Initiator send sync_cache scsi command? 
> > > Does it need to sync all device cache at once?
> > > Any reply would be thankful.
> > > 
> > 
> > The upper layers like the FS or application determine when to send a
> > sync cache. They send down a request and the iscsi layer just sends it
> > to the target.
> > 
> > You are using LIO right? It looks like we end up syncing the entire
> > device sometimes. I think for iscsi pings/Nops that have the immediate
> > bit set, the target would want to reply to them right away. They should
> > not be getting stuck behind these type of commands.
> > 
> > Nick, what do you think?
> > 
> 
> In modern iscsi-target code, the backend sbc_ops->execute_sync_cache()
> call is invoked directly from iscsi_trx kthread process context.
> 
> For FILEIO backends, this can block immediate iscsi commands like NOPs
> on the same connection (eg: socket) processing a SYNCHRONIZE_CACHE CDB
> that is taking a long time to complete.
> 
> Wrt to Liu's follow-up question, an initiator should attempt to retry
> all outstanding commands that did not receive a response once the
> initiator side NopOut timeout has fired.
> 
> So from an application perspective, the initiator NopOut timeout and
> subsequent iscsi session reinstatement should not be propagating up a
> SYNCHRONIZE_CACHE failure unless the default 120 second Linux/iSCSI
> initiator I/O timeout has elapsed.
> 
> Liu, are you seeing something different wrt iozone on a Linux host..?

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 118081] open-iscsi Ping timeout erro

2016-05-24 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=118081

--- Comment #3 from nab  ---
Hey Mike & Co,

Apologies for missing this bug-report.  Comments below.

(Adding target-devel CC')

On Fri, 2016-05-13 at 11:19 -0500, Mike Christie wrote:
> On 05/11/2016 10:34 PM, bugzilla-dae...@bugzilla.kernel.org wrote:
> > https://bugzilla.kernel.org/show_bug.cgi?id=118081
> > 
> > Bug ID: 118081
> >Summary: open-iscsi Ping timeout erro
> >Product: SCSI Drivers
> >Version: 2.5
> > Kernel Version: 4.4.7
> >   Hardware: All
> > OS: Linux
> >   Tree: Mainline
> > Status: NEW
> >   Severity: normal
> >   Priority: P1
> >  Component: Other
> >   Assignee: scsi_drivers-ot...@kernel-bugs.osdl.org
> >   Reporter: liuzhengyuang...@gmail.com
> > Regression: No
> > 
> > Hi everyone:
> > I create a target using fileio as the backend storage on ARM64 server. The
> > initiator reported some errors showed bellow  while perform iozone test.
> > 
> > [178444.145679]  connection14:0: ping timeout of 5 secs expired, recv 
> > timeout
> > 5, last rx 4339462894, last ping 4339464146, now 4339465400
> > [178444.145706]  connection14:0: detected conn error (1011)
> > [178469.674313]  connection14:0: detected conn error (1020)
> > [178504.420979]  connection14:0: ping timeout of 5 secs expired, recv 
> > timeout
> > 5, last rx 4339477953, last ping 4339479204, now 4339480456
> > [178504.421001]  connection14:0: detected conn error (1011)
> > [178532.064262]  connection14:0: detected conn error (1020)
> > [178564.584087]  connection14:0: ping timeout of 5 secs expired, recv 
> > timeout
> > 5, last rx 4339492980, last ping 4339494232, now 4339495484
> > ..
> > 
> > I try to trace the function call of target iscsi. Then, I found the  
> > receiving 
> > thread of target iscsi blocked at fd_execute_sync_cache -> vfs_fsync_range.
> > Further, vfs_fsync_range may takes more than 10 seconds to return,while
> > initiator Ping timeout would happened after 5 seconds.   vfs_fsync_range was
> > call with the form vfs_fsync_range(fd_dev->fd_file, 0, LLONG_MAX, 1) every
> > times  which means sync all device cache. 
> > So, is this a bug?
> > How  does Initiator send sync_cache scsi command? 
> > Does it need to sync all device cache at once?
> > Any reply would be thankful.
> > 
> 
> The upper layers like the FS or application determine when to send a
> sync cache. They send down a request and the iscsi layer just sends it
> to the target.
> 
> You are using LIO right? It looks like we end up syncing the entire
> device sometimes. I think for iscsi pings/Nops that have the immediate
> bit set, the target would want to reply to them right away. They should
> not be getting stuck behind these type of commands.
> 
> Nick, what do you think?
> 

In modern iscsi-target code, the backend sbc_ops->execute_sync_cache()
call is invoked directly from iscsi_trx kthread process context.

For FILEIO backends, this can block immediate iscsi commands like NOPs
on the same connection (eg: socket) processing a SYNCHRONIZE_CACHE CDB
that is taking a long time to complete.

Wrt to Liu's follow-up question, an initiator should attempt to retry
all outstanding commands that did not receive a response once the
initiator side NopOut timeout has fired.

So from an application perspective, the initiator NopOut timeout and
subsequent iscsi session reinstatement should not be propagating up a
SYNCHRONIZE_CACHE failure unless the default 120 second Linux/iSCSI
initiator I/O timeout has elapsed.

Liu, are you seeing something different wrt iozone on a Linux host..?

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Bug 118081] New: open-iscsi Ping timeout erro

2016-05-24 Thread Nicholas A. Bellinger
Hey Mike & Co,

Apologies for missing this bug-report.  Comments below.

(Adding target-devel CC')

On Fri, 2016-05-13 at 11:19 -0500, Mike Christie wrote:
> On 05/11/2016 10:34 PM, bugzilla-dae...@bugzilla.kernel.org wrote:
> > https://bugzilla.kernel.org/show_bug.cgi?id=118081
> > 
> > Bug ID: 118081
> >Summary: open-iscsi Ping timeout erro
> >Product: SCSI Drivers
> >Version: 2.5
> > Kernel Version: 4.4.7
> >   Hardware: All
> > OS: Linux
> >   Tree: Mainline
> > Status: NEW
> >   Severity: normal
> >   Priority: P1
> >  Component: Other
> >   Assignee: scsi_drivers-ot...@kernel-bugs.osdl.org
> >   Reporter: liuzhengyuang...@gmail.com
> > Regression: No
> > 
> > Hi everyone:
> > I create a target using fileio as the backend storage on ARM64 server. The
> > initiator reported some errors showed bellow  while perform iozone test.
> > 
> > [178444.145679]  connection14:0: ping timeout of 5 secs expired, recv 
> > timeout
> > 5, last rx 4339462894, last ping 4339464146, now 4339465400
> > [178444.145706]  connection14:0: detected conn error (1011)
> > [178469.674313]  connection14:0: detected conn error (1020)
> > [178504.420979]  connection14:0: ping timeout of 5 secs expired, recv 
> > timeout
> > 5, last rx 4339477953, last ping 4339479204, now 4339480456
> > [178504.421001]  connection14:0: detected conn error (1011)
> > [178532.064262]  connection14:0: detected conn error (1020)
> > [178564.584087]  connection14:0: ping timeout of 5 secs expired, recv 
> > timeout
> > 5, last rx 4339492980, last ping 4339494232, now 4339495484
> > ..
> > 
> > I try to trace the function call of target iscsi. Then, I found the  
> > receiving 
> > thread of target iscsi blocked at fd_execute_sync_cache -> vfs_fsync_range.
> > Further, vfs_fsync_range may takes more than 10 seconds to return,while
> > initiator Ping timeout would happened after 5 seconds.   vfs_fsync_range was
> > call with the form vfs_fsync_range(fd_dev->fd_file, 0, LLONG_MAX, 1) every
> > times  which means sync all device cache. 
> > So, is this a bug?
> > How  does Initiator send sync_cache scsi command? 
> > Does it need to sync all device cache at once?
> > Any reply would be thankful.
> > 
> 
> The upper layers like the FS or application determine when to send a
> sync cache. They send down a request and the iscsi layer just sends it
> to the target.
> 
> You are using LIO right? It looks like we end up syncing the entire
> device sometimes. I think for iscsi pings/Nops that have the immediate
> bit set, the target would want to reply to them right away. They should
> not be getting stuck behind these type of commands.
> 
> Nick, what do you think?
> 

In modern iscsi-target code, the backend sbc_ops->execute_sync_cache()
call is invoked directly from iscsi_trx kthread process context.

For FILEIO backends, this can block immediate iscsi commands like NOPs
on the same connection (eg: socket) processing a SYNCHRONIZE_CACHE CDB
that is taking a long time to complete.

Wrt to Liu's follow-up question, an initiator should attempt to retry
all outstanding commands that did not receive a response once the
initiator side NopOut timeout has fired.

So from an application perspective, the initiator NopOut timeout and
subsequent iscsi session reinstatement should not be propagating up a
SYNCHRONIZE_CACHE failure unless the default 120 second Linux/iSCSI
initiator I/O timeout has elapsed.

Liu, are you seeing something different wrt iozone on a Linux host..?

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/1] uas: remove can_queue set in host template

2016-05-24 Thread Hans de Goede

Hi,

On 23-05-16 21:28, tom.t...@gmail.com wrote:

From: Tom Yan 

Commit 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host level") made
qdepth limit set in host template (`.can_queue = MAX_CMNDS`) redundant.
Removing it to avoid confusion.

Signed-off-by: Tom Yan 


I agree removing this is good:

Acked-by: Hans de Goede 

Regards,

Hans




diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 4d49fce..e03c490 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -848,7 +848,6 @@ static struct scsi_host_template uas_host_template = {
.slave_configure = uas_slave_configure,
.eh_abort_handler = uas_eh_abort_handler,
.eh_bus_reset_handler = uas_eh_bus_reset_handler,
-   .can_queue = MAX_CMNDS,
.this_id = -1,
.sg_tablesize = SG_NONE,
.skip_settle_delay = 1,


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] USB: uas: Fix slave queue_depth not being set

2016-05-24 Thread Hans de Goede

Hi,

On 23-05-16 19:36, James Bottomley wrote:

On Mon, 2016-05-23 at 13:49 +0200, Hans de Goede wrote:

Commit 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host level")
removed the scsi_change_queue_depth() call from uas_slave_configure()
assuming that the slave would inherit the host's queue_depth, which
that commit sets to the same value.

This is incorrect, without the scsi_change_queue_depth() call the
slave's queue_depth defaults to 1, introducing a performance
regression.

This commit restores the call, fixing the performance regression.

Cc: sta...@vger.kernel.org
Fixes: 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host level")
Reported-by: Tom Yan 
Signed-off-by: Hans de Goede 
---
 drivers/usb/storage/uas.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 16bc679..ecc7d4b 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -835,6 +835,7 @@ static int uas_slave_configure(struct scsi_device
*sdev)
if (devinfo->flags & US_FL_BROKEN_FUA)
sdev->broken_fua = 1;

+   scsi_change_queue_depth(sdev, devinfo->qdepth - 2);


Are you sure about this?  For spinning rust, experiments imply that the
optimal queue depth per device is somewhere between 2 and 4.  Obviously
that's not true for SSDs, so it depends on your use case.  Plus, for
ATA NCQ devices (which I believe most UAS is bridged to) you have a
maximum NCQ depth of 31.


So this value is the same as host.can_queue, and is what uas has always
used, basically this says it is ok to queue as much as the bridge can
handle. We've seen a few rare multi-lun devices, but typically almost
all uas devices have one lun, what I really want to do here is give a
maximum and let say the sd driver lower that if it is sub-optimal.

Also notice that uas is used a lot with ssd-s, that is mostly what
I want to optimize for, but it is definitely also used with
spinning rust.

And yes almost all uas devices are bridged sata devices (this may
change in the near future though, with ssd-s specifically designed
for usb-3 attachment, although sofar these all seem to use an
embbeded sata bridge), so from this pov an upper limit of 31 makes sense,
I guess, but I've not seen any bridges which actually do more then 32
streams anyways.

Still this is a bug-fix patch, essentially a partial revert, to address
performance regressions, so lets get this out as is and take our time to
come up with some tweaks (if necessary) for the say 4.8.


There's a good reason why you don't want a queue deeper than you can
handle: it tends to interfere with writeback because you build up a lot
of pending I/O in the queue which can't be issued (it's very similar to
why bufferbloat is a problem in networks).  In theory, as long as your
devices return the correct indicator (QUEUE_FULL status), we'll handle
most of this in the mid-layer by plugging the block queue, but given
what I've seen from UAS devices, that's less than probable.


So any smart ideas how to be nicer to spinning rust, without negatively
impacting ssd-s? As said if I've to choice I think we should chose
optimizing ssd-s, as that is where uas is used a lot (although usb-attached
harddisks are switching over to it too).

Note I just checked the 1TB sata/ahci harddisk in my workstation and it is using
a queue_depth of 31 too, so this really does seem like a mid-layer problem
to me.

Regards,

Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 13/13] cxgbit: add files for cxgbit.ko

2016-05-24 Thread Nicholas A. Bellinger
Hi Or & Co,

On Wed, 2016-05-18 at 14:45 +0300, Or Gerlitz wrote:
> On Sat, Apr 30, 2016 at 6:54 PM, Or Gerlitz  wrote:
> > On Tue, Apr 19, 2016 at 9:30 PM, Varun Prakash  wrote:
> >> cxgbit.h - This file contains data structure
> >> definitions for cxgbit.ko.
> >>
> >> cxgbit_lro.h - This file contains data structure
> >> definitions for LRO support.
> >>
> >> cxgbit_main.c - This file contains code for
> >> registering with iscsi target transport and
> >> cxgb4 driver.
> >>
> >> cxgbit_cm.c - This file contains code for
> >> connection management.
> >>
> >> cxgbit_target.c - This file contains code
> >> for processing iSCSI PDU.
> >>
> >> cxgbit_ddp.c - This file contains code for
> >> Direct Data Placement.
> >
> > Wait,
> >
> > You are adding many K's LOCs to handle things like CM (connection
> > management), DDP and LRO. But your upstream solution must be using CM
> > and DDP (and LRO as well) for the HW offloaded initiator side as well,
> > not to mention the iWARP side of things.
> >
> > There must be some way to refactor things instead of repeating the
> > same bits over and over, thoughts?
> 
> Nic,
> 
> The author haven't responded... where that this stands from your point of 
> view?
> 

For an initial merge, I don't have an objection to this series wrt
drivers/target/iscsi/* improvements + prerequisites, and new standalone
cxgbit iscsit_transport driver.

That said, there are areas between cxgbi + cxgbit code that can be made
common as you've pointed out.  The Cheliso folks have mentioned off-list
that cxgbi as-is in mainline does not support LRO, and that the majority
of DDP logic is shared between initiator + target.

Are there specific pieces of logic in DDP or iWARP for cxgb* that you'd
like to see Varun + Co pursue as common code in v4.8+..?

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2] libfc: Update rport reference counting

2016-05-24 Thread Hannes Reinecke
Originally libfc would just be initializing the refcount to '1',
and using the disc_mutex to synchronize if and when the final put
should be happening.
This has a race condition as the mutex might be delayed, causing
other threads to access an invalid structure.
This patch updates the rport reference counting to increase the
reference every time 'rport_lookup' is called, and decreases
the reference correspondingly.
This removes the need to hold 'disc_mutex' when removing the
structure, and avoids the above race condition.

Signed-off-by: Hannes Reinecke 
---
 drivers/scsi/fcoe/fcoe_ctlr.c |  7 +--
 drivers/scsi/libfc/fc_lport.c | 19 ++---
 drivers/scsi/libfc/fc_rport.c | 49 ++-
 3 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 3e83d48..ada4bde 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -2496,14 +2496,13 @@ static int fcoe_ctlr_vn_lookup(struct fcoe_ctlr *fip, 
u32 port_id, u8 *mac)
struct fcoe_rport *frport;
int ret = -1;
 
-   rcu_read_lock();
rdata = lport->tt.rport_lookup(lport, port_id);
if (rdata) {
frport = fcoe_ctlr_rport(rdata);
memcpy(mac, frport->enode_mac, ETH_ALEN);
ret = 0;
+   kref_put(>kref, lport->tt.rport_destroy);
}
-   rcu_read_unlock();
return ret;
 }
 
@@ -2585,11 +2584,7 @@ static void fcoe_ctlr_vn_beacon(struct fcoe_ctlr *fip,
fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ, fcoe_all_vn2vn, 0);
return;
}
-   mutex_lock(>disc.disc_mutex);
rdata = lport->tt.rport_lookup(lport, new->ids.port_id);
-   if (rdata)
-   kref_get(>kref);
-   mutex_unlock(>disc.disc_mutex);
if (rdata) {
if (rdata->ids.node_name == new->ids.node_name &&
rdata->ids.port_name == new->ids.port_name) {
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index e01a298..b9b44da 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -2090,7 +2090,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job)
struct fc_rport *rport;
struct fc_rport_priv *rdata;
int rc = -EINVAL;
-   u32 did;
+   u32 did, tov;
 
job->reply->reply_payload_rcv_len = 0;
if (rsp)
@@ -2121,15 +2121,20 @@ int fc_lport_bsg_request(struct fc_bsg_job *job)
 
case FC_BSG_HST_CT:
did = ntoh24(job->request->rqst_data.h_ct.port_id);
-   if (did == FC_FID_DIR_SERV)
+   if (did == FC_FID_DIR_SERV) {
rdata = lport->dns_rdata;
-   else
+   if (!rdata)
+   break;
+   tov = rdata->e_d_tov;
+   } else {
rdata = lport->tt.rport_lookup(lport, did);
+   if (!rdata)
+   break;
+   tov = rdata->e_d_tov;
+   kref_put(>kref, lport->tt.rport_destroy);
+   }
 
-   if (!rdata)
-   break;
-
-   rc = fc_lport_ct_request(job, lport, did, rdata->e_d_tov);
+   rc = fc_lport_ct_request(job, lport, did, tov);
break;
 
case FC_BSG_HST_ELS_NOLOGIN:
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 589ff9a..93f5961 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -95,17 +95,23 @@ static const char *fc_rport_state_names[] = {
  * @lport:   The local port to lookup the remote port on
  * @port_id: The remote port ID to look up
  *
- * The caller must hold either disc_mutex or rcu_read_lock().
+ * The reference count of the fc_rport_priv structure is
+ * increased by one.
  */
 static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport *lport,
 u32 port_id)
 {
-   struct fc_rport_priv *rdata;
+   struct fc_rport_priv *rdata = NULL, *tmp_rdata;
 
-   list_for_each_entry_rcu(rdata, >disc.rports, peers)
-   if (rdata->ids.port_id == port_id)
-   return rdata;
-   return NULL;
+   rcu_read_lock();
+   list_for_each_entry_rcu(tmp_rdata, >disc.rports, peers)
+   if (tmp_rdata->ids.port_id == port_id &&
+   kref_get_unless_zero(_rdata->kref)) {
+   rdata = tmp_rdata;
+   break;
+   }
+   rcu_read_unlock();
+   return rdata;
 }
 
 /**
@@ -340,7 +346,6 @@ static void fc_rport_work(struct work_struct *work)
fc_remote_port_delete(rport);
}
 
-   mutex_lock(>disc.disc_mutex);
mutex_lock(>rp_mutex);