[Patch v2] Storvsc: Select channel based on available percentage of ring buffer to write

2018-04-19 Thread Long Li
From: Long Li <lon...@microsoft.com>

This is a best effort for estimating on how busy the ring buffer is for
that channel, based on available buffer to write in percentage. It is still
possible that at the time of actual ring buffer write, the space may not be
available due to other processes may be writing at the time.

Selecting a channel based on how full it is can reduce the possibility that
a ring buffer write will fail, and avoid the situation a channel is over
busy.

Now it's possible that storvsc can use a smaller ring buffer size
(e.g. 40k bytes) to take advantage of cache locality.

Changes.
v2: Pre-allocate struct cpumask on the heap.
Struct cpumask is a big structure (1k bytes) when CONFIG_NR_CPUS=8192 (default
value when CONFIG_MAXSMP=y). Don't use kernel stack for it by pre-allocating
them using kmalloc when channels are first initialized.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 90 --
 1 file changed, 72 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index a2ec0bc9e9fa..2a9fff94dd1a 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer 
size (bytes)");
 
 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to 
subchannels");
+
+static int ring_avail_percent_lowater = 10;
+module_param(ring_avail_percent_lowater, int, S_IRUGO);
+MODULE_PARM_DESC(ring_avail_percent_lowater,
+   "Select a channel if available ring size > this in percent");
+
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
@@ -468,6 +474,13 @@ struct storvsc_device {
 * Mask of CPUs bound to subchannels.
 */
struct cpumask alloced_cpus;
+   /*
+* Pre-allocated struct cpumask for each hardware queue.
+* struct cpumask is used by selecting out-going channels. It is a
+* big structure, default to 1024k bytes when CONFIG_MAXSMP=y.
+* Pre-allocate it to avoid allocation on the kernel stack.
+*/
+   struct cpumask *cpumask_chns;
/* Used for vsc/vsp channel reset process */
struct storvsc_cmd_request init_request;
struct storvsc_cmd_request reset_request;
@@ -872,6 +885,13 @@ static int storvsc_channel_init(struct hv_device *device, 
bool is_fc)
if (stor_device->stor_chns == NULL)
return -ENOMEM;
 
+   stor_device->cpumask_chns = kcalloc(num_possible_cpus(),
+   sizeof(struct cpumask), GFP_KERNEL);
+   if (stor_device->cpumask_chns == NULL) {
+   kfree(stor_device->stor_chns);
+   return -ENOMEM;
+   }
+
stor_device->stor_chns[device->channel->target_cpu] = device->channel;
cpumask_set_cpu(device->channel->target_cpu,
_device->alloced_cpus);
@@ -1232,6 +1252,7 @@ static int storvsc_dev_remove(struct hv_device *device)
vmbus_close(device->channel);
 
kfree(stor_device->stor_chns);
+   kfree(stor_device->cpumask_chns);
kfree(stor_device);
return 0;
 }
@@ -1241,7 +1262,7 @@ static struct vmbus_channel *get_og_chn(struct 
storvsc_device *stor_device,
 {
u16 slot = 0;
u16 hash_qnum;
-   struct cpumask alloced_mask;
+   struct cpumask *alloced_mask = _device->cpumask_chns[q_num];
int num_channels, tgt_cpu;
 
if (stor_device->num_sc == 0)
@@ -1257,10 +1278,10 @@ static struct vmbus_channel *get_og_chn(struct 
storvsc_device *stor_device,
 * III. Mapping is persistent.
 */
 
-   cpumask_and(_mask, _device->alloced_cpus,
+   cpumask_and(alloced_mask, _device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num)));
 
-   num_channels = cpumask_weight(_mask);
+   num_channels = cpumask_weight(alloced_mask);
if (num_channels == 0)
return stor_device->device->channel;
 
@@ -1268,7 +1289,7 @@ static struct vmbus_channel *get_og_chn(struct 
storvsc_device *stor_device,
while (hash_qnum >= num_channels)
hash_qnum -= num_channels;
 
-   for_each_cpu(tgt_cpu, _mask) {
+   for_each_cpu(tgt_cpu, alloced_mask) {
if (slot == hash_qnum)
break;
slot++;
@@ -1285,9 +1306,9 @@ static int storvsc_do_io(struct hv_device *device,
 {
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
-   struct vmbus_channel *outgoing_channel;
+   struct vmbus_channel *outgoing_channel, *channel;
int ret = 0;
-   struct cpumask alloced_mask;
+   struct cpumask *alloced_mask;
int tgt_cpu;
 
vstor_packet = >vstor_

RE: [PATCH v2] storvsc: Set up correct queue depth values for IDE devices

2018-04-18 Thread Long Li
> Subject: Re: [PATCH v2] storvsc: Set up correct queue depth values for IDE
> devices
> 
> 
> Long,
> 
> > Can you take a look at the following patch?
> 
> >> > + max_sub_channels =
> >> > +(num_cpus - 1) / storvsc_vcpus_per_sub_channel;
> 
> What happens if num_cpus = 1?

If num_cpus=1, we don't have any sub channels.

The host offers one sub channel for VM with 5 CPUs, after that it offers an 
additional sub channel every 4 CPUs.

The primary channel is always offered.

> 
> --
> Martin K. PetersenOracle Linux Engineering
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2] storvsc: Set up correct queue depth values for IDE devices

2018-04-18 Thread Long Li
Hi Martin

Can you take a look at the following patch?

Long

 
> > -Original Message-
> > From: linux-kernel-ow...@vger.kernel.org
> > <linux-kernel-ow...@vger.kernel.org> On Behalf Of Long Li
> > Sent: Thursday, March 22, 2018 2:47 PM
> > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>;
> > James E . J . Bottomley <jbottom...@odin.com>; Martin K . Petersen
> > <martin.peter...@oracle.com>; de...@linuxdriverproject.org; linux-
> > s...@vger.kernel.org; linux-ker...@vger.kernel.org
> > Cc: Long Li <lon...@microsoft.com>
> > Subject: [PATCH v2] storvsc: Set up correct queue depth values for IDE
> > devices
> >
> > From: Long Li <lon...@microsoft.com>
> >
> > Unlike SCSI and FC, we don't use multiple channels for IDE.
> > Also fix the calculation for sub-channels.
> >
> > Change log:
> > v2: Addressed comment on incorrect number of sub-channels.
> > (Michael Kelley <michael.h.kel...@microsoft.com>)
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> 
> Reviewed-by: Michael Kelley <mikel...@microsoft.com>
> 
> > ---
> >  drivers/scsi/storvsc_drv.c | 7 +--
> >  1 file changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index 8c51d628b52e..a2ec0bc9e9fa 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -1722,11 +1722,14 @@ static int storvsc_probe(struct hv_device
> *device,
> > max_targets = STORVSC_MAX_TARGETS;
> > max_channels = STORVSC_MAX_CHANNELS;
> > /*
> > -* On Windows8 and above, we support sub-channels for
> storage.
> > +* On Windows8 and above, we support sub-channels for
> storage
> > +* on SCSI and FC controllers.
> >  * The number of sub-channels offerred is based on the
> number of
> >  * VCPUs in the guest.
> >  */
> > -   max_sub_channels = (num_cpus /
> storvsc_vcpus_per_sub_channel);
> > +   if (!dev_is_ide)
> > +   max_sub_channels =
> > +   (num_cpus - 1) /
> storvsc_vcpus_per_sub_channel;
> > }
> >
> > scsi_driver.can_queue = (max_outstanding_req_per_channel *
> > --
> > 2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [Resend Patch 3/3] Storvsc: Select channel based on available percentage of ring buffer to write

2018-04-13 Thread Long Li
> Subject: RE: [Resend Patch 3/3] Storvsc: Select channel based on available
> percentage of ring buffer to write
> 
> > -Original Message-
> > From: linux-kernel-ow...@vger.kernel.org
> > <linux-kernel-ow...@vger.kernel.org> On Behalf Of Long Li
> > Sent: Tuesday, March 27, 2018 5:49 PM
> > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>;
> > James E . J . Bottomley <jbottom...@odin.com>; Martin K . Petersen
> > <martin.peter...@oracle.com>; de...@linuxdriverproject.org; linux-
> > s...@vger.kernel.org; linux-ker...@vger.kernel.org;
> > net...@vger.kernel.org
> > Cc: Long Li <lon...@microsoft.com>
> > Subject: [Resend Patch 3/3] Storvsc: Select channel based on available
> > percentage of ring buffer to write
> >
> > From: Long Li <lon...@microsoft.com>
> >
> > This is a best effort for estimating on how busy the ring buffer is
> > for that channel, based on available buffer to write in percentage. It
> > is still possible that at the time of actual ring buffer write, the
> > space may not be available due to other processes may be writing at the
> time.
> >
> > Selecting a channel based on how full it is can reduce the possibility
> > that a ring buffer write will fail, and avoid the situation a channel
> > is over busy.
> >
> > Now it's possible that storvsc can use a smaller ring buffer size
> > (e.g. 40k bytes) to take advantage of cache locality.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/scsi/storvsc_drv.c | 62
> > +-
> >  1 file changed, 50 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index a2ec0bc9e9fa..b1a87072b3ab 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size,
> "Ring
> > buffer size (bytes)");
> >
> >  module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
> > MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs
> to
> > subchannels");
> > +
> > +static int ring_avail_percent_lowater = 10;
> 
> Reserving 10% of each ring buffer by default seems like more than is needed
> in the storvsc driver.  That would be about 4Kbytes for the 40K ring buffer
> you suggest, and even more for a ring buffer of 128K.  Each outgoing record is
> only about 344 bytes (I'd have to check exactly).  With the new channel
> selection algorithm below, the only time we use a channel that is already
> below the low water mark is when no channel could be found that is above
> the low water mark.   There could be a case of two or more threads deciding
> that a channel is above the low water mark at the same time and both
> choosing it, but that's likely to be rare.  So it seems like we could set the

It's not rare for two processes checking on the same channel at the same time, 
when running multiple processes I/O workload. The CPU to channel is not 1:1 
mapping.

> default low water mark to 5 percent or even 3 percent, which will let more of
> the ring buffer be used, and let a channel be assigned according to the
> algorithm, rather than falling through to the default because all channels
> appear to be "full".

It seems it's not about how big ring buffer is, e.g. even you have a ring 
buffer of infinite size, it won't help with performance if it's getting queued 
all the time, while other ring buffers are near empty. It's more about how 
multiple ring buffers are getting utilized in a reasonable and balanced way. 
Testing shows 10 is a good choice, while 3 is prone to return BUSY and trigger 
block layer retry.

> 
> > +module_param(ring_avail_percent_lowater, int, S_IRUGO);
> > +MODULE_PARM_DESC(ring_avail_percent_lowater,
> > +   "Select a channel if available ring size > this in percent");
> > +
> >  /*
> >   * Timeout in seconds for all devices managed by this driver.
> >   */
> > @@ -1285,9 +1291,9 @@ static int storvsc_do_io(struct hv_device
> > *device,  {
> > struct storvsc_device *stor_device;
> > struct vstor_packet *vstor_packet;
> > -   struct vmbus_channel *outgoing_channel;
> > +   struct vmbus_channel *outgoing_channel, *channel;
> > int ret = 0;
> > -   struct cpumask alloced_mask;
> > +   struct cpumask alloced_mask, other_numa_mask;
> > int tgt_cpu;
> >
> > vstor_

RE: [Resend Patch 1/3] Vmbus: Add function to report available ring buffer to write in total ring size percentage

2018-03-28 Thread Long Li
> Subject: Re: [Resend Patch 1/3] Vmbus: Add function to report available ring
> buffer to write in total ring size percentage
> 
> 
> Long,
> 
> > Netvsc has a function to calculate how much ring buffer in percentage
> > is available to write. This function is also useful for storvsc and
> > other vmbus devices.
> 
> What is the submission strategy for this series? Do you expect it to go
> through net or scsi? If the latter, I'll need an ack from davem.

Martin,

I hope this patch set goes through SCSI, because it's purpose is to improve 
storvsc.

If this strategy is not possible, I can resubmit the 1st two patches to net, 
and the 3rd patch to scsi after the 1st two are merged.

Long

> 
> --
> Martin K. PetersenOracle Linux Engineering
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[Resend Patch 2/3] Netvsc: Use the vmbus functiton to calculate ring buffer percentage

2018-03-27 Thread Long Li
From: Long Li <lon...@microsoft.com>

In Vmbus, we have defined a function to calculate available ring buffer
percentage to write.

Use that function and remove netvsc's private version.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h |  1 -
 drivers/net/hyperv/netvsc.c | 17 +++--
 drivers/net/hyperv/netvsc_drv.c |  3 ---
 3 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index cd538d5a7986..a0199ab13d67 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -189,7 +189,6 @@ struct netvsc_device;
 struct net_device_context;
 
 extern u32 netvsc_ring_bytes;
-extern struct reciprocal_value netvsc_ring_reciprocal;
 
 struct netvsc_device *netvsc_device_add(struct hv_device *device,
const struct netvsc_device_info *info);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 0265d703eb03..8af0069e4d8c 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -31,7 +31,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 
@@ -590,17 +589,6 @@ void netvsc_device_remove(struct hv_device *device)
 #define RING_AVAIL_PERCENT_HIWATER 20
 #define RING_AVAIL_PERCENT_LOWATER 10
 
-/*
- * Get the percentage of available bytes to write in the ring.
- * The return value is in range from 0 to 100.
- */
-static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info 
*ring_info)
-{
-   u32 avail_write = hv_get_bytes_to_write(ring_info);
-
-   return reciprocal_divide(avail_write  * 100, netvsc_ring_reciprocal);
-}
-
 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
 u32 index)
 {
@@ -649,7 +637,8 @@ static void netvsc_send_tx_complete(struct netvsc_device 
*net_device,
wake_up(_device->wait_drain);
 
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-   (hv_ringbuf_avail_percent(>outbound) > 
RING_AVAIL_PERCENT_HIWATER ||
+   (hv_get_avail_to_write_percent(>outbound) >
+RING_AVAIL_PERCENT_HIWATER ||
 queue_sends < 1)) {
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
ndev_ctx->eth_stats.wake_queue++;
@@ -757,7 +746,7 @@ static inline int netvsc_send_pkt(
struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
-   u32 ring_avail = hv_ringbuf_avail_percent(_channel->outbound);
+   u32 ring_avail = hv_get_avail_to_write_percent(_channel->outbound);
 
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (skb)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index faea0be18924..b0b1c2fd2b7b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -35,7 +35,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -55,7 +54,6 @@ static unsigned int ring_size __ro_after_init = 128;
 module_param(ring_size, uint, S_IRUGO);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
 unsigned int netvsc_ring_bytes __ro_after_init;
-struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
 
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
@@ -2186,7 +2184,6 @@ static int __init netvsc_drv_init(void)
ring_size);
}
netvsc_ring_bytes = ring_size * PAGE_SIZE;
-   netvsc_ring_reciprocal = reciprocal_value(netvsc_ring_bytes);
 
ret = vmbus_driver_register(_drv);
if (ret)
-- 
2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[Resend Patch 3/3] Storvsc: Select channel based on available percentage of ring buffer to write

2018-03-27 Thread Long Li
From: Long Li <lon...@microsoft.com>

This is a best effort for estimating on how busy the ring buffer is for
that channel, based on available buffer to write in percentage. It is still
possible that at the time of actual ring buffer write, the space may not be
available due to other processes may be writing at the time.

Selecting a channel based on how full it is can reduce the possibility that
a ring buffer write will fail, and avoid the situation a channel is over
busy.

Now it's possible that storvsc can use a smaller ring buffer size
(e.g. 40k bytes) to take advantage of cache locality.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 62 +-
 1 file changed, 50 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index a2ec0bc9e9fa..b1a87072b3ab 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer 
size (bytes)");
 
 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to 
subchannels");
+
+static int ring_avail_percent_lowater = 10;
+module_param(ring_avail_percent_lowater, int, S_IRUGO);
+MODULE_PARM_DESC(ring_avail_percent_lowater,
+   "Select a channel if available ring size > this in percent");
+
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
@@ -1285,9 +1291,9 @@ static int storvsc_do_io(struct hv_device *device,
 {
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
-   struct vmbus_channel *outgoing_channel;
+   struct vmbus_channel *outgoing_channel, *channel;
int ret = 0;
-   struct cpumask alloced_mask;
+   struct cpumask alloced_mask, other_numa_mask;
int tgt_cpu;
 
vstor_packet = >vstor_packet;
@@ -1301,22 +1307,53 @@ static int storvsc_do_io(struct hv_device *device,
/*
 * Select an an appropriate channel to send the request out.
 */
-
if (stor_device->stor_chns[q_num] != NULL) {
outgoing_channel = stor_device->stor_chns[q_num];
-   if (outgoing_channel->target_cpu == smp_processor_id()) {
+   if (outgoing_channel->target_cpu == q_num) {
/*
 * Ideally, we want to pick a different channel if
 * available on the same NUMA node.
 */
cpumask_and(_mask, _device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num)));
-   for_each_cpu_wrap(tgt_cpu, _mask,
-   outgoing_channel->target_cpu + 1) {
-   if (tgt_cpu != outgoing_channel->target_cpu) {
-   outgoing_channel =
-   stor_device->stor_chns[tgt_cpu];
-   break;
+
+   for_each_cpu_wrap(tgt_cpu, _mask, q_num + 1) {
+   if (tgt_cpu == q_num)
+   continue;
+   channel = stor_device->stor_chns[tgt_cpu];
+   if (hv_get_avail_to_write_percent(
+   >outbound)
+   > ring_avail_percent_lowater) {
+   outgoing_channel = channel;
+   goto found_channel;
+   }
+   }
+
+   /*
+* All the other channels on the same NUMA node are
+* busy. Try to use the channel on the current CPU
+*/
+   if (hv_get_avail_to_write_percent(
+   _channel->outbound)
+   > ring_avail_percent_lowater)
+   goto found_channel;
+
+   /*
+* If we reach here, all the channels on the current
+* NUMA node are busy. Try to find a channel in
+* other NUMA nodes
+*/
+   cpumask_andnot(_numa_mask,
+   _device->alloced_cpus,
+   cpumask_of_node(cpu_to_node(q_num)));
+
+   for_each_cpu(tgt_cpu, _numa_mask) {
+   channel = stor_device->stor_chns[tgt_cpu];
+   if (hv_get_avail_to_write_percent(
+ 

[Resend Patch 1/3] Vmbus: Add function to report available ring buffer to write in total ring size percentage

2018-03-27 Thread Long Li
From: Long Li <lon...@microsoft.com>

Netvsc has a function to calculate how much ring buffer in percentage is
available to write. This function is also useful for storvsc and other
vmbus devices.

Define a similar function in vmbus to be used by other vmbus devices.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/ring_buffer.c |  2 ++
 include/linux/hyperv.h   | 12 
 2 files changed, 14 insertions(+)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 8699bb969e7e..3c836c099a8f 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -227,6 +227,8 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info 
*ring_info,
ring_info->ring_buffer->feature_bits.value = 1;
 
ring_info->ring_size = page_cnt << PAGE_SHIFT;
+   ring_info->ring_size_div10_reciprocal =
+   reciprocal_value(ring_info->ring_size / 10);
ring_info->ring_datasize = ring_info->ring_size -
sizeof(struct hv_ring_buffer);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2048f3c3b68a..eb7204851089 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_PAGE_BUFFER_COUNT  32
 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */
@@ -121,6 +122,7 @@ struct hv_ring_buffer {
 struct hv_ring_buffer_info {
struct hv_ring_buffer *ring_buffer;
u32 ring_size;  /* Include the shared header */
+   struct reciprocal_value ring_size_div10_reciprocal;
spinlock_t ring_lock;
 
u32 ring_datasize;  /* < ring_size */
@@ -155,6 +157,16 @@ static inline u32 hv_get_bytes_to_write(const struct 
hv_ring_buffer_info *rbi)
return write;
 }
 
+static inline u32 hv_get_avail_to_write_percent(
+   const struct hv_ring_buffer_info *rbi)
+{
+   u32 avail_write = hv_get_bytes_to_write(rbi);
+
+   return reciprocal_divide(
+   (avail_write  << 3) + (avail_write << 1),
+   rbi->ring_size_div10_reciprocal);
+}
+
 /*
  * VMBUS version is 32 bit entity broken up into
  * two 16 bit quantities: major_number. minor_number.
-- 
2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 2/3] Netvsc: Use the vmbus functiton to calculate ring buffer percentage

2018-03-23 Thread Long Li
> Subject: RE: [PATCH 2/3] Netvsc: Use the vmbus functiton to calculate ring
> buffer percentage
> 
> 
> 
> > -Original Message-
> > From: Haiyang Zhang
> > Sent: Friday, March 23, 2018 8:01 AM
> > To: Long Li <lon...@linuxonhyperv.com>; KY Srinivasan
> > <k...@microsoft.com>; Stephen Hemminger <sthem...@microsoft.com>;
> James
> > E . J . Bottomley <jbottom...@odin.com>; Martin K . Petersen
> > <martin.peter...@oracle.com>; de...@linuxdriverproject.org; linux-
> > s...@vger.kernel.org; linux-ker...@vger.kernel.org
> > Cc: Long Li <lon...@microsoft.com>
> > Subject: RE: [PATCH 2/3] Netvsc: Use the vmbus functiton to calculate
> > ring buffer percentage
> >
> >
> >
> > > -Original Message-
> > > From: Long Li <lon...@linuxonhyperv.com>
> > > Sent: Thursday, March 22, 2018 8:16 PM
> > > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > > <haiya...@microsoft.com>; Stephen Hemminger
> > <sthem...@microsoft.com>;
> > > James E . J . Bottomley <jbottom...@odin.com>; Martin K . Petersen
> > > <martin.peter...@oracle.com>; de...@linuxdriverproject.org; linux-
> > > s...@vger.kernel.org; linux-ker...@vger.kernel.org
> > > Cc: Long Li <lon...@microsoft.com>
> > > Subject: [PATCH 2/3] Netvsc: Use the vmbus functiton to calculate
> > > ring
> > buffer
> > > percentage
> > >
> > > From: Long Li <lon...@microsoft.com>
> > >
> > > In Vmbus, we have defined a function to calculate available ring
> > > buffer percentage to write.
> > >
> > > Use that function and remove duplicate netvsc code.
> > >
> > > Signed-off-by: Long Li <lon...@microsoft.com>
> > > ---
> > >  drivers/net/hyperv/netvsc.c | 17 +++--
> > >  drivers/net/hyperv/netvsc_drv.c |  3 ---
> > >  2 files changed, 3 insertions(+), 17 deletions(-)
> 
> Why is the patch being sent to the scsi list and not to the network mailing 
> list
> and Dave Miller.

I will re-send the patch.

> 
> K. Y
> > >
> > > diff --git a/drivers/net/hyperv/netvsc.c
> > > b/drivers/net/hyperv/netvsc.c
> > index
> > > 0265d703eb03..8af0069e4d8c 100644
> > > --- a/drivers/net/hyperv/netvsc.c
> > > +++ b/drivers/net/hyperv/netvsc.c
> > > @@ -31,7 +31,6 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > -#include 
> > >
> > >  #include 
> > >
> > > @@ -590,17 +589,6 @@ void netvsc_device_remove(struct hv_device
> > *device)
> > > #define RING_AVAIL_PERCENT_HIWATER 20  #define
> > > RING_AVAIL_PERCENT_LOWATER 10
> > >
> > > -/*
> > > - * Get the percentage of available bytes to write in the ring.
> > > - * The return value is in range from 0 to 100.
> > > - */
> > > -static u32 hv_ringbuf_avail_percent(const struct
> > > hv_ring_buffer_info
> > > *ring_info) -{
> > > - u32 avail_write = hv_get_bytes_to_write(ring_info);
> > > -
> > > - return reciprocal_divide(avail_write  * 100, netvsc_ring_reciprocal);
> > > -}
> > > -
> > >  static inline void netvsc_free_send_slot(struct netvsc_device
> > *net_device,
> > >u32 index)
> > >  {
> > > @@ -649,7 +637,8 @@ static void netvsc_send_tx_complete(struct
> > > netvsc_device *net_device,
> > >   wake_up(_device->wait_drain);
> > >
> > >   if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx))
> > &&
> > > - (hv_ringbuf_avail_percent(>outbound) >
> > > RING_AVAIL_PERCENT_HIWATER ||
> > > + (hv_get_avail_to_write_percent(>outbound) >
> > > +  RING_AVAIL_PERCENT_HIWATER ||
> > >queue_sends < 1)) {
> > >   netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
> > >   ndev_ctx->eth_stats.wake_queue++;  @@ -757,7 +746,7 @@
> static
> > >inline int netvsc_send_pkt(
> > >   struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet-
> > >q_idx);
> > >   u64 req_id;
> > >   int ret;
> > > - u32 ring_avail = hv_ringbuf_avail_percent(_channel-
> > >outbound);
> > > + u32 ring_avail =
> > > +hv_get_avail_to_write_percent(_channel->outbound);
> > >
> > >   nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
> > >   if (sk

[PATCH 3/3] Storvsc: Select channel based on available percentage of ring buffer to write

2018-03-22 Thread Long Li
From: Long Li <lon...@microsoft.com>

This is a best effort for estimating on how busy the ring buffer is for
that channel, based on available buffer to write in percentage. It is still
possible that at the time of actual ring buffer write, the space may not be
available due to other processes may be writing at the time.

Selecting a channel based on how full it is can reduce the possibility that
a ring buffer write will fail, and avoid the situation a channel is over
busy.

Now it's possible that storvsc can use a smaller ring buffer size
(e.g. 40k bytes) to take advantage of cache locality.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 62 +-
 1 file changed, 50 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index a2ec0bc9e9fa..96681c4f75cb 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer 
size (bytes)");
 
 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to 
subchannels");
+
+static int ring_avail_percent_lowater = 10;
+module_param(ring_avail_percent_lowater, int, S_IRUGO);
+MODULE_PARM_DESC(ring_avail_percent_lowater,
+   "Select a channel if available ring size > this in percent");
+
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
@@ -1285,9 +1291,9 @@ static int storvsc_do_io(struct hv_device *device,
 {
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
-   struct vmbus_channel *outgoing_channel;
+   struct vmbus_channel *outgoing_channel, *channel;
int ret = 0;
-   struct cpumask alloced_mask;
+   struct cpumask alloced_mask, other_numa_mask;
int tgt_cpu;
 
vstor_packet = >vstor_packet;
@@ -1301,22 +1307,53 @@ static int storvsc_do_io(struct hv_device *device,
/*
 * Select an an appropriate channel to send the request out.
 */
-
if (stor_device->stor_chns[q_num] != NULL) {
outgoing_channel = stor_device->stor_chns[q_num];
-   if (outgoing_channel->target_cpu == smp_processor_id()) {
+   if (outgoing_channel->target_cpu == q_num) {
/*
 * Ideally, we want to pick a different channel if
 * available on the same NUMA node.
 */
cpumask_and(_mask, _device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num)));
-   for_each_cpu_wrap(tgt_cpu, _mask,
-   outgoing_channel->target_cpu + 1) {
-   if (tgt_cpu != outgoing_channel->target_cpu) {
-   outgoing_channel =
-   stor_device->stor_chns[tgt_cpu];
-   break;
+
+   for_each_cpu_wrap(tgt_cpu, _mask, q_num + 1) {
+   if (tgt_cpu == q_num)
+   continue;
+   channel = stor_device->stor_chns[tgt_cpu];
+   if (hv_get_avail_to_write_percent(
+   >outbound)
+   > ring_avail_percent_lowater) {
+   outgoing_channel = channel;
+   goto found_channel;
+   }
+   }
+
+   /*
+* All the othe channels on the same NUMA node are
+* busy. Try to use the channel with the current CPU
+*/
+   if (hv_get_avail_to_write_percent(
+   _channel->outbound)
+   > ring_avail_percent_lowater)
+   goto found_channel;
+
+   /*
+* If we reach here, all the channels on the current
+* NUMA node are busy. Try to find a channel in
+* other NUMA nodes
+*/
+   cpumask_andnot(_numa_mask,
+   _device->alloced_cpus,
+   cpumask_of_node(cpu_to_node(q_num)));
+
+   for_each_cpu(tgt_cpu, _numa_mask) {
+   channel = stor_device->stor_chns[tgt_cpu];
+   if (hv_get_avail_to_write_percent(
+ 

[PATCH 2/3] Netvsc: Use the vmbus functiton to calculate ring buffer percentage

2018-03-22 Thread Long Li
From: Long Li <lon...@microsoft.com>

In Vmbus, we have defined a function to calculate available ring buffer
percentage to write.

Use that function and remove duplicate netvsc code.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/net/hyperv/netvsc.c | 17 +++--
 drivers/net/hyperv/netvsc_drv.c |  3 ---
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 0265d703eb03..8af0069e4d8c 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -31,7 +31,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 
@@ -590,17 +589,6 @@ void netvsc_device_remove(struct hv_device *device)
 #define RING_AVAIL_PERCENT_HIWATER 20
 #define RING_AVAIL_PERCENT_LOWATER 10
 
-/*
- * Get the percentage of available bytes to write in the ring.
- * The return value is in range from 0 to 100.
- */
-static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info 
*ring_info)
-{
-   u32 avail_write = hv_get_bytes_to_write(ring_info);
-
-   return reciprocal_divide(avail_write  * 100, netvsc_ring_reciprocal);
-}
-
 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
 u32 index)
 {
@@ -649,7 +637,8 @@ static void netvsc_send_tx_complete(struct netvsc_device 
*net_device,
wake_up(_device->wait_drain);
 
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-   (hv_ringbuf_avail_percent(>outbound) > 
RING_AVAIL_PERCENT_HIWATER ||
+   (hv_get_avail_to_write_percent(>outbound) >
+RING_AVAIL_PERCENT_HIWATER ||
 queue_sends < 1)) {
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
ndev_ctx->eth_stats.wake_queue++;
@@ -757,7 +746,7 @@ static inline int netvsc_send_pkt(
struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
-   u32 ring_avail = hv_ringbuf_avail_percent(_channel->outbound);
+   u32 ring_avail = hv_get_avail_to_write_percent(_channel->outbound);
 
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (skb)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index faea0be18924..b0b1c2fd2b7b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -35,7 +35,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -55,7 +54,6 @@ static unsigned int ring_size __ro_after_init = 128;
 module_param(ring_size, uint, S_IRUGO);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
 unsigned int netvsc_ring_bytes __ro_after_init;
-struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
 
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
@@ -2186,7 +2184,6 @@ static int __init netvsc_drv_init(void)
ring_size);
}
netvsc_ring_bytes = ring_size * PAGE_SIZE;
-   netvsc_ring_reciprocal = reciprocal_value(netvsc_ring_bytes);
 
ret = vmbus_driver_register(_drv);
if (ret)
-- 
2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 1/3] Vmbus: Add function to report available ring buffer to write in total ring size percentage

2018-03-22 Thread Long Li
From: Long Li <lon...@microsoft.com>

Netvsc has a similar function to calculate how much ring buffer in
percentage is available to write. This function is useful for storvsc and
other vmbus devices.

Define a similar function in vmbus to be used by storvsc.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/ring_buffer.c |  2 ++
 include/linux/hyperv.h   | 12 
 2 files changed, 14 insertions(+)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 8699bb969e7e..3c836c099a8f 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -227,6 +227,8 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info 
*ring_info,
ring_info->ring_buffer->feature_bits.value = 1;
 
ring_info->ring_size = page_cnt << PAGE_SHIFT;
+   ring_info->ring_size_div10_reciprocal =
+   reciprocal_value(ring_info->ring_size / 10);
ring_info->ring_datasize = ring_info->ring_size -
sizeof(struct hv_ring_buffer);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2048f3c3b68a..eb7204851089 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_PAGE_BUFFER_COUNT  32
 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */
@@ -121,6 +122,7 @@ struct hv_ring_buffer {
 struct hv_ring_buffer_info {
struct hv_ring_buffer *ring_buffer;
u32 ring_size;  /* Include the shared header */
+   struct reciprocal_value ring_size_div10_reciprocal;
spinlock_t ring_lock;
 
u32 ring_datasize;  /* < ring_size */
@@ -155,6 +157,16 @@ static inline u32 hv_get_bytes_to_write(const struct 
hv_ring_buffer_info *rbi)
return write;
 }
 
+static inline u32 hv_get_avail_to_write_percent(
+   const struct hv_ring_buffer_info *rbi)
+{
+   u32 avail_write = hv_get_bytes_to_write(rbi);
+
+   return reciprocal_divide(
+   (avail_write  << 3) + (avail_write << 1),
+   rbi->ring_size_div10_reciprocal);
+}
+
 /*
  * VMBUS version is 32 bit entity broken up into
  * two 16 bit quantities: major_number. minor_number.
-- 
2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2] storvsc: Set up correct queue depth values for IDE devices

2018-03-22 Thread Long Li
From: Long Li <lon...@microsoft.com>

Unlike SCSI and FC, we don't use multiple channels for IDE.
Also fix the calculation for sub-channels.

Change log:
v2: Addressed comment on incorrect number of sub-channels.
(Michael Kelley <michael.h.kel...@microsoft.com>)

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 8c51d628b52e..a2ec0bc9e9fa 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1722,11 +1722,14 @@ static int storvsc_probe(struct hv_device *device,
max_targets = STORVSC_MAX_TARGETS;
max_channels = STORVSC_MAX_CHANNELS;
/*
-* On Windows8 and above, we support sub-channels for storage.
+* On Windows8 and above, we support sub-channels for storage
+* on SCSI and FC controllers.
 * The number of sub-channels offerred is based on the number of
 * VCPUs in the guest.
 */
-   max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
+   if (!dev_is_ide)
+   max_sub_channels =
+   (num_cpus - 1) / storvsc_vcpus_per_sub_channel;
}
 
scsi_driver.can_queue = (max_outstanding_req_per_channel *
-- 
2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: Set up correct queue depth values for IDE devices

2018-03-16 Thread Long Li
> > Subject: [PATCH] storvsc: Set up correct queue depth values for IDE
> > devices
> >
> > From: Long Li <lon...@microsoft.com>
> >
> > Unlike SCSI and FC, we don't use multiple channels for IDE. So set
> > queue depth correctly for IDE.
> >
> > Also set the correct cmd_per_lun for all devices.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/scsi/storvsc_drv.c | 8 ++--
> >  1 file changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index 8c51d628b52e..fba170640e9c 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -1722,15 +1722,19 @@ static int storvsc_probe(struct hv_device
> *device,
> > max_targets = STORVSC_MAX_TARGETS;
> > max_channels = STORVSC_MAX_CHANNELS;
> > /*
> > -* On Windows8 and above, we support sub-channels for
> storage.
> > +* On Windows8 and above, we support sub-channels for
> storage
> > +* on SCSI and FC controllers.
> >  * The number of sub-channels offerred is based on the
> number of
> >  * VCPUs in the guest.
> >  */
> > -   max_sub_channels = (num_cpus /
> storvsc_vcpus_per_sub_channel);
> > +   if (!dev_is_ide)
> > +   max_sub_channels =
> > +   num_cpus / storvsc_vcpus_per_sub_channel;
> 
> This calculation of the # of sub-channels doesn't get the right answer (and it
> didn't before this patch either).  storvsc_vcpus_per_sub_channel defaults to
> 4.
> If num_cpus is 8, max_sub_channels will be 2, but it should be 1.  The sub-
> channel count should not include the main channel since we add 1 to the
> sub-channel count below when calculating can_queue.

This is a good point. I will fix the code calculating can_queue.

> 
> Furthermore, this is calculation is just a guess, in the sense that we're
> replicating the algorithm we think Hyper-V is using to determine the number
> of sub-channels to offer.   It turns out Hyper-V is changing that algorithm 
> for
> particular devices in an upcoming new Azure VM size.  But the only use of
> max_sub_channels is in the calculation of can_queue below, so the impact is
> minimal.
> 
> > }
> >
> > scsi_driver.can_queue = (max_outstanding_req_per_channel *
> >  (max_sub_channels + 1));
> > +   scsi_driver.cmd_per_lun = scsi_driver.can_queue;
> 
> can_queue is defined as "int", while cmd_per_lun is defined as "short".
> The calculated value of can_queue could easily be over 32,767 with
> 15 sub-channels and max_outstanding_req_per_channel being 3036 for the
> default 1 Mbyte ring buffer.  So the assignment to cmd_per_lun could
> produce truncation and even a negative number.

This is a good catch. I think I should try calling blk_set_queue_depth() and 
pass the correct value. 

> 
> More broadly, since max_outstanding_req_per_channel is based on the ring
> buffer size, these calculations imply that Hyper-V storvsp's queuing capacity
> is based on the ring buffer size.  I don't think that's the case.  From
> conversations with the storvsp folks, I think Hyper-V always removes entries
> from the guest->host ring buffer and then
> lets storvsp queue them separately.   So we don't want to be linking
> cmd_per_lun (or even can_queue, for that matter) to the ring buffer size.
> The current default ring buffer size of 1 Mbyte is probably 10x bigger than
> needed, and we want to be able to adjust that without ending up with
> can_queue and cmd_per_lun values that are too small.

cmd_per_lun needs to reflect the device capacity. What value do you propose? 
It's not a good idea to leave them constant. Setting those values are also 
important because we don't' want to return BUSY on writing to ring buffer on 
full, that will slow down the SCSI stack.

Historically we use ring buffer size to calculate device properties (e.g. 
can_queue for SCSI host).

I agree this doesn't need to be based on the exact queuing capacity of ring 
buffer, maybe we can do 2X of that value (e.g. look at how block uses 
nr_request in MQ). Setting those values smaller is more conservative and I 
don't see an ill effect.

> 
> We would probably do better to set can_queue to a constant, and
> leave cmd_per_lun at its current value of 2048.   The can_queue
> value is already capped at 10240 in the blk-mq layer, so maybe that's a
> reasonable constant to use.

Actually this is not a good idea for smaller ring buffers. You'll see the 
problem when setting both ring buffer sizes to 10 pages.

> 
> Thoughts?
> 
> >
> > host = scsi_host_alloc(_driver,
> >sizeof(struct hv_host_device));
> > --
> > 2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] storvsc: Set up correct queue depth values for IDE devices

2018-03-15 Thread Long Li
From: Long Li <lon...@microsoft.com>

Unlike SCSI and FC, we don't use multiple channels for IDE. So set queue depth
correctly for IDE.

Also set the correct cmd_per_lun for all devices.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 8c51d628b52e..fba170640e9c 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1722,15 +1722,19 @@ static int storvsc_probe(struct hv_device *device,
max_targets = STORVSC_MAX_TARGETS;
max_channels = STORVSC_MAX_CHANNELS;
/*
-* On Windows8 and above, we support sub-channels for storage.
+* On Windows8 and above, we support sub-channels for storage
+* on SCSI and FC controllers.
 * The number of sub-channels offerred is based on the number of
 * VCPUs in the guest.
 */
-   max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
+   if (!dev_is_ide)
+   max_sub_channels =
+   num_cpus / storvsc_vcpus_per_sub_channel;
}
 
scsi_driver.can_queue = (max_outstanding_req_per_channel *
 (max_sub_channels + 1));
+   scsi_driver.cmd_per_lun = scsi_driver.can_queue;
 
host = scsi_host_alloc(_driver,
   sizeof(struct hv_host_device));
-- 
2.14.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] scsi: storvsc: missing error code in storvsc_probe()

2018-02-08 Thread Long Li
From: Long Li <lon...@microsoft.com>

This patch backports upstream commit ca8dc694045e9aa248e9916e0f614deb0494cb3d
for 4.14-stable.

commit ca8dc694045e9aa248e9916e0f614deb0494cb3d:

We should set the error code if fc_remote_port_add() fails.

Cc: <sta...@vger.kernel.org> #v4.12+
Fixes: daf0cd445a21 ("scsi: storvsc: Add support for FC rport.")
Signed-off-by: Dan Carpenter <dan.carpen...@oracle.com>
Reviewed-by: Cathy Avery <cav...@redhat.com>
Acked-by: K. Y. Srinivasan <k...@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.peter...@oracle.com>

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 5e7200f..c17ccb9 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1826,8 +1826,10 @@ static int storvsc_probe(struct hv_device *device,
fc_host_node_name(host) = stor_device->node_name;
fc_host_port_name(host) = stor_device->port_name;
stor_device->rport = fc_remote_port_add(host, 0, );
-   if (!stor_device->rport)
+   if (!stor_device->rport) {
+   ret = -ENOMEM;
goto err_out3;
+   }
}
 #endif
return 0;
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/1] scsi: storvsc: Spread interrupts when picking a channel for I/O requests

2018-01-31 Thread Long Li
> Subject: RE: [PATCH 1/1] scsi: storvsc: Spread interrupts when picking a
> channel for I/O requests
> 
> > From: Long Li
> > Sent: Wednesday, January 31, 2018 12:23 PM
> > To: Michael Kelley (EOSG) <michael.h.kel...@microsoft.com>; KY
> > Srinivasan <k...@microsoft.com>; Stephen Hemminger
> > <sthem...@microsoft.com>; martin.peter...@oracle.com;
> > de...@linuxdriverproject.org; linux-ker...@vger.kernel.org;
> > linux-s...@vger.kernel.org; James E . J . Bottomley
> > <j...@linux.vnet.ibm.com>
> > Subject: RE: [PATCH 1/1] scsi: storvsc: Spread interrupts when picking
> > a channel for I/O requests
> >
> > > Subject: RE: [PATCH 1/1] scsi: storvsc: Spread interrupts when
> > > picking a channel for I/O requests
> > >
> > > Updated/corrected two email addresses ...
> > >
> > > > -Original Message-
> > > > From: Michael Kelley (EOSG)
> > > > Sent: Wednesday, January 24, 2018 2:14 PM
> > > > To: KY Srinivasan <k...@microsoft.com>; Stephen Hemminger
> > > > <sthem...@microsoft.com>; martin.peter...@oracle.com;
> > > > lo...@microsoft.com; jbottom...@odin.com;
> > > > de...@linuxdriverproject.org; linux-ker...@vger.kernel.org;
> > > > linux-s...@vger.kernel.org
> > > > Cc: Michael Kelley (EOSG) <michael.h.kel...@microsoft.com>
> > > > Subject: [PATCH 1/1] scsi: storvsc: Spread interrupts when picking
> > > > a channel for I/O requests
> > > >
> > > > Update the algorithm in storvsc_do_io to look for a channel
> > > > starting with the current CPU + 1 and wrap around (within the
> > > > current NUMA node). This spreads VMbus interrupts more evenly
> > > > across CPUs. Previous code always started with first CPU in the
> > > > current NUMA node, skewing the interrupt load to that CPU.
> > > >
> > > > Signed-off-by: Michael Kelley <mikel...@microsoft.com>

Reviewed-by: Long Li <lon...@microsoft.com>

> > > > ---
> > > >  drivers/scsi/storvsc_drv.c | 3 ++-
> > > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/scsi/storvsc_drv.c
> > > > b/drivers/scsi/storvsc_drv.c index e07907d..f3264c4 100644
> > > > --- a/drivers/scsi/storvsc_drv.c
> > > > +++ b/drivers/scsi/storvsc_drv.c
> > > > @@ -1310,7 +1310,8 @@ static int storvsc_do_io(struct hv_device
> *device,
> > > >  */
> > > > cpumask_and(_mask, _device-
> alloced_cpus,
> > > >
> > > cpumask_of_node(cpu_to_node(q_num)));
> > > > -   for_each_cpu(tgt_cpu, _mask) {
> > > > +   for_each_cpu_wrap(tgt_cpu, _mask,
> > > > +   outgoing_channel->target_cpu + 
> > > > 1) {
> >
> > Does it work when target_cpu is the last CPU on the system?
> >
> > Otherwise, looking good.
> 
> Yes, it works.  for_each_cpu_wrap() correctly wraps in the case where the
> 3rd parameter ('start') is one past the end of the mask.  Arguably, we
> shouldn't rely on that, and should do the wrap to 0 before calling
> for_each_cpu_wrap().
> 
> >
> > > > if (tgt_cpu != 
> > > > outgoing_channel->target_cpu)
> > > {
> > > > outgoing_channel =
> > > > stor_device->stor_chns[tgt_cpu];
> > > > --
> > > > 1.8.3.1
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/1] scsi: storvsc: Spread interrupts when picking a channel for I/O requests

2018-01-31 Thread Long Li
> Subject: RE: [PATCH 1/1] scsi: storvsc: Spread interrupts when picking a
> channel for I/O requests
> 
> Updated/corrected two email addresses ...
> 
> > -Original Message-
> > From: Michael Kelley (EOSG)
> > Sent: Wednesday, January 24, 2018 2:14 PM
> > To: KY Srinivasan ; Stephen Hemminger
> > ; martin.peter...@oracle.com;
> > lo...@microsoft.com; jbottom...@odin.com;
> > de...@linuxdriverproject.org; linux-ker...@vger.kernel.org;
> > linux-s...@vger.kernel.org
> > Cc: Michael Kelley (EOSG) 
> > Subject: [PATCH 1/1] scsi: storvsc: Spread interrupts when picking a
> > channel for I/O requests
> >
> > Update the algorithm in storvsc_do_io to look for a channel starting
> > with the current CPU + 1 and wrap around (within the current NUMA
> > node). This spreads VMbus interrupts more evenly across CPUs. Previous
> > code always started with first CPU in the current NUMA node, skewing
> > the interrupt load to that CPU.
> >
> > Signed-off-by: Michael Kelley 
> > ---
> >  drivers/scsi/storvsc_drv.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index e07907d..f3264c4 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -1310,7 +1310,8 @@ static int storvsc_do_io(struct hv_device *device,
> >  */
> > cpumask_and(_mask, _device-
> >alloced_cpus,
> >
> cpumask_of_node(cpu_to_node(q_num)));
> > -   for_each_cpu(tgt_cpu, _mask) {
> > +   for_each_cpu_wrap(tgt_cpu, _mask,
> > +   outgoing_channel->target_cpu + 1) {

Does it work when target_cpu is the last CPU on the system?

Otherwise, looking good.

> > if (tgt_cpu != outgoing_channel->target_cpu)
> {
> > outgoing_channel =
> > stor_device->stor_chns[tgt_cpu];
> > --
> > 1.8.3.1
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2] storvsc: do not assume SG list is continuous when doing bounce buffers (for 4.1 and prior stable kernels)

2018-01-10 Thread Long Li
From: Long Li <lon...@microsoft.com>

The original patch was made for stable 4.1 and was Acked on 08/22/2017, but for
some reason it never made it to the stable tree.

Change from v1:
Changed comment that this patch is for linux-stable 4.1 and all prior stable
kernels.

storvsc checks the SG list for gaps before passing them to Hyper-v device.
If there are gaps, data is copied to a bounce buffer and a continuous data
buffer is passed to Hyper-V.

The check on gaps assumes SG list is continuous, and not chained. This is
 not always true. Failing the check may result in incorrect I/O data
passed to the Hyper-v device.

This code path is not used post Linux 4.1.

Signed-off-by: Long Li <lon...@microsoft.com>
Acked-by: Martin K. Petersen <martin.peter...@oracle.com>

---
 drivers/scsi/storvsc_drv.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 6c52d14..14dc5c6 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -584,17 +584,18 @@ static int do_bounce_buffer(struct scatterlist *sgl, 
unsigned int sg_count)
for (i = 0; i < sg_count; i++) {
if (i == 0) {
/* make sure 1st one does not have hole */
-   if (sgl[i].offset + sgl[i].length != PAGE_SIZE)
+   if (sgl->offset + sgl->length != PAGE_SIZE)
return i;
} else if (i == sg_count - 1) {
/* make sure last one does not have hole */
-   if (sgl[i].offset != 0)
+   if (sgl->offset != 0)
return i;
} else {
/* make sure no hole in the middle */
-   if (sgl[i].length != PAGE_SIZE || sgl[i].offset != 0)
+   if (sgl->length != PAGE_SIZE || sgl->offset != 0)
return i;
}
+   sgl = sg_next(sgl);
}
return -1;
 }
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: do not assume SG list is continuous when doing bounce buffers (for 4.1 stable only)

2018-01-09 Thread Long Li
> Christoph,
> 
> > Ok.  If the stable maintainers are ok with your small fix I'm not
> > going to complain too loudly.  But I'm always worried about stable
> > trees divering too much from mainline.
> 
> The seemingly innocuous transition from SG_GAPS to virt boundary has
> caused several data corruption regressions in the distro kernels. So has the
> corresponding conversion of storvsc.
> 
> As a result, getting the current upstream code into 4.1 would mean
> backporting and testing a significant amount of both block layer and driver
> code. I don't think it's worth the risk. This patch is simple and the path of 
> least
> resistance.
> 
> Acked-by: Martin K. Petersen 

Sorry to bring up this patch again. It seems it hasn't made it to stable 
branches.

Please take a look.

> 
> --
> Martin K. PetersenOracle Linux Engineering
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: Avoid excessive host scan on controller change

2017-11-06 Thread Long Li
> From: Martin K. Petersen [mailto:martin.peter...@oracle.com]
> Sent: Monday, November 6, 2017 7:40 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; James E . J . Bottomley
> <jbottom...@odin.com>; Martin K . Petersen
> <martin.peter...@oracle.com>; de...@linuxdriverproject.org; linux-
> s...@vger.kernel.org; linux-ker...@vger.kernel.org; Long Li
> <lon...@microsoft.com>
> Subject: Re: [PATCH] storvsc: Avoid excessive host scan on controller change
> 
> 
> Long,
> 
> > When there are multiple disks attached to the same SCSI controller,
> > the host may send several VSTOR_OPERATION_REMOVE_DEVICE or
> > VSTOR_OPERATION_ENUMERATE_BUS messages in a row, to indicate
> there is
> > a change on the SCSI controller. In response, storvsc rescans the SCSI
> > host.
> 
> Applied to 4.15/scsi-queue with some fuzz. Please verify, thanks!

Martin, thank you! All looking good.

Long

> 
> --
> Martin K. PetersenOracle Linux Engineering
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-11-01 Thread Long Li
> -Original Message-
> From: Greg KH [mailto:g...@kroah.com]
> Sent: Wednesday, November 1, 2017 11:54 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org; sta...@vger.kernel.org; Paul Meyer
> <paul.me...@microsoft.com>
> Subject: Re: [PATCH v2] hv: kvp: Avoid reading past allocated blocks from
> KVP file
> 
> On Wed, Nov 01, 2017 at 06:39:00PM +, Long Li wrote:
> > > From: Greg KH [mailto:g...@kroah.com]
> > > Sent: Tuesday, October 31, 2017 11:50 PM
> > > To: Long Li <lon...@microsoft.com>
> > > Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > > <haiya...@microsoft.com>; Stephen Hemminger
> > > <sthem...@microsoft.com>; de...@linuxdriverproject.org; linux-
> > > ker...@vger.kernel.org; sta...@vger.kernel.org; Paul Meyer
> > > <paul.me...@microsoft.com>; Long Li <lon...@microsoft.com>
> > > Subject: Re: [PATCH v2] hv: kvp: Avoid reading past allocated blocks
> > > from KVP file
> > >
> > > On Tue, Oct 31, 2017 at 01:02:35PM -0700, Long Li wrote:
> > > > From: Paul Meyer <paul.me...@microsoft.com>
> > > >
> > > > While reading in more than one block (50) of KVP records, the
> > > > allocation goes per block, but the reads used the total number of
> > > > allocated records (without resetting the pointer/stream). This
> > > > causes the records buffer to overrun when the refresh reads more
> > > > than one block over the previous capacity (e.g. reading more than
> > > > 100 KVP records whereas the in-memory database was empty before).
> > > >
> > > > Fix this by reading the correct number of KVP records from file each
> time.
> > > >
> > > > Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
> > > > Signed-off-by: Long Li <lon...@microsoft.com>
> > > > ---
> > > >  tools/hv/hv_kvp_daemon.c | 66
> > > > 
> > > >  1 file changed, 10 insertions(+), 56 deletions(-)
> > >
> > > When you version a patch, you always have to say what changed below
> > > the
> > > --- line, as the documentation states to do...
> >
> > Sorry it was my bad. Can I resend v2 and indicate what has changed?
> 
> Why wouldn't you?
> 
> But it would be v3 then :)

I have sent a "revised v2". Please let me know if it is acceptable. If not I'll 
send a "v3".

> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[Revised PATCH v2] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-11-01 Thread Long Li
From: Paul Meyer <paul.me...@microsoft.com>

While reading in more than one block (50) of KVP records, the allocation
goes per block, but the reads used the total number of allocated records
(without resetting the pointer/stream). This causes the records buffer to
overrun when the refresh reads more than one block over the previous
capacity (e.g. reading more than 100 KVP records whereas the in-memory
database was empty before).

Fix this by reading the correct number of KVP records from file each time.

Changes since v1:
1. Properly wrapped comment texts.
2. Added the 2nd Signed-off-by.

Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
Signed-off-by: Long Li <lon...@microsoft.com>
---
 tools/hv/hv_kvp_daemon.c | 66 
 1 file changed, 10 insertions(+), 56 deletions(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index eaa3bec..2094036 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -193,11 +193,13 @@ static void kvp_update_mem_state(int pool)
for (;;) {
readp = [records_read];
records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK * num_blocks,
-   filep);
+   ENTRIES_PER_BLOCK * num_blocks - records_read,
+   filep);
 
if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+   syslog(LOG_ERR,
+   "Failed to read file, pool: %d; error: %d %s",
+pool, errno, strerror(errno));
exit(EXIT_FAILURE);
}
 
@@ -224,15 +226,11 @@ static void kvp_update_mem_state(int pool)
fclose(filep);
kvp_release_lock(pool);
 }
+
 static int kvp_file_init(void)
 {
int  fd;
-   FILE *filep;
-   size_t records_read;
char *fname;
-   struct kvp_record *record;
-   struct kvp_record *readp;
-   int num_blocks;
int i;
int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
 
@@ -246,61 +244,17 @@ static int kvp_file_init(void)
 
for (i = 0; i < KVP_POOL_COUNT; i++) {
fname = kvp_file_info[i].fname;
-   records_read = 0;
-   num_blocks = 1;
sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* 
rw-r--r-- */);
 
if (fd == -1)
return 1;
 
-
-   filep = fopen(fname, "re");
-   if (!filep) {
-   close(fd);
-   return 1;
-   }
-
-   record = malloc(alloc_unit * num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   for (;;) {
-   readp = [records_read];
-   records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK,
-   filep);
-
-   if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d",
-  i);
-   exit(EXIT_FAILURE);
-   }
-
-   if (!feof(filep)) {
-   /*
-* We have more data to read.
-*/
-   num_blocks++;
-   record = realloc(record, alloc_unit *
-   num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   continue;
-   }
-   break;
-   }
kvp_file_info[i].fd = fd;
-   kvp_file_info[i].num_blocks = num_blocks;
-   kvp_file_info[i].records = record;
-   kvp_file_info[i].num_records = records_read;
-   fclose(filep);
-
+   kvp_file_info[i].num_blocks = 1;
+   kvp_file_info[i].records = malloc(alloc_unit);
+   kvp_file_info[i].num_records = 0;
+   kvp_update_mem_state(i);
}
 
return 0;
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-11-01 Thread Long Li
> From: Greg KH [mailto:g...@kroah.com]
> Sent: Tuesday, October 31, 2017 11:50 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org; sta...@vger.kernel.org; Paul Meyer
> <paul.me...@microsoft.com>; Long Li <lon...@microsoft.com>
> Subject: Re: [PATCH v2] hv: kvp: Avoid reading past allocated blocks from
> KVP file
> 
> On Tue, Oct 31, 2017 at 01:02:35PM -0700, Long Li wrote:
> > From: Paul Meyer <paul.me...@microsoft.com>
> >
> > While reading in more than one block (50) of KVP records, the
> > allocation goes per block, but the reads used the total number of
> > allocated records (without resetting the pointer/stream). This causes
> > the records buffer to overrun when the refresh reads more than one
> > block over the previous capacity (e.g. reading more than 100 KVP
> > records whereas the in-memory database was empty before).
> >
> > Fix this by reading the correct number of KVP records from file each time.
> >
> > Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  tools/hv/hv_kvp_daemon.c | 66
> > 
> >  1 file changed, 10 insertions(+), 56 deletions(-)
> 
> When you version a patch, you always have to say what changed below the
> --- line, as the documentation states to do...

Sorry it was my bad. Can I resend v2 and indicate what has changed?

Long

> 
> v3? :)
> 
> thanks,
> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] storvsc: Avoid excessive host scan on controller change

2017-10-31 Thread Long Li
From: Long Li <lon...@microsoft.com>

When there are multiple disks attached to the same SCSI controller,
the host may send several VSTOR_OPERATION_REMOVE_DEVICE or
VSTOR_OPERATION_ENUMERATE_BUS messages in a row, to indicate there is a
change on the SCSI controller. In response, storvsc rescans the SCSI host.

There is no need to do multiple scans on the same host. Fix the code to do
only one scan.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 6febcdb..b602f52 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -488,6 +488,8 @@ struct hv_host_device {
unsigned char target;
struct workqueue_struct *handle_error_wq;
char work_q_name[20];
+   struct work_struct host_scan_work;
+   struct Scsi_Host *host;
 };
 
 struct storvsc_scan_work {
@@ -516,13 +518,12 @@ static void storvsc_device_scan(struct work_struct *work)
 
 static void storvsc_host_scan(struct work_struct *work)
 {
-   struct storvsc_scan_work *wrk;
struct Scsi_Host *host;
struct scsi_device *sdev;
+   struct hv_host_device *host_device =
+   container_of(work, struct hv_host_device, host_scan_work);
 
-   wrk = container_of(work, struct storvsc_scan_work, work);
-   host = wrk->host;
-
+   host = host_device->host;
/*
 * Before scanning the host, first check to see if any of the
 * currrently known devices have been hot removed. We issue a
@@ -542,8 +543,6 @@ static void storvsc_host_scan(struct work_struct *work)
 * Now scan the host to discover LUNs that may have been added.
 */
scsi_scan_host(host);
-
-   kfree(wrk);
 }
 
 static void storvsc_remove_lun(struct work_struct *work)
@@ -1119,8 +1118,7 @@ static void storvsc_on_receive(struct storvsc_device 
*stor_device,
 struct vstor_packet *vstor_packet,
 struct storvsc_cmd_request *request)
 {
-   struct storvsc_scan_work *work;
-
+   struct hv_host_device *host_dev;
switch (vstor_packet->operation) {
case VSTOR_OPERATION_COMPLETE_IO:
storvsc_on_io_completion(stor_device, vstor_packet, request);
@@ -1128,13 +1126,9 @@ static void storvsc_on_receive(struct storvsc_device 
*stor_device,
 
case VSTOR_OPERATION_REMOVE_DEVICE:
case VSTOR_OPERATION_ENUMERATE_BUS:
-   work = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC);
-   if (!work)
-   return;
-
-   INIT_WORK(>work, storvsc_host_scan);
-   work->host = stor_device->host;
-   schedule_work(>work);
+   host_dev = shost_priv(stor_device->host);
+   queue_work(
+   host_dev->handle_error_wq, _dev->host_scan_work);
break;
 
case VSTOR_OPERATION_FCHBA_DATA:
@@ -1747,6 +1741,7 @@ static int storvsc_probe(struct hv_device *device,
 
host_dev->port = host->host_no;
host_dev->dev = device;
+   host_dev->host = host;
 
 
stor_device = kzalloc(sizeof(struct storvsc_device), GFP_KERNEL);
@@ -1815,6 +1810,7 @@ static int storvsc_probe(struct hv_device *device,
create_singlethread_workqueue(host_dev->work_q_name);
if (!host_dev->handle_error_wq)
goto err_out2;
+   INIT_WORK(_dev->host_scan_work, storvsc_host_scan);
/* Register the HBA and start the scsi bus scan */
ret = scsi_add_host(host, >device);
if (ret != 0)
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-10-31 Thread Long Li
> From: Paul Meyer <paul.me...@microsoft.com>
> 
> While reading in more than one block (50) of KVP records, the allocation goes
> per block, but the reads used the total number of allocated records (without
> resetting the pointer/stream). This causes the records buffer to overrun when
> the refresh reads more than one block over the previous capacity (e.g. reading
> more than 100 KVP records whereas the in-memory database was empty before).
> 
> Fix this by reading the correct number of KVP records from file each time.

Please drop this patch. I have sent a v2.

> 
> Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
> Reviewed-by: Long Li <lon...@microsoft.com>
> ---
>  tools/hv/hv_kvp_daemon.c | 66 
> 
>  1 file changed, 10 insertions(+), 56 deletions(-)
> 
> diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index
> eaa3bec..2094036 100644
> --- a/tools/hv/hv_kvp_daemon.c
> +++ b/tools/hv/hv_kvp_daemon.c
> @@ -193,11 +193,13 @@ static void kvp_update_mem_state(int pool)
> for (;;) {
> readp = [records_read];
> records_read += fread(readp, sizeof(struct kvp_record),
> -   ENTRIES_PER_BLOCK * num_blocks,
> -   filep);
> +   ENTRIES_PER_BLOCK * num_blocks - records_read,
> +   filep);
> 
> if (ferror(filep)) {
> -   syslog(LOG_ERR, "Failed to read file, pool: %d", 
> pool);
> +   syslog(LOG_ERR,
> +   "Failed to read file, pool: %d; error: %d %s",
> +pool, errno, strerror(errno));
> exit(EXIT_FAILURE);
> }
> 
> @@ -224,15 +226,11 @@ static void kvp_update_mem_state(int pool)
> fclose(filep);
> kvp_release_lock(pool);
>  }
> +
>  static int kvp_file_init(void)
>  {
> int  fd;
> -   FILE *filep;
> -   size_t records_read;
> char *fname;
> -   struct kvp_record *record;
> -   struct kvp_record *readp;
> -   int num_blocks;
> int i;
> int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
> 
> @@ -246,61 +244,17 @@ static int kvp_file_init(void)
> 
> for (i = 0; i < KVP_POOL_COUNT; i++) {
> fname = kvp_file_info[i].fname;
> -   records_read = 0;
> -   num_blocks = 1;
> sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
> fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* 
> rw-r--r--
> */);
> 
> if (fd == -1)
> return 1;
> 
> -
> -   filep = fopen(fname, "re");
> -   if (!filep) {
> -   close(fd);
> -   return 1;
> -   }
> -
> -   record = malloc(alloc_unit * num_blocks);
> -   if (record == NULL) {
> -   fclose(filep);
> -   close(fd);
> -   return 1;
> -   }
> -   for (;;) {
> -   readp = [records_read];
> -   records_read += fread(readp, sizeof(struct 
> kvp_record),
> -   ENTRIES_PER_BLOCK,
> -   filep);
> -
> -   if (ferror(filep)) {
> -   syslog(LOG_ERR, "Failed to read file, pool: 
> %d",
> -  i);
> -   exit(EXIT_FAILURE);
> -   }
> -
> -   if (!feof(filep)) {
> -   /*
> -* We have more data to read.
> -*/
> -   num_blocks++;
> -   record = realloc(record, alloc_unit *
> -   num_blocks);
> -   if (record == NULL) {
> -   fclose(filep);
> -   close(fd);
> -   return 1;
> -   }
> -   continue;
> -   }
> -   break;
> -   }
> kvp_file_info[i].fd = fd;
> -   kvp_file_info[i].num_blocks = num_blo

[PATCH v2] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-10-31 Thread Long Li
From: Paul Meyer <paul.me...@microsoft.com>

While reading in more than one block (50) of KVP records, the allocation
goes per block, but the reads used the total number of allocated records
(without resetting the pointer/stream). This causes the records buffer to
overrun when the refresh reads more than one block over the previous
capacity (e.g. reading more than 100 KVP records whereas the in-memory
database was empty before).

Fix this by reading the correct number of KVP records from file each time.

Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
Signed-off-by: Long Li <lon...@microsoft.com>
---
 tools/hv/hv_kvp_daemon.c | 66 
 1 file changed, 10 insertions(+), 56 deletions(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index eaa3bec..2094036 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -193,11 +193,13 @@ static void kvp_update_mem_state(int pool)
for (;;) {
readp = [records_read];
records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK * num_blocks,
-   filep);
+   ENTRIES_PER_BLOCK * num_blocks - records_read,
+   filep);
 
if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+   syslog(LOG_ERR,
+   "Failed to read file, pool: %d; error: %d %s",
+pool, errno, strerror(errno));
exit(EXIT_FAILURE);
}
 
@@ -224,15 +226,11 @@ static void kvp_update_mem_state(int pool)
fclose(filep);
kvp_release_lock(pool);
 }
+
 static int kvp_file_init(void)
 {
int  fd;
-   FILE *filep;
-   size_t records_read;
char *fname;
-   struct kvp_record *record;
-   struct kvp_record *readp;
-   int num_blocks;
int i;
int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
 
@@ -246,61 +244,17 @@ static int kvp_file_init(void)
 
for (i = 0; i < KVP_POOL_COUNT; i++) {
fname = kvp_file_info[i].fname;
-   records_read = 0;
-   num_blocks = 1;
sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* 
rw-r--r-- */);
 
if (fd == -1)
return 1;
 
-
-   filep = fopen(fname, "re");
-   if (!filep) {
-   close(fd);
-   return 1;
-   }
-
-   record = malloc(alloc_unit * num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   for (;;) {
-   readp = [records_read];
-   records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK,
-   filep);
-
-   if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d",
-  i);
-   exit(EXIT_FAILURE);
-   }
-
-   if (!feof(filep)) {
-   /*
-* We have more data to read.
-*/
-   num_blocks++;
-   record = realloc(record, alloc_unit *
-   num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   continue;
-   }
-   break;
-   }
kvp_file_info[i].fd = fd;
-   kvp_file_info[i].num_blocks = num_blocks;
-   kvp_file_info[i].records = record;
-   kvp_file_info[i].num_records = records_read;
-   fclose(filep);
-
+   kvp_file_info[i].num_blocks = 1;
+   kvp_file_info[i].records = malloc(alloc_unit);
+   kvp_file_info[i].num_records = 0;
+   kvp_update_mem_state(i);
}
 
return 0;
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-10-31 Thread Long Li
> On Tue, Oct 31, 2017 at 06:10:00PM +0000, Long Li wrote:
> > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > Sent: Tuesday, October 31, 2017 1:43 AM
> > > To: Long Li <lon...@microsoft.com>
> > > Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > > <haiya...@microsoft.com>; Stephen Hemminger
> > > <sthem...@microsoft.com>; de...@linuxdriverproject.org;
> > > linux-ker...@vger.kernel.org; Paul Meyer <paul.me...@microsoft.com>
> > > Subject: Re: [PATCH] hv: kvp: Avoid reading past allocated blocks
> > > from KVP file
> > >
> > > On Mon, Oct 30, 2017 at 05:08:03PM -0700, Long Li wrote:
> > > > From: Paul Meyer <paul.me...@microsoft.com>
> > > >
> > > > While reading in more than one block (50) of KVP records, the
> > > > allocation goes per block, but the reads used the total number of
> > > > allocated records (without resetting the pointer/stream). This
> > > > causes the records buffer to overrun when the refresh reads more
> > > > than one block over the previous capacity (e.g. reading more than
> > > > 100 KVP records
> > > whereas the in-memory database was empty before).
> > >
> > > Please wrap changelogs at 72 columns like your editor asked you to...
> >
> > I will fix it.
> >
> > >
> > > >
> > > > Fix this by reading the correct number of KVP records from file each 
> > > > time.
> > > >
> > > > Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
> > > > ---
> > >
> > > Why is your name not also on the signed-off-by chain if you are
> > > forwarding on a patch from someone else?
> > >
> > > Is this patch also needed on stable kernels?
> >
> > I'm sending on behalf of Paul Meyer. I will add a "Reviewed-by:" tag.
> 
> Sending on behalf means you should add your signed-off-by, as it is going
> through you.

Thanks. I will re-send the patch.

> 
> thanks,
> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-10-31 Thread Long Li
From: Paul Meyer <paul.me...@microsoft.com>

While reading in more than one block (50) of KVP records, the allocation
goes per block, but the reads used the total number of allocated records
(without resetting the pointer/stream). This causes the records buffer to
overrun when the refresh reads more than one block over the previous
capacity (e.g. reading more than 100 KVP records whereas the in-memory
database was empty before).

Fix this by reading the correct number of KVP records from file each time.

Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
Reviewed-by: Long Li <lon...@microsoft.com>
---
 tools/hv/hv_kvp_daemon.c | 66 
 1 file changed, 10 insertions(+), 56 deletions(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index eaa3bec..2094036 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -193,11 +193,13 @@ static void kvp_update_mem_state(int pool)
for (;;) {
readp = [records_read];
records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK * num_blocks,
-   filep);
+   ENTRIES_PER_BLOCK * num_blocks - records_read,
+   filep);
 
if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+   syslog(LOG_ERR,
+   "Failed to read file, pool: %d; error: %d %s",
+pool, errno, strerror(errno));
exit(EXIT_FAILURE);
}
 
@@ -224,15 +226,11 @@ static void kvp_update_mem_state(int pool)
fclose(filep);
kvp_release_lock(pool);
 }
+
 static int kvp_file_init(void)
 {
int  fd;
-   FILE *filep;
-   size_t records_read;
char *fname;
-   struct kvp_record *record;
-   struct kvp_record *readp;
-   int num_blocks;
int i;
int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
 
@@ -246,61 +244,17 @@ static int kvp_file_init(void)
 
for (i = 0; i < KVP_POOL_COUNT; i++) {
fname = kvp_file_info[i].fname;
-   records_read = 0;
-   num_blocks = 1;
sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* 
rw-r--r-- */);
 
if (fd == -1)
return 1;
 
-
-   filep = fopen(fname, "re");
-   if (!filep) {
-   close(fd);
-   return 1;
-   }
-
-   record = malloc(alloc_unit * num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   for (;;) {
-   readp = [records_read];
-   records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK,
-   filep);
-
-   if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d",
-  i);
-   exit(EXIT_FAILURE);
-   }
-
-   if (!feof(filep)) {
-   /*
-* We have more data to read.
-*/
-   num_blocks++;
-   record = realloc(record, alloc_unit *
-   num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   continue;
-   }
-   break;
-   }
kvp_file_info[i].fd = fd;
-   kvp_file_info[i].num_blocks = num_blocks;
-   kvp_file_info[i].records = record;
-   kvp_file_info[i].num_records = records_read;
-   fclose(filep);
-
+   kvp_file_info[i].num_blocks = 1;
+   kvp_file_info[i].records = malloc(alloc_unit);
+   kvp_file_info[i].num_records = 0;
+   kvp_update_mem_state(i);
}
 
return 0;
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-10-31 Thread Long Li
> From: Greg KH [mailto:gre...@linuxfoundation.org]
> Sent: Tuesday, October 31, 2017 1:43 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger <sthem...@microsoft.com>;
> de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; Paul Meyer
> <paul.me...@microsoft.com>
> Subject: Re: [PATCH] hv: kvp: Avoid reading past allocated blocks from KVP 
> file
> 
> On Mon, Oct 30, 2017 at 05:08:03PM -0700, Long Li wrote:
> > From: Paul Meyer <paul.me...@microsoft.com>
> >
> > While reading in more than one block (50) of KVP records, the
> > allocation goes per block, but the reads used the total number of
> > allocated records (without resetting the pointer/stream). This causes
> > the records buffer to overrun when the refresh reads more than one
> > block over the previous capacity (e.g. reading more than 100 KVP records
> whereas the in-memory database was empty before).
> 
> Please wrap changelogs at 72 columns like your editor asked you to...

I will fix it.

> 
> >
> > Fix this by reading the correct number of KVP records from file each time.
> >
> > Signed-off-by: Paul Meyer <paul.me...@microsoft.com>
> > ---
> 
> Why is your name not also on the signed-off-by chain if you are forwarding on 
> a
> patch from someone else?
> 
> Is this patch also needed on stable kernels?

I'm sending on behalf of Paul Meyer. I will add a "Reviewed-by:" tag.

Yes it should also go stable. Will send v2 to include that.

> 
> thanks,
> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] hv: kvp: Avoid reading past allocated blocks from KVP file

2017-10-30 Thread Long Li
From: Paul Meyer 

While reading in more than one block (50) of KVP records, the allocation goes
per block, but the reads used the total number of allocated records (without
resetting the pointer/stream). This causes the records buffer to overrun when
the refresh reads more than one block over the previous capacity (e.g. reading
more than 100 KVP records whereas the in-memory database was empty before).

Fix this by reading the correct number of KVP records from file each time.

Signed-off-by: Paul Meyer 
---
 tools/hv/hv_kvp_daemon.c | 66 
 1 file changed, 10 insertions(+), 56 deletions(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index eaa3bec..2094036 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -193,11 +193,13 @@ static void kvp_update_mem_state(int pool)
for (;;) {
readp = [records_read];
records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK * num_blocks,
-   filep);
+   ENTRIES_PER_BLOCK * num_blocks - records_read,
+   filep);
 
if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+   syslog(LOG_ERR,
+   "Failed to read file, pool: %d; error: %d %s",
+pool, errno, strerror(errno));
exit(EXIT_FAILURE);
}
 
@@ -224,15 +226,11 @@ static void kvp_update_mem_state(int pool)
fclose(filep);
kvp_release_lock(pool);
 }
+
 static int kvp_file_init(void)
 {
int  fd;
-   FILE *filep;
-   size_t records_read;
char *fname;
-   struct kvp_record *record;
-   struct kvp_record *readp;
-   int num_blocks;
int i;
int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
 
@@ -246,61 +244,17 @@ static int kvp_file_init(void)
 
for (i = 0; i < KVP_POOL_COUNT; i++) {
fname = kvp_file_info[i].fname;
-   records_read = 0;
-   num_blocks = 1;
sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* 
rw-r--r-- */);
 
if (fd == -1)
return 1;
 
-
-   filep = fopen(fname, "re");
-   if (!filep) {
-   close(fd);
-   return 1;
-   }
-
-   record = malloc(alloc_unit * num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   for (;;) {
-   readp = [records_read];
-   records_read += fread(readp, sizeof(struct kvp_record),
-   ENTRIES_PER_BLOCK,
-   filep);
-
-   if (ferror(filep)) {
-   syslog(LOG_ERR, "Failed to read file, pool: %d",
-  i);
-   exit(EXIT_FAILURE);
-   }
-
-   if (!feof(filep)) {
-   /*
-* We have more data to read.
-*/
-   num_blocks++;
-   record = realloc(record, alloc_unit *
-   num_blocks);
-   if (record == NULL) {
-   fclose(filep);
-   close(fd);
-   return 1;
-   }
-   continue;
-   }
-   break;
-   }
kvp_file_info[i].fd = fd;
-   kvp_file_info[i].num_blocks = num_blocks;
-   kvp_file_info[i].records = record;
-   kvp_file_info[i].num_records = records_read;
-   fclose(filep);
-
+   kvp_file_info[i].num_blocks = 1;
+   kvp_file_info[i].records = malloc(alloc_unit);
+   kvp_file_info[i].num_records = 0;
+   kvp_update_mem_state(i);
}
 
return 0;
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH V2] scsi: storvsc: Allow only one remove lun work item to be issued per lun

2017-10-19 Thread Long Li
> On Tue, Oct 17, 2017 at 01:35:21PM -0400, Cathy Avery wrote:
> > +   /*
> > +* Set the error handler work queue.
> > +*/
> > +   snprintf(host_dev->work_q_name, sizeof(host_dev-
> >work_q_name),
> > +"storvsc_error_wq_%d", host->host_no);
> > +   host_dev->handle_error_wq =
> > +   create_singlethread_workqueue(host_dev-
> >work_q_name);
> 
> If you use alloc_ordered_workqueue directly instead of
> create_singlethread_workqueue you can pass a format string and don't need
> the separate allocation.
> 
> But I'm not sure if Tejun is fine with using __WQ_LEGACY directly..
> 
> Except for this nit this looks fine to me:
> 
> Reviewed-by: Christoph Hellwig <h...@lst.de>

The work storvsc_host_scan (scheduled from function storvsc_on_receive) should 
also use this workqueue. We can do it in another patch.

Reviewed-by: Long Li <lon...@microsoft.com>

> ___
> devel mailing list
> de...@linuxdriverproject.org
> https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fdriverd
> ev.linuxdriverproject.org%2Fmailman%2Flistinfo%2Fdriverdev-
> devel=02%7C01%7Clongli%40microsoft.com%7C9c303c3630ef490cecc3
> 08d5170702a2%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C636440
> 241242573253=tbCBOnKxtRR38rAdsBDa7zA0Jc2XwrySTsH3uyRxHxA%
> 3D=0
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: fix memory leak on ring buffer busy

2017-08-30 Thread Long Li
> Long,
> 
> >> Which kernel version is this patch aimed at?
> >
> > Martin, thanks for pointing this out. This should also go to stable
> > trees.
> 
> The reason I asked is that it didn't apply to neither fixes, nor for-next.
> 
> I applied it to 4.13/scsi-fixes by hand and added a stable tag.

Thank you. I'm sorry I misunderstood your question. I just realized I was 
working on an experimental branch. Sorry about that.

> 
> --
> Martin K. PetersenOracle Linux Engineering
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: fix memory leak on ring buffer busy

2017-08-29 Thread Long Li
> -Original Message-
> From: Martin K. Petersen [mailto:martin.peter...@oracle.com]
> Sent: Tuesday, August 29, 2017 6:31 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; James E . J . Bottomley
> <jbottom...@odin.com>; de...@linuxdriverproject.org; linux-
> s...@vger.kernel.org; linux-ker...@vger.kernel.org; Long Li
> <lon...@microsoft.com>
> Subject: Re: [PATCH] storvsc: fix memory leak on ring buffer busy
> 
> 
> Long,
> 
> > When storvsc is sending I/O to Hyper-v, it may allocate a bigger
> > buffer descriptor for large data payload that can't fit into a
> > pre-allocated buffer descriptor. This bigger buffer is freed on return
> > path.
> >
> > If I/O request to Hyper-v fails due to ring buffer busy, the storvsc
> > allocated buffer descriptor should also be freed.
> 
> Which kernel version is this patch aimed at?

Martin, thanks for pointing this out. This should also go to stable trees.

Cc: sta...@vger.kernel.org
> 
> --
> Martin K. PetersenOracle Linux Engineering
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] storvsc: fix memory leak on ring buffer busy

2017-08-28 Thread Long Li
From: Long Li <lon...@microsoft.com>

When storvsc is sending I/O to Hyper-v, it may allocate a bigger buffer
descriptor for large data payload that can't fit into a pre-allocated
buffer descriptor. This bigger buffer is freed on return path.

If I/O request to Hyper-v fails due to ring buffer busy, the storvsc allocated
buffer descriptor should also be freed.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 009adb0..db52882 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1657,6 +1657,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, 
struct scsi_cmnd *scmnd)
ret = storvsc_do_io(dev, cmd_request, smp_processor_id());
 
if (ret == -EAGAIN) {
+   if (payload_sz > sizeof(cmd_request->mpb))
+   kfree(payload);
/* no more space */
return SCSI_MLQUEUE_DEVICE_BUSY;
}
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: do not assume SG list is continuous when doing bounce buffers (for 4.1 stable only)

2017-08-22 Thread Long Li
 
> Wouldn't it make sense to backport the changes to set the virt_boundary
> (which probably still is the SG_GAPS flag in such an old kernel)?

We can make storvsc use SG_GAPS. But the following patch is missing in 4.1 
stable block layer to make this work on some I/O situations. Backporting is 
more difficult and affect other code.

commit 5e7c4274a70aa2d6f485996d0ca1dad52d0039ca
Author: Jens Axboe 
Date:   Thu Sep 3 19:28:20 2015 +0300

block: Check for gaps on front and back merges

We are checking for gaps to previous bio_vec, which can
only detect back merges gaps. Moreover, at the point where
we check for a gap, we don't know if we will attempt a back
or a front merge. Thus, check for gap to prev in a back merge
attempt and check for a gap to next in a front merge attempt.

Signed-off-by: Jens Axboe 
[sagig: Minor rename change]
Signed-off-by: Sagi Grimberg 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] storvsc: do not assume SG list is continuous when doing bounce buffers (for 4.1 stable only)

2017-08-21 Thread Long Li
From: Long Li <lon...@microsoft.com>

This patch is for linux-stable 4.1 branch only.

storvsc checks the SG list for gaps before passing them to Hyper-v device.
If there are gaps, data is copied to a bounce buffer and a continuous data
buffer is passed to Hyper-V.

The check on gaps assumes SG list is continuous, and not chained. This is
 not always true. Failing the check may result in incorrect I/O data
passed to the Hyper-v device.

This code path is not used post Linux 4.1.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 6c52d14..14dc5c6 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -584,17 +584,18 @@ static int do_bounce_buffer(struct scatterlist *sgl, 
unsigned int sg_count)
for (i = 0; i < sg_count; i++) {
if (i == 0) {
/* make sure 1st one does not have hole */
-   if (sgl[i].offset + sgl[i].length != PAGE_SIZE)
+   if (sgl->offset + sgl->length != PAGE_SIZE)
return i;
} else if (i == sg_count - 1) {
/* make sure last one does not have hole */
-   if (sgl[i].offset != 0)
+   if (sgl->offset != 0)
return i;
} else {
/* make sure no hole in the middle */
-   if (sgl[i].length != PAGE_SIZE || sgl[i].offset != 0)
+   if (sgl->length != PAGE_SIZE || sgl->offset != 0)
return i;
}
+   sgl = sg_next(sgl);
}
return -1;
 }
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [REGRESSION][Stable][v3.12.y][v4.4.y][v4.9.y][v4.10.y][v4.11-rc1] scsi: storvsc: properly set residual data length on errors

2017-04-03 Thread Long Li
I think we need both going forward. They addressed different problems.

> -Original Message-
> From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> Behalf Of Cathy Avery
> Sent: Thursday, March 30, 2017 6:52 AM
> To: driverdev-devel@linuxdriverproject.org; Stephen Hemminger
> <sthem...@microsoft.com>; gre...@linuxfoundation.org
> Subject: Re: [REGRESSION][Stable][v3.12.y][v4.4.y][v4.9.y][v4.10.y][v4.11-rc1]
> scsi: storvsc: properly set residual data length on errors
> 
> Hi,
> 
> So which commit is moving forward and which one is not?
> 
> f1c635b439a5c01776fe3a25b1e2dc546ea82e6f or
> 40630f462824ee24bc00d692865c86c3828094e0?
> 
> We have backported 40630f462824ee24bc00d692865c86c3828094e0 and I am
> unclear if this is a regression and must be removed or it is a regression but 
> is
> fixed by f1c635b439a5c01776fe3a25b1e2dc546ea82e6f and can remain.
> 
> Thanks,
> 
> Cathy
> 
> On 03/28/2017 12:14 PM, Stephen Hemminger wrote:
> > I decided not to send it to stable since problem was only observed on
> > 4.11 but it is probably endemic to all GEN2 VM's
> >
> > -Original Message-
> > From: Joseph Salisbury [mailto:joseph.salisb...@canonical.com]
> > Sent: Tuesday, March 28, 2017 7:29 AM
> > To: Stephen Hemminger <sthem...@microsoft.com>; Long Li
> > <lon...@microsoft.com>
> > Cc: KY Srinivasan <k...@microsoft.com>; Martin K. Petersen
> > <martin.peter...@oracle.com>; Haiyang Zhang
> <haiya...@microsoft.com>;
> > j...@linux.vnet.ibm.com; de...@linuxdriverproject.org; linux-scsi
> > <linux-s...@vger.kernel.org>; LKML <linux-ker...@vger.kernel.org>;
> > sta...@vger.kernel.org; Greg KH <gre...@linuxfoundation.org>
> > Subject: Re:
> > [REGRESSION][Stable][v3.12.y][v4.4.y][v4.9.y][v4.10.y][v4.11-rc1]
> > scsi: storvsc: properly set residual data length on errors
> >
> > On 03/27/2017 06:14 PM, Stephen Hemminger wrote:
> >> Are you sure the real problem is not the one fixed by this commit?
> >>
> >> commit f1c635b439a5c01776fe3a25b1e2dc546ea82e6f
> >> Author: Stephen Hemminger <step...@networkplumber.org>
> >> Date:   Tue Mar 7 09:15:53 2017 -0800
> >>
> >>  scsi: storvsc: Workaround for virtual DVD SCSI version
> >>
> >>  Hyper-V host emulation of SCSI for virtual DVD device reports SCSI
> >>  version 0 (UNKNOWN) but is still capable of supporting REPORTLUN.
> >>
> >>  Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11
> >>  successfully with virtual DVD ROM device. What happens is that the
> SCSI
> >>  scan process falls back to doing sequential probing by INQUIRY.  But 
> >> the
> >>  storvsc driver has a previous workaround that masks/blocks all errors
> >>  reports from INQUIRY (or MODE_SENSE) commands.  This workaround
> causes
> >>  the scan to then populate a full set of bogus LUN's on the target and
> >>  then sends kernel spinning off into a death spiral doing block reads 
> >> on
> >>  the non-existent LUNs.
> >>
> >>  By setting the correct blacklist flags, the target with the DVD device
> >>  is scanned with REPORTLUN and that works correctly.
> >>
> >>  Patch needs to go in current 4.11, it is safe but not necessary in 
> >> older
> >>  kernels.
> >>
> >>  Signed-off-by: Stephen Hemminger <sthem...@microsoft.com>
> >>  Reviewed-by: K. Y. Srinivasan <k...@microsoft.com>
> >>  Reviewed-by: Christoph Hellwig <h...@lst.de>
> >>  Signed-off-by: Martin K. Petersen <martin.peter...@oracle.com>
> >>
> >> -Original Message-
> >> From: Joseph Salisbury [mailto:joseph.salisb...@canonical.com]
> >> Sent: Monday, March 27, 2017 1:22 PM
> >> To: Long Li <lon...@microsoft.com>
> >> Cc: KY Srinivasan <k...@microsoft.com>; Martin K. Petersen
> >> <martin.peter...@oracle.com>; Haiyang Zhang
> <haiya...@microsoft.com>;
> >> Stephen Hemminger <sthem...@microsoft.com>;
> j...@linux.vnet.ibm.com;
> >> de...@linuxdriverproject.org; linux-scsi
> >> <linux-s...@vger.kernel.org>; LKML <linux-ker...@vger.kernel.org>;
> >> sta...@vger.kernel.org; Greg KH <gre...@linuxfoundation.org>
> >> Subject:
> >> [REGRESSION][Stable][v3.12.y][v4.4.y][v4.9.y][v4.10.y][v4.11-rc1]
> >> scsi: storvsc: properly set residual data length on errors
> >>
> &g

RE: [PATCH 2/2] pci-hyperv: Fix an atomic bug

2017-03-27 Thread Long Li
> -Original Message-
> From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> Behalf Of k...@exchange.microsoft.com
> Sent: Friday, March 24, 2017 11:07 AM
> To: helg...@kernel.org; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org; de...@linuxdriverproject.org; o...@aepfle.de;
> a...@canonical.com; vkuzn...@redhat.com; jasow...@redhat.com;
> leann.ogasaw...@canonical.com; marcelo.ce...@canonical.com; Stephen
> Hemminger <sthem...@microsoft.com>
> Cc: sta...@vger.kernel.org
> Subject: [PATCH 2/2] pci-hyperv: Fix an atomic bug
> 
> From: K. Y. Srinivasan <k...@microsoft.com>
> 
> The memory allocation here needs to be non-blocking.
> Fix the issue.
> 
> Signed-off-by: K. Y. Srinivasan <k...@microsoft.com>
> Cc: <sta...@vger.kernel.org>

Reviewed-by: Long Li <lon...@microsoft.com>

> ---
>  drivers/pci/host/pci-hyperv.c |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index 32a16fb..85088a1 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -877,7 +877,7 @@ static void hv_compose_msi_msg(struct irq_data
> *data, struct msi_msg *msg)
> hv_int_desc_free(hpdev, int_desc);
> }
> 
> -   int_desc = kzalloc(sizeof(*int_desc), GFP_KERNEL);
> +   int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
> if (!int_desc)
> goto drop_reference;
> 
> --
> 1.7.1
> 
> ___
> devel mailing list
> de...@linuxdriverproject.org
> http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/2] pci-hyperv: Fix a bug in specifying CPU affinity

2017-03-27 Thread Long Li
> -Original Message-
> From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> Behalf Of k...@exchange.microsoft.com
> Sent: Friday, March 24, 2017 11:07 AM
> To: helg...@kernel.org; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org; de...@linuxdriverproject.org; o...@aepfle.de;
> a...@canonical.com; vkuzn...@redhat.com; jasow...@redhat.com;
> leann.ogasaw...@canonical.com; marcelo.ce...@canonical.com; Stephen
> Hemminger <sthem...@microsoft.com>
> Cc: sta...@vger.kernel.org
> Subject: [PATCH 1/2] pci-hyperv: Fix a bug in specifying CPU affinity
> 
> From: K. Y. Srinivasan <k...@microsoft.com>
> 
> When we have 32 or more CPUs in the affinity mask, we should use a special
> constant to specify that to the host. Fix this issue.
> 
> Signed-off-by: K. Y. Srinivasan <k...@microsoft.com>
> Cc: <sta...@vger.kernel.org>

Reviewed-by: Long Li <lon...@microsoft.com>

> ---
>  drivers/pci/host/pci-hyperv.c |   11 ---
>  1 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index ada9856..32a16fb 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -72,6 +72,7 @@ enum {
> PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1  };
> 
> +#define CPU_AFFINITY_ALL   -1ULL
>  #define PCI_CONFIG_MMIO_LENGTH 0x2000
>  #define CFG_PAGE_OFFSET 0x1000
>  #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH -
> CFG_PAGE_OFFSET) @@ -897,9 +898,13 @@ static void
> hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  * processors because Hyper-V only supports 64 in a guest.
>  */
> affinity = irq_data_get_affinity_mask(data);
> -   for_each_cpu_and(cpu, affinity, cpu_online_mask) {
> -   int_pkt->int_desc.cpu_mask |=
> -   (1ULL << vmbus_cpu_number_to_vp_number(cpu));
> +   if (cpumask_weight(affinity) >= 32) {
> +   int_pkt->int_desc.cpu_mask = CPU_AFFINITY_ALL;
> +   } else {
> +   for_each_cpu_and(cpu, affinity, cpu_online_mask) {
> +   int_pkt->int_desc.cpu_mask |=
> +   (1ULL << vmbus_cpu_number_to_vp_number(cpu));
> +   }
> }
> 
> ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt,
> --
> 1.7.1
> 
> ___
> devel mailing list
> de...@linuxdriverproject.org
> http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v3] HV: properly delay KVP packets when negotiation is in progress

2017-03-24 Thread Long Li
From: Long Li <lon...@microsoft.com>

The host may send multiple negotiation packets (due to timeout) before the 
KVP user-mode daemon is connected. We need to defer processing those packets
until the daemon is negotiated and connected. It's okay for guest to respond
to all negotiation packets.

In addition, the host may send multiple staged KVP requests as soon as 
negotiation is done. We need to properly process those packets using one 
tasklet for exclusive access to ring buffer.

This patch is based on the work of Nick Meier <nick.me...@microsoft.com>.

The patch v3 has incorporated suggestions from 
Vitaly Kuznetsov <vkuzn...@redhat.com>.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/hv_kvp.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index de26371..be7222e 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -113,7 +113,7 @@ static void kvp_poll_wrapper(void *channel)
 {
/* Transaction is finished, reset the state here to avoid races. */
kvp_transaction.state = HVUTIL_READY;
-   hv_kvp_onchannelcallback(channel);
+   tasklet_schedule(&((struct vmbus_channel*)channel)->callback_event);
 }
 
 static void kvp_register_done(void)
@@ -160,7 +160,7 @@ static void kvp_timeout_func(struct work_struct *dummy)
 
 static void kvp_host_handshake_func(struct work_struct *dummy)
 {
-   hv_poll_channel(kvp_transaction.recv_channel, hv_kvp_onchannelcallback);
+   tasklet_schedule(_transaction.recv_channel->callback_event);
 }
 
 static int kvp_handle_handshake(struct hv_kvp_msg *msg)
@@ -628,16 +628,17 @@ void hv_kvp_onchannelcallback(void *context)
 NEGO_IN_PROGRESS,
 NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
 
-   if (host_negotiatied == NEGO_NOT_STARTED &&
-   kvp_transaction.state < HVUTIL_READY) {
+   if (kvp_transaction.state < HVUTIL_READY) {
/*
 * If userspace daemon is not connected and host is asking
 * us to negotiate we need to delay to not lose messages.
 * This is important for Failover IP setting.
 */
-   host_negotiatied = NEGO_IN_PROGRESS;
-   schedule_delayed_work(_host_handshake_work,
+   if (host_negotiatied == NEGO_NOT_STARTED) {
+   host_negotiatied = NEGO_IN_PROGRESS;
+   schedule_delayed_work(_host_handshake_work,
  HV_UTIL_NEGO_TIMEOUT * HZ);
+   }
return;
}
if (kvp_transaction.state > HVUTIL_READY)
@@ -705,6 +706,7 @@ void hv_kvp_onchannelcallback(void *context)
   VM_PKT_DATA_INBAND, 0);
 
host_negotiatied = NEGO_FINISHED;
+   hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
}
 
 }
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v3] HV: properly delay KVP packets when negotiation is in progress

2017-03-24 Thread Long Li
From: Long Li <lon...@microsoft.com>

The host may send multiple negotiation packets (due to timeout) before the 
KVP user-mode daemon is connected. We need to defer processing those packets
until the daemon is negotiated and connected. It's okay for guest to respond
to all negotiation packets.

In addition, the host may send multiple staged KVP requests as soon as 
negotiation is done. We need to properly process those packets using one 
tasklet for exclusive access to ring buffer.

This patch is based on the work of Nick Meier <nick.me...@microsoft.com>.

The patch v3 has incorporated suggestions from 
Vitaly Kuznetsov <vkuzn...@redhat.com>.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/hv_kvp.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index de26371..be7222e 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -113,7 +113,7 @@ static void kvp_poll_wrapper(void *channel)
 {
/* Transaction is finished, reset the state here to avoid races. */
kvp_transaction.state = HVUTIL_READY;
-   hv_kvp_onchannelcallback(channel);
+   tasklet_schedule(&((struct vmbus_channel*)channel)->callback_event);
 }
 
 static void kvp_register_done(void)
@@ -160,7 +160,7 @@ static void kvp_timeout_func(struct work_struct *dummy)
 
 static void kvp_host_handshake_func(struct work_struct *dummy)
 {
-   hv_poll_channel(kvp_transaction.recv_channel, hv_kvp_onchannelcallback);
+   tasklet_schedule(_transaction.recv_channel->callback_event);
 }
 
 static int kvp_handle_handshake(struct hv_kvp_msg *msg)
@@ -628,16 +628,17 @@ void hv_kvp_onchannelcallback(void *context)
 NEGO_IN_PROGRESS,
 NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
 
-   if (host_negotiatied == NEGO_NOT_STARTED &&
-   kvp_transaction.state < HVUTIL_READY) {
+   if (kvp_transaction.state < HVUTIL_READY) {
/*
 * If userspace daemon is not connected and host is asking
 * us to negotiate we need to delay to not lose messages.
 * This is important for Failover IP setting.
 */
-   host_negotiatied = NEGO_IN_PROGRESS;
-   schedule_delayed_work(_host_handshake_work,
+   if (host_negotiatied == NEGO_NOT_STARTED) {
+   host_negotiatied = NEGO_IN_PROGRESS;
+   schedule_delayed_work(_host_handshake_work,
  HV_UTIL_NEGO_TIMEOUT * HZ);
+   }
return;
}
if (kvp_transaction.state > HVUTIL_READY)
@@ -705,6 +706,7 @@ void hv_kvp_onchannelcallback(void *context)
   VM_PKT_DATA_INBAND, 0);
 
host_negotiatied = NEGO_FINISHED;
+   hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
}
 
 }
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 2/2 v5] pci-hyperv: lock pci bus on device eject

2017-03-23 Thread Long Li
From: Long Li <lon...@microsoft.com>

A PCI_EJECT message can arrive at the same time we are calling
pci_scan_child_bus in the workqueue for the previous PCI_BUS_RELATIONS
message or in create_root_hv_pci_bus(), in this case we could potentially
modify the bus from multiple places.

Properly lock the bus access.

Thanks Dexuan Cui <de...@microsoft.com> for pointing out the race condition
in create_root_hv_pci_bus().

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
Acked-by: K. Y. Srinivasan <k...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 39fafda..a1b3c19 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1209,9 +1209,11 @@ static int create_root_hv_pci_bus(struct 
hv_pcibus_device *hbus)
hbus->pci_bus->msi = >msi_chip;
hbus->pci_bus->msi->dev = >hdev->device;
 
+   pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_bus_assign_resources(hbus->pci_bus);
pci_bus_add_devices(hbus->pci_bus);
+   pci_unlock_rescan_remove();
hbus->state = hv_pcibus_installed;
return 0;
 }
@@ -1612,8 +1614,10 @@ static void hv_eject_device_work(struct work_struct 
*work)
pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0,
   wslot);
if (pdev) {
+   pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
pci_dev_put(pdev);
+   pci_unlock_rescan_remove();
}
 
spin_lock_irqsave(>hbus->device_list_lock, flags);
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 1/2 v5] pci-hyperv: properly handle pci bus remove

2017-03-23 Thread Long Li
From: Long Li <lon...@microsoft.com>

hv_pci_devices_present is called in hv_pci_remove when we remove a PCI
device from host (e.g. by disabling SRIOV on a device). In hv_pci_remove,
the bus is already removed before the call, so we don't need to rescan the
bus in the workqueue scheduled from hv_pci_devices_present.

By introducing status hv_pcibus_removed, we can avoid this situation.

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
Acked-by: K. Y. Srinivasan <k...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index ada9856..39fafda 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -350,6 +350,7 @@ enum hv_pcibus_state {
hv_pcibus_init = 0,
hv_pcibus_probed,
hv_pcibus_installed,
+   hv_pcibus_removed,
hv_pcibus_maximum
 };
 
@@ -1504,13 +1505,24 @@ static void pci_devices_present_work(struct work_struct 
*work)
put_pcichild(hpdev, hv_pcidev_ref_initial);
}
 
-   /* Tell the core to rescan bus because there may have been changes. */
-   if (hbus->state == hv_pcibus_installed) {
+   switch(hbus->state) {
+   case hv_pcibus_installed:
+   /*
+   * Tell the core to rescan bus
+   * because there may have been changes.
+   */
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
-   } else {
+   break;
+
+   case hv_pcibus_init:
+   case hv_pcibus_probed:
survey_child_resources(hbus);
+   break;
+
+   default:
+   break;
}
 
up(>enum_sem);
@@ -2185,6 +2197,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+   hbus->state = hv_pcibus_init;
 
/*
 * The PCI bus "domain" is what is called "segment" in ACPI and
@@ -2348,6 +2361,7 @@ static int hv_pci_remove(struct hv_device *hdev)
pci_stop_root_bus(hbus->pci_bus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
+   hbus->state = hv_pcibus_removed;
}
 
hv_pci_bus_exit(hdev);
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[Please ignore this is a test] pci-hyperv: properly handle pci bus remove

2017-03-23 Thread Long Li
From: Long Li <lon...@microsoft.com>

hv_pci_devices_present is called in hv_pci_remove when we remove a PCI 
device from host (e.g. by disabling SRIOV on a device). In hv_pci_remove,
the bus is already removed before the call, so we don't need to rescan the 
bus in the workqueue scheduled from hv_pci_devices_present. 

By introducing status hv_pcibus_removed, we can avoid this situation.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index ada9856..8a92244 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -350,6 +350,7 @@ enum hv_pcibus_state {
hv_pcibus_init = 0,
hv_pcibus_probed,
hv_pcibus_installed,
+   hv_pcibus_removed,
hv_pcibus_maximum
 };
 
@@ -1504,12 +1505,19 @@ static void pci_devices_present_work(struct work_struct 
*work)
put_pcichild(hpdev, hv_pcidev_ref_initial);
}
 
-   /* Tell the core to rescan bus because there may have been changes. */
-   if (hbus->state == hv_pcibus_installed) {
+   switch(hbus->state) {
+   case hv_pcibus_installed:
+   /*
+   * Tell the core to rescan bus
+   * because there may have been changes.
+   */
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
-   } else {
+   break;
+
+   case hv_pcibus_init:
+   case hv_pcibus_probed:
survey_child_resources(hbus);
}
 
@@ -2185,6 +2193,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+   hbus->state = hv_pcibus_init;
 
/*
 * The PCI bus "domain" is what is called "segment" in ACPI and
@@ -2348,6 +2357,7 @@ static int hv_pci_remove(struct hv_device *hdev)
pci_stop_root_bus(hbus->pci_bus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
+   hbus->state = hv_pcibus_removed;
}
 
hv_pci_bus_exit(hdev);
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2] HV: properly delay KVP packets when negotiation is in progress

2017-03-23 Thread Long Li


> -Original Message-
> From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
> Sent: Thursday, March 23, 2017 9:04 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org; sta...@vger.kernel.org
> Subject: Re: [PATCH v2] HV: properly delay KVP packets when negotiation is
> in progress
> 
> Long Li <lon...@microsoft.com> writes:
> 
> > The host may send multiple negotiation packets (due to timeout) before
> > the KVP user-mode daemon is connected. We need to defer processing
> > those packets until the daemon is negotiated and connected. It's okay
> > for guest to respond to all negotiation packets.
> >
> > In addition, the host may send multiple staged KVP requests as soon as
> > negotiation is done. We need to properly process those packets using
> > one tasklet for exclusive access to ring buffer.
> >
> > This patch is based on the work of Nick Meier
> > <nick.me...@microsoft.com>
> >
> > The patch v2 has incorporated suggestion from Vitaly Kuznetsov
> > <vkuzn...@redhat.com>.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/hv/hv_kvp.c | 12 +++-
> >  1 file changed, 7 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index
> > de26371..845b70b 100644
> > --- a/drivers/hv/hv_kvp.c
> > +++ b/drivers/hv/hv_kvp.c
> > @@ -113,7 +113,7 @@ static void kvp_poll_wrapper(void *channel)  {
> > /* Transaction is finished, reset the state here to avoid races. */
> > kvp_transaction.state = HVUTIL_READY;
> > -   hv_kvp_onchannelcallback(channel);
> > +   tasklet_schedule(&((struct vmbus_channel*)channel)-
> >callback_event);
> >  }
> 
> There is one more function in the code which calls
> hv_kvp_onchannelcallback():
> 
> static void kvp_host_handshake_func(struct work_struct *dummy) {
>   hv_poll_channel(kvp_transaction.recv_channel,
> hv_kvp_onchannelcallback); }
> 
> we can't replace hv_kvp_onchannelcallback with kvp_poll_wrapper here as
> we don't want to reset kvp_transaction.state but it seems this should also
> get updated, e.g. hv_poll_channel() here can be replaced with the direct
> 
>  tasklet_schedule(&((struct vmbus_channel*)channel)->callback_event);
> 
> call. This will ensure hv_kvp_onchannelcallback() calls are always serialized.

Thank you. I will send v3.

> 
> >
> >  static void kvp_register_done(void)
> > @@ -628,16 +628,17 @@ void hv_kvp_onchannelcallback(void *context)
> >  NEGO_IN_PROGRESS,
> >  NEGO_FINISHED} host_negotiatied =
> NEGO_NOT_STARTED;
> >
> > -   if (host_negotiatied == NEGO_NOT_STARTED &&
> > -   kvp_transaction.state < HVUTIL_READY) {
> > +   if (kvp_transaction.state < HVUTIL_READY) {
> > /*
> >  * If userspace daemon is not connected and host is asking
> >  * us to negotiate we need to delay to not lose messages.
> >  * This is important for Failover IP setting.
> >  */
> > -   host_negotiatied = NEGO_IN_PROGRESS;
> > -   schedule_delayed_work(_host_handshake_work,
> > +   if (host_negotiatied == NEGO_NOT_STARTED) {
> > +   host_negotiatied = NEGO_IN_PROGRESS;
> > +
>   schedule_delayed_work(_host_handshake_work,
> >   HV_UTIL_NEGO_TIMEOUT * HZ);
> > +   }
> > return;
> > }
> > if (kvp_transaction.state > HVUTIL_READY) @@ -705,6 +706,7 @@
> void
> > hv_kvp_onchannelcallback(void *context)
> >VM_PKT_DATA_INBAND, 0);
> >
> > host_negotiatied = NEGO_FINISHED;
> > +   hv_poll_channel(kvp_transaction.recv_channel,
> kvp_poll_wrapper);
> > }
> >
> >  }
> 
> --
>   Vitaly
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/2 v4] pci-hyperv: properly handle pci bus remove

2017-03-22 Thread Long Li


> -Original Message-
> From: Bjorn Helgaas [mailto:helg...@kernel.org]
> Sent: Thursday, March 16, 2017 1:07 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH 1/2 v4] pci-hyperv: properly handle pci bus remove
> 
> On Tue, Feb 28, 2017 at 02:19:45AM +, Long Li wrote:
> > hv_pci_devices_present is called in hv_pci_remove when we remove a PCI
> > device from host (e.g. by disabling SRIOV on a device). In
> > hv_pci_remove, the bus is already removed before the call, so we don't
> > need to rescan the bus in the workqueue scheduled from
> > hv_pci_devices_present. By introducing status hv_pcibus_removed, we
> can avoid this situation.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
> > Acked-by: K. Y. Srinivasan <k...@microsoft.com>
> 
> This didn't apply for me because saving it to a file resulted in some encoded
> file with "=3D" instead of "=".  I see that mutt is smart enough to deal with
> that in this reply, so there's probably a way for it to decode it when saving 
> to
> a file, but I don't know it.
> 
> I tried to apply it by hand, but the hunk in hv_pci_remove() doesn't match
> the context.  I think your patch is based on something previous to
> 17978524a636 ("PCI: hv: Fix hv_pci_remove() for hot-remove").  Please
> refresh the patch so it applies to my "master" branch (currently v4.11-rc1).
> 
> Also, the "default: break;" case below is redundant and can be removed.
> 
> So I'll wait for your updated versions of both these patches.
> 
> > ---
> >  drivers/pci/host/pci-hyperv.c | 20 +---
> >  1 file changed, 17 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index a8deeca..4a37598 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -348,6 +348,7 @@ enum hv_pcibus_state {
> > hv_pcibus_init = 0,
> > hv_pcibus_probed,
> > hv_pcibus_installed,
> > +   hv_pcibus_removed,
> > hv_pcibus_maximum
> >  };
> >
> > @@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct
> work_struct *work)
> > put_pcichild(hpdev, hv_pcidev_ref_initial);
> > }
> >
> > -   /* Tell the core to rescan bus because there may have been changes.
> */
> > -   if (hbus->state == hv_pcibus_installed) {
> > +   switch (hbus->state) {
> > +   case hv_pcibus_installed:
> > +   /*
> > +* Tell the core to rescan bus
> > +* because there may have been changes.
> > +*/
> > pci_lock_rescan_remove();
> > pci_scan_child_bus(hbus->pci_bus);
> > pci_unlock_rescan_remove();
> > -   } else {
> > +   break;
> > +
> > +   case hv_pcibus_init:
> > +   case hv_pcibus_probed:
> > survey_child_resources(hbus);
> > +   break;
> > +
> > +   default:
> > +   break;
> 
> ^ This is redundant.

I found it still needs "default:break", or it will give a compiler warning:

drivers/pci/host/pci-hyperv.c: In function 'pci_devices_present_work':
drivers/pci/host/pci-hyperv.c:1510:2: warning: enumeration value 
'hv_pcibus_removed' not handled in switch [-Wswitch]
  switch(hbus->state) {
  ^
drivers/pci/host/pci-hyperv.c:1510:2: warning: enumeration value 
'hv_pcibus_maximum' not handled in switch [-Wswitch]

> 
> > }
> >
> > up(>enum_sem);
> > @@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> > hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
> > if (!hbus)
> > return -ENOMEM;
> > +   hbus->state = hv_pcibus_init;
> >
> > /*
> >  * The PCI bus "domain" is what is called "segment" in ACPI and @@
> > -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
> > pci_stop_root_bus(hbus->pci_bus);
> > pci_remove_root_bus(hbus->pci_bus);
> > pci_unlock_rescan_remove();
> > +   hbus->state = hv_pcibus_removed;
> > }
> >
> > ret = hv_send_resources_released(hdev);
> > --
> > 1.8.5.6
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2] HV: properly delay KVP packets when negotiation is in progress

2017-03-22 Thread Long Li
The host may send multiple negotiation packets (due to timeout) before 
the KVP user-mode daemon is connected. We need to defer processing  
those packets until the daemon is negotiated and connected. It's okay
for guest to respond to all negotiation packets.

In addition, the host may send multiple staged KVP requests as soon as
negotiation is done. We need to properly process those packets using 
one tasklet for exclusive access to ring buffer.

This patch is based on the work of Nick Meier 
<nick.me...@microsoft.com>

The patch v2 has incorporated suggestion from Vitaly Kuznetsov 
<vkuzn...@redhat.com>.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/hv_kvp.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index de26371..845b70b 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -113,7 +113,7 @@ static void kvp_poll_wrapper(void *channel)
 {
/* Transaction is finished, reset the state here to avoid races. */
kvp_transaction.state = HVUTIL_READY;
-   hv_kvp_onchannelcallback(channel);
+   tasklet_schedule(&((struct vmbus_channel*)channel)->callback_event);
 }
 
 static void kvp_register_done(void)
@@ -628,16 +628,17 @@ void hv_kvp_onchannelcallback(void *context)
 NEGO_IN_PROGRESS,
 NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
 
-   if (host_negotiatied == NEGO_NOT_STARTED &&
-   kvp_transaction.state < HVUTIL_READY) {
+   if (kvp_transaction.state < HVUTIL_READY) {
/*
 * If userspace daemon is not connected and host is asking
 * us to negotiate we need to delay to not lose messages.
 * This is important for Failover IP setting.
 */
-   host_negotiatied = NEGO_IN_PROGRESS;
-   schedule_delayed_work(_host_handshake_work,
+   if (host_negotiatied == NEGO_NOT_STARTED) {
+   host_negotiatied = NEGO_IN_PROGRESS;
+   schedule_delayed_work(_host_handshake_work,
  HV_UTIL_NEGO_TIMEOUT * HZ);
+   }
return;
}
if (kvp_transaction.state > HVUTIL_READY)
@@ -705,6 +706,7 @@ void hv_kvp_onchannelcallback(void *context)
   VM_PKT_DATA_INBAND, 0);
 
host_negotiatied = NEGO_FINISHED;
+   hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
}
 
 }
-- 
2.7.4
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] HV: properly delay KVP packets when negotiation is in progress

2017-03-20 Thread Long Li


> -Original Message-
> From: Long Li
> Sent: Sunday, March 19, 2017 7:49 PM
> To: 'Vitaly Kuznetsov' <vkuzn...@redhat.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org
> Subject: RE: [PATCH] HV: properly delay KVP packets when negotiation is in
> progress
> 
> 
> 
> > -Original Message-
> > From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
> > Sent: Friday, March 17, 2017 9:16 AM
> > To: Long Li <lon...@microsoft.com>
> > Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>;
> > de...@linuxdriverproject.org; linux- ker...@vger.kernel.org
> > Subject: Re: [PATCH] HV: properly delay KVP packets when negotiation
> > is in progress
> >
> > Long Li <lon...@microsoft.com> writes:
> >
> > > The host may send multiple KVP packets before the negotiation with
> > > daemon is finished. We need to keep those packets in ring buffer
> > > until the daemon is negotiated and connected.
> >
> > The patch looks OK but previously we always presumed that this can't
> > happen for util drivers and host will never send a new request before
> > we answer to the previous one. If this is not true we may have more
> > issues which need fixing as all three drivers we have are written in a
> 'transaction'
> > fashion.
> >
> > So my question would be: can the host send multiple (KVP) packets
> > _after_ the negotiation with daemon is finished?
> 
> Thanks Vitaly. I'm checking with Windows guys and will update soon.

It's possible that hosts may send a number of staged KVP requests as soon as 
negotiation is done. The current KVP code can deal with a number of pending KVP 
requests, and respond to them one by one.

To summarize the issue this patch tries to fix:
1. When host sends a negotiation request, and it times out, the host will send 
another negotiation request, and so on.
2. The guest can respond to all negotiation requests from the host. All 
subsequent response (except for the 1st response) are ignored by the host.
3. Before negotiation is done, the host may have staged a number of pending KVP 
requests.
4. As soon as negotiation is done, the host sends all KVP requests to guest.

There is a corner case that if there is only one pending KVP request after the 
2nd (or 3rd etc) negotiation, it may get lost. I'm testing the following code 
to address this condition:

@@ -705,6 +706,7 @@ void hv_kvp_onchannelcallback(void *context)
   VM_PKT_DATA_INBAND, 0);

host_negotiatied = NEGO_FINISHED;
+       hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
}

 }

Please drop this patch. I'll send V2.

> 
> >
> >
> > >
> > > This patch is based on the work of Nick Meier
> > > <nick.me...@microsoft.com>
> > >
> > > Signed-off-by: Long Li <lon...@microsoft.com>
> > > ---
> > >  drivers/hv/hv_kvp.c | 9 +
> > >  1 file changed, 5 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index
> > > de26371..b9f928d 100644
> > > --- a/drivers/hv/hv_kvp.c
> > > +++ b/drivers/hv/hv_kvp.c
> > > @@ -628,16 +628,17 @@ void hv_kvp_onchannelcallback(void *context)
> > >NEGO_IN_PROGRESS,
> > >NEGO_FINISHED} host_negotiatied =
> > NEGO_NOT_STARTED;
> > >
> > > - if (host_negotiatied == NEGO_NOT_STARTED &&
> > > - kvp_transaction.state < HVUTIL_READY) {
> > > + if (kvp_transaction.state < HVUTIL_READY) {
> > >   /*
> > >* If userspace daemon is not connected and host is asking
> > >* us to negotiate we need to delay to not lose messages.
> > >* This is important for Failover IP setting.
> > >*/
> > > - host_negotiatied = NEGO_IN_PROGRESS;
> > > - schedule_delayed_work(_host_handshake_work,
> > > + if (host_negotiatied == NEGO_NOT_STARTED) {
> > > + host_negotiatied = NEGO_IN_PROGRESS;
> > > +
> > schedule_delayed_work(_host_handshake_work,
> > > HV_UTIL_NEGO_TIMEOUT * HZ);
> > > + }
> > >   return;
> > >   }
> > >   if (kvp_transaction.state > HVUTIL_READY)
> >
> > --
> >   Vitaly
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] HV: properly delay KVP packets when negotiation is in progress

2017-03-19 Thread Long Li


> -Original Message-
> From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
> Sent: Friday, March 17, 2017 9:16 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] HV: properly delay KVP packets when negotiation is in
> progress
> 
> Long Li <lon...@microsoft.com> writes:
> 
> > The host may send multiple KVP packets before the negotiation with
> > daemon is finished. We need to keep those packets in ring buffer until
> > the daemon is negotiated and connected.
> 
> The patch looks OK but previously we always presumed that this can't
> happen for util drivers and host will never send a new request before we
> answer to the previous one. If this is not true we may have more issues
> which need fixing as all three drivers we have are written in a 'transaction'
> fashion.
> 
> So my question would be: can the host send multiple (KVP) packets _after_
> the negotiation with daemon is finished?

Thanks Vitaly. I'm checking with Windows guys and will update soon.

> 
> 
> >
> > This patch is based on the work of Nick Meier
> > <nick.me...@microsoft.com>
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/hv/hv_kvp.c | 9 +
> >  1 file changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index
> > de26371..b9f928d 100644
> > --- a/drivers/hv/hv_kvp.c
> > +++ b/drivers/hv/hv_kvp.c
> > @@ -628,16 +628,17 @@ void hv_kvp_onchannelcallback(void *context)
> >  NEGO_IN_PROGRESS,
> >  NEGO_FINISHED} host_negotiatied =
> NEGO_NOT_STARTED;
> >
> > -   if (host_negotiatied == NEGO_NOT_STARTED &&
> > -   kvp_transaction.state < HVUTIL_READY) {
> > +   if (kvp_transaction.state < HVUTIL_READY) {
> > /*
> >  * If userspace daemon is not connected and host is asking
> >  * us to negotiate we need to delay to not lose messages.
> >  * This is important for Failover IP setting.
> >  */
> > -   host_negotiatied = NEGO_IN_PROGRESS;
> > -   schedule_delayed_work(_host_handshake_work,
> > +   if (host_negotiatied == NEGO_NOT_STARTED) {
> > +   host_negotiatied = NEGO_IN_PROGRESS;
> > +
>   schedule_delayed_work(_host_handshake_work,
> >   HV_UTIL_NEGO_TIMEOUT * HZ);
> > +   }
> > return;
> > }
> > if (kvp_transaction.state > HVUTIL_READY)
> 
> --
>   Vitaly
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/2 v4] pci-hyperv: properly handle pci bus remove

2017-03-16 Thread Long Li
> -Original Message-
> From: Bjorn Helgaas [mailto:helg...@kernel.org]
> Sent: Thursday, March 16, 2017 1:07 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH 1/2 v4] pci-hyperv: properly handle pci bus remove
> 
> On Tue, Feb 28, 2017 at 02:19:45AM +, Long Li wrote:
> > hv_pci_devices_present is called in hv_pci_remove when we remove a PCI
> > device from host (e.g. by disabling SRIOV on a device). In
> > hv_pci_remove, the bus is already removed before the call, so we don't
> > need to rescan the bus in the workqueue scheduled from
> > hv_pci_devices_present. By introducing status hv_pcibus_removed, we
> can avoid this situation.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
> > Acked-by: K. Y. Srinivasan <k...@microsoft.com>
> 
> This didn't apply for me because saving it to a file resulted in some encoded
> file with "=3D" instead of "=".  I see that mutt is smart enough to deal with
> that in this reply, so there's probably a way for it to decode it when saving 
> to
> a file, but I don't know it.
> 
> I tried to apply it by hand, but the hunk in hv_pci_remove() doesn't match
> the context.  I think your patch is based on something previous to
> 17978524a636 ("PCI: hv: Fix hv_pci_remove() for hot-remove").  Please
> refresh the patch so it applies to my "master" branch (currently v4.11-rc1).
> 
> Also, the "default: break;" case below is redundant and can be removed.
> 
> So I'll wait for your updated versions of both these patches.

Thanks, I'll address those issues and resend the patch.

> 
> > ---
> >  drivers/pci/host/pci-hyperv.c | 20 +---
> >  1 file changed, 17 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index a8deeca..4a37598 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -348,6 +348,7 @@ enum hv_pcibus_state {
> > hv_pcibus_init = 0,
> > hv_pcibus_probed,
> > hv_pcibus_installed,
> > +   hv_pcibus_removed,
> > hv_pcibus_maximum
> >  };
> >
> > @@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct
> work_struct *work)
> > put_pcichild(hpdev, hv_pcidev_ref_initial);
> > }
> >
> > -   /* Tell the core to rescan bus because there may have been changes.
> */
> > -   if (hbus->state == hv_pcibus_installed) {
> > +   switch (hbus->state) {
> > +   case hv_pcibus_installed:
> > +   /*
> > +* Tell the core to rescan bus
> > +* because there may have been changes.
> > +*/
> > pci_lock_rescan_remove();
> > pci_scan_child_bus(hbus->pci_bus);
> > pci_unlock_rescan_remove();
> > -   } else {
> > +   break;
> > +
> > +   case hv_pcibus_init:
> > +   case hv_pcibus_probed:
> > survey_child_resources(hbus);
> > +   break;
> > +
> > +   default:
> > +   break;
> 
> ^ This is redundant.
> 
> > }
> >
> > up(>enum_sem);
> > @@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> > hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
> > if (!hbus)
> > return -ENOMEM;
> > +   hbus->state = hv_pcibus_init;
> >
> > /*
> >  * The PCI bus "domain" is what is called "segment" in ACPI and @@
> > -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
> > pci_stop_root_bus(hbus->pci_bus);
> > pci_remove_root_bus(hbus->pci_bus);
> > pci_unlock_rescan_remove();
> > +   hbus->state = hv_pcibus_removed;
> > }
> >
> > ret = hv_send_resources_released(hdev);
> > --
> > 1.8.5.6
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] HV: properly delay KVP packets when negotiation is in progress

2017-03-16 Thread Long Li
The host may send multiple KVP packets before the negotiation with daemon
is finished. We need to keep those packets in ring buffer until the daemon
is negotiated and connected.

This patch is based on the work of Nick Meier <nick.me...@microsoft.com>

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/hv_kvp.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index de26371..b9f928d 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -628,16 +628,17 @@ void hv_kvp_onchannelcallback(void *context)
 NEGO_IN_PROGRESS,
 NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
 
-   if (host_negotiatied == NEGO_NOT_STARTED &&
-   kvp_transaction.state < HVUTIL_READY) {
+   if (kvp_transaction.state < HVUTIL_READY) {
/*
 * If userspace daemon is not connected and host is asking
 * us to negotiate we need to delay to not lose messages.
 * This is important for Failover IP setting.
 */
-   host_negotiatied = NEGO_IN_PROGRESS;
-   schedule_delayed_work(_host_handshake_work,
+   if (host_negotiatied == NEGO_NOT_STARTED) {
+   host_negotiatied = NEGO_IN_PROGRESS;
+   schedule_delayed_work(_host_handshake_work,
  HV_UTIL_NEGO_TIMEOUT * HZ);
+   }
return;
}
if (kvp_transaction.state > HVUTIL_READY)
-- 
2.7.4
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 2/2 v4] pci-hyperv: lock pci bus on device eject

2017-02-27 Thread Long Li
A PCI_EJECT message can arrive at the same time we are calling 
pci_scan_child_bus in the workqueue for the previous PCI_BUS_RELATIONS 
message or in create_root_hv_pci_bus(), in this case we could 
potentially modify the bus from multiple places. 

Properly lock the bus access.

Thanks Dexuan Cui <de...@microsoft.com> for pointing out the race condition in 
create_root_hv_pci_bus().

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
Acked-by: K. Y. Srinivasan <k...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 4a37598..33c75c9 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1198,9 +1198,11 @@ static int create_root_hv_pci_bus(struct 
hv_pcibus_device *hbus)
hbus->pci_bus->msi = >msi_chip;
hbus->pci_bus->msi->dev = >hdev->device;
 
+   pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_bus_assign_resources(hbus->pci_bus);
pci_bus_add_devices(hbus->pci_bus);
+   pci_unlock_rescan_remove();
hbus->state = hv_pcibus_installed;
return 0;
 }
@@ -1590,8 +1592,10 @@ static void hv_eject_device_work(struct work_struct 
*work)
pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0,
   wslot);
if (pdev) {
+   pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
pci_dev_put(pdev);
+   pci_unlock_rescan_remove();
}
 
memset(, 0, sizeof(ctxt));
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 1/2 v4] pci-hyperv: properly handle pci bus remove

2017-02-27 Thread Long Li
hv_pci_devices_present is called in hv_pci_remove when we remove a PCI
device from host (e.g. by disabling SRIOV on a device). In hv_pci_remove,
the bus is already removed before the call, so we don't need to rescan 
the bus in the workqueue scheduled from hv_pci_devices_present. By 
introducing status hv_pcibus_removed, we can avoid this situation.

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
Acked-by: K. Y. Srinivasan <k...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index a8deeca..4a37598 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -348,6 +348,7 @@ enum hv_pcibus_state {
hv_pcibus_init = 0,
hv_pcibus_probed,
hv_pcibus_installed,
+   hv_pcibus_removed,
hv_pcibus_maximum
 };
 
@@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct work_struct 
*work)
put_pcichild(hpdev, hv_pcidev_ref_initial);
}
 
-   /* Tell the core to rescan bus because there may have been changes. */
-   if (hbus->state == hv_pcibus_installed) {
+   switch (hbus->state) {
+   case hv_pcibus_installed:
+   /*
+* Tell the core to rescan bus
+* because there may have been changes.
+*/
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
-   } else {
+   break;
+
+   case hv_pcibus_init:
+   case hv_pcibus_probed:
survey_child_resources(hbus);
+   break;
+
+   default:
+   break;
}
 
up(>enum_sem);
@@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+   hbus->state = hv_pcibus_init;
 
/*
 * The PCI bus "domain" is what is called "segment" in ACPI and
@@ -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
pci_stop_root_bus(hbus->pci_bus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
+   hbus->state = hv_pcibus_removed;
}
 
ret = hv_send_resources_released(hdev);
-- 
1.8.5.6
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [Resend PATCH 2/2 v3] pci-hyperv: lock pci bus on device eject

2017-02-27 Thread Long Li
Ok, I will resend.

> -Original Message-
> From: Greg KH [mailto:g...@kroah.com]
> Sent: Saturday, February 25, 2017 12:02 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org
> Subject: Re: [Resend PATCH 2/2 v3] pci-hyperv: lock pci bus on device eject
> 
> On Fri, Feb 24, 2017 at 09:49:17PM +, Long Li wrote:
> > A PCI_EJECT message can arrive at the same time we are calling
> pci_scan_child_bus in the workqueue for the previous PCI_BUS_RELATIONS
> message or in create_root_hv_pci_bus(), in this case we could potentailly
> modify the bus from multiple places. Properly lock the bus access.
> 
> Properly wrap your changelog comments at 72 columns like your editor is
> telling you to do...

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[Resend PATCH 2/2 v3] pci-hyperv: lock pci bus on device eject

2017-02-24 Thread Long Li
A PCI_EJECT message can arrive at the same time we are calling 
pci_scan_child_bus in the workqueue for the previous PCI_BUS_RELATIONS message 
or in create_root_hv_pci_bus(), in this case we could potentailly modify the 
bus from multiple places. Properly lock the bus access.

Thanks Dexuan Cui <de...@microsoft.com> for pointing out the race condition in 
create_root_hv_pci_bus().

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
Acked-by: K. Y. Srinivasan <k...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 4a37598..33c75c9 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1198,9 +1198,11 @@ static int create_root_hv_pci_bus(struct 
hv_pcibus_device *hbus)
hbus->pci_bus->msi = >msi_chip;
hbus->pci_bus->msi->dev = >hdev->device;
 
+   pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_bus_assign_resources(hbus->pci_bus);
pci_bus_add_devices(hbus->pci_bus);
+   pci_unlock_rescan_remove();
hbus->state = hv_pcibus_installed;
return 0;
 }
@@ -1590,8 +1592,10 @@ static void hv_eject_device_work(struct work_struct 
*work)
pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0,
   wslot);
if (pdev) {
+   pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
pci_dev_put(pdev);
+   pci_unlock_rescan_remove();
}
 
memset(, 0, sizeof(ctxt));
-- 
1.8.5.6
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[Resend PATCH 1/2 v3] pci-hyperv: properly handle pci bus remove

2017-02-24 Thread Long Li
hv_pci_devices_present is called in hv_pci_remove when we remove a PCI device 
from host (e.g. by disabling SRIOV on a device). In hv_pci_remove, the bus is 
already removed before the call, so we don't need to rescan the bus in the 
workqueue scheduled from hv_pci_devices_present. By introducing status 
hv_pcibus_removed, we can avoid this situation.

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
Acked-by: K. Y. Srinivasan <k...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index a8deeca..4a37598 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -348,6 +348,7 @@ enum hv_pcibus_state {
hv_pcibus_init = 0,
hv_pcibus_probed,
hv_pcibus_installed,
+   hv_pcibus_removed,
hv_pcibus_maximum
 };
 
@@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct work_struct 
*work)
put_pcichild(hpdev, hv_pcidev_ref_initial);
}
 
-   /* Tell the core to rescan bus because there may have been changes. */
-   if (hbus->state == hv_pcibus_installed) {
+   switch (hbus->state) {
+   case hv_pcibus_installed:
+   /*
+* Tell the core to rescan bus
+* because there may have been changes.
+*/
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
-   } else {
+   break;
+
+   case hv_pcibus_init:
+   case hv_pcibus_probed:
survey_child_resources(hbus);
+   break;
+
+   default:
+   break;
}
 
up(>enum_sem);
@@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+   hbus->state = hv_pcibus_init;
 
/*
 * The PCI bus "domain" is what is called "segment" in ACPI and
@@ -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
pci_stop_root_bus(hbus->pci_bus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
+   hbus->state = hv_pcibus_removed;
}
 
ret = hv_send_resources_released(hdev);
-- 
1.8.5.6
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [Resend PATCH 1/2 v3] pci-hyperv: properly handle pci bus remove

2017-02-13 Thread Long Li
> -Original Message-
> From: Bjorn Helgaas [mailto:bhelg...@google.com]
> Sent: Saturday, February 11, 2017 9:35 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; de...@linuxdriverproject.org; linux-
> p...@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [Resend PATCH 1/2 v3] pci-hyperv: properly handle pci bus
> remove
> 
> On Fri, Feb 10, 2017 at 7:18 PM, Long Li <lon...@microsoft.com> wrote:
> > Hi Bjorn,
> >
> > This patch and the other one in the series ([Resend PATCH 2/2 v3] pci-
> hyperv: lock pci bus on device eject) have been Acked.
> >
> > Is there anything else should be done before it can be merged? Please let
> me know.
> 
> Sorry, I don't know what happened here.  I see your Jan 23 posting in my
> work email (bhelg...@google.com), but I don't see it on the linux-pci or
> linux-kernel lists, and patchwork [1] doesn't have a copy
> either.   I suspect there was something about your email that made
> vger drop it (maybe an HTML or other "fancy" stuff per
> https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fvger.ke
> rnel.org%2Fmajordomo-
> info.html=02%7C01%7Clongli%40microsoft.com%7Cd3d9fb666bdd4244
> 901b08d452a4692b%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C63
> 6224313474452403=UcQu75mTXO3xh5ot%2FZTRDgL5GXayaXjs%2Fugt
> wWe91Ko%3D=0).
> 
> Patchwork works by subscribing to linux-pci and collecting things that look
> like patches.  Then I work from patchwork as a to-do list.
> That's a convenient way to ensure that patches appear on the mailing list
> before I apply them.  It also means that if a patch doesn't appear on 
> linux-pci
> and subsequently in patchwork, I don't know about it.
> 
> Patchwork does have copies of previous versions, but I marked them
> "changes requested".  When I do that, the patch drops off the to-do list
> because I'm expecting a new version, which *will* appear on the list.  I don't
> mark things "changes requested" if I'm only waiting for an ack, so it looks 
> like
> the only change I was looking for was a changelog revision.  Normally I just
> tweak changelogs myself, so I apologize for not doing that in this case.
> 
> Anyway, can you just post the current version, including the acks, and make
> sure it shows up on the mailing list?
> 
> I'm sorry this has languished so long.  Thanks for reminding me about it so we
> can sort this out.

Thank you. I will fix the email and resend the patch.

> 
> [1]
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatch
> work.ozlabs.org%2Fproject%2Flinux-
> pci%2Flist%2F%3Fsubmitter%3D69886%26state%3D*%26q%3D%26archive%3
> Dboth%26delegate=02%7C01%7Clongli%40microsoft.com%7Cd3d9fb66
> 6bdd4244901b08d452a4692b%7C72f988bf86f141af91ab2d7cd011db47%7C1%
> 7C0%7C636224313474452403=ELx04yDnSbe1fxXLy7z2iFoKwazKEMlDLrl
> p4CWhXbk%3D=0=
> 
> >> -Original Message-
> >> From: KY Srinivasan
> >> Sent: Friday, January 27, 2017 10:42 AM
> >> To: Long Li <lon...@microsoft.com>; Haiyang Zhang
> >> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>
> >> Cc: de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> >> ker...@vger.kernel.org; Long Li <lon...@microsoft.com>
> >> Subject: RE: [Resend PATCH 1/2 v3] pci-hyperv: properly handle pci
> >> bus remove
> >>
> >>
> >>
> >> > -Original Message-
> >> > From: Long Li [mailto:lon...@exchange.microsoft.com]
> >> > Sent: Monday, January 23, 2017 9:45 PM
> >> > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> >> > <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>
> >> > Cc: de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> >> > ker...@vger.kernel.org; Long Li <lon...@microsoft.com>
> >> > Subject: [Resend PATCH 1/2 v3] pci-hyperv: properly handle pci bus
> >> > remove
> >> >
> >> > [This sender failed our fraud detection checks and may not be who
> >> > they appear to be. Learn about spoofing at
> >> >
> https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Faka
> >> >
> .ms%2FLearnAboutSpoofing=02%7C01%7Clongli%40microsoft.com%7C
> d3
> >> >
> d9fb666bdd4244901b08d452a4692b%7C72f988bf86f141af91ab2d7cd011db47
> %7
> >> >
> C1%7C0%7C636224313474452403=jlfhIYsJJT4HbcPGSPTk43AApcip%2F
> 9m
> >> > w7snnFn%2FvI74%3D=0]
> >> >
> >> >

RE: [Resend PATCH 1/2 v3] pci-hyperv: properly handle pci bus remove

2017-02-10 Thread Long Li
Hi Bjorn,

This patch and the other one in the series ([Resend PATCH 2/2 v3] pci-hyperv: 
lock pci bus on device eject) have been Acked.

Is there anything else should be done before it can be merged? Please let me 
know.

Thanks

Long

> -Original Message-
> From: KY Srinivasan
> Sent: Friday, January 27, 2017 10:42 AM
> To: Long Li <lon...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>
> Cc: de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org; Long Li <lon...@microsoft.com>
> Subject: RE: [Resend PATCH 1/2 v3] pci-hyperv: properly handle pci bus
> remove
> 
> 
> 
> > -Original Message-
> > From: Long Li [mailto:lon...@exchange.microsoft.com]
> > Sent: Monday, January 23, 2017 9:45 PM
> > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>
> > Cc: de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> > ker...@vger.kernel.org; Long Li <lon...@microsoft.com>
> > Subject: [Resend PATCH 1/2 v3] pci-hyperv: properly handle pci bus
> > remove
> >
> > [This sender failed our fraud detection checks and may not be who they
> > appear to be. Learn about spoofing at
> > http://aka.ms/LearnAboutSpoofing]
> >
> > From: Long Li <lon...@microsoft.com>
> >
> > hv_pci_devices_present is called in hv_pci_remove when we remove a PCI
> > device from host (e.g. by disabling SRIOV on a device). In
> > hv_pci_remove, the bus is already removed before the call, so we don't
> > need to rescan the bus in the workqueue scheduled from
> > hv_pci_devices_present. By introducing status hv_pcibus_removed, we
> can avoid this situation.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
> Acked-by: K. Y. Srinivasan <k...@microsoft.com>
> > ---
> >  drivers/pci/host/pci-hyperv.c | 20 +---
> >  1 file changed, 17 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index a8deeca..4a37598 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -348,6 +348,7 @@ enum hv_pcibus_state {
> > hv_pcibus_init = 0,
> > hv_pcibus_probed,
> > hv_pcibus_installed,
> > +   hv_pcibus_removed,
> > hv_pcibus_maximum
> >  };
> >
> > @@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct
> > work_struct *work)
> > put_pcichild(hpdev, hv_pcidev_ref_initial);
> > }
> >
> > -   /* Tell the core to rescan bus because there may have been changes.
> */
> > -   if (hbus->state == hv_pcibus_installed) {
> > +   switch (hbus->state) {
> > +   case hv_pcibus_installed:
> > +   /*
> > +* Tell the core to rescan bus
> > +* because there may have been changes.
> > +*/
> > pci_lock_rescan_remove();
> > pci_scan_child_bus(hbus->pci_bus);
> > pci_unlock_rescan_remove();
> > -   } else {
> > +   break;
> > +
> > +   case hv_pcibus_init:
> > +   case hv_pcibus_probed:
> > survey_child_resources(hbus);
> > +   break;
> > +
> > +   default:
> > +   break;
> > }
> >
> > up(>enum_sem);
> > @@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> > hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
> > if (!hbus)
> > return -ENOMEM;
> > +   hbus->state = hv_pcibus_init;
> >
> > /*
> >  * The PCI bus "domain" is what is called "segment" in ACPI
> > and @@ -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device
> *hdev)
> > pci_stop_root_bus(hbus->pci_bus);
> > pci_remove_root_bus(hbus->pci_bus);
> > pci_unlock_rescan_remove();
> > +   hbus->state = hv_pcibus_removed;
> > }
> >
> > ret = hv_send_resources_released(hdev);
> > --
> > 1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] hv: use substraction to update ring buffer index

2017-01-20 Thread Long Li


> -Original Message-
> From: Dexuan Cui
> Sent: Sunday, January 15, 2017 7:12 PM
> To: Long Li <lon...@microsoft.com>; KY Srinivasan <k...@microsoft.com>;
> Haiyang Zhang <haiya...@microsoft.com>
> Cc: de...@linuxdriverproject.org; linux-ker...@vger.kernel.org
> Subject: RE: [PATCH] hv: use substraction to update ring buffer index
> 
> > From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> > Behalf Of Long Li
> > Sent: Thursday, January 5, 2017 12:08
> > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>
> > Cc: de...@linuxdriverproject.org; linux-ker...@vger.kernel.org
> > Subject: [PATCH] hv: use substraction to update ring buffer index
> >
> > From: Long Li <lon...@microsoft.com>
> >
> > The ring buffer code uses %= to calculate index. For x86/64, %=
> > compiles to div, more than 10 times slower than sub.
> >
> > Replace div with sub for this data heavy code path.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/hv/ring_buffer.c | 9 ++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index
> > cd49cb1..f8eee6e 100644
> > --- a/drivers/hv/ring_buffer.c
> > +++ b/drivers/hv/ring_buffer.c
> > @@ -135,7 +135,8 @@ hv_get_next_readlocation_withoffset(struct
> > hv_ring_buffer_info *ring_info,
> > u32 next = ring_info->ring_buffer->read_index;
> >
> > next += offset;
> > -   next %= ring_info->ring_datasize;
> > +   if (next >= ring_info->ring_datasize)
> > +   next -= ring_info->ring_datasize;
> >
> > return next;
> >  }
> > @@ -179,7 +180,8 @@ static u32 hv_copyfrom_ringbuffer(
> > memcpy(dest, ring_buffer + start_read_offset, destlen);
> >
> > start_read_offset += destlen;
> > -   start_read_offset %= ring_buffer_size;
> > +   if (start_read_offset >= ring_buffer_size)
> > +   start_read_offset -= ring_buffer_size;
> >
> > return start_read_offset;
> >  }
> > @@ -201,7 +203,8 @@ static u32 hv_copyto_ringbuffer(
> > memcpy(ring_buffer + start_write_offset, src, srclen);
> >
> > start_write_offset += srclen;
> > -   start_write_offset %= ring_buffer_size;
> > +   if (start_write_offset >= ring_buffer_size)
> > +   start_write_offset -= ring_buffer_size;
> >
> > return start_write_offset;
> >  }
> 
> Hi Long,
> I guess you want to fix put_pkt_raw() too. :-)

Good point. I will send an updated patch.

> 
> Thanks,
> -- Dexuan

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 2/2 v3] pci-hyperv: lock pci bus on device eject

2017-01-09 Thread Long Li
Hi Bjorn,

The patch is still pending (along with 1/2 v3). Please let me know if you want 
me to resend the two patches.

Thanks

Long

> -Original Message-
> From: KY Srinivasan
> Sent: Tuesday, October 4, 2016 1:49 PM
> To: Long Li <lon...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>
> Cc: de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: RE: [PATCH 2/2 v3] pci-hyperv: lock pci bus on device eject
> 
> 
> 
> > -Original Message-
> > From: Long Li
> > Sent: Monday, October 3, 2016 11:43 PM
> > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>
> > Cc: de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> > ker...@vger.kernel.org; Long Li <lon...@microsoft.com>
> > Subject: [PATCH 2/2 v3] pci-hyperv: lock pci bus on device eject
> >
> > This sender failed our fraud detection checks and may not be who they
> > appear to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing
> >
> > From: Long Li <lon...@microsoft.com>
> >
> > A PCI_EJECT message can arrive at the same time we are calling
> > pci_scan_child_bus in the workqueue for the previous
> PCI_BUS_RELATIONS
> > message or in create_root_hv_pci_bus(), in this case we could
> > potentailly modify the bus from multiple places. Properly lock the bus
> access.
> >
> > Thanks Dexuan Cui <de...@microsoft.com> for pointing out the race
> > condition in create_root_hv_pci_bus().
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > Tested-by: Cathy Avery <cav...@redhat.com>
> > Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
> 
> Acked-by: KY Srinivasan <k...@microsoft.com>
> 
> > ---
> >  drivers/pci/host/pci-hyperv.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index 4a37598..33c75c9 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -1198,9 +1198,11 @@ static int create_root_hv_pci_bus(struct
> > hv_pcibus_device *hbus)
> > hbus->pci_bus->msi = >msi_chip;
> > hbus->pci_bus->msi->dev = >hdev->device;
> >
> > +   pci_lock_rescan_remove();
> > pci_scan_child_bus(hbus->pci_bus);
> > pci_bus_assign_resources(hbus->pci_bus);
> > pci_bus_add_devices(hbus->pci_bus);
> > +   pci_unlock_rescan_remove();
> > hbus->state = hv_pcibus_installed;
> > return 0;
> >  }
> > @@ -1590,8 +1592,10 @@ static void hv_eject_device_work(struct
> > work_struct *work)
> > pdev =
> > pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain,
> > 0,
> >wslot);
> > if (pdev) {
> > +   pci_lock_rescan_remove();
> > pci_stop_and_remove_bus_device(pdev);
> > pci_dev_put(pdev);
> > +   pci_unlock_rescan_remove();
> > }
> >
> > memset(, 0, sizeof(ctxt));
> > --
> > 1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/2 v3] pci-hyperv: properly handle pci bus remove

2017-01-09 Thread Long Li
Hi Bjorn

This patch is still pending. The patch has been ack'ed.

Do you want me to resend this patch?

Thanks

Long

> -Original Message-
> From: KY Srinivasan
> Sent: Friday, November 11, 2016 2:21 PM
> To: Bjorn Helgaas <helg...@kernel.org>; Long Li <lon...@microsoft.com>
> Cc: Haiyang Zhang <haiya...@microsoft.com>; Bjorn Helgaas
> <bhelg...@google.com>; de...@linuxdriverproject.org; linux-
> p...@vger.kernel.org; linux-ker...@vger.kernel.org; Long Li
> <lon...@microsoft.com>
> Subject: RE: [PATCH 1/2 v3] pci-hyperv: properly handle pci bus remove
> 
> 
> 
> > -Original Message-
> > From: Bjorn Helgaas [mailto:helg...@kernel.org]
> > Sent: Friday, November 11, 2016 1:04 PM
> > To: Long Li <lon...@microsoft.com>
> > Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> > de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> > ker...@vger.kernel.org; Long Li <lon...@microsoft.com>
> > Subject: Re: [PATCH 1/2 v3] pci-hyperv: properly handle pci bus remove
> >
> > On Mon, Oct 03, 2016 at 11:42:47PM -0700, Long Li wrote:
> > > From: Long Li <lon...@microsoft.com>
> > >
> > > hv_pci_devices_present is called in hv_pci_remove when we remove a
> > > PCI
> > device from host (e.g. by disabling SRIOV on a device). In
> > hv_pci_remove, the bus is already removed before the call, so we don't
> > need to rescan the bus in the workqueue scheduled from
> > hv_pci_devices_present. By introducing status hv_pcibus_removed, we
> can avoid this situation.
> > >
> > > Signed-off-by: Long Li <lon...@microsoft.com>
> > > Tested-by: Cathy Avery <cav...@redhat.com>
> > > Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
> 
> Acked-by: K. Y. Srinivasan <k...@microsoft.com>
> 
> 
> >
> > I need an ack from the Hyper-V maintainers.  I see acks for previous
> > versions, but I don't know whether you've changed things that would
> > invalidate those acks.  If the acks still apply, please include them
> > and repost these patches.
> >
> > Also, please run "git log --oneline drivers/pci/host/pci-hyperv.c" and
> > make your subject line match the previous ones.
> >
> > > ---
> > >  drivers/pci/host/pci-hyperv.c | 20 +---
> > >  1 file changed, 17 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/pci/host/pci-hyperv.c
> > > b/drivers/pci/host/pci-hyperv.c index a8deeca..4a37598 100644
> > > --- a/drivers/pci/host/pci-hyperv.c
> > > +++ b/drivers/pci/host/pci-hyperv.c
> > > @@ -348,6 +348,7 @@ enum hv_pcibus_state {
> > >   hv_pcibus_init = 0,
> > >   hv_pcibus_probed,
> > >   hv_pcibus_installed,
> > > + hv_pcibus_removed,
> > >   hv_pcibus_maximum
> > >  };
> > >
> > > @@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct
> > work_struct *work)
> > >   put_pcichild(hpdev, hv_pcidev_ref_initial);
> > >   }
> > >
> > > - /* Tell the core to rescan bus because there may have been changes.
> > */
> > > - if (hbus->state == hv_pcibus_installed) {
> > > + switch (hbus->state) {
> > > + case hv_pcibus_installed:
> > > + /*
> > > +  * Tell the core to rescan bus
> > > +  * because there may have been changes.
> > > +  */
> > >   pci_lock_rescan_remove();
> > >   pci_scan_child_bus(hbus->pci_bus);
> > >   pci_unlock_rescan_remove();
> > > - } else {
> > > + break;
> > > +
> > > + case hv_pcibus_init:
> > > + case hv_pcibus_probed:
> > >   survey_child_resources(hbus);
> > > + break;
> > > +
> > > + default:
> > > + break;
> > >   }
> > >
> > >   up(>enum_sem);
> > > @@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> > >   hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
> > >   if (!hbus)
> > >   return -ENOMEM;
> > > + hbus->state = hv_pcibus_init;
> > >
> > >   /*
> > >* The PCI bus "domain" is what is called "segment" in ACPI and @@
> > > -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
> > >   pci_stop_root_bus(hbus->pci_bus);
> > >   pci_remove_root_bus(hbus->pci_bus);
> > >   pci_unlock_rescan_remove();
> > > + hbus->state = hv_pcibus_removed;
> > >   }
> > >
> > >   ret = hv_send_resources_released(hdev);
> > > --
> > > 1.8.5.6
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-pci"
> > > in the body of a message to majord...@vger.kernel.org More
> majordomo
> > > info at
> >
> https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fvger.k
> > e
> > rnel.org%2Fmajordomo-
> >
> info.html=02%7C01%7Ckys%40microsoft.com%7C982492a275ed4126c4
> >
> d308d40a7644da%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C6361
> >
> 44950466092469=9cXs6P1zoQ7qB%2BxYD9bsd%2BLMN%2BjwSPQkxnj
> > iqBdv9go%3D=0
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2] hv: retry infinitely on hypercall transient failures

2017-01-07 Thread Long Li


> -Original Message-
> From: Greg KH [mailto:g...@kroah.com]
> Sent: Friday, January 06, 2017 11:43 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH v2] hv: retry infinitely on hypercall transient failures
> 
> On Sat, Jan 07, 2017 at 07:23:14AM +, Long Li wrote:
> > > -Original Message-
> > > From: Greg KH [mailto:g...@kroah.com]
> > > Sent: Wednesday, January 04, 2017 11:48 PM
> > > To: Long Li <lon...@microsoft.com>
> > > Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > > <haiya...@microsoft.com>; de...@linuxdriverproject.org; linux-
> > > ker...@vger.kernel.org
> > > Subject: Re: [PATCH v2] hv: retry infinitely on hypercall transient
> > > failures
> > >
> > > On Wed, Jan 04, 2017 at 06:12:20PM -0800, Long Li wrote:
> > > > From: Long Li <lon...@microsoft.com>
> > > >
> > > > Hyper-v host guarantees that a hypercall will finish in reasonable time.
> > > > Retry infinitely on transient failures to avoid returning error to upper
> layer.
> > >
> > > Again, never retry "forever", always have a way out, otherwise you will
> crash.
> > >
> > > And again, why are you making this change?  What problem does it solve?
> >
> > The problem it tries to solve is that in this code we are returning
> > error prematurely on transient failures. The hypercall is used mostly
> > in channel establishment. If we return a transient failure, the VM may
> > not boot or not useful after boot due to some devices missing.
> >
> > Another approach is to increase the number of retries. But we don't
> > know how many retries is safe, and Windows host side expects the guest
> > retry infinitely and not return error on transient failures.
> 
> That implies a lot of trust in the host side, don't you think?
> 
> Worse case, make the delay a minute or so, but give the system a way out
> incase there's a bug in the host.  As there will be bugs in the host, just 
> like
> there are bugs in the client :)

This makes sense. 1 minute is a long time for a hypercall. I will send V3.

> 
> thanks,
> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2] hv: retry infinitely on hypercall transient failures

2017-01-06 Thread Long Li
> -Original Message-
> From: Greg KH [mailto:g...@kroah.com]
> Sent: Wednesday, January 04, 2017 11:48 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH v2] hv: retry infinitely on hypercall transient failures
> 
> On Wed, Jan 04, 2017 at 06:12:20PM -0800, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > Hyper-v host guarantees that a hypercall will finish in reasonable time.
> > Retry infinitely on transient failures to avoid returning error to upper 
> > layer.
> 
> Again, never retry "forever", always have a way out, otherwise you will crash.
> 
> And again, why are you making this change?  What problem does it solve?

The problem it tries to solve is that in this code we are returning error 
prematurely on transient failures. The hypercall is used mostly in channel 
establishment. If we return a transient failure, the VM may not boot or not 
useful after boot due to some devices missing.

Another approach is to increase the number of retries. But we don't know how 
many retries is safe, and Windows host side expects the guest retry infinitely 
and not return error on transient failures.

> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] hv: use substraction to update ring buffer index

2017-01-06 Thread Long Li


> -Original Message-
> From: Dan Carpenter [mailto:dan.carpen...@oracle.com]
> Sent: Thursday, January 05, 2017 3:40 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] hv: use substraction to update ring buffer index
> 
> On Wed, Jan 04, 2017 at 08:08:22PM -0800, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > The ring buffer code uses %= to calculate index. For x86/64, %=
> > compiles to div, more than 10 times slower than sub.
> >
> > Replace div with sub for this data heavy code path.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/hv/ring_buffer.c | 9 ++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index
> > cd49cb1..f8eee6e 100644
> > --- a/drivers/hv/ring_buffer.c
> > +++ b/drivers/hv/ring_buffer.c
> > @@ -135,7 +135,8 @@ hv_get_next_readlocation_withoffset(struct
> hv_ring_buffer_info *ring_info,
> > u32 next = ring_info->ring_buffer->read_index;
> >
> > next += offset;
> > -   next %= ring_info->ring_datasize;
> > +   if (next >= ring_info->ring_datasize)
> > +   next -= ring_info->ring_datasize;
> 
> I take it that we trust that offset is roughly correct and not more than 2x
> ring_info->ring_datasize?  I guess there is only one caller so it's probably
> true...

Yes, you are right. It's not possible that we are getting to 2x ring_datasize, 
because it's not possible to transfer data more than ring_datasize over ring 
buffer.

> 
> >
> > return next;
> >  }
> > @@ -179,7 +180,8 @@ static u32 hv_copyfrom_ringbuffer(
> > memcpy(dest, ring_buffer + start_read_offset, destlen);
> >
> > start_read_offset += destlen;
> > -   start_read_offset %= ring_buffer_size;
> > +   if (start_read_offset >= ring_buffer_size)
> > +   start_read_offset -= ring_buffer_size;
> 
> I totally don't understand the original code here.  We do the memset and
> then we verify that we are not copying beyond the end of the ring buffer?  If
> feels like we should verify that offset + destlen aren't more than
> ring_buffer_size before we do the memcpy().

The ring buffer pages are mapped to wraparound 2x virtual address space. Please 
see hv_ringbuffer_init(). The call to vmap() setup this virtual address space. 
So we can use memcpy across the last page.

> 
> regards,
> dan carpenter
> 

Thanks for reviewing!

Long

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] hv: use substraction to update ring buffer index

2017-01-04 Thread Long Li
From: Long Li <lon...@microsoft.com>

The ring buffer code uses %= to calculate index. For x86/64, %= compiles to
div, more than 10 times slower than sub.

Replace div with sub for this data heavy code path.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/ring_buffer.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index cd49cb1..f8eee6e 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -135,7 +135,8 @@ hv_get_next_readlocation_withoffset(struct 
hv_ring_buffer_info *ring_info,
u32 next = ring_info->ring_buffer->read_index;
 
next += offset;
-   next %= ring_info->ring_datasize;
+   if (next >= ring_info->ring_datasize)
+   next -= ring_info->ring_datasize;
 
return next;
 }
@@ -179,7 +180,8 @@ static u32 hv_copyfrom_ringbuffer(
memcpy(dest, ring_buffer + start_read_offset, destlen);
 
start_read_offset += destlen;
-   start_read_offset %= ring_buffer_size;
+   if (start_read_offset >= ring_buffer_size)
+   start_read_offset -= ring_buffer_size;
 
return start_read_offset;
 }
@@ -201,7 +203,8 @@ static u32 hv_copyto_ringbuffer(
memcpy(ring_buffer + start_write_offset, src, srclen);
 
start_write_offset += srclen;
-   start_write_offset %= ring_buffer_size;
+   if (start_write_offset >= ring_buffer_size)
+   start_write_offset -= ring_buffer_size;
 
return start_write_offset;
 }
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2] hv: retry infinitely on hypercall transient failures

2017-01-04 Thread Long Li
From: Long Li <lon...@microsoft.com>

Hyper-v host guarantees that a hypercall will finish in reasonable time.
Retry infinitely on transient failures to avoid returning error to upper layer.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/connection.c | 30 ++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 6ce8b87..4b3cfde 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -438,46 +438,44 @@ void vmbus_on_event(unsigned long data)
 int vmbus_post_msg(void *buffer, size_t buflen)
 {
union hv_connection_id conn_id;
-   int ret = 0;
-   int retries = 0;
+   int ret;
u32 usec = 1;
 
conn_id.asu32 = 0;
conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID;
 
/*
-* hv_post_message() can have transient failures because of
-* insufficient resources. Retry the operation a couple of
-* times before giving up.
+* hv_post_message() can have transient failures. We retry infinitely
+* on these failures because host guarantees hypercall will finish.
 */
-   while (retries < 20) {
+   while (1) {
ret = hv_post_message(conn_id, 1, buffer, buflen);
 
switch (ret) {
+   /*
+* Retry on transient failures:
+* 1. HV_STATUS_INVALID_CONNECTION_ID:
+*We send messages too frequently.
+*
+* 2. HV_STATUS_INSUFFICIENT_MEMORY and
+*HV_STATUS_INSUFFICIENT_BUFFERS:
+*The host is temporariliy running out of resources.
+*/
case HV_STATUS_INVALID_CONNECTION_ID:
-   /*
-* We could get this if we send messages too
-* frequently.
-*/
-   ret = -EAGAIN;
-   break;
case HV_STATUS_INSUFFICIENT_MEMORY:
case HV_STATUS_INSUFFICIENT_BUFFERS:
-   ret = -ENOMEM;
break;
case HV_STATUS_SUCCESS:
-   return ret;
+   return 0;
default:
pr_err("hv_post_msg() failed; error code:%d\n", ret);
return -EINVAL;
}
 
-   retries++;
udelay(usec);
if (usec < 2048)
usec *= 2;
}
-   return ret;
 }
 
 /*
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] Retry infinitely for hypercall

2017-01-04 Thread Long Li


> -Original Message-
> From: Dan Carpenter [mailto:dan.carpen...@oracle.com]
> Sent: Wednesday, January 4, 2017 1:48 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] Retry infinitely for hypercall
> 
> Fix the subsystem prefix in the subject.
> 
> On Wed, Jan 04, 2017 at 02:39:31PM -0800, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > Hyper-v host guarantees that a hypercall will succeed. Retry infinitely to
> avoid returning transient failures to upper layer.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/hv/connection.c | 17 -
> >  1 file changed, 8 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index
> > 6ce8b87..4bcb099 100644
> > --- a/drivers/hv/connection.c
> > +++ b/drivers/hv/connection.c
> > @@ -439,7 +439,6 @@ int vmbus_post_msg(void *buffer, size_t buflen)  {
> > union hv_connection_id conn_id;
> > int ret = 0;
> 
> Btw, when you disable GCC's uninitialized variable checking by storing bogus
> values in "ret", it's eventually going to bite you in the bum.
> Eventually you're going to get a bug that should have been detected through
> static analysis if only you hadn't disabled it.
> 
> > -   int retries = 0;
> > u32 usec = 1;
> >
> > conn_id.asu32 = 0;
> > @@ -447,10 +446,10 @@ int vmbus_post_msg(void *buffer, size_t buflen)
> >
> > /*
> >  * hv_post_message() can have transient failures because of
> > -* insufficient resources. Retry the operation a couple of
> > -* times before giving up.
> > +* insufficient resources. We retry infinitely on these failures
> > +* because host guarantees hypercall will eventually succeed.
> >  */
> > -   while (retries < 20) {
> > +   while (1) {
> > ret = hv_post_message(conn_id, 1, buffer, buflen);
> >
> > switch (ret) {
> > @@ -459,11 +458,11 @@ int vmbus_post_msg(void *buffer, size_t buflen)
> >  * We could get this if we send messages too
> >  * frequently.
> >  */
> 
> Move the comment above the code it's commenting about.
> 
>   /*
>* We could get INVALID_CONNECTION_ID if we flood the
>* host with too many messages.
>*/
>   case HV_STATUS_INVALID_CONNECTION_ID:
>   case HV_STATUS_INSUFFICIENT_MEMORY:
>   case HV_STATUS_INSUFFICIENT_BUFFERS:
>   break;
> 
> 
> 
> > -   ret = -EAGAIN;
> > -   break;
> > case HV_STATUS_INSUFFICIENT_MEMORY:
> > case HV_STATUS_INSUFFICIENT_BUFFERS:
> > -   ret = -ENOMEM;
> > +   /*
> > +* Temporary failure out of resources
> > +*/
> > break;
> > case HV_STATUS_SUCCESS:
> > return ret;
> 
>   return 0;
> 
> Better to be more explicit.  When I looked at this I got briefly confused if 
> this
> function was supposed to return HV_ statuses or standard kernel error
> codes.  It turns out that HV_STATUS_SUCCESS is zero the success returns
> map directly to linux kernel code for success but it's clearer to be explicit.
> 
> > @@ -472,12 +471,12 @@ int vmbus_post_msg(void *buffer, size_t buflen)
> > return -EINVAL;
> > }
> 
> > -   retries++;
> > udelay(usec);
> > if (usec < 2048)
> > usec *= 2;
> > }
> > -   return ret;
> > +   /* Impossible to get here */
> > +   BUG_ON(1);
> 
> Remove the comment and the BUG_ON().
> 
> regards,
> dan carpenter

Thanks, I will fix those in V2.

Long
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] Retry infinitely for hypercall

2017-01-04 Thread Long Li


> -Original Message-
> From: Greg KH [mailto:g...@kroah.com]
> Sent: Wednesday, January 4, 2017 12:51 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; de...@linuxdriverproject.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] Retry infinitely for hypercall
> 
> On Wed, Jan 04, 2017 at 02:39:31PM -0800, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > Hyper-v host guarantees that a hypercall will succeed. Retry infinitely to
> avoid returning transient failures to upper layer.
> 
> Please wrap your changelog at the proper column.

Will do in V2.
> 
> And what happens when the hypercall does not succeed?  How is the kernel
> going to recover from that?

Sorry I should have used better wording in the patch. It should be "Retry 
infinitely on transient failures for hypercall". The host guarantees that it 
will return something other than transient failures in a reasonable small time 
frame. I will fix the comment in V2.

> 
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/hv/connection.c | 17 -
> >  1 file changed, 8 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index
> > 6ce8b87..4bcb099 100644
> > --- a/drivers/hv/connection.c
> > +++ b/drivers/hv/connection.c
> > @@ -439,7 +439,6 @@ int vmbus_post_msg(void *buffer, size_t buflen)  {
> > union hv_connection_id conn_id;
> > int ret = 0;
> > -   int retries = 0;
> > u32 usec = 1;
> >
> > conn_id.asu32 = 0;
> > @@ -447,10 +446,10 @@ int vmbus_post_msg(void *buffer, size_t buflen)
> >
> > /*
> >  * hv_post_message() can have transient failures because of
> > -* insufficient resources. Retry the operation a couple of
> > -* times before giving up.
> > +* insufficient resources. We retry infinitely on these failures
> > +* because host guarantees hypercall will eventually succeed.
> >  */
> > -   while (retries < 20) {
> > +   while (1) {
> > ret = hv_post_message(conn_id, 1, buffer, buflen);
> >
> > switch (ret) {
> > @@ -459,11 +458,11 @@ int vmbus_post_msg(void *buffer, size_t buflen)
> >  * We could get this if we send messages too
> >  * frequently.
> >  */
> > -   ret = -EAGAIN;
> > -   break;
> 
> Document you are falling through please, otherwise someone will "fix"
> this later.
Will add comment in V2.

> 
> > case HV_STATUS_INSUFFICIENT_MEMORY:
> > case HV_STATUS_INSUFFICIENT_BUFFERS:
> > -   ret = -ENOMEM;
> > +   /*
> > +* Temporary failure out of resources
> > +*/
> > break;
> > case HV_STATUS_SUCCESS:
> > return ret;
> > @@ -472,12 +471,12 @@ int vmbus_post_msg(void *buffer, size_t buflen)
> > return -EINVAL;
> > }
> >
> > -   retries++;
> > udelay(usec);
> > if (usec < 2048)
> > usec *= 2;
> > }
> > -   return ret;
> > +   /* Impossible to get here */
> > +   BUG_ON(1);
> 
> If it is impossible, why do you have this line at all?

I will remove this line. There is no way for the code to get here.

> 
> What is this trying to solve?  Do you need to increase the time spent waiting?
> We all know things break, please allow the kernel to stay alive if at all
> possible.

The purpose is to wait until the host returns a non-transient status code for a 
hypercall. However, we don't know how many transient failures we are getting 
before the host returns a final status code. So use the infinite loop to wait 
until the host returns the final status code.

Thanks for reviewing. I will send V2 to address the comment.

Long

> 
> thanks,
> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] Retry infinitely for hypercall

2017-01-04 Thread Long Li
From: Long Li <lon...@microsoft.com>

Hyper-v host guarantees that a hypercall will succeed. Retry infinitely to 
avoid returning transient failures to upper layer.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/connection.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 6ce8b87..4bcb099 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -439,7 +439,6 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 {
union hv_connection_id conn_id;
int ret = 0;
-   int retries = 0;
u32 usec = 1;
 
conn_id.asu32 = 0;
@@ -447,10 +446,10 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 
/*
 * hv_post_message() can have transient failures because of
-* insufficient resources. Retry the operation a couple of
-* times before giving up.
+* insufficient resources. We retry infinitely on these failures
+* because host guarantees hypercall will eventually succeed.
 */
-   while (retries < 20) {
+   while (1) {
ret = hv_post_message(conn_id, 1, buffer, buflen);
 
switch (ret) {
@@ -459,11 +458,11 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 * We could get this if we send messages too
 * frequently.
 */
-   ret = -EAGAIN;
-   break;
case HV_STATUS_INSUFFICIENT_MEMORY:
case HV_STATUS_INSUFFICIENT_BUFFERS:
-   ret = -ENOMEM;
+   /*
+* Temporary failure out of resources
+*/
break;
case HV_STATUS_SUCCESS:
return ret;
@@ -472,12 +471,12 @@ int vmbus_post_msg(void *buffer, size_t buflen)
return -EINVAL;
}
 
-   retries++;
udelay(usec);
if (usec < 2048)
usec *= 2;
}
-   return ret;
+   /* Impossible to get here */
+   BUG_ON(1);
 }
 
 /*
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] pci-hyperv: use kmalloc to allocate hypercall params buffer

2016-12-05 Thread Long Li


> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Monday, December 5, 2016 8:53 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org
> Subject: Re: [PATCH] pci-hyperv: use kmalloc to allocate hypercall params
> buffer
> 
> On Tue,  8 Nov 2016 14:04:38 -0800
> Long Li <lon...@exchange.microsoft.com> wrote:
> 
> > +   spin_lock_irqsave(>retarget_msi_interrupt_lock, flags);
> > +
> > +   params = >retarget_msi_interrupt_params;
> > +   memset(params, 0, sizeof(*params));
> > +   params->partition_id = HV_PARTITION_ID_SELF;
> > +   params->source = 1; /* MSI(-X) */
> > +   params->address = msi_desc->msg.address_lo;
> > +   params->data = msi_desc->msg.data;
> > +   params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> >(hbus->hdev->dev_instance.b[4] << 16) |
> >(hbus->hdev->dev_instance.b[7] << 8) |
> >(hbus->hdev->dev_instance.b[6] & 0xf8) |
> >PCI_FUNC(pdev->devfn);
> > -   params.vector = cfg->vector;
> > +   params->vector = cfg->vector;
> >
> > for_each_cpu_and(cpu, dest, cpu_online_mask)
> > -   params.vp_mask |= (1ULL <<
> vmbus_cpu_number_to_vp_number(cpu));
> > +   params->vp_mask |= (1ULL <<
> vmbus_cpu_number_to_vp_number(cpu));
> > +
> > +   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
> >
> > -   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, , NULL);
> > +   spin_unlock_irqrestore(>retarget_msi_interrupt_lock, flags);
> 
> It looks like the additional locking here is being overly paranoid.
> The caller is already holding the irq descriptor lock. Look at fixup_irqs.

You are right. On my test machine, there are two possible places calling 
hv_irq_unmask(): request _irq() and handle_edge_irq(). They both have 
desc->lock held when calling .irq_unmask on the chip. A review of the IRQ code 
shows that desc->lock is always held while calling chip->irq_unmask().

Since the lock doesn't do any harm and it is not on performance code path, we 
can remove the lock in the upcoming patches.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] pci-hyperv: use kmalloc to allocate hypercall params buffer

2016-12-05 Thread Long Li
> -Original Message-
> From: KY Srinivasan
> Sent: Monday, December 5, 2016 1:23 PM
> To: Cathy Avery <cav...@redhat.com>; Bjorn Helgaas
> <helg...@kernel.org>; Long Li <lon...@microsoft.com>
> Cc: de...@linuxdriverproject.org
> Subject: RE: [PATCH] pci-hyperv: use kmalloc to allocate hypercall params
> buffer
> 
> 
> 
> > -Original Message-
> > From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> > Behalf Of Cathy Avery
> > Sent: Monday, December 5, 2016 4:54 AM
> > To: Bjorn Helgaas <helg...@kernel.org>; Long Li <lon...@microsoft.com>
> > Cc: de...@linuxdriverproject.org
> > Subject: Re: [PATCH] pci-hyperv: use kmalloc to allocate hypercall
> > params buffer
> >
> > Hi,
> >
> > Is the double semicolon a typo?
> 
> Yes; it is a typo.

I'll fix this.

> 
> K. Y
> >
> > Thanks,
> >
> > Cathy
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index 763ff87..ca553df 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -378,6 +378,8 @@ struct hv_pcibus_device {
> > struct msi_domain_info msi_info;
> > struct msi_controller msi_chip;
> > struct irq_domain *irq_domain;
> > +   struct retarget_msi_interrupt retarget_msi_interrupt_params;
> > +   spinlock_t retarget_msi_interrupt_lock;;
> >   };
> >
> >
> >
> > On 11/29/2016 06:25 PM, Bjorn Helgaas wrote:
> > > On Tue, Nov 08, 2016 at 02:04:38PM -0800, Long Li wrote:
> > >> From: Long Li <lon...@microsoft.com>
> > >>
> > >> hv_do_hypercall assumes that we pass a segment from a physically
> > >> continuous buffer. Buffer allocated on the stack may not work if
> > >> CONFIG_VMAP_STACK=y is set.
> > >>
> > >> Change to use kmalloc to allocate this buffer.
> > >>
> > >> The v2 patch adds locking to access the pre-allocated buffer.
> > >>
> > >> Signed-off-by: Long Li <lon...@microsoft.com>
> > >> Reported-by: Haiyang Zhang <haiya...@microsoft.com>
> > > Applied with KY's ack to pci/host-hv, thanks!
> > >
> > >> ---
> > >>   drivers/pci/host/pci-hyperv.c | 29 +++--
> > >>   1 file changed, 19 insertions(+), 10 deletions(-)
> > >>
> > >> diff --git a/drivers/pci/host/pci-hyperv.c
> > >> b/drivers/pci/host/pci-hyperv.c index 763ff87..ca553df 100644
> > >> --- a/drivers/pci/host/pci-hyperv.c
> > >> +++ b/drivers/pci/host/pci-hyperv.c
> > >> @@ -378,6 +378,8 @@ struct hv_pcibus_device {
> > >>  struct msi_domain_info msi_info;
> > >>  struct msi_controller msi_chip;
> > >>  struct irq_domain *irq_domain;
> > >> +struct retarget_msi_interrupt retarget_msi_interrupt_params;
> > >> +spinlock_t retarget_msi_interrupt_lock;;
> > >>   };
> > >>
> > >>   /*
> > >> @@ -774,34 +776,40 @@ void hv_irq_unmask(struct irq_data *data)
> > >>   {
> > >>  struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
> > >>  struct irq_cfg *cfg = irqd_cfg(data);
> > >> -struct retarget_msi_interrupt params;
> > >> +struct retarget_msi_interrupt *params;
> > >>  struct hv_pcibus_device *hbus;
> > >>  struct cpumask *dest;
> > >>  struct pci_bus *pbus;
> > >>  struct pci_dev *pdev;
> > >>  int cpu;
> > >> +unsigned long flags;
> > >>
> > >>  dest = irq_data_get_affinity_mask(data);
> > >>  pdev = msi_desc_to_pci_dev(msi_desc);
> > >>  pbus = pdev->bus;
> > >>  hbus = container_of(pbus->sysdata, struct hv_pcibus_device,
> > sysdata);
> > >>
> > >> -memset(, 0, sizeof(params));
> > >> -params.partition_id = HV_PARTITION_ID_SELF;
> > >> -params.source = 1; /* MSI(-X) */
> > >> -params.address = msi_desc->msg.address_lo;
> > >> -params.data = msi_desc->msg.data;
> > >> -params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> > >> +spin_lock_irqsave(>retarget_msi_interrupt_lock, flags);
> > >> +
> > >> + 

RE: [Resend] [PATCH] pci-hyperv: use kmalloc to allocate hypercall params buffer

2016-11-08 Thread Long Li


> -Original Message-
> From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> Behalf Of Long Li
> Sent: Tuesday, November 8, 2016 8:57 AM
> To: Greg KH <gre...@linuxfoundation.org>
> Cc: linux-...@vger.kernel.org; Haiyang Zhang <haiya...@microsoft.com>;
> linux-ker...@vger.kernel.org; Bjorn Helgaas <bhelg...@google.com>;
> de...@linuxdriverproject.org
> Subject: RE: [Resend] [PATCH] pci-hyperv: use kmalloc to allocate hypercall
> params buffer
> 
> This sender failed our fraud detection checks and may not be who they
> appear to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing
> 
> > -Original Message-
> > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > Sent: Monday, November 7, 2016 11:00 PM
> > To: Long Li <lon...@microsoft.com>
> > Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> > <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> > de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> > p...@vger.kernel.org
> > Subject: Re: [Resend] [PATCH] pci-hyperv: use kmalloc to allocate
> > hypercall params buffer
> >
> > On Tue, Nov 08, 2016 at 12:14:14AM -0800, Long Li wrote:
> > > From: Long Li <lon...@microsoft.com>
> > >
> > > hv_do_hypercall assumes that we pass a segment from a physically
> > continuous buffer. Buffer allocated on the stack may not work if
> > CONFIG_VMAP_STACK=y is set. Use kmalloc to allocate this buffer.
> >
> > Please wrap your changelog at 72 columns.
> >
> > >
> > > Signed-off-by: Long Li <lon...@microsoft.com>
> > > Reported-by: Haiyang Zhang <haiya...@microsoft.com>
> > > ---
> > >  drivers/pci/host/pci-hyperv.c | 24 +---
> > >  1 file changed, 13 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/drivers/pci/host/pci-hyperv.c
> > > b/drivers/pci/host/pci-hyperv.c index 763ff87..97e6daf 100644
> > > --- a/drivers/pci/host/pci-hyperv.c
> > > +++ b/drivers/pci/host/pci-hyperv.c
> > > @@ -378,6 +378,7 @@ struct hv_pcibus_device {
> > > struct msi_domain_info msi_info;
> > > struct msi_controller msi_chip;
> > > struct irq_domain *irq_domain;
> > > +   struct retarget_msi_interrupt retarget_msi_interrupt_params;
> >
> > Can you handle potentially unaligned accesses like this?  Is there
> > some lock preventing you from using this structure more than once at the
> same time?
> >
> > >  };
> > >
> > >  /*
> > > @@ -774,7 +775,7 @@ void hv_irq_unmask(struct irq_data *data)  {
> > > struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
> > > struct irq_cfg *cfg = irqd_cfg(data);
> > > -   struct retarget_msi_interrupt params;
> > > +   struct retarget_msi_interrupt *params;
> > > struct hv_pcibus_device *hbus;
> > > struct cpumask *dest;
> > > struct pci_bus *pbus;
> > > @@ -785,23 +786,24 @@ void hv_irq_unmask(struct irq_data *data)
> > > pdev = msi_desc_to_pci_dev(msi_desc);
> > > pbus = pdev->bus;
> > > hbus = container_of(pbus->sysdata, struct hv_pcibus_device,
> > > sysdata);
> > > -
> > > -   memset(, 0, sizeof(params));
> > > -   params.partition_id = HV_PARTITION_ID_SELF;
> > > -   params.source = 1; /* MSI(-X) */
> > > -   params.address = msi_desc->msg.address_lo;
> > > -   params.data = msi_desc->msg.data;
> > > -   params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> > > +   params = >retarget_msi_interrupt_params;
> > > +
> > > +   memset(params, 0, sizeof(*params));
> > > +   params->partition_id = HV_PARTITION_ID_SELF;
> > > +   params->source = 1; /* MSI(-X) */
> > > +   params->address = msi_desc->msg.address_lo;
> > > +   params->data = msi_desc->msg.data;
> > > +   params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> > >(hbus->hdev->dev_instance.b[4] << 16) |
> > >(hbus->hdev->dev_instance.b[7] << 8) |
> > >(hbus->hdev->dev_instance.b[6] & 0xf8) |
> > >PCI_FUNC(pdev->devfn);
> > > -   params.vector = cfg->vector;
> > > +   params->vector = cfg->vector;
> > >
> > > for_each_cpu_and(cpu, dest, cpu_online_mask)
> > > -   params.vp

[PATCH] pci-hyperv: use kmalloc to allocate hypercall params buffer

2016-11-08 Thread Long Li
From: Long Li <lon...@microsoft.com>

hv_do_hypercall assumes that we pass a segment from a physically
continuous buffer. Buffer allocated on the stack may not work if
CONFIG_VMAP_STACK=y is set.

Change to use kmalloc to allocate this buffer.

The v2 patch adds locking to access the pre-allocated buffer.

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Haiyang Zhang <haiya...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 763ff87..ca553df 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -378,6 +378,8 @@ struct hv_pcibus_device {
struct msi_domain_info msi_info;
struct msi_controller msi_chip;
struct irq_domain *irq_domain;
+   struct retarget_msi_interrupt retarget_msi_interrupt_params;
+   spinlock_t retarget_msi_interrupt_lock;;
 };
 
 /*
@@ -774,34 +776,40 @@ void hv_irq_unmask(struct irq_data *data)
 {
struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
struct irq_cfg *cfg = irqd_cfg(data);
-   struct retarget_msi_interrupt params;
+   struct retarget_msi_interrupt *params;
struct hv_pcibus_device *hbus;
struct cpumask *dest;
struct pci_bus *pbus;
struct pci_dev *pdev;
int cpu;
+   unsigned long flags;
 
dest = irq_data_get_affinity_mask(data);
pdev = msi_desc_to_pci_dev(msi_desc);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
 
-   memset(, 0, sizeof(params));
-   params.partition_id = HV_PARTITION_ID_SELF;
-   params.source = 1; /* MSI(-X) */
-   params.address = msi_desc->msg.address_lo;
-   params.data = msi_desc->msg.data;
-   params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
+   spin_lock_irqsave(>retarget_msi_interrupt_lock, flags);
+
+   params = >retarget_msi_interrupt_params;
+   memset(params, 0, sizeof(*params));
+   params->partition_id = HV_PARTITION_ID_SELF;
+   params->source = 1; /* MSI(-X) */
+   params->address = msi_desc->msg.address_lo;
+   params->data = msi_desc->msg.data;
+   params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
   (hbus->hdev->dev_instance.b[4] << 16) |
   (hbus->hdev->dev_instance.b[7] << 8) |
   (hbus->hdev->dev_instance.b[6] & 0xf8) |
   PCI_FUNC(pdev->devfn);
-   params.vector = cfg->vector;
+   params->vector = cfg->vector;
 
for_each_cpu_and(cpu, dest, cpu_online_mask)
-   params.vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+   params->vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+
+   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
 
-   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, , NULL);
+   spin_unlock_irqrestore(>retarget_msi_interrupt_lock, flags);
 
pci_msi_unmask_irq(data);
 }
@@ -2186,6 +2194,7 @@ static int hv_pci_probe(struct hv_device *hdev,
INIT_LIST_HEAD(>resources_for_children);
spin_lock_init(>config_lock);
spin_lock_init(>device_list_lock);
+   spin_lock_init(>retarget_msi_interrupt_lock);
sema_init(>enum_sem, 1);
init_completion(>remove_event);
 
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [Resend] [PATCH] pci-hyperv: use kmalloc to allocate hypercall params buffer

2016-11-08 Thread Long Li


> -Original Message-
> From: Greg KH [mailto:gre...@linuxfoundation.org]
> Sent: Monday, November 7, 2016 11:00 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org
> Subject: Re: [Resend] [PATCH] pci-hyperv: use kmalloc to allocate hypercall
> params buffer
> 
> On Tue, Nov 08, 2016 at 12:14:14AM -0800, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > hv_do_hypercall assumes that we pass a segment from a physically
> continuous buffer. Buffer allocated on the stack may not work if
> CONFIG_VMAP_STACK=y is set. Use kmalloc to allocate this buffer.
> 
> Please wrap your changelog at 72 columns.
> 
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > Reported-by: Haiyang Zhang <haiya...@microsoft.com>
> > ---
> >  drivers/pci/host/pci-hyperv.c | 24 +---
> >  1 file changed, 13 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index 763ff87..97e6daf 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -378,6 +378,7 @@ struct hv_pcibus_device {
> > struct msi_domain_info msi_info;
> > struct msi_controller msi_chip;
> > struct irq_domain *irq_domain;
> > +   struct retarget_msi_interrupt retarget_msi_interrupt_params;
> 
> Can you handle potentially unaligned accesses like this?  Is there some lock
> preventing you from using this structure more than once at the same time?
> 
> >  };
> >
> >  /*
> > @@ -774,7 +775,7 @@ void hv_irq_unmask(struct irq_data *data)  {
> > struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
> > struct irq_cfg *cfg = irqd_cfg(data);
> > -   struct retarget_msi_interrupt params;
> > +   struct retarget_msi_interrupt *params;
> > struct hv_pcibus_device *hbus;
> > struct cpumask *dest;
> > struct pci_bus *pbus;
> > @@ -785,23 +786,24 @@ void hv_irq_unmask(struct irq_data *data)
> > pdev = msi_desc_to_pci_dev(msi_desc);
> > pbus = pdev->bus;
> > hbus = container_of(pbus->sysdata, struct hv_pcibus_device,
> > sysdata);
> > -
> > -   memset(, 0, sizeof(params));
> > -   params.partition_id = HV_PARTITION_ID_SELF;
> > -   params.source = 1; /* MSI(-X) */
> > -   params.address = msi_desc->msg.address_lo;
> > -   params.data = msi_desc->msg.data;
> > -   params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> > +   params = >retarget_msi_interrupt_params;
> > +
> > +   memset(params, 0, sizeof(*params));
> > +   params->partition_id = HV_PARTITION_ID_SELF;
> > +   params->source = 1; /* MSI(-X) */
> > +   params->address = msi_desc->msg.address_lo;
> > +   params->data = msi_desc->msg.data;
> > +   params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> >(hbus->hdev->dev_instance.b[4] << 16) |
> >(hbus->hdev->dev_instance.b[7] << 8) |
> >(hbus->hdev->dev_instance.b[6] & 0xf8) |
> >PCI_FUNC(pdev->devfn);
> > -   params.vector = cfg->vector;
> > +   params->vector = cfg->vector;
> >
> > for_each_cpu_and(cpu, dest, cpu_online_mask)
> > -   params.vp_mask |= (1ULL <<
> vmbus_cpu_number_to_vp_number(cpu));
> > +   params->vp_mask |= (1ULL <<
> vmbus_cpu_number_to_vp_number(cpu));
> >
> > -   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, , NULL);
> > +   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
> 
> As you only use this in one spot, why not just allocate it here and then free
> it?  Why add it to the pcibus device structure?

Thanks Greg. I will send a V2.

> 
> thanks,
> 
> greg k-h
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[Resend] [PATCH] pci-hyperv: use kmalloc to allocate hypercall params buffer

2016-11-07 Thread Long Li
From: Long Li <lon...@microsoft.com>

hv_do_hypercall assumes that we pass a segment from a physically continuous 
buffer. Buffer allocated on the stack may not work if CONFIG_VMAP_STACK=y is 
set. Use kmalloc to allocate this buffer.

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Haiyang Zhang <haiya...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 763ff87..97e6daf 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -378,6 +378,7 @@ struct hv_pcibus_device {
struct msi_domain_info msi_info;
struct msi_controller msi_chip;
struct irq_domain *irq_domain;
+   struct retarget_msi_interrupt retarget_msi_interrupt_params;
 };
 
 /*
@@ -774,7 +775,7 @@ void hv_irq_unmask(struct irq_data *data)
 {
struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
struct irq_cfg *cfg = irqd_cfg(data);
-   struct retarget_msi_interrupt params;
+   struct retarget_msi_interrupt *params;
struct hv_pcibus_device *hbus;
struct cpumask *dest;
struct pci_bus *pbus;
@@ -785,23 +786,24 @@ void hv_irq_unmask(struct irq_data *data)
pdev = msi_desc_to_pci_dev(msi_desc);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
-
-   memset(, 0, sizeof(params));
-   params.partition_id = HV_PARTITION_ID_SELF;
-   params.source = 1; /* MSI(-X) */
-   params.address = msi_desc->msg.address_lo;
-   params.data = msi_desc->msg.data;
-   params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
+   params = >retarget_msi_interrupt_params;
+
+   memset(params, 0, sizeof(*params));
+   params->partition_id = HV_PARTITION_ID_SELF;
+   params->source = 1; /* MSI(-X) */
+   params->address = msi_desc->msg.address_lo;
+   params->data = msi_desc->msg.data;
+   params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
   (hbus->hdev->dev_instance.b[4] << 16) |
   (hbus->hdev->dev_instance.b[7] << 8) |
   (hbus->hdev->dev_instance.b[6] & 0xf8) |
   PCI_FUNC(pdev->devfn);
-   params.vector = cfg->vector;
+   params->vector = cfg->vector;
 
for_each_cpu_and(cpu, dest, cpu_online_mask)
-   params.vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+   params->vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
 
-   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, , NULL);
+   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
 
pci_msi_unmask_irq(data);
 }
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] pci-hyperv: move hypercall buffer from stack to heap

2016-11-01 Thread Long Li
From: Long Li <lon...@microsoft.com>

We need to pass a segment from a physically continuous buffer to 
hv_do_hypercall. Buffer allocated on the stack may not work if 
CONFIG_VMAP_STACK=y is set. Moving the params buffer from stack to buffer 
returned by kmalloc.

Signed-off-by: Long Li <lon...@microsoft.com>
Reported-by: Haiyang Zhang <haiya...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 763ff87..97e6daf 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -378,6 +378,7 @@ struct hv_pcibus_device {
struct msi_domain_info msi_info;
struct msi_controller msi_chip;
struct irq_domain *irq_domain;
+   struct retarget_msi_interrupt retarget_msi_interrupt_params;
 };
 
 /*
@@ -774,7 +775,7 @@ void hv_irq_unmask(struct irq_data *data)
 {
struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
struct irq_cfg *cfg = irqd_cfg(data);
-   struct retarget_msi_interrupt params;
+   struct retarget_msi_interrupt *params;
struct hv_pcibus_device *hbus;
struct cpumask *dest;
struct pci_bus *pbus;
@@ -785,23 +786,24 @@ void hv_irq_unmask(struct irq_data *data)
pdev = msi_desc_to_pci_dev(msi_desc);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
-
-   memset(, 0, sizeof(params));
-   params.partition_id = HV_PARTITION_ID_SELF;
-   params.source = 1; /* MSI(-X) */
-   params.address = msi_desc->msg.address_lo;
-   params.data = msi_desc->msg.data;
-   params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
+   params = >retarget_msi_interrupt_params;
+
+   memset(params, 0, sizeof(*params));
+   params->partition_id = HV_PARTITION_ID_SELF;
+   params->source = 1; /* MSI(-X) */
+   params->address = msi_desc->msg.address_lo;
+   params->data = msi_desc->msg.data;
+   params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
   (hbus->hdev->dev_instance.b[4] << 16) |
   (hbus->hdev->dev_instance.b[7] << 8) |
   (hbus->hdev->dev_instance.b[6] & 0xf8) |
   PCI_FUNC(pdev->devfn);
-   params.vector = cfg->vector;
+   params->vector = cfg->vector;
 
for_each_cpu_and(cpu, dest, cpu_online_mask)
-   params.vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+   params->vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
 
-   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, , NULL);
+   hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
 
pci_msi_unmask_irq(data);
 }
-- 
2.7.4

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 2/2 v3] pci-hyperv: lock pci bus on device eject

2016-10-03 Thread Long Li
From: Long Li <lon...@microsoft.com>

A PCI_EJECT message can arrive at the same time we are calling 
pci_scan_child_bus in the workqueue for the previous PCI_BUS_RELATIONS message 
or in create_root_hv_pci_bus(), in this case we could potentailly modify the 
bus from multiple places. Properly lock the bus access.

Thanks Dexuan Cui <de...@microsoft.com> for pointing out the race condition in 
create_root_hv_pci_bus().

Signed-off-by: Long Li <lon...@microsoft.com>
Tested-by: Cathy Avery <cav...@redhat.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
---
 drivers/pci/host/pci-hyperv.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 4a37598..33c75c9 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1198,9 +1198,11 @@ static int create_root_hv_pci_bus(struct 
hv_pcibus_device *hbus)
hbus->pci_bus->msi = >msi_chip;
hbus->pci_bus->msi->dev = >hdev->device;
 
+   pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_bus_assign_resources(hbus->pci_bus);
pci_bus_add_devices(hbus->pci_bus);
+   pci_unlock_rescan_remove();
hbus->state = hv_pcibus_installed;
return 0;
 }
@@ -1590,8 +1592,10 @@ static void hv_eject_device_work(struct work_struct 
*work)
pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0,
   wslot);
if (pdev) {
+   pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
pci_dev_put(pdev);
+   pci_unlock_rescan_remove();
}
 
memset(, 0, sizeof(ctxt));
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 1/2 v3] pci-hyperv: properly handle pci bus remove

2016-10-03 Thread Long Li
From: Long Li <lon...@microsoft.com>

hv_pci_devices_present is called in hv_pci_remove when we remove a PCI device 
from host (e.g. by disabling SRIOV on a device). In hv_pci_remove, the bus is 
already removed before the call, so we don't need to rescan the bus in the 
workqueue scheduled from hv_pci_devices_present. By introducing status 
hv_pcibus_removed, we can avoid this situation.

Signed-off-by: Long Li <lon...@microsoft.com>
Tested-by: Cathy Avery <cav...@redhat.com>
Reported-by: Xiaofeng Wang <xiaof...@redhat.com>
---
 drivers/pci/host/pci-hyperv.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index a8deeca..4a37598 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -348,6 +348,7 @@ enum hv_pcibus_state {
hv_pcibus_init = 0,
hv_pcibus_probed,
hv_pcibus_installed,
+   hv_pcibus_removed,
hv_pcibus_maximum
 };
 
@@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct work_struct 
*work)
put_pcichild(hpdev, hv_pcidev_ref_initial);
}
 
-   /* Tell the core to rescan bus because there may have been changes. */
-   if (hbus->state == hv_pcibus_installed) {
+   switch (hbus->state) {
+   case hv_pcibus_installed:
+   /*
+* Tell the core to rescan bus
+* because there may have been changes.
+*/
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
-   } else {
+   break;
+
+   case hv_pcibus_init:
+   case hv_pcibus_probed:
survey_child_resources(hbus);
+   break;
+
+   default:
+   break;
}
 
up(>enum_sem);
@@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+   hbus->state = hv_pcibus_init;
 
/*
 * The PCI bus "domain" is what is called "segment" in ACPI and
@@ -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
pci_stop_root_bus(hbus->pci_bus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
+   hbus->state = hv_pcibus_removed;
}
 
ret = hv_send_resources_released(hdev);
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] hv: do not lose pending heartbeat vmbus packets

2016-09-30 Thread Long Li


> -Original Message-
> From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
> Sent: Thursday, September 29, 2016 2:22 AM
> To: KY Srinivasan <k...@microsoft.com>; Long Li <lon...@microsoft.com>
> Cc: Haiyang Zhang <haiya...@microsoft.com>;
> de...@linuxdriverproject.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] hv: do not lose pending heartbeat vmbus packets
> 
> Long Li <lon...@exchange.microsoft.com> writes:
> 
> > From: Long Li <lon...@microsoft.com>
> >
> > The host keeps sending heartbeat packets independent of guest
> responding to them. In some situations, there might be multiple heartbeat
> packets pending in the ring buffer. Don't lose them, read them all.
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> 
> Long, K. Y.,
> 
> it seems this patch didn't make it to char-misc tree and it looks like an
> important fix. A couple of nitpicks below,
> 
> > ---
> >  drivers/hv/hv_util.c | 10 +++---
> >  1 file changed, 7 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index
> > d5acaa2..9dc6372 100644
> > --- a/drivers/hv/hv_util.c
> > +++ b/drivers/hv/hv_util.c
> > @@ -283,10 +283,14 @@ static void heartbeat_onchannelcallback(void
> *context)
> > u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
> > struct icmsg_negotiate *negop = NULL;
> >
> > -   vmbus_recvpacket(channel, hbeat_txf_buf,
> > -PAGE_SIZE, , );
> > +   while (1) {
> > +
> > +   vmbus_recvpacket(channel, hbeat_txf_buf,
> > +PAGE_SIZE, , );
> 
> We should check vmbus_recvpacket() return value as well. E.g.
> hv_ringbuffer_read() may return -EAGAIN in case we didn't receive the
> whole packet (and we do this check in other drivers, see
> storvsc_on_channel_callback() for example).

I agree with you,  we should check for -EAGAIN. This should also be done in 
storvsc_on_channel_callback.

I think the chance of  hv_ringbuffer_read() returning -EAGAIN is almost zero. 
Because read_index and write_index are updated after the whole packet is 
written to the ring buffer, and protected by memory barriers. So getting a 
partial read is impossible, unless the host is doing something wrong.

Checking for recvlen is safe, because it's always set to 0 at the beginning of 
hv_ringbuffer_read().

Anyway, we should check for -EAGAIN for all hyperv drivers on read. I think 
this is a separate issue on how we deal with a buggy host. Will send another 
set of patches .

> 
> > +
> > +   if (!recvlen)
> 
> so this should be 'if (ret || !recvlen)'
> 
> > +   break;
> >
> > -   if (recvlen > 0) {
> > icmsghdrp = (struct icmsg_hdr *)_txf_buf[
> > sizeof(struct vmbuspipe_hdr)];
> 
> --
>   Vitaly
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/2 v2] pci-hyperv: properly handle pci bus remove

2016-09-27 Thread Long Li


> -Original Message-
> From: Bjorn Helgaas [mailto:helg...@kernel.org]
> Sent: Tuesday, September 27, 2016 12:30 PM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Bjorn Helgaas <bhelg...@google.com>;
> de...@linuxdriverproject.org; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org; Long Li <lon...@microsoft.com>
> Subject: Re: [PATCH 1/2 v2] pci-hyperv: properly handle pci bus remove
> 
> On Wed, Sep 14, 2016 at 07:10:01PM -0700, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > hv_pci_devices_present is called in hv_pci_remove when we remove a PCI
> device from host (e.g. by disabling SRIOV on a device). In hv_pci_remove,
> the bus is already removed before the call, so we don't need to rescan the
> bus in the workqueue scheduled from hv_pci_devices_present. By
> introducing status hv_pcibus_removed, we can avoid this situation.
> >
> > The patch fixes the following kernel panic.
> >
> > [  383.853124] Workqueue: events pci_devices_present_work [pci_hyperv]
> > [  383.853124] task: 88007f5f8000 ti: 88007f60 task.ti:
> > 88007f60
> > [  383.853124] RIP: 0010:[]  []
> > pci_is_pcie+0x6/0x20
> > [  383.853124] RSP: 0018:88007f603d38  EFLAGS: 00010206 [
> > 383.853124] RAX: 88007f5f8000 RBX: 642f3d4854415056 RCX:
> > 88007f603fd8
> > [  383.853124] RDX:  RSI:  RDI:
> > 642f3d4854415056
> > [  383.853124] RBP: 88007f603d68 R08: 0246 R09:
> > a045eb9e
> > [  383.853124] R10: 88007b419a80 R11: ea0001c0ef40 R12:
> > 880003ee1c00
> > [  383.853124] R13: 63702f30303a3137 R14:  R15:
> > 0246
> > [  383.853124] FS:  () GS:88007b40()
> > knlGS:
> > [  383.853124] CS:  0010 DS:  ES:  CR0: 80050033 [
> > 383.853124] CR2: 7f68b3f52350 CR3: 03546000 CR4:
> > 000406f0
> > [  383.853124] DR0:  DR1:  DR2:
> > 
> > [  383.853124] DR3:  DR6: 0ff0 DR7:
> > 0400
> > [  383.853124] Stack:
> > [  383.853124]  88007f603d68 8134db17 0008
> > 880003ee1c00
> > [  383.853124]  63702f30303a3137 880003d8edb8 88007f603da0
> > 8134ee2d [  383.853124]  880003d8ed00 88007f603dd8
> > 880075fec320
> > 880003d8edb8
> > [  383.853124] Call Trace:
> > [  383.853124]  [] ? pci_scan_slot+0x27/0x140 [
> > 383.853124]  [] pci_scan_child_bus+0x3d/0x150 [
> > 383.853124]  []
> > pci_devices_present_work+0x3ea/0x400 [pci_hyperv] [  383.853124]
> > [] process_one_work+0x17b/0x470 [  383.853124]
> > [] worker_thread+0x126/0x410 [  383.853124]
> > [] ? rescuer_thread+0x460/0x460 [  383.853124]
> > [] kthread+0xcf/0xe0 [  383.853124]
> > [] ?
> > kthread_create_on_node+0x140/0x140
> > [  383.853124]  [] ret_from_fork+0x58/0x90 [
> > 383.853124]  [] ?
> > kthread_create_on_node+0x140/0x140
> > [  383.853124] Code: 89 e5 5d 25 f0 00 00 00 c1 f8 04 c3 66 0f 1f 84
> > 00
> > 00 00 00 00 66 66 66 66 90 55 0f b6 47 4a 48 89 e5 5d c3 90 66 66 66
> > 66
> > 90 55 <80> 7f 4a 00 48 89 e5 5d 0f 95 c0 c3 0f 1f 40 00 66 2e 0f 1f 84
> > [  383.853124] RIP  [] pci_is_pcie+0x6/0x20 [
> > 383.853124]  RSP 
> 
> Personally, I would remove the timestamps and addresses from this trace
> because I don't think they contribute to diagnosing the problem.

Thanks Bjorn. I will remove those kernel traces and send a v3 patch.

> 
> > Signed-off-by: Long Li <lon...@microsoft.com>
> 
> I'm ready to apply these but am waiting for an ack from the maintainers listed
> in MAINTAINERS (feel free to update that if it's out of date).
> 
> > ---
> >  drivers/pci/host/pci-hyperv.c | 20 +---
> >  1 file changed, 17 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index a8deeca..4a37598 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -348,6 +348,7 @@ enum hv_pcibus_state {
> > hv_pcibus_init = 0,
> > hv_pcibus_probed,
> > hv_pcibus_installed,
> > +   hv_pcibus_removed,
> > hv_pcibus_maximum
> >  };
> >
> > @@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct
> work_struct *work)
> > put

RE: [PATCH 1/2 v2] pci-hyperv: properly handle pci bus remove

2016-09-27 Thread Long Li
Thanks for pointing that out.

If you don't mind, I will also add "Tested-by: Cathy Avery <cav...@redhat.com>".

> -Original Message-
> From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> Behalf Of Cathy Avery
> Sent: Friday, September 23, 2016 4:59 AM
> To: driverdev-devel@linuxdriverproject.org
> Subject: Re: [PATCH 1/2 v2] pci-hyperv: properly handle pci bus remove
> 
> Hi,
> 
> You seem to be missing the Reported-by tag.
> 
> That's xiaof...@redhat.com.
> 
> Cathy
> 
> On 09/14/2016 10:10 PM, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > hv_pci_devices_present is called in hv_pci_remove when we remove a PCI
> device from host (e.g. by disabling SRIOV on a device). In hv_pci_remove,
> the bus is already removed before the call, so we don't need to rescan the
> bus in the workqueue scheduled from hv_pci_devices_present. By
> introducing status hv_pcibus_removed, we can avoid this situation.
> >
> > The patch fixes the following kernel panic.
> >
> > [  383.853124] Workqueue: events pci_devices_present_work [pci_hyperv]
> > [  383.853124] task: 88007f5f8000 ti: 88007f60 task.ti:
> > 88007f60
> > [  383.853124] RIP: 0010:[]  []
> > pci_is_pcie+0x6/0x20
> > [  383.853124] RSP: 0018:88007f603d38  EFLAGS: 00010206 [
> > 383.853124] RAX: 88007f5f8000 RBX: 642f3d4854415056 RCX:
> > 88007f603fd8
> > [  383.853124] RDX:  RSI:  RDI:
> > 642f3d4854415056
> > [  383.853124] RBP: 88007f603d68 R08: 0246 R09:
> > a045eb9e
> > [  383.853124] R10: 88007b419a80 R11: ea0001c0ef40 R12:
> > 880003ee1c00
> > [  383.853124] R13: 63702f30303a3137 R14:  R15:
> > 0246
> > [  383.853124] FS:  () GS:88007b40()
> > knlGS:
> > [  383.853124] CS:  0010 DS:  ES:  CR0: 80050033 [
> > 383.853124] CR2: 7f68b3f52350 CR3: 03546000 CR4:
> > 000406f0
> > [  383.853124] DR0:  DR1:  DR2:
> > 
> > [  383.853124] DR3:  DR6: 0ff0 DR7:
> > 0400
> > [  383.853124] Stack:
> > [  383.853124]  88007f603d68 8134db17 0008
> > 880003ee1c00
> > [  383.853124]  63702f30303a3137 880003d8edb8 88007f603da0
> > 8134ee2d [  383.853124]  880003d8ed00 88007f603dd8
> > 880075fec320
> > 880003d8edb8
> > [  383.853124] Call Trace:
> > [  383.853124]  [] ? pci_scan_slot+0x27/0x140 [
> > 383.853124]  [] pci_scan_child_bus+0x3d/0x150 [
> > 383.853124]  []
> > pci_devices_present_work+0x3ea/0x400 [pci_hyperv] [  383.853124]
> > [] process_one_work+0x17b/0x470 [  383.853124]
> > [] worker_thread+0x126/0x410 [  383.853124]
> > [] ? rescuer_thread+0x460/0x460 [  383.853124]
> > [] kthread+0xcf/0xe0 [  383.853124]
> > [] ?
> > kthread_create_on_node+0x140/0x140
> > [  383.853124]  [] ret_from_fork+0x58/0x90 [
> > 383.853124]  [] ?
> > kthread_create_on_node+0x140/0x140
> > [  383.853124] Code: 89 e5 5d 25 f0 00 00 00 c1 f8 04 c3 66 0f 1f 84
> > 00
> > 00 00 00 00 66 66 66 66 90 55 0f b6 47 4a 48 89 e5 5d c3 90 66 66 66
> > 66
> > 90 55 <80> 7f 4a 00 48 89 e5 5d 0f 95 c0 c3 0f 1f 40 00 66 2e 0f 1f 84
> > [  383.853124] RIP  [] pci_is_pcie+0x6/0x20 [
> > 383.853124]  RSP 
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >   drivers/pci/host/pci-hyperv.c | 20 +---
> >   1 file changed, 17 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/host/pci-hyperv.c
> > b/drivers/pci/host/pci-hyperv.c index a8deeca..4a37598 100644
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -348,6 +348,7 @@ enum hv_pcibus_state {
> > hv_pcibus_init = 0,
> > hv_pcibus_probed,
> > hv_pcibus_installed,
> > +   hv_pcibus_removed,
> > hv_pcibus_maximum
> >   };
> >
> > @@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct
> work_struct *work)
> > put_pcichild(hpdev, hv_pcidev_ref_initial);
> > }
> >
> > -   /* Tell the core to rescan bus because there may have been changes.
> */
> > -   if (hbus->state == hv_pcibus_installed) {
> > +   switch (hbus->state) {
> > +   case hv_pcibus_installed:
> > +   /*
> > +* Tell the core

[PATCH 2/2 v2] pci-hyperv: lock pci bus on device eject

2016-09-14 Thread Long Li
From: Long Li <lon...@microsoft.com>

A PCI_EJECT message can arrive at the same time we are calling 
pci_scan_child_bus in the workqueue for the previous PCI_BUS_RELATIONS message 
or in create_root_hv_pci_bus(), in this case we could potentailly modify the 
bus from multiple places. Properly lock the bus access.

Thanks Dexuan Cui <de...@microsoft.com> for pointing out the race condition in 
create_root_hv_pci_bus().

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 4a37598..33c75c9 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1198,9 +1198,11 @@ static int create_root_hv_pci_bus(struct 
hv_pcibus_device *hbus)
hbus->pci_bus->msi = >msi_chip;
hbus->pci_bus->msi->dev = >hdev->device;
 
+   pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_bus_assign_resources(hbus->pci_bus);
pci_bus_add_devices(hbus->pci_bus);
+   pci_unlock_rescan_remove();
hbus->state = hv_pcibus_installed;
return 0;
 }
@@ -1590,8 +1592,10 @@ static void hv_eject_device_work(struct work_struct 
*work)
pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0,
   wslot);
if (pdev) {
+   pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
pci_dev_put(pdev);
+   pci_unlock_rescan_remove();
}
 
memset(, 0, sizeof(ctxt));
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 1/2 v2] pci-hyperv: properly handle pci bus remove

2016-09-14 Thread Long Li
From: Long Li <lon...@microsoft.com>

hv_pci_devices_present is called in hv_pci_remove when we remove a PCI device 
from host (e.g. by disabling SRIOV on a device). In hv_pci_remove, the bus is 
already removed before the call, so we don't need to rescan the bus in the 
workqueue scheduled from hv_pci_devices_present. By introducing status 
hv_pcibus_removed, we can avoid this situation.

The patch fixes the following kernel panic.

[  383.853124] Workqueue: events pci_devices_present_work [pci_hyperv]
[  383.853124] task: 88007f5f8000 ti: 88007f60 task.ti:
88007f60
[  383.853124] RIP: 0010:[]  []
pci_is_pcie+0x6/0x20
[  383.853124] RSP: 0018:88007f603d38  EFLAGS: 00010206
[  383.853124] RAX: 88007f5f8000 RBX: 642f3d4854415056 RCX:
88007f603fd8
[  383.853124] RDX:  RSI:  RDI:
642f3d4854415056
[  383.853124] RBP: 88007f603d68 R08: 0246 R09:
a045eb9e
[  383.853124] R10: 88007b419a80 R11: ea0001c0ef40 R12:
880003ee1c00
[  383.853124] R13: 63702f30303a3137 R14:  R15:
0246
[  383.853124] FS:  () GS:88007b40()
knlGS:
[  383.853124] CS:  0010 DS:  ES:  CR0: 80050033
[  383.853124] CR2: 7f68b3f52350 CR3: 03546000 CR4:
000406f0
[  383.853124] DR0:  DR1:  DR2:

[  383.853124] DR3:  DR6: 0ff0 DR7:
0400
[  383.853124] Stack:
[  383.853124]  88007f603d68 8134db17 0008
880003ee1c00
[  383.853124]  63702f30303a3137 880003d8edb8 88007f603da0
8134ee2d
[  383.853124]  880003d8ed00 88007f603dd8 880075fec320
880003d8edb8
[  383.853124] Call Trace:
[  383.853124]  [] ? pci_scan_slot+0x27/0x140
[  383.853124]  [] pci_scan_child_bus+0x3d/0x150
[  383.853124]  []
pci_devices_present_work+0x3ea/0x400 [pci_hyperv]
[  383.853124]  [] process_one_work+0x17b/0x470
[  383.853124]  [] worker_thread+0x126/0x410
[  383.853124]  [] ? rescuer_thread+0x460/0x460
[  383.853124]  [] kthread+0xcf/0xe0
[  383.853124]  [] ?
kthread_create_on_node+0x140/0x140
[  383.853124]  [] ret_from_fork+0x58/0x90
[  383.853124]  [] ?
kthread_create_on_node+0x140/0x140
[  383.853124] Code: 89 e5 5d 25 f0 00 00 00 c1 f8 04 c3 66 0f 1f 84 00
00 00 00 00 66 66 66 66 90 55 0f b6 47 4a 48 89 e5 5d c3 90 66 66 66 66
90 55 <80> 7f 4a 00 48 89 e5 5d 0f 95 c0 c3 0f 1f 40 00 66 2e 0f 1f 84
[  383.853124] RIP  [] pci_is_pcie+0x6/0x20
[  383.853124]  RSP 

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index a8deeca..4a37598 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -348,6 +348,7 @@ enum hv_pcibus_state {
hv_pcibus_init = 0,
hv_pcibus_probed,
hv_pcibus_installed,
+   hv_pcibus_removed,
hv_pcibus_maximum
 };
 
@@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct work_struct 
*work)
put_pcichild(hpdev, hv_pcidev_ref_initial);
}
 
-   /* Tell the core to rescan bus because there may have been changes. */
-   if (hbus->state == hv_pcibus_installed) {
+   switch (hbus->state) {
+   case hv_pcibus_installed:
+   /*
+* Tell the core to rescan bus
+* because there may have been changes.
+*/
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
-   } else {
+   break;
+
+   case hv_pcibus_init:
+   case hv_pcibus_probed:
survey_child_resources(hbus);
+   break;
+
+   default:
+   break;
}
 
up(>enum_sem);
@@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+   hbus->state = hv_pcibus_init;
 
/*
 * The PCI bus "domain" is what is called "segment" in ACPI and
@@ -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
pci_stop_root_bus(hbus->pci_bus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
+   hbus->state = hv_pcibus_removed;
}
 
ret = hv_send_resources_released(hdev);
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 2/2] pci-hyperv: properly handle device eject

2016-09-14 Thread Long Li
> -Original Message-
> From: Dexuan Cui
> Sent: Tuesday, September 13, 2016 10:45 PM
> To: Long Li <lon...@microsoft.com>; KY Srinivasan <k...@microsoft.com>;
> Haiyang Zhang <haiya...@microsoft.com>; Bjorn Helgaas
> <bhelg...@google.com>
> Cc: de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org
> Subject: RE: [PATCH 2/2] pci-hyperv: properly handle device eject
> 
> > From: Long Li
> > Sent: Wednesday, September 14, 2016 1:41
> >
> > I think this code is safe here. If we reach the code
> > pci_stop_and_remove_bus_device_locked, create_root_hv_pci_bus() is
> > already called.
> 
> When hv_pci_probe() -> create_root_hv_pci_bus() -> pci_scan_child_bus()
> is running on one cpu, I think nothing in the current code can prevent
> hv_eject_device_work() -> pci_stop_and_remove_bus_device_locked()
> from running on another cpu?
> 
> The race window is pretty small however.

This is a valid race condition. I'll work on a V2 patch. Thanks!

> 
> Thanks,
> -- Dexuan
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 2/2] pci-hyperv: properly handle device eject

2016-09-13 Thread Long Li


> -Original Message-
> From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> Behalf Of Long Li
> Sent: Tuesday, September 13, 2016 10:33 AM
> To: Dexuan Cui <de...@microsoft.com>; KY Srinivasan
> <k...@microsoft.com>; Haiyang Zhang <haiya...@microsoft.com>; Bjorn
> Helgaas <bhelg...@google.com>
> Cc: de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org
> Subject: RE: [PATCH 2/2] pci-hyperv: properly handle device eject
> 
> This sender failed our fraud detection checks and may not be who they
> appear to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing
> 
> > -Original Message-
> > From: Dexuan Cui
> > Sent: Tuesday, September 13, 2016 2:51 AM
> > To: Long Li <lon...@microsoft.com>; KY Srinivasan <k...@microsoft.com>;
> > Haiyang Zhang <haiya...@microsoft.com>; Bjorn Helgaas
> > <bhelg...@google.com>
> > Cc: de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> > p...@vger.kernel.org
> > Subject: RE: [PATCH 2/2] pci-hyperv: properly handle device eject
> >
> > > From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org]
> > > On Behalf Of Long Li
> > > Sent: Tuesday, September 13, 2016 7:54 ...
> > > A PCI_EJECT message can arrive at the same time we are calling
> > > pci_scan_child_bus in the workqueue for the previous
> > PCI_BUS_RELATIONS
> > > message, in this case we could potentailly modify the bus from two
> places.
> > > Properly lock the bus access.
> > >
> > > --- a/drivers/pci/host/pci-hyperv.c
> > > +++ b/drivers/pci/host/pci-hyperv.c
> > > @@ -1587,7 +1587,7 @@ static void hv_eject_device_work(struct
> > > work_struct
> > > *work)
> > > pdev =
> > > pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain,
> > 0,
> > >wslot);
> > > if (pdev) {
> > > -   pci_stop_and_remove_bus_device(pdev);
> > > +   pci_stop_and_remove_bus_device_locked(pdev);
> > > pci_dev_put(pdev);
> > > }
> >
> > The _locked version tries to get the mutex pci_rescan_remove_lock.
> >
> > But it looks pci_scan_child_bus() doesn't try to get the mutex(?), so
> > how can this patch make sure the 2 code paths are not running
> simultaneously?
> 
> Thanks for the review.
> 
> The lock is to protect the following call to pci_scan_child_bus() in
> pci_devices_present_work():
> 
> /*
>  * Tell the core to rescan bus
>  * because there may have been changes.
>  */
> pci_lock_rescan_remove();
> pci_scan_child_bus(hbus->pci_bus);
> pci_unlock_rescan_remove();
> 
> This race condition has shown up in the tests.
> 
> You raised a valid concern in create_root_hv_pci_bus(). There might be
> another race condition there. I'll look into this.

I think this code is safe here. If we reach the code 
pci_stop_and_remove_bus_device_locked, create_root_hv_pci_bus() is already 
called.

> 
> >
> > Thanks,
> > -- Dexuan
> ___
> devel mailing list
> de...@linuxdriverproject.org
> https://na01.safelinks.protection.outlook.com/?url=http%3a%2f%2fdriverde
> v.linuxdriverproject.org%2fmailman%2flistinfo%2fdriverdev-
> devel=02%7c01%7clongli%40microsoft.com%7c3d12ee6d87c140eb5114
> 08d3dbfc1713%7c72f988bf86f141af91ab2d7cd011db47%7c1%7c0%7c6360938
> 48185348266=a2GYqIBsQAFxszkKg3fl1nqqPgvZHh%2bAY2255RgrvUU
> %3d
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 2/2] pci-hyperv: properly handle device eject

2016-09-13 Thread Long Li


> -Original Message-
> From: Dexuan Cui
> Sent: Tuesday, September 13, 2016 2:51 AM
> To: Long Li <lon...@microsoft.com>; KY Srinivasan <k...@microsoft.com>;
> Haiyang Zhang <haiya...@microsoft.com>; Bjorn Helgaas
> <bhelg...@google.com>
> Cc: de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org
> Subject: RE: [PATCH 2/2] pci-hyperv: properly handle device eject
> 
> > From: devel [mailto:driverdev-devel-boun...@linuxdriverproject.org] On
> > Behalf Of Long Li
> > Sent: Tuesday, September 13, 2016 7:54 ...
> > A PCI_EJECT message can arrive at the same time we are calling
> > pci_scan_child_bus in the workqueue for the previous
> PCI_BUS_RELATIONS
> > message, in this case we could potentailly modify the bus from two places.
> > Properly lock the bus access.
> >
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -1587,7 +1587,7 @@ static void hv_eject_device_work(struct
> > work_struct
> > *work)
> > pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain,
> 0,
> >wslot);
> > if (pdev) {
> > -   pci_stop_and_remove_bus_device(pdev);
> > +   pci_stop_and_remove_bus_device_locked(pdev);
> > pci_dev_put(pdev);
> > }
> 
> The _locked version tries to get the mutex pci_rescan_remove_lock.
> 
> But it looks pci_scan_child_bus() doesn't try to get the mutex(?), so how can
> this patch make sure the 2 code paths are not running simultaneously?

Thanks for the review.

The lock is to protect the following call to pci_scan_child_bus() in 
pci_devices_present_work():

/*
 * Tell the core to rescan bus
 * because there may have been changes.
 */
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();

This race condition has shown up in the tests.

You raised a valid concern in create_root_hv_pci_bus(). There might be another 
race condition there. I'll look into this.

> 
> Thanks,
> -- Dexuan
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] hv: do not lose pending heartbeat vmbus packets

2016-09-12 Thread Long Li
From: Long Li <lon...@microsoft.com>

The host keeps sending heartbeat packets independent of guest responding to 
them. In some situations, there might be multiple heartbeat packets pending in 
the ring buffer. Don't lose them, read them all.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/hv/hv_util.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index d5acaa2..9dc6372 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -283,10 +283,14 @@ static void heartbeat_onchannelcallback(void *context)
u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
struct icmsg_negotiate *negop = NULL;
 
-   vmbus_recvpacket(channel, hbeat_txf_buf,
-PAGE_SIZE, , );
+   while (1) {
+
+   vmbus_recvpacket(channel, hbeat_txf_buf,
+PAGE_SIZE, , );
+
+   if (!recvlen)
+   break;
 
-   if (recvlen > 0) {
icmsghdrp = (struct icmsg_hdr *)_txf_buf[
sizeof(struct vmbuspipe_hdr)];
 
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 2/2] pci-hyperv: properly handle device eject

2016-09-12 Thread Long Li
From: Long Li <lon...@microsoft.com>

A PCI_EJECT message can arrive at the same time we are calling 
pci_scan_child_bus in the workqueue for the previous PCI_BUS_RELATIONS message, 
in this case we could potentailly modify the bus from two places. Properly lock 
the bus access.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 3c2b330..ca77009 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1587,7 +1587,7 @@ static void hv_eject_device_work(struct work_struct *work)
pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0,
   wslot);
if (pdev) {
-   pci_stop_and_remove_bus_device(pdev);
+   pci_stop_and_remove_bus_device_locked(pdev);
pci_dev_put(pdev);
}
 
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 1/2] pci-hyperv: properly handle pci bus remove

2016-09-12 Thread Long Li
From: Long Li <lon...@microsoft.com>

hv_pci_devices_present is called in hv_pci_remove when we remove a PCI device 
from host (e.g. by disabling SRIOV on a device). In hv_pci_remove, the bus is 
already removed before the call, so we don't need to rescan the bus in the 
workqueue scheduled from hv_pci_devices_present. By introducing status 
hv_pcibus_removed, we can avoid this situation.

The patch fixes the following kernel panic.

[  383.853124] Workqueue: events pci_devices_present_work [pci_hyperv]
[  383.853124] task: 88007f5f8000 ti: 88007f60 task.ti:
88007f60
[  383.853124] RIP: 0010:[]  []
pci_is_pcie+0x6/0x20
[  383.853124] RSP: 0018:88007f603d38  EFLAGS: 00010206
[  383.853124] RAX: 88007f5f8000 RBX: 642f3d4854415056 RCX:
88007f603fd8
[  383.853124] RDX:  RSI:  RDI:
642f3d4854415056
[  383.853124] RBP: 88007f603d68 R08: 0246 R09:
a045eb9e
[  383.853124] R10: 88007b419a80 R11: ea0001c0ef40 R12:
880003ee1c00
[  383.853124] R13: 63702f30303a3137 R14:  R15:
0246
[  383.853124] FS:  () GS:88007b40()
knlGS:
[  383.853124] CS:  0010 DS:  ES:  CR0: 80050033
[  383.853124] CR2: 7f68b3f52350 CR3: 03546000 CR4:
000406f0
[  383.853124] DR0:  DR1:  DR2:

[  383.853124] DR3:  DR6: 0ff0 DR7:
0400
[  383.853124] Stack:
[  383.853124]  88007f603d68 8134db17 0008
880003ee1c00
[  383.853124]  63702f30303a3137 880003d8edb8 88007f603da0
8134ee2d
[  383.853124]  880003d8ed00 88007f603dd8 880075fec320
880003d8edb8
[  383.853124] Call Trace:
[  383.853124]  [] ? pci_scan_slot+0x27/0x140
[  383.853124]  [] pci_scan_child_bus+0x3d/0x150
[  383.853124]  []
pci_devices_present_work+0x3ea/0x400 [pci_hyperv]
[  383.853124]  [] process_one_work+0x17b/0x470
[  383.853124]  [] worker_thread+0x126/0x410
[  383.853124]  [] ? rescuer_thread+0x460/0x460
[  383.853124]  [] kthread+0xcf/0xe0
[  383.853124]  [] ?
kthread_create_on_node+0x140/0x140
[  383.853124]  [] ret_from_fork+0x58/0x90
[  383.853124]  [] ?
kthread_create_on_node+0x140/0x140
[  383.853124] Code: 89 e5 5d 25 f0 00 00 00 c1 f8 04 c3 66 0f 1f 84 00
00 00 00 00 66 66 66 66 90 55 0f b6 47 4a 48 89 e5 5d c3 90 66 66 66 66
90 55 <80> 7f 4a 00 48 89 e5 5d 0f 95 c0 c3 0f 1f 40 00 66 2e 0f 1f 84
[  383.853124] RIP  [] pci_is_pcie+0x6/0x20
[  383.853124]  RSP 

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index daa5fc3..26f049b 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -348,6 +348,7 @@ enum hv_pcibus_state {
hv_pcibus_init = 0,
hv_pcibus_probed,
hv_pcibus_installed,
+   hv_pcibus_removed,
hv_pcibus_maximum
 };
 
@@ -1481,13 +1482,24 @@ static void pci_devices_present_work(struct work_struct 
*work)
put_pcichild(hpdev, hv_pcidev_ref_initial);
}
 
-   /* Tell the core to rescan bus because there may have been changes. */
-   if (hbus->state == hv_pcibus_installed) {
+   switch (hbus->state) {
+   case hv_pcibus_installed:
+   /*
+* Tell the core to rescan bus
+* because there may have been changes.
+*/
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
-   } else {
+   break;
+
+   case hv_pcibus_init:
+   case hv_pcibus_probed:
survey_child_resources(hbus);
+   break;
+
+   default:
+   break;
}
 
up(>enum_sem);
@@ -2163,6 +2175,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+   hbus->state = hv_pcibus_init;
 
/*
 * The PCI bus "domain" is what is called "segment" in ACPI and
@@ -2305,6 +2318,7 @@ static int hv_pci_remove(struct hv_device *hdev)
pci_stop_root_bus(hbus->pci_bus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
+   hbus->state = hv_pcibus_removed;
}
 
ret = hv_send_resources_released(hdev);
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 1/3] storvsc: use tagged SRB requests if supported by the device

2016-09-07 Thread Long Li
> -Original Message-
> From: Johannes Thumshirn [mailto:jthumsh...@suse.de]
> Sent: Wednesday, September 7, 2016 12:47 AM
> To: Long Li <lon...@exchange.microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; James E.J. Bottomley
> <j...@linux.vnet.ibm.com>; Martin K. Petersen
> <martin.peter...@oracle.com>; de...@linuxdriverproject.org; linux-
> s...@vger.kernel.org; linux-ker...@vger.kernel.org; Long Li
> <lon...@microsoft.com>
> Subject: Re: [PATCH 1/3] storvsc: use tagged SRB requests if supported by
> the device
> 
> On Tue, Sep 06, 2016 at 02:25:41PM -0700, Long Li wrote:
> > From: Long Li <lon...@microsoft.com>
> >
> > Properly set SRB flags when hosting device supports tagged queuing. This
> patch improves the performance on Fiber Channel disks.
> 
> ENOSIGNEDOFF and please use checkpatch.pl on the patch.

Thanks for pointing that out. I'll re-send the patches.
> 
> >
> > ---
> >  drivers/scsi/storvsc_drv.c | 8 
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index 8ccfc9e..a8f3e4c 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -136,6 +136,8 @@ struct hv_fc_wwn_packet {
> >  #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F00
> >  #define SRB_FLAGS_CLASS_DRIVER_RESERVED0xF000
> >
> > +#define SP_UNTAGGED((unsigned char) ~0)
> > +#define SRB_SIMPLE_TAG_REQUEST 0x20
> >
> >  /*
> >   * Platform neutral description of a scsi request - @@ -1451,6
> > +1453,12 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> struct scsi_cmnd *scmnd)
> > vm_srb->win8_extension.srb_flags |=
> > SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
> >
> > +   if(scmnd->device->tagged_supported) {
> > +   vm_srb->win8_extension.srb_flags |=
> (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE);
> > +   vm_srb->win8_extension.queue_tag = SP_UNTAGGED;
> > +   vm_srb->win8_extension.queue_action =
> SRB_SIMPLE_TAG_REQUEST;
> > +   }
> > +
> > /* Build the SRB */
> > switch (scmnd->sc_data_direction) {
> > case DMA_TO_DEVICE:
> > --
> > 1.8.5.6
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-scsi"
> > in the body of a message to majord...@vger.kernel.org More
> majordomo
> > info at
> > https://na01.safelinks.protection.outlook.com/?url=http%3a%2f%2fvger.k
> > ernel.org%2fmajordomo-
> info.html=02%7c01%7clongli%40microsoft.com%
> >
> 7cdedd4c7ad4cf4955224d08d3d6f31f3d%7c72f988bf86f141af91ab2d7cd011db
> 47%
> >
> 7c1%7c0%7c636088312112339554=QvrOLvFjisQ4Nfz%2bkz1uyt7G7wh
> R7Uz7D
> > DlYMuc5VUM%3d
> 
> --
> Johannes Thumshirn  Storage
> jthumsh...@suse.de+49 911 74053 689
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: Felix Imendörffer, Jane Smithard, Graham Norton HRB 21284 (AG
> Nürnberg) Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76
> 0850
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 0/3] storvsc: fixes issues on Fiber Channel

2016-09-06 Thread Long Li
From: Long Li <lon...@microsoft.com>

This patch set fixes connectivity issues and improves performance for Fiber 
Channel disks.

Long Li (3):
  Use tagged SRB requests if supported by the device
  Properly handle SRB_ERROR when sense message is present
  Use block layer default segment size

 drivers/scsi/storvsc_drv.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 3/3] storvsc: use block layer default segment size

2016-09-06 Thread Long Li
From: Long Li <lon...@microsoft.com>

We no long have the restriction of page size limit in the SG list. Remove it. 
The driver can properly handle default block segment size.

---
 drivers/scsi/storvsc_drv.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 8328c87..ac57f9c 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1271,9 +1271,6 @@ static int storvsc_do_io(struct hv_device *device,
 
 static int storvsc_device_configure(struct scsi_device *sdevice)
 {
-
-   blk_queue_max_segment_size(sdevice->request_queue, PAGE_SIZE);
-
blk_queue_bounce_limit(sdevice->request_queue, BLK_BOUNCE_ANY);
 
blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 2/3] storvsc: properly handle SRB_ERROR when sense message is present

2016-09-06 Thread Long Li
From: Long Li <lon...@microsoft.com>

When sense message is present on error, we should pass along to the upper layer 
to decide how to deal with the error. This patch fixes connectivity issues with 
Fiber Channel devices.

---
 drivers/scsi/storvsc_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index a8f3e4c..8328c87 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -890,6 +890,9 @@ static void storvsc_handle_error(struct vmscsi_request 
*vm_srb,
 
switch (SRB_STATUS(vm_srb->srb_status)) {
case SRB_STATUS_ERROR:
+   /* Let upper layer deal with error when sense message is 
present */
+   if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
+   break;
/*
 * If there is an error; offline the device since all
 * error recovery strategies would have already been
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 1/3] storvsc: use tagged SRB requests if supported by the device

2016-09-06 Thread Long Li
From: Long Li <lon...@microsoft.com>

Properly set SRB flags when hosting device supports tagged queuing. This patch 
improves the performance on Fiber Channel disks.

---
 drivers/scsi/storvsc_drv.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 8ccfc9e..a8f3e4c 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -136,6 +136,8 @@ struct hv_fc_wwn_packet {
 #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F00
 #define SRB_FLAGS_CLASS_DRIVER_RESERVED0xF000
 
+#define SP_UNTAGGED((unsigned char) ~0)
+#define SRB_SIMPLE_TAG_REQUEST 0x20
 
 /*
  * Platform neutral description of a scsi request -
@@ -1451,6 +1453,12 @@ static int storvsc_queuecommand(struct Scsi_Host *host, 
struct scsi_cmnd *scmnd)
vm_srb->win8_extension.srb_flags |=
SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
 
+   if(scmnd->device->tagged_supported) {
+   vm_srb->win8_extension.srb_flags |= 
(SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE);
+   vm_srb->win8_extension.queue_tag = SP_UNTAGGED;
+   vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST;
+   }
+
/* Build the SRB */
switch (scmnd->sc_data_direction) {
case DMA_TO_DEVICE:
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: add more logging for error and warning messages

2015-12-04 Thread Long Li
> -Original Message-
> From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
> Sent: Friday, December 4, 2015 1:53 AM
> To: Long Li <lon...@microsoft.com>
> Cc: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; James E.J. Bottomley <jbottom...@odin.com>;
> de...@linuxdriverproject.org; linux-ker...@vger.kernel.org; linux-
> s...@vger.kernel.org
> Subject: Re: [PATCH] storvsc: add more logging for error and warning
> messages
> 
> Long Li <lon...@microsoft.com> writes:
> 
> > Introduce a logging level for storvsc to log certain error/warning
> > messages. Those messages are helpful in some environments, e.g.
> > Microsoft Azure, for customer support and troubleshooting purposes.
> 
> I have an alternative suggestion: let's use dynamic debug! Basically, we need
> to convert all non-error logging to using dev_dbg() and this can be enabled
> dynamically when needed, even reboot won't be required.

This is great idea for debugging!

I think the messages (srb errors) we want to log in this patch are real errors. 
They are not for debugging, but for customer support in production environment.

Those errors can be ignored in certain specific storage configurations due to 
some quirks. They are real errors on Azure, so we want to always log them.
 
> 
> >
> > Signed-off-by: Long Li <lon...@microsoft.com>
> > ---
> >  drivers/scsi/storvsc_drv.c | 30 +-
> >  1 file changed, 29 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index 40c43ae..afa1647 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -164,6 +164,21 @@ static int sense_buffer_size =
> > PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
> >  */
> >  static int vmstor_proto_version;
> >
> > +#define STORVSC_LOGGING_NONE   0
> > +#define STORVSC_LOGGING_ERROR  1
> > +#define STORVSC_LOGGING_WARN   2
> > +
> > +static int logging_level = STORVSC_LOGGING_ERROR;
> > +module_param(logging_level, int, S_IRUGO|S_IWUSR);
> > +MODULE_PARM_DESC(logging_level,
> > +   "Logging level, 0 - None, 1 - Error (default), 2 - Warning.");
> > +
> > +static inline bool do_logging(int level) {
> > +   return (logging_level >= level) ? true : false; }
> > +
> > +
> >  struct vmscsi_win8_extension {
> > /*
> >  * The following were added in Windows 8 @@ -1183,7 +1198,7 @@
> > static void storvsc_command_completion(struct storvsc_cmd_request
> > *cmd_request)
> >
> > scmnd->result = vm_srb->scsi_status;
> >
> > -   if (scmnd->result) {
> > +   if (scmnd->result && do_logging(STORVSC_LOGGING_ERROR)) {
> > if (scsi_normalize_sense(scmnd->sense_buffer,
> > SCSI_SENSE_BUFFERSIZE, _hdr))
> > scsi_print_sense_hdr(scmnd->device, "storvsc", @@
> -1239,12
> > +1254,25 @@ static void storvsc_on_io_completion(struct hv_device
> *device,
> > stor_pkt->vm_srb.sense_info_length =
> > vstor_packet->vm_srb.sense_info_length;
> >
> > +   if (vstor_packet->vm_srb.scsi_status != 0 ||
> > +   vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS)
> > +   if (do_logging(STORVSC_LOGGING_WARN))
> > +   dev_warn(>device,
> > +   "cmd 0x%x scsi status 0x%x srb status
> 0x%x\n",
> > +   stor_pkt->vm_srb.cdb[0],
> > +   vstor_packet->vm_srb.scsi_status,
> > +   vstor_packet->vm_srb.srb_status);
> >
> > if ((vstor_packet->vm_srb.scsi_status & 0xFF) == 0x02) {
> > /* CHECK_CONDITION */
> > if (vstor_packet->vm_srb.srb_status &
> > SRB_STATUS_AUTOSENSE_VALID) {
> > /* autosense data available */
> > +   if (do_logging(STORVSC_LOGGING_WARN))
> > +   dev_warn(>device,
> > +   "stor pkt %p autosense data valid -
> len %d\n",
> > +   request,
> > +   vstor_packet-
> >vm_srb.sense_info_length);
> >
> > memcpy(request->cmd->sense_buffer,
> >vstor_packet->vm_srb.sense_data,
> 
> --
>   Vitaly
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2] storvsc: add logging for error/warning messages

2015-12-03 Thread Long Li
Introduce a logging level for storvsc to log certain error/warning messages. 
Those messages are helpful in some environments, e.g. Microsoft Azure, for 
customer support and troubleshooting purposes.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 34 +-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 40c43ae..f46ed2c 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -164,6 +164,26 @@ static int sense_buffer_size = 
PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
 */
 static int vmstor_proto_version;
 
+#define STORVSC_LOGGING_NONE   0
+#define STORVSC_LOGGING_ERROR  1
+#define STORVSC_LOGGING_WARN   2
+
+static int logging_level = STORVSC_LOGGING_ERROR;
+module_param(logging_level, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(logging_level,
+   "Logging level, 0 - None, 1 - Error (default), 2 - Warning.");
+
+static inline bool do_logging(int level)
+{
+   return logging_level >= level;
+}
+
+#define storvsc_log(dev, level, fmt, ...)  \
+do {   \
+   if (do_logging(level))  \
+   dev_warn(&(dev)->device, fmt, ##__VA_ARGS__);   \
+} while (0)
+
 struct vmscsi_win8_extension {
/*
 * The following were added in Windows 8
@@ -1185,7 +1205,8 @@ static void storvsc_command_completion(struct 
storvsc_cmd_request *cmd_request)
 
if (scmnd->result) {
if (scsi_normalize_sense(scmnd->sense_buffer,
-   SCSI_SENSE_BUFFERSIZE, _hdr))
+   SCSI_SENSE_BUFFERSIZE, _hdr) &&
+   do_logging(STORVSC_LOGGING_ERROR))
scsi_print_sense_hdr(scmnd->device, "storvsc",
 _hdr);
}
@@ -1239,6 +1260,13 @@ static void storvsc_on_io_completion(struct hv_device 
*device,
stor_pkt->vm_srb.sense_info_length =
vstor_packet->vm_srb.sense_info_length;
 
+   if (vstor_packet->vm_srb.scsi_status != 0 ||
+   vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS)
+   storvsc_log(device, STORVSC_LOGGING_WARN,
+   "cmd 0x%x scsi status 0x%x srb status 0x%x\n",
+   stor_pkt->vm_srb.cdb[0],
+   vstor_packet->vm_srb.scsi_status,
+   vstor_packet->vm_srb.srb_status);
 
if ((vstor_packet->vm_srb.scsi_status & 0xFF) == 0x02) {
/* CHECK_CONDITION */
@@ -1246,6 +1274,10 @@ static void storvsc_on_io_completion(struct hv_device 
*device,
SRB_STATUS_AUTOSENSE_VALID) {
/* autosense data available */
 
+   storvsc_log(device, STORVSC_LOGGING_WARN,
+   "stor pkt %p autosense data valid - len %d\n",
+   request, 
vstor_packet->vm_srb.sense_info_length);
+
memcpy(request->cmd->sense_buffer,
   vstor_packet->vm_srb.sense_data,
   vstor_packet->vm_srb.sense_info_length);
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] storvsc: add more logging for error and warning messages

2015-12-03 Thread Long Li
Introduce a logging level for storvsc to log certain error/warning messages. 
Those messages are helpful in some environments, e.g. Microsoft Azure, for 
customer support and troubleshooting purposes.

Signed-off-by: Long Li <lon...@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 30 +-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 40c43ae..afa1647 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -164,6 +164,21 @@ static int sense_buffer_size = 
PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
 */
 static int vmstor_proto_version;
 
+#define STORVSC_LOGGING_NONE   0
+#define STORVSC_LOGGING_ERROR  1
+#define STORVSC_LOGGING_WARN   2
+
+static int logging_level = STORVSC_LOGGING_ERROR;
+module_param(logging_level, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(logging_level,
+   "Logging level, 0 - None, 1 - Error (default), 2 - Warning.");
+
+static inline bool do_logging(int level)
+{
+   return (logging_level >= level) ? true : false;
+}
+
+
 struct vmscsi_win8_extension {
/*
 * The following were added in Windows 8
@@ -1183,7 +1198,7 @@ static void storvsc_command_completion(struct 
storvsc_cmd_request *cmd_request)
 
scmnd->result = vm_srb->scsi_status;
 
-   if (scmnd->result) {
+   if (scmnd->result && do_logging(STORVSC_LOGGING_ERROR)) {
if (scsi_normalize_sense(scmnd->sense_buffer,
SCSI_SENSE_BUFFERSIZE, _hdr))
scsi_print_sense_hdr(scmnd->device, "storvsc",
@@ -1239,12 +1254,25 @@ static void storvsc_on_io_completion(struct hv_device 
*device,
stor_pkt->vm_srb.sense_info_length =
vstor_packet->vm_srb.sense_info_length;
 
+   if (vstor_packet->vm_srb.scsi_status != 0 ||
+   vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS)
+   if (do_logging(STORVSC_LOGGING_WARN))
+   dev_warn(>device,
+   "cmd 0x%x scsi status 0x%x srb status 0x%x\n",
+   stor_pkt->vm_srb.cdb[0],
+   vstor_packet->vm_srb.scsi_status,
+   vstor_packet->vm_srb.srb_status);
 
if ((vstor_packet->vm_srb.scsi_status & 0xFF) == 0x02) {
/* CHECK_CONDITION */
if (vstor_packet->vm_srb.srb_status &
SRB_STATUS_AUTOSENSE_VALID) {
/* autosense data available */
+   if (do_logging(STORVSC_LOGGING_WARN))
+   dev_warn(>device,
+   "stor pkt %p autosense data valid - len 
%d\n",
+   request,
+   vstor_packet->vm_srb.sense_info_length);
 
memcpy(request->cmd->sense_buffer,
   vstor_packet->vm_srb.sense_data,
-- 
1.8.5.6

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] storvsc: add more logging for error and warning messages

2015-12-03 Thread Long Li
Thanks Joe.

I'll send out another patch.

> -Original Message-
> From: Joe Perches [mailto:j...@perches.com]
> Sent: Thursday, December 3, 2015 6:28 PM
> To: Long Li <lon...@microsoft.com>; KY Srinivasan <k...@microsoft.com>;
> Haiyang Zhang <haiya...@microsoft.com>; James E.J. Bottomley
> <jbottom...@odin.com>
> Cc: de...@linuxdriverproject.org; linux-s...@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] storvsc: add more logging for error and warning
> messages
> 
> On Thu, 2015-12-03 at 19:47 -0800, Long Li wrote:
> > Introduce a logging level for storvsc to log certain error/warning
> > messages. Those messages are helpful in some environments, e.g.
> > Microsoft Azure, for customer support and troubleshooting purposes.
> []
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> []
> > +static inline bool do_logging(int level) {
> > +   return (logging_level >= level) ? true : false;
> 
> The ternary is not necessary
> 
>   return logging_level >= level;
> 
> is enough
> 
> > +}
> > +
> > +
> >  struct vmscsi_win8_extension {
> >     /*
> >      * The following were added in Windows 8 @@ -1183,7 +1198,7 @@
> > static void storvsc_command_completion(struct storvsc_cmd_request
> > *cmd_request)
> >
> >     scmnd->result = vm_srb->scsi_status;
> >
> > -   if (scmnd->result) {
> > +   if (scmnd->result && do_logging(STORVSC_LOGGING_ERROR)) {
> >     if (scsi_normalize_sense(scmnd->sense_buffer,
> >     SCSI_SENSE_BUFFERSIZE, _hdr))
> >     scsi_print_sense_hdr(scmnd->device, "storvsc",
> 
> Is it appropriate to make this scsi_normalize_sense call conditional on
> do_logging here?
> 
> > @@ -1239,12 +1254,25 @@ static void storvsc_on_io_completion(struct
> hv_device *device,
> >     stor_pkt->vm_srb.sense_info_length =
> >     vstor_packet->vm_srb.sense_info_length;
> >
> > +   if (vstor_packet->vm_srb.scsi_status != 0 ||
> > +   vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS)
> > +   if (do_logging(STORVSC_LOGGING_WARN))
> > +   dev_warn(>device,
> > +   "cmd 0x%x scsi status 0x%x srb status
> 0x%x\n",
> > +   stor_pkt->vm_srb.cdb[0],
> > +   vstor_packet->vm_srb.scsi_status,
> > +   vstor_packet->vm_srb.srb_status);
> 
> It might make some sense to use another macro indirection like
> 
> #define svc_log_warn(dev, level, fmt, ...)\
> do {  \
>   if (do_logging(STORSVC_LOGGING_##level) \
>   dev_warn(&(dev)->device, fmt, ##__VA_ARGS__);   \
> } while (0)
> 
> So a use could be:
> 
>   if (vstore_packet...)
>   svc_log_warn(device, WARN, ...);
> 
> >
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH] scsi: storvsc: use shost_for_each_device() instead of open coding

2015-07-03 Thread Long Li


 -Original Message-
 From: KY Srinivasan
 Sent: Friday, July 03, 2015 11:35 AM
 To: Vitaly Kuznetsov; linux-s...@vger.kernel.org
 Cc: Long Li; Haiyang Zhang; James E.J. Bottomley; 
 de...@linuxdriverproject.org;
 linux-ker...@vger.kernel.org
 Subject: RE: [PATCH] scsi: storvsc: use shost_for_each_device() instead of 
 open
 coding
 
 
 
  -Original Message-
  From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
  Sent: Wednesday, July 1, 2015 2:31 AM
  To: linux-s...@vger.kernel.org
  Cc: Long Li; KY Srinivasan; Haiyang Zhang; James E.J. Bottomley;
  de...@linuxdriverproject.org; linux-ker...@vger.kernel.org
  Subject: [PATCH] scsi: storvsc: use shost_for_each_device() instead of
  open coding
 
  Comment in struct Scsi_Host says that drivers are not supposed to
  access __devices directly. storvsc_host_scan() doesn't happen in irq
  context so we can just use shost_for_each_device().
 
  Signed-off-by: Vitaly Kuznetsov vkuzn...@redhat.com
 
 Signed-off-by: K. Y. Srinivasan k...@microsoft.com
Reviewed-by: Long Li lon...@microsoft.com
  ---
   drivers/scsi/storvsc_drv.c | 9 +
   1 file changed, 1 insertion(+), 8 deletions(-)
 
  diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
  index 3c6584f..9ea912b 100644
  --- a/drivers/scsi/storvsc_drv.c
  +++ b/drivers/scsi/storvsc_drv.c
  @@ -426,7 +426,6 @@ static void storvsc_host_scan(struct work_struct
  *work)
  struct storvsc_scan_work *wrk;
  struct Scsi_Host *host;
  struct scsi_device *sdev;
  -   unsigned long flags;
 
  wrk = container_of(work, struct storvsc_scan_work, work);
  host = wrk-host;
  @@ -443,14 +442,8 @@ static void storvsc_host_scan(struct work_struct
  *work)
   * may have been removed this way.
   */
  mutex_lock(host-scan_mutex);
  -   spin_lock_irqsave(host-host_lock, flags);
  -   list_for_each_entry(sdev, host-__devices, siblings) {
  -   spin_unlock_irqrestore(host-host_lock, flags);
  +   shost_for_each_device(sdev, host)
  scsi_test_unit_ready(sdev, 1, 1, NULL);
  -   spin_lock_irqsave(host-host_lock, flags);
  -   continue;
  -   }
  -   spin_unlock_irqrestore(host-host_lock, flags);
  mutex_unlock(host-scan_mutex);
  /*
   * Now scan the host to discover LUNs that may have been added.
  --
  2.4.3

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


  1   2   >