RE: [PATCH V3 2/2 net-next] hyperv: Support batched notification

2015-03-19 Thread Jason Wang



On Fri, Mar 20, 2015 at 12:53 AM, KY Srinivasan k...@microsoft.com 
wrote:




 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Tuesday, March 17, 2015 8:09 PM
 To: KY Srinivasan
 Cc: da...@davemloft.net; net...@vger.kernel.org; linux-
 ker...@vger.kernel.org; de...@linuxdriverproject.org; 
o...@aepfle.de;

 a...@canonical.com; KY Srinivasan
 Subject: Re: [PATCH V3 2/2 net-next] hyperv: Support batched 
notification
 
 
 
 On Wed, Mar 18, 2015 at 12:02 AM, K. Y. Srinivasan 
k...@microsoft.com

 wrote:
  Optimize notifying the host by deferring notification until there
  are no more packets to be sent. This will help in batching the
  requests
  on the host.
 
  Signed-off-by: K. Y. Srinivasan k...@microsoft.com
  ---
   drivers/net/hyperv/hyperv_net.h   |2 +-
   drivers/net/hyperv/netvsc.c   |   14 +-
   drivers/net/hyperv/netvsc_drv.c   |2 +-
   drivers/net/hyperv/rndis_filter.c |2 +-
   4 files changed, 12 insertions(+), 8 deletions(-)
 
  diff --git a/drivers/net/hyperv/hyperv_net.h
  b/drivers/net/hyperv/hyperv_net.h
  index 4815843..3fd9896 100644
  --- a/drivers/net/hyperv/hyperv_net.h
  +++ b/drivers/net/hyperv/hyperv_net.h
  @@ -184,7 +184,7 @@ struct rndis_device {
   int netvsc_device_add(struct hv_device *device, void
  *additional_info);
   int netvsc_device_remove(struct hv_device *device);
   int netvsc_send(struct hv_device *device,
  - struct hv_netvsc_packet *packet);
  + struct hv_netvsc_packet *packet, bool kick_q);
   void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct rndis_message *resp);
   int netvsc_recv_callback(struct hv_device *device_obj,
  diff --git a/drivers/net/hyperv/netvsc.c 
b/drivers/net/hyperv/netvsc.c

  index 208eb05..9003b94 100644
  --- a/drivers/net/hyperv/netvsc.c
  +++ b/drivers/net/hyperv/netvsc.c
  @@ -707,7 +707,7 @@ static u32 netvsc_copy_to_send_buf(struct
  netvsc_device *net_device,
   }
 
   int netvsc_send(struct hv_device *device,
  - struct hv_netvsc_packet *packet)
  + struct hv_netvsc_packet *packet, bool kick_q)
   {
struct netvsc_device *net_device;
int ret = 0;
  @@ -719,6 +719,7 @@ int netvsc_send(struct hv_device *device,
u32 msg_size = 0;
struct sk_buff *skb = NULL;
u16 q_idx = packet-q_idx;
  + u32 vmbus_flags =
 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 
 
net_device = get_outbound_net_device(device);
  @@ -768,18 +769,21 @@ int netvsc_send(struct hv_device *device,
return -ENODEV;
 
if (packet-page_buf_cnt) {
  - ret = vmbus_sendpacket_pagebuffer(out_channel,
  + ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
  packet-page_buf,
  packet-page_buf_cnt,
  sendMessage,
  sizeof(struct
 nvsp_message),
  -   req_id);
  +   req_id,
  +   vmbus_flags,
  +   kick_q);
} else {
  - ret = vmbus_sendpacket(out_channel, sendMessage,
  + ret = vmbus_sendpacket_ctl(out_channel, sendMessage,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
  -
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
  + vmbus_flags,
  + kick_q);
}
 
if (ret == 0) {
  diff --git a/drivers/net/hyperv/netvsc_drv.c
  b/drivers/net/hyperv/netvsc_drv.c
  index a06bd66..eae58db 100644
  --- a/drivers/net/hyperv/netvsc_drv.c
  +++ b/drivers/net/hyperv/netvsc_drv.c
  @@ -556,7 +556,7 @@ do_send:
packet-page_buf_cnt = init_page_array(rndis_msg,
 rndis_msg_size,
skb, packet-page_buf[0]);
 
  - ret = netvsc_send(net_device_ctx-device_ctx, packet);
  + ret = netvsc_send(net_device_ctx-device_ctx, packet,
  !skb-xmit_more);
 
 
 Looks like the issue of V2 still there (E.g we need to kick when

 hv_ringbuffer_write() returns -EAGAIN?


Jason, this issue will be handled in the lower layer. I have already 
submitted a patch to the vmbus

Driver that correctly signals the host when EAGAIN is encountered.

Regards,

K. Y


Okay, find the mail but looks like I was not cced.

Please keep me in the cc list for hyperv patches.

Thanks

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V3 2/2 net-next] hyperv: Support batched notification

2015-03-17 Thread Jason Wang



On Wed, Mar 18, 2015 at 12:02 AM, K. Y. Srinivasan k...@microsoft.com 
wrote:

Optimize notifying the host by deferring notification until there
are no more packets to be sent. This will help in batching the 
requests

on the host.

Signed-off-by: K. Y. Srinivasan k...@microsoft.com
---
 drivers/net/hyperv/hyperv_net.h   |2 +-
 drivers/net/hyperv/netvsc.c   |   14 +-
 drivers/net/hyperv/netvsc_drv.c   |2 +-
 drivers/net/hyperv/rndis_filter.c |2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h 
b/drivers/net/hyperv/hyperv_net.h

index 4815843..3fd9896 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -184,7 +184,7 @@ struct rndis_device {
 int netvsc_device_add(struct hv_device *device, void 
*additional_info);

 int netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
-   struct hv_netvsc_packet *packet);
+   struct hv_netvsc_packet *packet, bool kick_q);
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct rndis_message *resp);
 int netvsc_recv_callback(struct hv_device *device_obj,
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 208eb05..9003b94 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -707,7 +707,7 @@ static u32 netvsc_copy_to_send_buf(struct 
netvsc_device *net_device,

 }
 
 int netvsc_send(struct hv_device *device,

-   struct hv_netvsc_packet *packet)
+   struct hv_netvsc_packet *packet, bool kick_q)
 {
struct netvsc_device *net_device;
int ret = 0;
@@ -719,6 +719,7 @@ int netvsc_send(struct hv_device *device,
u32 msg_size = 0;
struct sk_buff *skb = NULL;
u16 q_idx = packet-q_idx;
+   u32 vmbus_flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 
 
 	net_device = get_outbound_net_device(device);

@@ -768,18 +769,21 @@ int netvsc_send(struct hv_device *device,
return -ENODEV;
 
 	if (packet-page_buf_cnt) {

-   ret = vmbus_sendpacket_pagebuffer(out_channel,
+   ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
  packet-page_buf,
  packet-page_buf_cnt,
  sendMessage,
  sizeof(struct nvsp_message),
- req_id);
+ req_id,
+ vmbus_flags,
+ kick_q);
} else {
-   ret = vmbus_sendpacket(out_channel, sendMessage,
+   ret = vmbus_sendpacket_ctl(out_channel, sendMessage,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
-   VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+   vmbus_flags,
+   kick_q);
}
 
 	if (ret == 0) {
diff --git a/drivers/net/hyperv/netvsc_drv.c 
b/drivers/net/hyperv/netvsc_drv.c

index a06bd66..eae58db 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -556,7 +556,7 @@ do_send:
packet-page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
skb, packet-page_buf[0]);
 
-	ret = netvsc_send(net_device_ctx-device_ctx, packet);
+	ret = netvsc_send(net_device_ctx-device_ctx, packet, 
!skb-xmit_more);
 


Looks like the issue of V2 still there (E.g we need to kick when 
hv_ringbuffer_write() returns -EAGAIN?


 drop:
if (ret == 0) {
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c

index ca81de0..05f3792 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -238,7 +238,7 @@ static int rndis_filter_send_request(struct 
rndis_device *dev,
 
 	packet-send_completion = NULL;
 
-	ret = netvsc_send(dev-net_dev-dev, packet);

+   ret = netvsc_send(dev-net_dev-dev, packet, true);
return ret;
 }
 
--

1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V2 3/3 net-next] hyperv: Support batched notification

2015-03-11 Thread Jason Wang



On Thu, Mar 12, 2015 at 3:04 AM, K. Y. Srinivasan k...@microsoft.com 
wrote:

Optimize notifying the host by deferring notification until there
are no more packets to be sent. This will help in batching the 
requests

on the host.

Signed-off-by: K. Y. Srinivasan k...@microsoft.com
---
 drivers/net/hyperv/hyperv_net.h   |2 +-
 drivers/net/hyperv/netvsc.c   |   14 +-
 drivers/net/hyperv/netvsc_drv.c   |3 ++-
 drivers/net/hyperv/rndis_filter.c |2 +-
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h 
b/drivers/net/hyperv/hyperv_net.h

index 4815843..3fd9896 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -184,7 +184,7 @@ struct rndis_device {
 int netvsc_device_add(struct hv_device *device, void 
*additional_info);

 int netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
-   struct hv_netvsc_packet *packet);
+   struct hv_netvsc_packet *packet, bool kick_q);
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct rndis_message *resp);
 int netvsc_recv_callback(struct hv_device *device_obj,
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 208eb05..9003b94 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -707,7 +707,7 @@ static u32 netvsc_copy_to_send_buf(struct 
netvsc_device *net_device,

 }
 
 int netvsc_send(struct hv_device *device,

-   struct hv_netvsc_packet *packet)
+   struct hv_netvsc_packet *packet, bool kick_q)
 {
struct netvsc_device *net_device;
int ret = 0;
@@ -719,6 +719,7 @@ int netvsc_send(struct hv_device *device,
u32 msg_size = 0;
struct sk_buff *skb = NULL;
u16 q_idx = packet-q_idx;
+   u32 vmbus_flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 
 
 	net_device = get_outbound_net_device(device);

@@ -768,18 +769,21 @@ int netvsc_send(struct hv_device *device,
return -ENODEV;
 
 	if (packet-page_buf_cnt) {

-   ret = vmbus_sendpacket_pagebuffer(out_channel,
+   ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
  packet-page_buf,
  packet-page_buf_cnt,
  sendMessage,
  sizeof(struct nvsp_message),
- req_id);
+ req_id,
+ vmbus_flags,
+ kick_q);
} else {
-   ret = vmbus_sendpacket(out_channel, sendMessage,
+   ret = vmbus_sendpacket_ctl(out_channel, sendMessage,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
-   VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+   vmbus_flags,
+   kick_q);
}
 
 	if (ret == 0) {
diff --git a/drivers/net/hyperv/netvsc_drv.c 
b/drivers/net/hyperv/netvsc_drv.c

index a06bd66..80b4b29 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -384,6 +384,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, 
struct net_device *net)

u32 net_trans_info;
u32 hash;
u32 skb_length = skb-len;
+   bool kick_q = !skb-xmit_more;
 
 
 	/* We will atmost need two pages to describe the rndis

@@ -556,7 +557,7 @@ do_send:
packet-page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
skb, packet-page_buf[0]);
 
-	ret = netvsc_send(net_device_ctx-device_ctx, packet);

+   ret = netvsc_send(net_device_ctx-device_ctx, packet, kick_q);


Maybe just a !skb-xmit_more here to save a local variable.


 
 drop:

if (ret == 0) {
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c

index ca81de0..05f3792 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -238,7 +238,7 @@ static int rndis_filter_send_request(struct 
rndis_device *dev,
 
 	packet-send_completion = NULL;
 
-	ret = netvsc_send(dev-net_dev-dev, packet);

+   ret = netvsc_send(dev-net_dev-dev, packet, true);
return ret;
 }
 
--

1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V2 2/3 net-next] Drivers: hv: vmbus: Fix a bug in the signalling logic with kick_q

2015-03-11 Thread Jason Wang



On Thu, Mar 12, 2015 at 3:04 AM, K. Y. Srinivasan k...@microsoft.com 
wrote:
When the caller specifies that signalling should be deferred, we need 
to
address the case where we are not able to place the current packet 
because
the buffer is full. In this case, we will signal the host as some 
packets

may have been placed on the ring buffer.
I would like to thank Jason Wang jasow...@redhat.com for pointing
out this issue.

Signed-off-by: K. Y. Srinivasan k...@microsoft.com
---
 drivers/hv/channel.c |   32 
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index e58cdb7..ae06ba9 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -614,8 +614,24 @@ int vmbus_sendpacket_ctl(struct vmbus_channel 
*channel, void *buffer,
 
 	ret = hv_ringbuffer_write(channel-outbound, bufferlist, 3, 
signal);
 
+	/*

+* Here is the logic for signalling the host:
+* 1. If the host is already draining the ringbuffer,
+*don't signal. This is indicated by the parameter
+*signal.
+*
+* 2. If we are not able to write, signal if kick_q is false.
+*kick_q being false indicates that we may have placed zero or
+*more packets with more packets to come. We will signal in
+*this case even if potentially we may have not placed any
+*packet. This is a rare enough condition that it should not
+*matter.
+*/
+
if ((ret == 0)  kick_q  signal)
vmbus_setevent(channel);
+   else if ((ret != 0)  !kick_q)
+   vmbus_setevent(channel);
 
 	return ret;

 }
@@ -705,8 +721,24 @@ int vmbus_sendpacket_pagebuffer_ctl(struct 
vmbus_channel *channel,
 
 	ret = hv_ringbuffer_write(channel-outbound, bufferlist, 3, 
signal);
 
+	/*

+* Here is the logic for signalling the host:
+* 1. If the host is already draining the ringbuffer,
+*don't signal. This is indicated by the parameter
+*signal.
+*
+* 2. If we are not able to write, signal if kick_q is false.
+*kick_q being false indicates that we may have placed zero or
+*more packets with more packets to come. We will signal in
+*this case even if potentially we may have not placed any
+*packet. This is a rare enough condition that it should not
+*matter.
+*/
+
if ((ret == 0)  kick_q  signal)
vmbus_setevent(channel);
+   else if ((ret != 0)  !kick_q)
+   vmbus_setevent(channel);
 
 	return ret;

 }
--


Looks like we need to kick unconditionally here. Consider we may get 
-EAGAIN when we want to send the last skb (kick_q is true) from the 
list. We need kick host in this case.


Btw, another method is let the driver to decide e.g exporting the 
vmbus_setevent() and call it in netvsc_start_xmit().





___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V2 1/3 net-next] Drivers: hv: vmbus: Export the vmbus_sendpacket_pagebuffer_ctl()

2015-03-11 Thread Jason Wang



On Thu, Mar 12, 2015 at 3:04 AM, K. Y. Srinivasan k...@microsoft.com 
wrote:

Export the vmbus_sendpacket_pagebuffer_ctl() interface. This interface
will be used in the netvsc driver to optimize signalling the host.

Signed-off-by: K. Y. Srinivasan k...@microsoft.com
---
 drivers/hv/channel.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index da53180..e58cdb7 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -710,6 +710,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct 
vmbus_channel *channel,
 
 	return ret;

 }
+EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
 
 /*

  * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
--
1.7.4.1


Acked-by: Jason Wang jasow...@redhat.com

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 2/2 net-next] hyperv: Support batched notification

2015-03-10 Thread Jason Wang



On Wed, Mar 11, 2015 at 2:50 AM, K. Y. Srinivasan k...@microsoft.com 
wrote:

Optimize notifying the host by deferring notification until there
are no more packets to be sent. This will help in batching the 
requests

on the host.

Signed-off-by: K. Y. Srinivasan k...@microsoft.com
---
 drivers/net/hyperv/hyperv_net.h   |2 +-
 drivers/net/hyperv/netvsc.c   |   14 +-
 drivers/net/hyperv/netvsc_drv.c   |3 ++-
 drivers/net/hyperv/rndis_filter.c |2 +-
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h 
b/drivers/net/hyperv/hyperv_net.h

index 4815843..3fd9896 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -184,7 +184,7 @@ struct rndis_device {
 int netvsc_device_add(struct hv_device *device, void 
*additional_info);

 int netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
-   struct hv_netvsc_packet *packet);
+   struct hv_netvsc_packet *packet, bool kick_q);
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct rndis_message *resp);
 int netvsc_recv_callback(struct hv_device *device_obj,
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 208eb05..9003b94 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -707,7 +707,7 @@ static u32 netvsc_copy_to_send_buf(struct 
netvsc_device *net_device,

 }
 
 int netvsc_send(struct hv_device *device,

-   struct hv_netvsc_packet *packet)
+   struct hv_netvsc_packet *packet, bool kick_q)
 {
struct netvsc_device *net_device;
int ret = 0;
@@ -719,6 +719,7 @@ int netvsc_send(struct hv_device *device,
u32 msg_size = 0;
struct sk_buff *skb = NULL;
u16 q_idx = packet-q_idx;
+   u32 vmbus_flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 
 
 	net_device = get_outbound_net_device(device);

@@ -768,18 +769,21 @@ int netvsc_send(struct hv_device *device,
return -ENODEV;
 
 	if (packet-page_buf_cnt) {

-   ret = vmbus_sendpacket_pagebuffer(out_channel,
+   ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
  packet-page_buf,
  packet-page_buf_cnt,
  sendMessage,
  sizeof(struct nvsp_message),
- req_id);
+ req_id,
+ vmbus_flags,
+ kick_q);


What if kick_q is false but ret is -EAGAIN here? Looks like in this 
case host won't get notified at all. How about checking whether txq and 
kicking if it has been stopped like what other network driver did?


} else {
-   ret = vmbus_sendpacket(out_channel, sendMessage,
+   ret = vmbus_sendpacket_ctl(out_channel, sendMessage,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
-   VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+   vmbus_flags,
+   kick_q);
}
 
 	if (ret == 0) {
diff --git a/drivers/net/hyperv/netvsc_drv.c 
b/drivers/net/hyperv/netvsc_drv.c

index a06bd66..80b4b29 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -384,6 +384,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, 
struct net_device *net)

u32 net_trans_info;
u32 hash;
u32 skb_length = skb-len;
+   bool kick_q = !skb-xmit_more;
 
 
 	/* We will atmost need two pages to describe the rndis

@@ -556,7 +557,7 @@ do_send:
packet-page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
skb, packet-page_buf[0]);
 
-	ret = netvsc_send(net_device_ctx-device_ctx, packet);

+   ret = netvsc_send(net_device_ctx-device_ctx, packet, kick_q);
 
 drop:

if (ret == 0) {
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c

index ca81de0..05f3792 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -238,7 +238,7 @@ static int rndis_filter_send_request(struct 
rndis_device *dev,
 
 	packet-send_completion = NULL;
 
-	ret = netvsc_send(dev-net_dev-dev, packet);

+   ret = netvsc_send(dev-net_dev-dev, packet, true);
return ret;
 }
 
--

1.7.4.1



___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] Drivers: hv: hv_balloon: keep locks balanced on add_memory() failure

2015-03-05 Thread Jason Wang


- Original Message -
 When add_memory() fails the following BUG is observed:
 [  743.646107] hv_balloon: hot_add memory failed error is -17
 [  743.679973]
 [  743.680930] =
 [  743.680930] [ BUG: bad unlock balance detected! ]
 [  743.680930] 3.19.0-rc5_bug1131426+ #552 Not tainted
 [  743.680930] -
 [  743.680930] kworker/0:2/255 is trying to release lock
 (dm_device.ha_region_mutex) at:
 [  743.680930] [81aae5fe] mutex_unlock+0xe/0x10
 [  743.680930] but there are no more locks to release!
 
 This happens as we don't acquire ha_region_mutex and hot_add_req() expects us
 to as it does unconditional mutex_unlock(). Acquire the lock on the error
 path.
 
 Signed-off-by: Vitaly Kuznetsov vkuzn...@redhat.com

Acked-by: Jason Wang jasow...@redhat.com
 ---
 This patch is dependent on the previously posted 'Drivers: hv: hv_balloon:
 eliminate the trylock path in acquire/release_region_mutex'.
 ---
  drivers/hv/hv_balloon.c | 1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
 index 1283035..771bf84 100644
 --- a/drivers/hv/hv_balloon.c
 +++ b/drivers/hv/hv_balloon.c
 @@ -654,6 +654,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned
 long size,
   }
   has-ha_end_pfn -= HA_CHUNK;
   has-covered_end_pfn -=  processed_pfn;
 + mutex_lock(dm_device.ha_region_mutex);
   break;
   }
  
 --
 1.9.3
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 1/4] Drivers: hv: vmbus: implement get/put usage workflow for vmbus channels

2015-02-04 Thread Jason Wang



On Wed, Feb 4, 2015 at 1:00 AM, Vitaly Kuznetsov vkuzn...@redhat.com 
wrote:
free_channel() function frees the channel unconditionally so we need 
to make
sure nobody has any link to it. This is not trivial and there are 
several

examples of races we have:

1) In vmbus_onoffer_rescind() we check for channel existence with
   relid2channel() and then use it. This can go wrong if we're in the 
middle

   of channel removal (free_channel() was already called).

2) In process_chn_event() we check for channel existence with
   pcpu_relid2channel() and then use it. This can also go wrong.

3) vmbus_free_channels() just frees all channels, in case we're in 
the middle

   of vmbus_process_rescind_offer() crash is possible.

The issue can be solved by holding vmbus_connection.channel_lock 
everywhere,
however, it looks like a way to deadlocks and performance 
degradation. Get/put

workflow fits here the best.

Implement vmbus_get_channel()/vmbus_put_channel() pair instead of
free_channel().

Signed-off-by: Vitaly Kuznetsov vkuzn...@redhat.com
---
 drivers/hv/channel_mgmt.c | 45 
++---

 drivers/hv/connection.c   |  7 +--
 drivers/hv/hyperv_vmbus.h |  4 
 include/linux/hyperv.h| 13 +
 4 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 36bacc7..eb9ce94 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -147,6 +147,8 @@ static struct vmbus_channel *alloc_channel(void)
return NULL;
 
 	channel-id = atomic_inc_return(chan_num);

+   atomic_set(channel-count, 1);
+
spin_lock_init(channel-inbound_lock);
spin_lock_init(channel-lock);
 
@@ -178,19 +180,47 @@ static void release_channel(struct work_struct 
*work)

 }
 
 /*
- * free_channel - Release the resources used by the vmbus channel 
object
+ * vmbus_put_channel - Decrease the channel usage counter and 
release the

+ * resources when this counter reaches zero.
  */
-static void free_channel(struct vmbus_channel *channel)
+void vmbus_put_channel(struct vmbus_channel *channel)
 {
+   unsigned long flags;
 
 	/*

 * We have to release the channel's workqueue/thread in the vmbus's
 * workqueue/thread context
 * ie we can't destroy ourselves.
 */
-   INIT_WORK(channel-work, release_channel);
-   queue_work(vmbus_connection.work_queue, channel-work);
+   spin_lock_irqsave(channel-lock, flags);
+   if (atomic_dec_and_test(channel-count)) {
+   channel-dying = true;
+   INIT_WORK(channel-work, release_channel);
+   spin_unlock_irqrestore(channel-lock, flags);
+   queue_work(vmbus_connection.work_queue, channel-work);
+   } else
+   spin_unlock_irqrestore(channel-lock, flags);
+}
+EXPORT_SYMBOL_GPL(vmbus_put_channel);
+
+/* vmbus_get_channel - Get additional reference to the channel */
+struct vmbus_channel *vmbus_get_channel(struct vmbus_channel 
*channel)

+{
+   unsigned long flags;
+   struct vmbus_channel *ret = NULL;
+
+   if (!channel)
+   return NULL;
+
+   spin_lock_irqsave(channel-lock, flags);
+   if (!channel-dying) {
+   atomic_inc(channel-count);
+   ret = channel;
+   }
+   spin_unlock_irqrestore(channel-lock, flags);


Looks like we can use atomic_inc_return_safe() here to avoid extra 
dying. And then there's also no need for the spinlock.


if (atomic_inc_return_safe(channel-count)  0)
return channel;
else
return NULL;


+   return ret;
 }
+EXPORT_SYMBOL_GPL(vmbus_get_channel);
 
 static void percpu_channel_enq(void *arg)

 {
@@ -253,7 +283,7 @@ static void vmbus_process_rescind_offer(struct 
work_struct *work)

list_del(channel-sc_list);
spin_unlock_irqrestore(primary_channel-lock, flags);
}
-   free_channel(channel);
+   vmbus_put_channel(channel);
 }
 
 void vmbus_free_channels(void)

@@ -262,7 +292,7 @@ void vmbus_free_channels(void)
 
 	list_for_each_entry(channel, vmbus_connection.chn_list, listentry) 
{

vmbus_device_unregister(channel-device_obj);
-   free_channel(channel);
+   vmbus_put_channel(channel);
}
 }
 
@@ -391,7 +421,7 @@ done_init_rescind:

spin_unlock_irqrestore(newchannel-lock, flags);
return;
 err_free_chan:
-   free_channel(newchannel);
+   vmbus_put_channel(newchannel);
 }
 
 enum {
@@ -549,6 +579,7 @@ static void vmbus_onoffer_rescind(struct 
vmbus_channel_message_header *hdr)

queue_work(channel-controlwq, channel-work);
 
 	spin_unlock_irqrestore(channel-lock, flags);

+   vmbus_put_channel(channel);
 }
 
 /*

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index c4acd1c..d1ce134 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -247,7 +247,8 @@ void 

RE: [PATCH net] hyperv: Fix the error processing in netvsc_send()

2015-02-03 Thread Jason Wang



On Tue, Feb 3, 2015 at 11:46 PM, Haiyang Zhang haiya...@microsoft.com 
wrote:




 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, February 2, 2015 1:49 AM
   btw, I find during netvsc_start_xmit(), ret was change to 
-ENOSPC

  when
   queue_sends[q_idx]  1. But non of the caller check -ENOSPC in 
fact?

 
  In this case, we don't request re-send, so set ret to a value 
other

  than
  -EAGAIN.
 
 Why not? We have available slots for it to be sent now. Dropping the

 packet in this case may cause out of order sending.


The EAGAIN error doesn't normally happen, because we set the hi water 
mark

to stop send queue.


This is not true since only txq was stopped which means only network 
stack stop sending packets but not for control path e.g 
rndis_filter_send_request() or other callers who call 
vmbus_sendpacket() directly (e.g recv completion). 

For control path, user may meet several errors when they want to change 
mac address under heavy load. 

What's more serious is netvsc_send_recv_completion(), it can not even 
recover from more than 3 times of EAGAIN.


I must say mixing data packets with control packets with the same 
channel sounds really scary. Since control packets could be blocked or 
even dropped because of data packets already queued during heavy load, 
and you need to synchronize two paths carefully (e.g I didn't see any 
tx lock were held if rndis_filter_send_request() call netsc_send() 
which may stop or start a queue).



 If in really rare case, the ring buffer is full and there
is no outstanding sends, we can't stop queue here because there will 
be no
send-completion msg to wake it up. 


Confused, I believe only txq is stopped but we may still get completion 
interrupt in this case.


And, the ring buffer is likely to be 
occupied by other special msg, e.g. receive-completion msg (not a 
normal case),
so we can't assume there are available slots. 


Then why not checking hv_ringbuf_avail_percent() instead? And there's 
no need to check queue_sends since it does not count recv completion.



We don't request retry from
the upper layer in this case to avoid possible busy retry.


Can't we just do this by stopping txq and depending on tx interrupt to 
wake it?


Thanks

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2 3/3] hv: vmbus_open(): reset the channel state on ENOMEM

2015-02-02 Thread Jason Wang



On Mon, Feb 2, 2015 at 12:37 PM, Dexuan Cui de...@microsoft.com wrote:
Without this patch, the state is put to CHANNEL_OPENING_STATE, and 
when
the driver is loaded next time, vmbus_open() will fail immediately 
due to

newchannel-state != CHANNEL_OPEN_STATE.

CC: K. Y. Srinivasan k...@microsoft.com
Signed-off-by: Dexuan Cui de...@microsoft.com
---

v2: this is a RESEND.

 drivers/hv/channel.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 2978f5e..26dcf26 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -89,9 +89,10 @@ int vmbus_open(struct vmbus_channel *newchannel, 
u32 send_ringbuffer_size,

out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(send_ringbuffer_size + recv_ringbuffer_size));
 
-	if (!out)

-   return -ENOMEM;
-
+   if (!out) {
+   err = -ENOMEM;
+   goto error0;
+   }
 
 	in = (void *)((unsigned long)out + send_ringbuffer_size);
 
@@ -199,6 +200,7 @@ error0:

free_pages((unsigned long)out,
get_order(send_ringbuffer_size + recv_ringbuffer_size));
kfree(open_info);
+   newchannel-state = CHANNEL_OPEN_STATE;
return err;
 }
 EXPORT_SYMBOL_GPL(vmbus_open);
--
1.9.1


Reviewed-by: Jason Wang jasow...@redhat.com

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2 2/3] hv: vmbus_post_msg: retry the hypercall on some transient errors

2015-02-02 Thread Jason Wang



On Mon, Feb 2, 2015 at 12:36 PM, Dexuan Cui de...@microsoft.com wrote:
I got HV_STATUS_INVALID_CONNECTION_ID on Hyper-V 2008 R2 when keeping 
running
rmmod hv_netvsc; modprobe hv_netvsc; rmmod hv_utils; modprobe 
hv_utils
in a Linux guest. Looks the host has some kind of throttling 
mechanism if

some kinds of hypercalls are sent too frequently.


Better to clarify this kind of throttling. Looks like it only affects 
HVCALL_POST_MESSAGE. Other looks good.


Reviewed-by: Jason Wang jasow...@redhat.com


Without the patch, the driver can occasionally fail to load.

Also let's retry HV_STATUS_INSUFFICIENT_MEMORY, though we didn't get 
it

before.

Removed 'case -ENOMEM', since the hypervisor doesn't return this.

CC: K. Y. Srinivasan k...@microsoft.com
Signed-off-by: Dexuan Cui de...@microsoft.com
---

v2:
  updated the subject and changelog
  on HV_STATUS_INVALID_CONNECTION_ID: ret =  -EAGAIN;
  added HV_STATUS_INSUFFICIENT_MEMORY
  removed unreachable -ENOMEM
  changed the delay from 100ms to 1000ms
  
 arch/x86/include/uapi/asm/hyperv.h |  2 ++

 drivers/hv/connection.c| 11 +--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/uapi/asm/hyperv.h 
b/arch/x86/include/uapi/asm/hyperv.h

index 90c458e..ce6068d 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -225,6 +225,8 @@
 #define HV_STATUS_INVALID_HYPERCALL_CODE   2
 #define HV_STATUS_INVALID_HYPERCALL_INPUT  3
 #define HV_STATUS_INVALID_ALIGNMENT4
+#define HV_STATUS_INSUFFICIENT_MEMORY  11
+#define HV_STATUS_INVALID_CONNECTION_ID18
 #define HV_STATUS_INSUFFICIENT_BUFFERS 19
 
 typedef struct _HV_REFERENCE_TSC_PAGE {

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index c4acd1c..af2388f 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -440,9 +440,16 @@ int vmbus_post_msg(void *buffer, size_t buflen)
ret = hv_post_message(conn_id, 1, buffer, buflen);
 
 		switch (ret) {

+   case HV_STATUS_INVALID_CONNECTION_ID:
+   /*
+* We could get this if we send messages too
+* frequently.
+*/
+   ret = -EAGAIN;
+   break;
+   case HV_STATUS_INSUFFICIENT_MEMORY:
case HV_STATUS_INSUFFICIENT_BUFFERS:
ret = -ENOMEM;
-   case -ENOMEM:
break;
case HV_STATUS_SUCCESS:
return ret;
@@ -452,7 +459,7 @@ int vmbus_post_msg(void *buffer, size_t buflen)
}
 
 		retries++;

-   msleep(100);
+   msleep(1000);
}
return ret;
 }
--
1.9.1



___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a later place

2015-02-02 Thread Jason Wang



On Mon, Feb 2, 2015 at 12:35 PM, Dexuan Cui de...@microsoft.com wrote:
Before the line vmbus_open() returns, srv-util_cb can be already 
running
and the variables, like util_fw_version, are needed by the 
srv-util_cb.


A questions is why we do this for util only? Can callbacks of other 
devices be called before vmbus_open() returns?




So we have to make sure the variables are initialized before the 
vmbus_open().


CC: K. Y. Srinivasan k...@microsoft.com
Reviewed-by: Vitaly Kuznetsov vkuzn...@redhat.com
Signed-off-by: Dexuan Cui de...@microsoft.com
---

v2:
This is a RESEND.
I just added Vitaly's Reviewed-by.

 drivers/hv/hv_util.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 3b9c9ef..c5be773 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -340,12 +340,8 @@ static int util_probe(struct hv_device *dev,
 
 	set_channel_read_state(dev-channel, false);
 
-	ret = vmbus_open(dev-channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 
0,

-   srv-util_cb, dev-channel);
-   if (ret)
-   goto error;
-
hv_set_drvdata(dev, srv);
+


This seems unnecessary.


/*
 * Based on the host; initialize the framework and
 * service version numbers we will negotiate.
@@ -365,6 +361,11 @@ static int util_probe(struct hv_device *dev,
hb_srv_version = HB_VERSION;
}
 
+	ret = vmbus_open(dev-channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 
0,

+   srv-util_cb, dev-channel);
+   if (ret)
+   goto error;
+
return 0;
 
 error:

--
1.9.1



___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a later place

2015-02-02 Thread Jason Wang



On Tue, Feb 3, 2015 at 11:30 AM, KY Srinivasan k...@microsoft.com 
wrote:




 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, February 2, 2015 7:09 PM
 To: Dexuan Cui
 Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-

 de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com; KY
 Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
 Subject: RE: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a 
later place
 
 
 
 On Mon, Feb 2, 2015 at 6:09 PM, Dexuan Cui de...@microsoft.com 
wrote:

   -Original Message-
   From: Jason Wang [mailto:jasow...@redhat.com]
   Sent: Monday, February 2, 2015 17:36 PM
   To: Dexuan Cui
   Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;
  driverdev-
   de...@linuxdriverproject.org; o...@aepfle.de; 
a...@canonical.com; KY

  Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
   Subject: Re: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a
  later place
 
 
 
   On Mon, Feb 2, 2015 at 12:35 PM, Dexuan Cui 
de...@microsoft.com

  wrote:
Before the line vmbus_open() returns, srv-util_cb can be 
already
   running   and the variables, like util_fw_version, are needed 
by

  the   srv-util_cb.
 
   A questions is why we do this for util only? Can callbacks of 
other

  devices be called before vmbus_open() returns?
  The variables are used in vmbus_prep_negotiate_resp(), which is 
only

  for the util devices.
 
  I think the other devices should already handle the similar issue
  properly.
  If this is not the case, we need to fix them too.
 
 Better to check all the others, e.g in balloon_probe(), it call
 hv_set_drvdata() after vmbus_open() and dose several datas setups 
in the

 middle. If balloon_onchannelcallback() could be called before
 hv_set_drvdata(), the code looks wrong.


Jason,

For all other device types, the guest initiates the communication 
with the host and potentially
negotiates appropriate (supported) version with the host. For the 
services packaged in the util
driver, the flow is a little different - the host pushes the version 
information into the guest. So,

the fix Dexuan made is only needed in the util driver.

Regards,

K. Y 


Thanks, so you mean for other device, it won't get any interrupt before 
guest negotiate the version with host?


 
 Thanks
 
 




___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a later place

2015-02-02 Thread Jason Wang



On Mon, Feb 2, 2015 at 6:09 PM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, February 2, 2015 17:36 PM
 To: Dexuan Cui
 Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-

 de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com; KY
 Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
 Subject: Re: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a 
later place
 
 
 
 On Mon, Feb 2, 2015 at 12:35 PM, Dexuan Cui de...@microsoft.com 
wrote:

  Before the line vmbus_open() returns, srv-util_cb can be already
  running
  and the variables, like util_fw_version, are needed by the
  srv-util_cb.
 
 A questions is why we do this for util only? Can callbacks of other

 devices be called before vmbus_open() returns?
The variables are used in vmbus_prep_negotiate_resp(), which is only 
for

the util devices.

I think the other devices should already handle the similar issue 
properly.

If this is not the case, we need to fix them too.


Better to check all the others, e.g in balloon_probe(), it call 
hv_set_drvdata() after vmbus_open() and dose several datas setups in 
the middle. If balloon_onchannelcallback() could be called before 
hv_set_drvdata(), the code looks wrong.


Thanks



___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a later place

2015-02-02 Thread Jason Wang



On Tue, Feb 3, 2015 at 11:40 AM, KY Srinivasan k...@microsoft.com 
wrote:




 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, February 2, 2015 7:38 PM
 To: KY Srinivasan
 Cc: Dexuan Cui; gre...@linuxfoundation.org; 
linux-ker...@vger.kernel.org;

 driverdev-devel@linuxdriverproject.org; o...@aepfle.de;
 a...@canonical.com; vkuzn...@redhat.com; Haiyang Zhang
 Subject: RE: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a 
later place
 
 
 
 On Tue, Feb 3, 2015 at 11:30 AM, KY Srinivasan k...@microsoft.com

 wrote:
 
 
   -Original Message-
   From: Jason Wang [mailto:jasow...@redhat.com]
   Sent: Monday, February 2, 2015 7:09 PM
   To: Dexuan Cui
   Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;
  driverdev-
   de...@linuxdriverproject.org; o...@aepfle.de; 
a...@canonical.com; KY

  Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
   Subject: RE: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a
  later place
 
 
 
   On Mon, Feb 2, 2015 at 6:09 PM, Dexuan Cui de...@microsoft.com
  wrote:
 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]Sent: 
Monday,

  February 2, 2015 17:36 PMTo: Dexuan CuiCc:
  gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;  
  driverdev-de...@linuxdriverproject.org; o...@aepfle.de;
  a...@canonical.com; KY   Srinivasan; vkuzn...@redhat.com; 
Haiyang
  ZhangSubject: Re: [PATCH v2 1/3] hv: hv_util: move 
vmbus_open()
  to a   later place  On Mon, Feb 2, 2015 at 
12:35

  PM, Dexuan Cui de...@microsoft.com   wrote:
  Before the line vmbus_open() returns, srv-util_cb can be
  alreadyrunning   and the variables, like 
util_fw_version, are

  needed by   the   srv-util_cb.
   
 A questions is why we do this for util only? Can callbacks 
of

  other   devices be called before vmbus_open() returns?
The variables are used in vmbus_prep_negotiate_resp(), which 
is

  only   for the util devices.
   
I think the other devices should already handle the similar 
issue

   properly.
If this is not the case, we need to fix them too.
 
   Better to check all the others, e.g in balloon_probe(), it call
   hv_set_drvdata() after vmbus_open() and dose several datas 
setups in
  the  middle. If balloon_onchannelcallback() could be called 
before

  hv_set_drvdata(), the code looks wrong.
 
  Jason,
 
  For all other device types, the guest initiates the communication 
with
  the host and potentially negotiates appropriate (supported) 
version
  with the host. For the services packaged in the util driver, the 
flow
  is a little different - the host pushes the version information 
into
  the guest. So, the fix Dexuan made is only needed in the util 
driver.

 
  Regards,
 
  K. Y
 
 Thanks, so you mean for other device, it won't get any interrupt 
before guest

 negotiate the version with host?


Yes, the guest initiates the version negotiation. Are you seeing 
something different?


K. Y


No, thanks.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2 1/3] hv: hv_util: move vmbus_open() to a later place

2015-02-02 Thread Jason Wang



On Mon, Feb 2, 2015 at 12:35 PM, Dexuan Cui de...@microsoft.com wrote:
Before the line vmbus_open() returns, srv-util_cb can be already 
running
and the variables, like util_fw_version, are needed by the 
srv-util_cb.


So we have to make sure the variables are initialized before the 
vmbus_open().


CC: K. Y. Srinivasan k...@microsoft.com
Reviewed-by: Vitaly Kuznetsov vkuzn...@redhat.com
Signed-off-by: Dexuan Cui de...@microsoft.com
---

v2:
This is a RESEND.
I just added Vitaly's Reviewed-by.

 drivers/hv/hv_util.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 3b9c9ef..c5be773 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -340,12 +340,8 @@ static int util_probe(struct hv_device *dev,
 
 	set_channel_read_state(dev-channel, false);
 
-	ret = vmbus_open(dev-channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 
0,

-   srv-util_cb, dev-channel);
-   if (ret)
-   goto error;
-
hv_set_drvdata(dev, srv);
+
/*
 * Based on the host; initialize the framework and
 * service version numbers we will negotiate.
@@ -365,6 +361,11 @@ static int util_probe(struct hv_device *dev,
hb_srv_version = HB_VERSION;
}
 
+	ret = vmbus_open(dev-channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 
0,

+   srv-util_cb, dev-channel);
+   if (ret)
+   goto error;
+
return 0;
 
 error:

--
1.9.1


Reviewed-by: Jason Wang jasow...@redhat.com

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH net] hyperv: Fix the error processing in netvsc_send()

2015-02-01 Thread Jason Wang



On Fri, Jan 30, 2015 at 11:05 PM, Haiyang Zhang 
haiya...@microsoft.com wrote:




 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Friday, January 30, 2015 5:25 AM
  + if (ret != 0) {
  + if (section_index != NETVSC_INVALID_INDEX)
  + netvsc_free_send_slot(net_device, section_index);
 
 What if ret is -EINVAL or -ENOSPC? Looks like we need free the skb 
in

 this case also.


In these cases, skb is freed in netvsc_start_xmit().



 
  + } else if (skb) {
  + dev_kfree_skb_any(skb);
 
 The caller - netvsc_start_xmit() do this also, may be handle this in
 caller is better since netvsc_start_xmit() is the only user that 
tries

 to send a skb?


When the packet is sent out normally, we frees it in netvsc_send() if 
it's
copied to send-buffer. The free is done in netvsc_send(), because the 
copy
is also in this function. If it's not copied, it will be freed in 
another

function -- netvsc_xmit_completion().

netvsc_start_xmit() only does free skb in error case.


Ok.



 btw, I find during netvsc_start_xmit(), ret was change to -ENOSPC 
when

 queue_sends[q_idx]  1. But non of the caller check -ENOSPC in fact?


In this case, we don't request re-send, so set ret to a value other 
than
-EAGAIN. 


Why not? We have available slots for it to be sent now. Dropping the 
packet in this case may cause out of order sending.

It's handled in the same way as errors != -EAGAIN, so we don't
need to check this value specifically.


Thanks

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net] hyperv: Fix the error processing in netvsc_send()

2015-01-30 Thread Jason Wang



On Fri, Jan 30, 2015 at 4:34 AM, Haiyang Zhang haiya...@microsoft.com 
wrote:
The existing code frees the skb in EAGAIN case, in which the skb will 
be

retried from upper layer and used again.
Also, the existing code doesn't free send buffer slot in error case, 
because

there is no completion message for unsent packets.
This patch fixes these problems.

(Please also include this patch for stable trees. Thanks!)

Signed-off-by: Haiyang Zhang haiya...@microsoft.com
Reviewed-by: K. Y. Srinivasan k...@microsoft.com
---
 drivers/net/hyperv/netvsc.c |   11 ---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 9f49c01..7cd4eb3 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -716,7 +716,7 @@ int netvsc_send(struct hv_device *device,
u64 req_id;
unsigned int section_index = NETVSC_INVALID_INDEX;
u32 msg_size = 0;
-   struct sk_buff *skb;
+   struct sk_buff *skb = NULL;
u16 q_idx = packet-q_idx;
 
 
@@ -743,8 +743,6 @@ int netvsc_send(struct hv_device *device,

   packet);
skb = (struct sk_buff *)
  (unsigned long)packet-send_completion_tid;
-   if (skb)
-   dev_kfree_skb_any(skb);
packet-page_buf_cnt = 0;
}
}
@@ -810,6 +808,13 @@ int netvsc_send(struct hv_device *device,
   packet, ret);
}
 
+	if (ret != 0) {

+   if (section_index != NETVSC_INVALID_INDEX)
+   netvsc_free_send_slot(net_device, section_index);


What if ret is -EINVAL or -ENOSPC? Looks like we need free the skb in 
this case also.


+   } else if (skb) {
+   dev_kfree_skb_any(skb);


The caller - netvsc_start_xmit() do this also, may be handle this in 
caller is better since netvsc_start_xmit() is the only user that tries 
to send a skb?


btw, I find during netvsc_start_xmit(), ret was change to -ENOSPC when 
queue_sends[q_idx]  1. But non of the caller check -ENOSPC in fact?


Thanks


+   }
+
return ret;
 }
 
--

1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and Rescind offer

2015-01-29 Thread Jason Wang



On Wed, Jan 28, 2015 at 7:57 PM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
 Sent: Tuesday, January 20, 2015 23:45 PM
 To: KY Srinivasan; de...@linuxdriverproject.org
 Cc: Haiyang Zhang; linux-ker...@vger.kernel.org; Dexuan Cui; Jason 
Wang;

 Radim Krčmář; Dan Carpenter
 Subject: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and 
Rescind offer
 
 Commit 4b2f9abea52a (staging: hv: convert channel_mgmt.c to not 
call
 osd_schedule_callback)' was written under an assumption that we 
never

 receive
 Rescind offer while we're still processing the initial Offer 
request. However,
 the issue we fixed in 04a258c162a8 could be caused by this 
assumption not

 always being true.
 
 In particular, we need to protect against the following:
 1) Receiving a Rescind offer after we do queue_work() for 
processing an

 Offer
request and before we actually enter vmbus_process_offer(). 
work.func

 points
to vmbus_process_offer() at this moment and in 
vmbus_onoffer_rescind()

 we do
another queue_work() without a check so we'll enter
 vmbus_process_offer()
twice.
 2) Receiving a Rescind offer after we enter vmbus_process_offer() 
and
especially after we set state = CHANNEL_OPEN_STATE. Many things 
can go
wrong in that case, e.g. we can call free_channel() while we're 
still using

it.
 
 Implement the required protection by changing work-func at the 
very end

 of
 vmbus_process_offer() and checking work-func in 
vmbus_onoffer_rescind().

 In
 case we receive rescind offer during or before 
vmbus_process_offer() is

 done
 we set rescind flag to true and we check it at the end of
 vmbus_process_offer()
 so such offer will not get lost.
 
 Suggested-by: Radim Krčmář rkrc...@redhat.com

 Signed-off-by: Vitaly Kuznetsov vkuzn...@redhat.com
 ---
  drivers/hv/channel_mgmt.c | 30 ++
  1 file changed, 22 insertions(+), 8 deletions(-)
 
 diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c

 index c6fdd74..877a944 100644
 --- a/drivers/hv/channel_mgmt.c
 +++ b/drivers/hv/channel_mgmt.c
 @@ -279,9 +279,6 @@ static void vmbus_process_offer(struct 
work_struct

 *work)
int ret;
unsigned long flags;
 
 -	/* The next possible work is rescind handling */

 -  INIT_WORK(newchannel-work, vmbus_process_rescind_offer);
 -
/* Make sure this is a new offer */
spin_lock_irqsave(vmbus_connection.channel_lock, flags);
 
 @@ -341,7 +338,7 @@ static void vmbus_process_offer(struct 
work_struct

 *work)
if (channel-sc_creation_callback != NULL)
channel-sc_creation_callback(newchannel);
 
 -			goto out;

 +  goto done_init_rescind;
}
 
  		goto err_free_chan;
 @@ -382,7 +379,14 @@ static void vmbus_process_offer(struct 
work_struct

 *work)
kfree(newchannel-device_obj);
goto err_free_chan;
}
 -out:
 +done_init_rescind:
 +  spin_lock_irqsave(newchannel-lock, flags);
 +  /* The next possible work is rescind handling */
 +  INIT_WORK(newchannel-work, vmbus_process_rescind_offer);
 +  /* Check if rescind offer was already received */
 +  if (newchannel-rescind)
 +  queue_work(newchannel-controlwq, newchannel-work);
 +  spin_unlock_irqrestore(newchannel-lock, flags);
return;
  err_free_chan:
free_channel(newchannel);
 @@ -520,6 +524,7 @@ static void vmbus_onoffer_rescind(struct
 vmbus_channel_message_header *hdr)
  {
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
 +  unsigned long flags;
 
  	rescind = (struct vmbus_channel_rescind_offer *)hdr;

channel = relid2channel(rescind-child_relid);
 @@ -528,11 +533,20 @@ static void vmbus_onoffer_rescind(struct
 vmbus_channel_message_header *hdr)
/* Just return here, no channel found */
return;
 
 +	spin_lock_irqsave(channel-lock, flags);

channel-rescind = true;
 +  /*
 +   * channel-work.func != vmbus_process_rescind_offer means we
 are still
 +	 * processing offer request and the rescind offer processing 
should

 be
 +   * postponed. It will be done at the very end of
 vmbus_process_offer()
 +   * as rescind flag is being checked there.
 +   */
 +  if (channel-work.func == vmbus_process_rescind_offer)
 +  /* work is initialized for vmbus_process_rescind_offer() from
 +   * vmbus_process_offer() where the channel got created */
 +  queue_work(channel-controlwq, channel-work);
 
 -	/* work is initialized for vmbus_process_rescind_offer() from

 -   * vmbus_process_offer() where the channel got created */
 -  queue_work(channel-controlwq, channel-work);
 +  spin_unlock_irqrestore(channel-lock, flags);
  }
 
  /*

 --


Hi Vitaly and all,
I have 2 questions

Re: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and Rescind offer

2015-01-29 Thread Jason Wang



On Wed, Jan 28, 2015 at 9:09 PM, Vitaly Kuznetsov vkuzn...@redhat.com 
wrote:

Dexuan Cui de...@microsoft.com writes:


 -Original Message-
 From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
 Sent: Wednesday, January 28, 2015 20:09 PM
 To: Dexuan Cui
 Cc: KY Srinivasan; de...@linuxdriverproject.org; Haiyang Zhang; 
linux-

 ker...@vger.kernel.org; Jason Wang; Radim Krčmář; Dan Carpenter
 Subject: Re: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer 
and Rescind

 offer
 
 Dexuan Cui de...@microsoft.com writes:
 
  -Original Message-

  From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
  Sent: Tuesday, January 20, 2015 23:45 PM
  To: KY Srinivasan; de...@linuxdriverproject.org
  Cc: Haiyang Zhang; linux-ker...@vger.kernel.org; Dexuan Cui; 
Jason Wang;

  Radim Krčmář; Dan Carpenter
  Subject: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and 
Rescind

 offer
 ...
 
  Hi Vitaly and all,
  I have 2 questions:
  In vmbus_process_offer(),  in the cases of goto err_free_chan,
  should we consider the possibility a rescind message could be 
pending for

  the new channel?
  In the cases,  because we don't run
  INIT_WORK(newchannel-work, vmbus_process_rescind_offer); ,
  vmbus_onoffer_rescind() will do nothing and as a result,
  vmbus_process_rescind_offer() won't be invoked.
 
 Yes, but processing the rescind offer results in freeing the 
channel

 (and this processing supposes the channel wasn't freed before) so
 there is no difference... or is it?
 
 

  Question 2: in vmbus_process_offer(), in the case
  vmbus_device_register() fails, we'll run
  list_del(newchannel-listentry); -- just after this line,
   what will happen at this time if relid2channel() returns NULL
  in vmbus_onoffer_rescind()?
 
  I think we'll lose the rescind message.
 
 
 Yes, but same logic applies - we already freed the channes so no 
rescind

 proccessing required.
 free_channel() and vmbus_process_rescind_offer() are different, 
because
  the latter does more work, e.g., sending the host a message 
 CHANNELMSG_RELID_RELEASED.


 In the cases of goto err_free_chan + a pending rescind message,
 the host may expect the message CHANNELMSG_RELID_RELEASED and
 could reoffer the channel once the message is received.

 It would be better if the VM doesn't lose the rescind message
 here. :-)


Ah, I see, CHANNELMSG_RELID_RELEASED is expected from us in any
case. I'll doing that in a separate patch is noone objects.


All the evil come from the un-serialized processing of message. I 
wonder whether we can do all the processing in workqueue and then those 
were automatically serialized.




Thanks for the review,




  If we still need to do something we need to
 add support for already freed channel to the rescind offer 
processing path.
 


 Thanks,
 -- Dexuan


--
  Vitaly
--
To unsubscribe from this list: send the line unsubscribe 
linux-kernel in

the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and Rescind offer

2015-01-29 Thread Jason Wang



On Wed, Jan 28, 2015 at 8:51 PM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
 Sent: Wednesday, January 28, 2015 20:09 PM
 To: Dexuan Cui
 Cc: KY Srinivasan; de...@linuxdriverproject.org; Haiyang Zhang; 
linux-

 ker...@vger.kernel.org; Jason Wang; Radim Krčmář; Dan Carpenter
 Subject: Re: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and 
Rescind

 offer
 
 Dexuan Cui de...@microsoft.com writes:
 
  -Original Message-

  From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
  Sent: Tuesday, January 20, 2015 23:45 PM
  To: KY Srinivasan; de...@linuxdriverproject.org
  Cc: Haiyang Zhang; linux-ker...@vger.kernel.org; Dexuan Cui; 
Jason Wang;

  Radim Krčmář; Dan Carpenter
  Subject: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and 
Rescind

 offer
 ...
 
  Hi Vitaly and all,
  I have 2 questions:
  In vmbus_process_offer(),  in the cases of goto err_free_chan,
  should we consider the possibility a rescind message could be 
pending for

  the new channel?
  In the cases,  because we don't run
  INIT_WORK(newchannel-work, vmbus_process_rescind_offer); ,
  vmbus_onoffer_rescind() will do nothing and as a result,
  vmbus_process_rescind_offer() won't be invoked.
 
 Yes, but processing the rescind offer results in freeing the channel

 (and this processing supposes the channel wasn't freed before) so
 there is no difference... or is it?
 
 

  Question 2: in vmbus_process_offer(), in the case
  vmbus_device_register() fails, we'll run
  list_del(newchannel-listentry); -- just after this line,
   what will happen at this time if relid2channel() returns NULL
  in vmbus_onoffer_rescind()?
 
  I think we'll lose the rescind message.
 
 
 Yes, but same logic applies - we already freed the channes so no 
rescind

 proccessing required.
free_channel() and vmbus_process_rescind_offer() are different, 
because
 the latter does more work, e.g., sending the host a message 
CHANNELMSG_RELID_RELEASED.


In the cases of goto err_free_chan + a pending rescind message,
the host may expect the message CHANNELMSG_RELID_RELEASED and
could reoffer the channel once the message is received.

It would be better if the VM doesn't lose the rescind message here. 
:-)


It's interesting that rescind needs a ack from guest. But looks like 
the offer does not need it? Is there a spec for this for us for 
reference?


Thanks




  If we still need to do something we need to
 add support for already freed channel to the rescind offer 
processing path.
 


Thanks,
-- Dexuan


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 2/3] hv: vmbus_post_msg: retry the hypercall on HV_STATUS_INVALID_CONNECTION_ID

2015-01-29 Thread Jason Wang



On Thu, Jan 29, 2015 at 7:02 PM, Dexuan Cui de...@microsoft.com wrote:

I got the hypercall error code on Hyper-V 2008 R2 when keeping running
rmmod hv_netvsc; modprobe hv_netvsc; rmmod hv_utils; modprobe 
hv_utils

in a Linux guest.

Without the patch, the driver can occasionally fail to load.

CC: K. Y. Srinivasan k...@microsoft.com
Signed-off-by: Dexuan Cui de...@microsoft.com
---
 arch/x86/include/uapi/asm/hyperv.h | 1 +
 drivers/hv/connection.c| 9 +
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/include/uapi/asm/hyperv.h 
b/arch/x86/include/uapi/asm/hyperv.h

index 90c458e..b9daffb 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -225,6 +225,7 @@
 #define HV_STATUS_INVALID_HYPERCALL_CODE   2
 #define HV_STATUS_INVALID_HYPERCALL_INPUT  3
 #define HV_STATUS_INVALID_ALIGNMENT4
+#define HV_STATUS_INVALID_CONNECTION_ID18
 #define HV_STATUS_INSUFFICIENT_BUFFERS 19
 
 typedef struct _HV_REFERENCE_TSC_PAGE {

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index c4acd1c..8bd05f3 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -440,6 +440,15 @@ int vmbus_post_msg(void *buffer, size_t buflen)
ret = hv_post_message(conn_id, 1, buffer, buflen);
 
 		switch (ret) {

+   case HV_STATUS_INVALID_CONNECTION_ID:
+   /*
+* We could get this if we send messages too
+* frequently or the host is under low resource
+* conditions: let's wait 1 more second before
+* retrying the hypercall.
+*/


The name HV_STATUS_INVALID_CONNECTION_ID is really confusing in this 
case. Since it does not show any meaning of lacking resources. 

+   msleep(1000);
+   break;
case HV_STATUS_INSUFFICIENT_BUFFERS:
ret = -ENOMEM;


I thought host should return this error value when lacking resources?


case -ENOMEM:
--
1.9.1

--
To unsubscribe from this list: send the line unsubscribe 
linux-kernel in

the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v3 2/3] Drivers: hv: rename sc_lock to the more generic lock

2015-01-20 Thread Jason Wang



On Tue, Jan 20, 2015 at 11:45 PM, Vitaly Kuznetsov 
vkuzn...@redhat.com wrote:
sc_lock spinlock in struct vmbus_channel is being used to not only 
protect the
sc_list field, e.g. vmbus_open() function uses it to implement 
test-and-set
access to the state field. Rename it to the more generic 'lock' and 
add the

description.

Signed-off-by: Vitaly Kuznetsov vkuzn...@redhat.com
---
 drivers/hv/channel.c  |  6 +++---
 drivers/hv/channel_mgmt.c | 10 +-
 include/linux/hyperv.h|  7 ++-
 3 files changed, 14 insertions(+), 9 deletions(-)


Acked-by: Jason Wang jasow...@redhat.com


diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 433f72a..8608ed1 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -73,14 +73,14 @@ int vmbus_open(struct vmbus_channel *newchannel, 
u32 send_ringbuffer_size,

unsigned long flags;
int ret, t, err = 0;
 
-	spin_lock_irqsave(newchannel-sc_lock, flags);

+   spin_lock_irqsave(newchannel-lock, flags);
if (newchannel-state == CHANNEL_OPEN_STATE) {
newchannel-state = CHANNEL_OPENING_STATE;
} else {
-   spin_unlock_irqrestore(newchannel-sc_lock, flags);
+   spin_unlock_irqrestore(newchannel-lock, flags);
return -EINVAL;
}
-   spin_unlock_irqrestore(newchannel-sc_lock, flags);
+   spin_unlock_irqrestore(newchannel-lock, flags);
 
 	newchannel-onchannel_callback = onchannelcallback;

newchannel-channel_callback_context = context;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 01f2c2b..c6fdd74 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -146,7 +146,7 @@ static struct vmbus_channel *alloc_channel(void)
return NULL;
 
 	spin_lock_init(channel-inbound_lock);

-   spin_lock_init(channel-sc_lock);
+   spin_lock_init(channel-lock);
 
 	INIT_LIST_HEAD(channel-sc_list);

INIT_LIST_HEAD(channel-percpu_list);
@@ -246,9 +246,9 @@ static void vmbus_process_rescind_offer(struct 
work_struct *work)

spin_unlock_irqrestore(vmbus_connection.channel_lock, flags);
} else {
primary_channel = channel-primary_channel;
-   spin_lock_irqsave(primary_channel-sc_lock, flags);
+   spin_lock_irqsave(primary_channel-lock, flags);
list_del(channel-sc_list);
-   spin_unlock_irqrestore(primary_channel-sc_lock, flags);
+   spin_unlock_irqrestore(primary_channel-lock, flags);
}
free_channel(channel);
 }
@@ -323,9 +323,9 @@ static void vmbus_process_offer(struct 
work_struct *work)

 * Process the sub-channel.
 */
newchannel-primary_channel = channel;
-   spin_lock_irqsave(channel-sc_lock, flags);
+   spin_lock_irqsave(channel-lock, flags);
list_add_tail(newchannel-sc_list, channel-sc_list);
-   spin_unlock_irqrestore(channel-sc_lock, flags);
+   spin_unlock_irqrestore(channel-lock, flags);
 
 			if (newchannel-target_cpu != get_cpu()) {

put_cpu();
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 476c685..02dd978 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -722,7 +722,12 @@ struct vmbus_channel {
 */
void (*sc_creation_callback)(struct vmbus_channel *new_sc);
 
-	spinlock_t sc_lock;

+   /*
+	 * The spinlock to protect the structure. It is being used to 
protect
+	 * test-and-set access to various attributes of the structure as 
well

+* as all sc_list operations.
+*/
+   spinlock_t lock;
/*
 * All Sub-channels of a primary channel are linked here.
 */
--
1.9.3



___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v3 1/3] Drivers: hv: check vmbus_device_create() return value in vmbus_process_offer()

2015-01-20 Thread Jason Wang



On Tue, Jan 20, 2015 at 11:45 PM, Vitaly Kuznetsov 
vkuzn...@redhat.com wrote:
vmbus_device_create() result is not being checked in 
vmbus_process_offer() and
it can fail if kzalloc() fails. Add the check and do minor cleanup to 
avoid

additional duplication of free_channel(); return; block.

Reported-by: Jason Wang jasow...@redhat.com
Signed-off-by: Vitaly Kuznetsov vkuzn...@redhat.com
---
 drivers/hv/channel_mgmt.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)


Acked-by: Jason Wang jasow...@redhat.com


diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 2c59f03..01f2c2b 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -341,11 +341,10 @@ static void vmbus_process_offer(struct 
work_struct *work)

if (channel-sc_creation_callback != NULL)
channel-sc_creation_callback(newchannel);
 
-			return;

+   goto out;
}
 
-		free_channel(newchannel);

-   return;
+   goto err_free_chan;
}
 
 	/*
@@ -364,6 +363,8 @@ static void vmbus_process_offer(struct 
work_struct *work)

newchannel-offermsg.offer.if_type,
newchannel-offermsg.offer.if_instance,
newchannel);
+   if (!newchannel-device_obj)
+   goto err_free_chan;
 
 	/*

 * Add the new device to the bus. This will kick off device-driver
@@ -379,9 +380,12 @@ static void vmbus_process_offer(struct 
work_struct *work)

list_del(newchannel-listentry);
spin_unlock_irqrestore(vmbus_connection.channel_lock, flags);
kfree(newchannel-device_obj);
-
-   free_channel(newchannel);
+   goto err_free_chan;
}
+out:
+   return;
+err_free_chan:
+   free_channel(newchannel);
 }
 
 enum {

--
1.9.3



___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v3 3/3] Drivers: hv: vmbus: serialize Offer and Rescind offer

2015-01-20 Thread Jason Wang



On Tue, Jan 20, 2015 at 11:45 PM, Vitaly Kuznetsov 
vkuzn...@redhat.com wrote:

Commit 4b2f9abea52a (staging: hv: convert channel_mgmt.c to not call
osd_schedule_callback)' was written under an assumption that we 
never receive
Rescind offer while we're still processing the initial Offer request. 
However,
the issue we fixed in 04a258c162a8 could be caused by this assumption 
not

always being true.

In particular, we need to protect against the following:
1) Receiving a Rescind offer after we do queue_work() for processing 
an Offer
   request and before we actually enter vmbus_process_offer(). 
work.func points
   to vmbus_process_offer() at this moment and in 
vmbus_onoffer_rescind() we do
   another queue_work() without a check so we'll enter 
vmbus_process_offer()

   twice.
2) Receiving a Rescind offer after we enter vmbus_process_offer() and
   especially after we set state = CHANNEL_OPEN_STATE. Many things 
can go
   wrong in that case, e.g. we can call free_channel() while we're 
still using

   it.

Implement the required protection by changing work-func at the very 
end of
vmbus_process_offer() and checking work-func in 
vmbus_onoffer_rescind(). In
case we receive rescind offer during or before vmbus_process_offer() 
is done
we set rescind flag to true and we check it at the end of 
vmbus_process_offer()

so such offer will not get lost.

Suggested-by: Radim Krčmář rkrc...@redhat.com
Signed-off-by: Vitaly Kuznetsov vkuzn...@redhat.com
---


Acked-by: Jason Wang jasow...@redhat.com


 drivers/hv/channel_mgmt.c | 30 ++
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index c6fdd74..877a944 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -279,9 +279,6 @@ static void vmbus_process_offer(struct 
work_struct *work)

int ret;
unsigned long flags;
 
-	/* The next possible work is rescind handling */

-   INIT_WORK(newchannel-work, vmbus_process_rescind_offer);
-
/* Make sure this is a new offer */
spin_lock_irqsave(vmbus_connection.channel_lock, flags);
 
@@ -341,7 +338,7 @@ static void vmbus_process_offer(struct 
work_struct *work)

if (channel-sc_creation_callback != NULL)
channel-sc_creation_callback(newchannel);
 
-			goto out;

+   goto done_init_rescind;
}
 
 		goto err_free_chan;
@@ -382,7 +379,14 @@ static void vmbus_process_offer(struct 
work_struct *work)

kfree(newchannel-device_obj);
goto err_free_chan;
}
-out:
+done_init_rescind:
+   spin_lock_irqsave(newchannel-lock, flags);
+   /* The next possible work is rescind handling */
+   INIT_WORK(newchannel-work, vmbus_process_rescind_offer);
+   /* Check if rescind offer was already received */
+   if (newchannel-rescind)
+   queue_work(newchannel-controlwq, newchannel-work);
+   spin_unlock_irqrestore(newchannel-lock, flags);
return;
 err_free_chan:
free_channel(newchannel);
@@ -520,6 +524,7 @@ static void vmbus_onoffer_rescind(struct 
vmbus_channel_message_header *hdr)

 {
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
+   unsigned long flags;
 
 	rescind = (struct vmbus_channel_rescind_offer *)hdr;

channel = relid2channel(rescind-child_relid);
@@ -528,11 +533,20 @@ static void vmbus_onoffer_rescind(struct 
vmbus_channel_message_header *hdr)

/* Just return here, no channel found */
return;
 
+	spin_lock_irqsave(channel-lock, flags);

channel-rescind = true;
+   /*
+	 * channel-work.func != vmbus_process_rescind_offer means we are 
still
+	 * processing offer request and the rescind offer processing should 
be
+	 * postponed. It will be done at the very end of 
vmbus_process_offer()

+* as rescind flag is being checked there.
+*/
+   if (channel-work.func == vmbus_process_rescind_offer)
+   /* work is initialized for vmbus_process_rescind_offer() from
+* vmbus_process_offer() where the channel got created */
+   queue_work(channel-controlwq, channel-work);
 
-	/* work is initialized for vmbus_process_rescind_offer() from

-* vmbus_process_offer() where the channel got created */
-   queue_work(channel-controlwq, channel-work);
+   spin_unlock_irqrestore(channel-lock, flags);
 }
 
 /*

--
1.9.3



___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message on transfer failure

2015-01-13 Thread Jason Wang



On Wed, Jan 14, 2015 at 9:43 AM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
 Sent: Tuesday, January 13, 2015 21:52 PM
 To: Dexuan Cui; KY Srinivasan
 Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-

 de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com;
 jasow...@redhat.com; Haiyang Zhang
 Subject: Re: [PATCH v3] hv: hv_fcopy: drop the obsolete message on 
transfer

 failure
 
 Dexuan Cui de...@microsoft.com writes:
 
  In the case the user-space daemon crashes, hangs or is killed, we
  need to down the semaphore, otherwise, after the daemon starts 
next

  time, the obsolete data in fcopy_transaction.message or
  fcopy_transaction.fcopy_msg will be used immediately.
 
 It seems this patch got lost and I don't see it in recent 'resend'

 series. K.Y., Dexuan, can you please take a look?

Hi Vitaly, Jason,
The patch can't fix all the corner cases (it would need non-trivial 
efforts for that)
as we discussed, but I think it would be better for us to have it as 
it can indeed fix

an obvious issue and doesn't introduce new issues.

I think I can document the known discussed corner cases in the patch 
as TODOs

and resend the patch.

Please let me know if you have different opinions. :-) 


Thanks,
-- Dexuan


Yes, I think it's ok.
We can do other fixes on top.

Thanks

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH RESEND 1/1] X86: Mark the Hyper-V clocksource as being continuous

2015-01-12 Thread Jason Wang



On Tue, Jan 13, 2015 at 8:26 AM, K. Y. Srinivasan k...@microsoft.com 
wrote:

The Hyper-V clocksource is continuous; mark it accordingly.

Signed-off-by: K. Y. Srinivasan k...@microsoft.com
Cc: stable sta...@vger.kernel.org
---
 arch/x86/kernel/cpu/mshyperv.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)


Acked-by: Jason Wang jasow...@redhat.com


diff --git a/arch/x86/kernel/cpu/mshyperv.c 
b/arch/x86/kernel/cpu/mshyperv.c

index a450373..939155f 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -107,6 +107,7 @@ static struct clocksource hyperv_cs = {
.rating = 400, /* use this when running on Hyperv*/
.read   = read_hv_clock,
.mask   = CLOCKSOURCE_MASK(64),
+   .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static void __init ms_hyperv_init_platform(void)

--
1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe 
linux-kernel in

the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V2 1/1] Drivers: hv: vmbus: Implement a clockevent device

2014-12-04 Thread Jason Wang
(hv_context.clk_evt[cpu]);
if (hv_context.synic_event_page[cpu])
free_page((unsigned long)hv_context.synic_event_page[cpu]);
if (hv_context.synic_message_page[cpu])
@@ -388,6 +457,15 @@ void hv_synic_init(void *arg)
hv_context.vp_index[cpu] = (u32)vp_index;
 
 	INIT_LIST_HEAD(hv_context.percpu_list[cpu]);

+
+   /*
+* Register the per-cpu clockevent source.
+*/
+   if (ms_hyperv.features  HV_X64_MSR_SYNTIMER_AVAILABLE)
+   clockevents_config_and_register(hv_context.clk_evt[cpu],
+   HV_TIMER_FREQUENCY,
+   HV_MIN_DELTA_TICKS,
+   HV_MAX_MAX_DELTA_TICKS);
return;
 }
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h

index c386d8d..44b1c94 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -178,6 +178,23 @@ struct hv_message_header {
};
 };
 
+/*

+ * Timer configuration register.
+ */
+union hv_timer_config {
+   u64 as_uint64;
+   struct {
+   u64 enable:1;
+   u64 periodic:1;
+   u64 lazy:1;
+   u64 auto_enable:1;
+   u64 reserved_z0:12;
+   u64 sintx:4;
+   u64 reserved_z1:44;
+   };
+};
+
+
 /* Define timer message payload structure. */
 struct hv_timer_message_payload {
u32 timer_index;
@@ -519,6 +536,10 @@ struct hv_context {
 * buffer to post messages to the host.
 */
void *post_msg_page[NR_CPUS];
+   /*
+* Support PV clockevent device.
+*/
+   struct clock_event_device *clk_evt[NR_CPUS];
 };
 
 extern struct hv_context hv_context;

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4d6b269..9e57c07 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -32,6 +32,7 @@
 #include linux/completion.h
 #include linux/hyperv.h
 #include linux/kernel_stat.h
+#include linux/clockchips.h
 #include asm/hyperv.h
 #include asm/hypervisor.h
 #include asm/mshyperv.h
@@ -578,6 +579,37 @@ static void vmbus_onmessage_work(struct 
work_struct *work)

kfree(ctx);
 }
 
+void hv_process_timer_expiration(struct hv_message *msg, int cpu)

+{
+   struct clock_event_device *dev = hv_context.clk_evt[cpu];
+
+   if (msg-header.message_type == HVMSG_NONE)
+   return;


Nitpick: caller has checked this.


+
+   if (dev-event_handler)
+   dev-event_handler(dev);
+
+   msg-header.message_type = HVMSG_NONE;
+
+   /*
+* Make sure the write to MessageType (ie set to
+* HVMSG_NONE) happens before we read the
+* MessagePending and EOMing. Otherwise, the EOMing
+* will not deliver any more messages since there is
+* no empty slot
+*/
+   mb();
+
+   if (msg-header.message_flags.msg_pending) {
+   /*
+* This will cause message queue rescan to
+* possibly deliver another msg from the
+* hypervisor
+*/
+   wrmsrl(HV_X64_MSR_EOM, 0);
+   }
+}
+
 static void vmbus_on_msg_dpc(unsigned long data)
 {
int cpu = smp_processor_id();
@@ -667,8 +699,12 @@ static void vmbus_isr(void)
msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 
 	/* Check if there are actual msgs to be processed */

-   if (msg-header.message_type != HVMSG_NONE)
-   tasklet_schedule(msg_dpc);
+   if (msg-header.message_type != HVMSG_NONE) {
+   if (msg-header.message_type == HVMSG_TIMER_EXPIRED)
+   hv_process_timer_expiration(msg, cpu);
+   else
+   tasklet_schedule(msg_dpc);
+   }
 }
 
 /*

--
1.7.4.1


Reviewed-by: Jason Wang jasow...@redhat.com

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message on transfer failure

2014-12-01 Thread Jason Wang



On Fri, Nov 28, 2014 at 7:54 PM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Friday, November 28, 2014 18:13 PM
 To: Dexuan Cui
 Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-

 de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com; KY
 Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
 Subject: RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message on 
transfer

 failure
 On Fri, Nov 28, 2014 at 4:36 PM, Dexuan Cui de...@microsoft.com 
wrote:

   -Original Message-
   From: Jason Wang [mailto:jasow...@redhat.com]
   Sent: Friday, November 28, 2014 14:47 PM
   To: Dexuan Cui
   Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;
  driverdev-
   de...@linuxdriverproject.org; o...@aepfle.de; 
a...@canonical.com; KY

   Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
   Subject: Re: [PATCH v3] hv: hv_fcopy: drop the obsolete message 
on

  transfer
   failure
   On Thu, Nov 27, 2014 at 9:09 PM, Dexuan Cui 
de...@microsoft.com

  wrote:
In the case the user-space daemon crashes, hangs or is 
killed, we

need to down the semaphore, otherwise, after the daemon starts
  next
time, the obsolete data in fcopy_transaction.message or
fcopy_transaction.fcopy_msg will be used immediately.
   
Cc: Jason Wang jasow...@redhat.com
Cc: Vitaly Kuznetsov vkuzn...@redhat.com
Cc: K. Y. Srinivasan k...@microsoft.com
Signed-off-by: Dexuan Cui de...@microsoft.com
---
   
v2: I removed the FCP prefix as Greg asked.
   
I also updated the output message a little:
FCP: failed to acquire the semaphore --
can not acquire the semaphore: it is benign
   
v3: I added the code in fcopy_release() as Jason Wang 
suggested.

I removed the pr_debug (it isn't so meaningful)and added a
comment instead.
   
 drivers/hv/hv_fcopy.c | 19 +++
 1 file changed, 19 insertions(+)
   
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 23b2ce2..faa6ba6 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -86,6 +86,18 @@ static void fcopy_work_func(struct 
work_struct

*dummy)
 * process the pending transaction.
 */
fcopy_respond_to_host(HV_E_FAIL);
+
+   /* In the case the user-space daemon crashes, hangs or is
  killed, we
+* need to down the semaphore, otherwise, after the daemon
  starts
next
+* time, the obsolete data in fcopy_transaction.message or
+* fcopy_transaction.fcopy_msg will be used immediately.
+*
+	 * NOTE: fcopy_read() happens to get the semaphore (very 
rare)?

We're
+* still OK, because we've reported the failure to the host.
+*/
+   if (down_trylock(fcopy_transaction.read_sema))
+   ;
 
   Sorry, I'm not quite understand how if () ; can help here.
 
   Btw, a question not relate to this patch.
 
   What happens if a daemon is resume from SIGSTOP and expires the
  check
   here?
  Hi Jason,
  My idea is: here we need down_trylock(), but in case we can't get 
the

  semaphore, it's OK anyway:
 
  Scenario 1):
  1.1: when the daemon is blocked on the pread(), the daemon 
receives

  SIGSTOP;
  1.2: the host user runs the PowerShell Copy-VMFile command;
  1.3.1: the driver reports the failure to the host user in 5s and
  1.3.2: the driver down()-es the semaphore;
  1.4: the daemon receives SIGCONT and it will be still blocked on 
the

  pread().
  Without the down_trylock(), in 1.4, the daemon can receive an
  obsolete message.
  NOTE: in this scenario, the daemon is not killed.
 
  Scenario 2):
  In senario 1), if the daemon receives SIGCONT between 1.3.1 and 
1.3.2

  and
  do down() in fcopy_read(), it will receive the message but: the
  driver has
  reported the failure to the host user and the driver's 1.3.2 can't
  get the
  semaphore -- IMO this is acceptably OK, though in the VM, an
  incomplete
  file will be left there.
  BTW, I think in the daemon's hv_start_fcopy() we should add a
  close(target_fd) before open()-ing a new one.
 
 Right, but how about the case when resuming from SIGSTOP but no 
timeout?

Sorry, I don't understand this:
if no timeout, fcopy_read() will get the semaphore and fcopy_write()
will try to cancel fcopy_work.


Yes.




 Looks like in this case userspace() may wait in down_interruptible()
 until timeout. We probably need something like this:
 
 if (down_interruptible(fcopy_transaction.read_sema)) {

 up(fcopy_transaction.read_sema);
 return -EINTR;
 }

until timeout?
if the daemon can't get the semaphore, it can only be wake by a 
signal(the
daemon doesn't install handler, so by default most signals will kill 
the daemon).
In case a signal waking up the daemon doesn't kill the daemon, why 
should

we do up()?


True

RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message on transfer failure

2014-12-01 Thread Jason Wang



On Mon, Dec 1, 2014 at 5:47 PM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, December 1, 2014 16:23 PM
 To: Dexuan Cui
 Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-

 de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com; KY
 Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
 Subject: RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message on 
transfer

 failure
 On Fri, Nov 28, 2014 at 7:54 PM, Dexuan Cui de...@microsoft.com 
wrote:

   -Original Message-
   From: Jason Wang [mailto:jasow...@redhat.com]
   Sent: Friday, November 28, 2014 18:13 PM
   To: Dexuan Cui
   Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;
  driverdev-
   de...@linuxdriverproject.org; o...@aepfle.de; 
a...@canonical.com; KY

   Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
   Subject: RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message 
on

  transfer
   failure
   On Fri, Nov 28, 2014 at 4:36 PM, Dexuan Cui 
de...@microsoft.com

  wrote:
 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Friday, November 28, 2014 14:47 PM
 To: Dexuan Cui
 Cc: gre...@linuxfoundation.org; 
linux-ker...@vger.kernel.org;

driverdev-
 de...@linuxdriverproject.org; o...@aepfle.de;
  a...@canonical.com; KY
 Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
 Subject: Re: [PATCH v3] hv: hv_fcopy: drop the obsolete 
message

  on
transfer
 failure
 On Thu, Nov 27, 2014 at 9:09 PM, Dexuan Cui
  de...@microsoft.com
wrote:
  In the case the user-space daemon crashes, hangs or is
  killed, we
  need to down the semaphore, otherwise, after the daemon 
starts

next
  time, the obsolete data in fcopy_transaction.message or
  fcopy_transaction.fcopy_msg will be used immediately.
 
  Cc: Jason Wang jasow...@redhat.com
  Cc: Vitaly Kuznetsov vkuzn...@redhat.com
  Cc: K. Y. Srinivasan k...@microsoft.com
  Signed-off-by: Dexuan Cui de...@microsoft.com
  ---
 
  v2: I removed the FCP prefix as Greg asked.
 
  I also updated the output message a little:
  FCP: failed to acquire the semaphore --
  can not acquire the semaphore: it is benign
 
  v3: I added the code in fcopy_release() as Jason Wang
  suggested.
  I removed the pr_debug (it isn't so meaningful)and 
added a

  comment instead.
 
   drivers/hv/hv_fcopy.c | 19 +++
   1 file changed, 19 insertions(+)
 
  diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
  index 23b2ce2..faa6ba6 100644
  --- a/drivers/hv/hv_fcopy.c
  +++ b/drivers/hv/hv_fcopy.c
  @@ -86,6 +86,18 @@ static void fcopy_work_func(struct
  work_struct
  *dummy)
 * process the pending transaction.
 */
fcopy_respond_to_host(HV_E_FAIL);
  +
  +	/* In the case the user-space daemon crashes, hangs or 
is

killed, we
  +	 * need to down the semaphore, otherwise, after the 
daemon

starts
  next
  +	 * time, the obsolete data in fcopy_transaction.message 
or

  +  * fcopy_transaction.fcopy_msg will be used immediately.
  +  *
  +  * NOTE: fcopy_read() happens to get the semaphore (very
  rare)?
  We're
  +	 * still OK, because we've reported the failure to the 
host.

  +  */
  + if (down_trylock(fcopy_transaction.read_sema))
  + ;
   
 Sorry, I'm not quite understand how if () ; can help here.
   
 Btw, a question not relate to this patch.
   
 What happens if a daemon is resume from SIGSTOP and expires 
the

check
 here?
Hi Jason,
My idea is: here we need down_trylock(), but in case we can't 
get

  the
semaphore, it's OK anyway:
   
Scenario 1):
1.1: when the daemon is blocked on the pread(), the daemon
  receives
SIGSTOP;
1.2: the host user runs the PowerShell Copy-VMFile command;
1.3.1: the driver reports the failure to the host user in 5s 
and

1.3.2: the driver down()-es the semaphore;
1.4: the daemon receives SIGCONT and it will be still blocked 
on

  the
pread().
Without the down_trylock(), in 1.4, the daemon can receive an
obsolete message.
NOTE: in this scenario, the daemon is not killed.
   
Scenario 2):
In senario 1), if the daemon receives SIGCONT between 1.3.1 
and

  1.3.2
and
do down() in fcopy_read(), it will receive the message but: 
the

driver has
reported the failure to the host user and the driver's 1.3.2 
can't

get the
semaphore -- IMO this is acceptably OK, though in the VM, an
incomplete
file will be left there.
BTW, I think in the daemon's hv_start_fcopy() we should add a
close(target_fd) before open()-ing a new one.
 
   Right, but how about the case when resuming from SIGSTOP

RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message on transfer failure

2014-12-01 Thread Jason Wang



On Tue, Dec 2, 2014 at 1:33 PM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: KY Srinivasan
 Sent: Monday, December 1, 2014 23:55 PM
 To: Dexuan Cui; Jason Wang
 Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-

 de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com;
 vkuzn...@redhat.com; Haiyang Zhang
 Subject: RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message on 
transfer

 failure
 
 
 
  -Original Message-

  From: Dexuan Cui
  Sent: Monday, December 1, 2014 3:01 AM
  To: Jason Wang
  Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-
  de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com; 
KY

  Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
  Subject: RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message 
on transfer

  failure
 
   -Original Message-
   From: Jason Wang [mailto:jasow...@redhat.com]
   Sent: Monday, December 1, 2014 18:18 PM
   To: Dexuan Cui
   Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;
   driverdev- de...@linuxdriverproject.org; o...@aepfle.de;
   a...@canonical.com; KY Srinivasan; vkuzn...@redhat.com; Haiyang
 Zhang
   Subject: RE: [PATCH v3] hv: hv_fcopy: drop the obsolete message 
on

   transfer failure
  
  
  
   On Mon, Dec 1, 2014 at 5:47 PM, Dexuan Cui de...@microsoft.com
  wrote:
 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, December 1, 2014 16:23 PM
 To: Dexuan Cui
 Cc: gre...@linuxfoundation.org; 
linux-ker...@vger.kernel.org;

driverdev-
 de...@linuxdriverproject.org; o...@aepfle.de; 
a...@canonical.com;

KY  Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
 Subject: RE: [PATCH v3] hv: hv_fcopy: drop the obsolete 
message on
transfer  failure  On Fri, Nov 28, 2014 at 7:54 PM, Dexuan 
Cui

de...@microsoft.com
wrote:
   -Original Message-
   From: Jason Wang [mailto:jasow...@redhat.com]Sent:
Friday, November 28, 2014 18:13 PMTo: Dexuan Cui
Cc:

gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;  
driverdev-de...@linuxdriverproject.org; o...@aepfle.de;
a...@canonical.com; KYSrinivasan; vkuzn...@redhat.com;
 Haiyang
ZhangSubject: RE: [PATCH v3] hv: hv_fcopy: drop the 
obsolete
message on   transferfailureOn Fri, Nov 28, 
2014 at

4:36 PM, Dexuan Cui de...@microsoft.com   wrote:
 -Original Message-  From: Jason Wang
[mailto:jasow...@redhat.com]  Sent: Friday, November 
28,

2014 14:47 PM  To: Dexuan Cui  Cc:
gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;  
  
driverdev-  de...@linuxdriverproject.org; 
o...@aepfle.de;
 a...@canonical.com; KY  Srinivasan; 
vkuzn...@redhat.com;
Haiyang Zhang  Subject: Re: [PATCH v3] hv: hv_fcopy: 
drop
the obsolete message   on transfer  
failure  

  On Thu, Nov 27, 2014 at 9:09 PM, Dexuan Cui  
de...@microsoft.com wrote:
  In the case the user-space daemon crashes, hangs 
or is
 killed, we   need to down the semaphore, 
otherwise,
after the daemon starts next   time, the 
obsolete

data in fcopy_transaction.message or  
fcopy_transaction.fcopy_msg will be used immediately.
 
  Cc: Jason Wang jasow...@redhat.com   
Cc:

Vitaly Kuznetsov vkuzn...@redhat.com   Cc: K. Y.
Srinivasan k...@microsoft.com   Signed-off-by: 
Dexuan Cui
de...@microsoft.com   --- 
v2: I

removed the FCP prefix as Greg asked.
 
  I also updated the output message a little:
  FCP: failed to acquire the semaphore --
  can not acquire the semaphore: it is benign
 
  v3: I added the code in fcopy_release() as Jason 
Wang

 suggested.
  I removed the pr_debug (it isn't so 
meaningful)and

added a
  comment instead.
 
   drivers/hv/hv_fcopy.c | 19 +++  
  
  1 file changed, 19 insertions(+) 
diff
--git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
  

index 23b2ce2..faa6ba6 100644   ---
a/drivers/hv/hv_fcopy.c   +++ 
b/drivers/hv/hv_fcopy.c  
   @@ -86,6 +86,18 @@ static void fcopy_work_func(struct  


work_struct   *dummy)
* process the pending transaction.
*/
   fcopy_respond_to_host(HV_E_FAIL);
  +
  +/* In the case the user-space daemon crashes, 
hangs

  or
is
killed, we
  + * need to down the semaphore, otherwise, after
  the
daemon
starts
  next
  + * time, the obsolete data in
  fcopy_transaction.message
or
  + * fcopy_transaction.fcopy_msg will be used
  immediately.
  + *
  + * NOTE: fcopy_read() happens to get the
  semaphore (very
  rare)?
  We're

RE: [PATCH v2] hv: hv_fcopy: drop the obsolete message on transfer failure

2014-11-27 Thread Jason Wang



On Thu, Nov 27, 2014 at 4:50 PM, Dexuan Cui de...@microsoft.com wrote:

 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Thursday, November 27, 2014 15:15 PM
 To: Dexuan Cui
 Cc: gre...@linuxfoundation.org; linux-ker...@vger.kernel.org; 
driverdev-

 de...@linuxdriverproject.org; o...@aepfle.de; a...@canonical.com; KY
 Srinivasan; vkuzn...@redhat.com; Haiyang Zhang
 Subject: Re: [PATCH v2] hv: hv_fcopy: drop the obsolete message on 
transfer

 failure
 - Original Message -
  In the case the user-space daemon crashes, hangs or is killed, we
  need to down the semaphore, otherwise, after the daemon starts 
next

  time, the obsolete data in fcopy_transaction.message or
  fcopy_transaction.fcopy_msg will be used immediately.
 
  Reviewed-by: Vitaly Kuznetsov vkuzn...@redhat.com
  Cc: K. Y. Srinivasan k...@microsoft.com
  Signed-off-by: Dexuan Cui de...@microsoft.com
  ---
 
  v2: I removed the FCP prefix as Greg asked.
 
  I also updated the output message a little:
  FCP: failed to acquire the semaphore --
  can not acquire the semaphore: it is benign
 
   drivers/hv/hv_fcopy.c | 9 +
   1 file changed, 9 insertions(+)
 
  diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
  index 23b2ce2..c518ad9 100644
  --- a/drivers/hv/hv_fcopy.c
  +++ b/drivers/hv/hv_fcopy.c
  @@ -86,6 +86,15 @@ static void fcopy_work_func(struct work_struct
 *dummy)
 * process the pending transaction.
 */
fcopy_respond_to_host(HV_E_FAIL);
  +
  +	/* In the case the user-space daemon crashes, hangs or is 
killed, we
  +	 * need to down the semaphore, otherwise, after the daemon 
starts

 next
  +  * time, the obsolete data in fcopy_transaction.message or
  +  * fcopy_transaction.fcopy_msg will be used immediately.
  +  */
 
 Looks still racy, what happens if the daemon start before 
down_trylock()

 but after fcopy_respont_to_host() here?

Jason,
Thanks for pointing this out!
IMO we can resolve this by adding down_trylock() in fcopy_release().
What's your opinion?



Looks better and need to cancel the timeout also here?



 
  +	if (down_trylock(fcopy_transaction.read_sema))

  + pr_debug(can not acquire the semaphore: it is benign\n);
 
 typo

  +
   }

Sorry -- what typo do you mean?


s/benign/begin/ ?


Thanks,
-- Dexuan
�NrybXǧv^)޺{.n+{zXܨ}Ơzj:+vzZ++zfh~izw?)ߢf^jǫym@Aa0hi


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v3] hv: hv_fcopy: drop the obsolete message on transfer failure

2014-11-27 Thread Jason Wang



On Thu, Nov 27, 2014 at 9:09 PM, Dexuan Cui de...@microsoft.com wrote:

In the case the user-space daemon crashes, hangs or is killed, we
need to down the semaphore, otherwise, after the daemon starts next
time, the obsolete data in fcopy_transaction.message or
fcopy_transaction.fcopy_msg will be used immediately.

Cc: Jason Wang jasow...@redhat.com
Cc: Vitaly Kuznetsov vkuzn...@redhat.com
Cc: K. Y. Srinivasan k...@microsoft.com
Signed-off-by: Dexuan Cui de...@microsoft.com
---

v2: I removed the FCP prefix as Greg asked.

I also updated the output message a little:
FCP: failed to acquire the semaphore -- 
can not acquire the semaphore: it is benign


v3: I added the code in fcopy_release() as Jason Wang suggested.
I removed the pr_debug (it isn't so meaningful)and added a 
comment instead.


 drivers/hv/hv_fcopy.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 23b2ce2..faa6ba6 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -86,6 +86,18 @@ static void fcopy_work_func(struct work_struct 
*dummy)

 * process the pending transaction.
 */
fcopy_respond_to_host(HV_E_FAIL);
+
+   /* In the case the user-space daemon crashes, hangs or is killed, we
+	 * need to down the semaphore, otherwise, after the daemon starts 
next

+* time, the obsolete data in fcopy_transaction.message or
+* fcopy_transaction.fcopy_msg will be used immediately.
+*
+	 * NOTE: fcopy_read() happens to get the semaphore (very rare)? 
We're

+* still OK, because we've reported the failure to the host.
+*/
+   if (down_trylock(fcopy_transaction.read_sema))
+   ;


Sorry, I'm not quite understand how if () ; can help here.

Btw, a question not relate to this patch.

What happens if a daemon is resume from SIGSTOP and expires the check 
here?


+
 }
 
 static int fcopy_handle_handshake(u32 version)
@@ -351,6 +363,13 @@ static int fcopy_release(struct inode *inode, 
struct file *f)

 */
in_hand_shake = true;
opened = false;
+
+   if (cancel_delayed_work_sync(fcopy_work)) {
+   /* We haven't up()-ed the semaphore(very rare)? */
+   if (down_trylock(fcopy_transaction.read_sema))
+   ;


And this.


+   fcopy_respond_to_host(HV_E_FAIL);
+   }
return 0;
 }
 
--

1.9.1

--
To unsubscribe from this list: send the line unsubscribe 
linux-kernel in

the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2] hv: hv_fcopy: drop the obsolete message on transfer failure

2014-11-26 Thread Jason Wang


- Original Message -
 In the case the user-space daemon crashes, hangs or is killed, we
 need to down the semaphore, otherwise, after the daemon starts next
 time, the obsolete data in fcopy_transaction.message or
 fcopy_transaction.fcopy_msg will be used immediately.
 
 Reviewed-by: Vitaly Kuznetsov vkuzn...@redhat.com
 Cc: K. Y. Srinivasan k...@microsoft.com
 Signed-off-by: Dexuan Cui de...@microsoft.com
 ---
 
 v2: I removed the FCP prefix as Greg asked.
 
 I also updated the output message a little:
 FCP: failed to acquire the semaphore --
 can not acquire the semaphore: it is benign
 
  drivers/hv/hv_fcopy.c | 9 +
  1 file changed, 9 insertions(+)
 
 diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
 index 23b2ce2..c518ad9 100644
 --- a/drivers/hv/hv_fcopy.c
 +++ b/drivers/hv/hv_fcopy.c
 @@ -86,6 +86,15 @@ static void fcopy_work_func(struct work_struct *dummy)
* process the pending transaction.
*/
   fcopy_respond_to_host(HV_E_FAIL);
 +
 + /* In the case the user-space daemon crashes, hangs or is killed, we
 +  * need to down the semaphore, otherwise, after the daemon starts next
 +  * time, the obsolete data in fcopy_transaction.message or
 +  * fcopy_transaction.fcopy_msg will be used immediately.
 +  */

Looks still racy, what happens if the daemon start before down_trylock()
but after fcopy_respont_to_host() here?

 + if (down_trylock(fcopy_transaction.read_sema))
 + pr_debug(can not acquire the semaphore: it is benign\n);

typo
 +
  }
  
  static int fcopy_handle_handshake(u32 version)
 --
 1.9.1
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] hv: hv_balloon: avoid memory leak on alloc_error of 2MB memory block

2014-11-24 Thread Jason Wang
On 11/24/2014 03:54 PM, Dexuan Cui wrote:
 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, November 24, 2014 15:28 PM
 To: Dexuan Cui; gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;
 driverdev-devel@linuxdriverproject.org; o...@aepfle.de;
 a...@canonical.com; KY Srinivasan
 Cc: Haiyang Zhang
 Subject: Re: [PATCH] hv: hv_balloon: avoid memory leak on alloc_error of
 2MB memory block

 On 11/24/2014 02:08 PM, Dexuan Cui wrote:
 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, November 24, 2014 13:18 PM
 To: Dexuan Cui; gre...@linuxfoundation.org; linux-
 ker...@vger.kernel.org;
 driverdev-devel@linuxdriverproject.org; o...@aepfle.de;
 a...@canonical.com; KY Srinivasan
 Cc: Haiyang Zhang
 Subject: Re: [PATCH] hv: hv_balloon: avoid memory leak on
 alloc_error of
 2MB memory block

 On 11/24/2014 01:56 PM, Dexuan Cui wrote:
 If num_ballooned is not 0, we shouldn't neglect the already-
 allocated
 2MB
 memory block(s).

 Cc: K. Y. Srinivasan k...@microsoft.com
 Cc: sta...@vger.kernel.org
 Signed-off-by: Dexuan Cui de...@microsoft.com
 ---
  drivers/hv/hv_balloon.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
 index 5e90c5d..cba2d3b 100644
 --- a/drivers/hv/hv_balloon.c
 +++ b/drivers/hv/hv_balloon.c
 @@ -1091,6 +1091,8 @@ static void balloon_up(struct
 work_struct
 *dummy)
 bool done = false;
 int i;

 +   /* The host does balloon_up in 2MB. */
 +   WARN_ON(num_pages % PAGES_IN_2M != 0);

 /*
  * We will attempt 2M allocations. However, if we fail to
 @@ -,7 +1113,7 @@ static void balloon_up(struct
 work_struct
 *dummy)
 bl_resp, alloc_unit,
  alloc_error);

 -   if ((alloc_error)  (alloc_unit != 1)) {
 +   if (alloc_error  (alloc_unit != 1) 
 num_ballooned == 0)
 {
 alloc_unit = 1;
 continue;
 }
 Before the change, we may retry the 4K allocation when part or all 2M
 allocations were failed. This makes sense when memory is fragmented.
 But
 Yes, but all the partially-allocated 2MB memory blocks are lost(mem leak).

 after the change, if part of 2M allocation were failed, we won't retry
 4K allocation. Is this expected?
 Hi Jason,
 The patch doesn't break the try 2MB first; then try 4K logic:

 With the change, we'll retry the 2MB allocation in the next iteration of the
 same while (!done) loop -- we expect this retry will cause
 alloc_error  (alloc_unit != 1)  num_ballooned == 0 to be true,
 so we'll later try 4K allocation, as we did before.
 If I read the code correctly, if part of 2M allocation fails.
 alloc_balloon_pages() will have a non zero return value with alloc_error
 set. Then it will match the following check:

 if ((alloc_error) || (num_ballooned == num_pages))
 {

 which will set done to true. So there's no second iteration of while
 (!done) loop?
 Oh... I see the issue in my patch.
 Thanks for pointing this out, Jason!

 Probably you need something like:

 if ((alloc_unit != 1)  (num_ballooned == 0)) {
 alloc_unit = 1;
 continue;
 }

 if ((alloc_unit == 1) || (num_ballooned == num_pages)) {
 ...
 }
 Your code is good, except for one thing:
 alloc_balloon_pages() can return due to:

 if (bl_resp-hdr.size + sizeof(union dm_mem_page_range) 
  PAGE_SIZE)
 return i * alloc_unit;

 In this case, alloc_unit == 1 is true, but we shouldn't run done = true. 

 How do you like this? I made a few changes based on your code.

 @@ -1086,16 +1088,18 @@ static void balloon_up(struct work_struct *dummy)
 num_pages -= num_ballooned;
 +   alloc_error = false;
 num_ballooned = alloc_balloon_pages(dm_device, num_pages,
 bl_resp, alloc_unit,
  alloc_error);

 -   if ((alloc_error)  (alloc_unit != 1)) {
 +   if (alloc_unit != 1  num_ballooned == 0) {
 alloc_unit = 1;
 continue;
 }

 -   if ((alloc_error) || (num_ballooned == num_pages)) {
 +   if ((alloc_unit == 1  alloc_error) ||
 +   (num_ballooned == num_pages)) {
 bl_resp-more_pages = 0;
 done = true;
 dm_device.state = DM_INITIALIZED;


 If you're Ok with this, I'll send out a v2 patch.

 Thanks,
 -- Dexuan

Looks good.

Thanks
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2] hv: hv_balloon: avoid memory leak on alloc_error of 2MB memory block

2014-11-24 Thread Jason Wang
On 11/25/2014 12:32 PM, Dexuan Cui wrote:
 If num_ballooned is not 0, we shouldn't neglect the
 already-partially-allocated 2MB memory block(s).

 Cc: Jason Wang jasow...@redhat.com
 Cc: K. Y. Srinivasan k...@microsoft.com
 Signed-off-by: Dexuan Cui de...@microsoft.com
 ---

 v2: I fixed the logic error in v1, pointed by Jason Wang:
   In v1: in the case of partially-allocated 2MB, alloc_error is true,
   so we'll run done = true and hence we won't proceed with
   the next iteration of trying 4K allocation.

 I also changed the WARN_ON to WARN_ON_ONCE in case the host behavior
 changes in the future.

  drivers/hv/hv_balloon.c | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)

 diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
 index 5e90c5d..b958ded 100644
 --- a/drivers/hv/hv_balloon.c
 +++ b/drivers/hv/hv_balloon.c
 @@ -1087,10 +1087,12 @@ static void balloon_up(struct work_struct *dummy)
   struct dm_balloon_response *bl_resp;
   int alloc_unit;
   int ret;
 - bool alloc_error = false;
 + bool alloc_error;
   bool done = false;
   int i;
  
 + /* The host balloons pages in 2M granularity. */
 + WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0);
  
   /*
* We will attempt 2M allocations. However, if we fail to
 @@ -1107,16 +1109,18 @@ static void balloon_up(struct work_struct *dummy)
  
  
   num_pages -= num_ballooned;
 + alloc_error = false;
   num_ballooned = alloc_balloon_pages(dm_device, num_pages,
   bl_resp, alloc_unit,
alloc_error);
  
 - if ((alloc_error)  (alloc_unit != 1)) {
 + if (alloc_unit != 1  num_ballooned == 0) {
   alloc_unit = 1;
   continue;
   }
  
 - if ((alloc_error) || (num_ballooned == num_pages)) {
 + if ((alloc_unit == 1  alloc_error) ||
 + (num_ballooned == num_pages)) {
   bl_resp-more_pages = 0;
   done = true;
   dm_device.state = DM_INITIALIZED;

Acked-by: Jason Wang jasow...@redhat.com

Thanks
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] hv: hv_balloon: avoid memory leak on alloc_error of 2MB memory block

2014-11-23 Thread Jason Wang
On 11/24/2014 02:08 PM, Dexuan Cui wrote:
 -Original Message-
  From: Jason Wang [mailto:jasow...@redhat.com]
  Sent: Monday, November 24, 2014 13:18 PM
  To: Dexuan Cui; gre...@linuxfoundation.org; linux-ker...@vger.kernel.org;
  driverdev-devel@linuxdriverproject.org; o...@aepfle.de;
  a...@canonical.com; KY Srinivasan
  Cc: Haiyang Zhang
  Subject: Re: [PATCH] hv: hv_balloon: avoid memory leak on alloc_error of
  2MB memory block
  
  On 11/24/2014 01:56 PM, Dexuan Cui wrote:
   If num_ballooned is not 0, we shouldn't neglect the already-allocated
  2MB
   memory block(s).
  
   Cc: K. Y. Srinivasan k...@microsoft.com
   Cc: sta...@vger.kernel.org
   Signed-off-by: Dexuan Cui de...@microsoft.com
   ---
drivers/hv/hv_balloon.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
  
   diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
   index 5e90c5d..cba2d3b 100644
   --- a/drivers/hv/hv_balloon.c
   +++ b/drivers/hv/hv_balloon.c
   @@ -1091,6 +1091,8 @@ static void balloon_up(struct work_struct
  *dummy)
   bool done = false;
   int i;
  
   +   /* The host does balloon_up in 2MB. */
   +   WARN_ON(num_pages % PAGES_IN_2M != 0);
  
   /*
* We will attempt 2M allocations. However, if we fail to
   @@ -,7 +1113,7 @@ static void balloon_up(struct work_struct
  *dummy)
   bl_resp, alloc_unit,
alloc_error);
  
   -   if ((alloc_error)  (alloc_unit != 1)) {
   +   if (alloc_error  (alloc_unit != 1)  num_ballooned 
   == 0)
  {
   alloc_unit = 1;
   continue;
   }
  
  Before the change, we may retry the 4K allocation when part or all 2M
  allocations were failed. This makes sense when memory is fragmented. But
 Yes, but all the partially-allocated 2MB memory blocks are lost(mem leak).

  after the change, if part of 2M allocation were failed, we won't retry
  4K allocation. Is this expected?
 Hi Jason,
 The patch doesn't break the try 2MB first; then try 4K logic:

 With the change, we'll retry the 2MB allocation in the next iteration of the
 same while (!done) loop -- we expect this retry will cause
 alloc_error  (alloc_unit != 1)  num_ballooned == 0 to be true,
 so we'll later try 4K allocation, as we did before.

If I read the code correctly, if part of 2M allocation fails.
alloc_balloon_pages() will have a non zero return value with alloc_error
set. Then it will match the following check:

if ((alloc_error) || (num_ballooned == num_pages))
{   

which will set done to true. So there's no second iteration of while
(!done) loop?

Probably you need something like:

if ((alloc_unit != 1)  (num_ballooned == 0)) {
alloc_unit = 1;
continue;
}

if ((alloc_unit == 1) || (num_ballooned = num_pages)) {
...
}

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 1/1] Drivers: hv: util: Properly pack the data for file copy functionality

2014-09-03 Thread Jason Wang
On 09/03/2014 10:21 AM, K. Y. Srinivasan wrote:
 Properly pack the data for file copy functionality. Patch based on
 investigation done by Matej Muzila mmuz...@redhat.com

 Signed-off-by: K. Y. Srinivasan k...@microsoft.com
 Reported-by: q...@redhat.com
 Cc: sta...@vger.kernel.org
 ---
  include/uapi/linux/hyperv.h |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
 index 78e4a86..0a8e6ba 100644
 --- a/include/uapi/linux/hyperv.h
 +++ b/include/uapi/linux/hyperv.h
 @@ -137,7 +137,7 @@ struct hv_do_fcopy {
   __u64   offset;
   __u32   size;
   __u8data[DATA_FRAGMENT];
 -};
 +} __attribute__((packed));
  
  /*
   * An implementation of HyperV key value pair (KVP) functionality for Linux.

Acked-by: Jason Wang jasow...@redhat.com
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] hyperv: remove meaningless pr_err() in vmbus_recvpacket_raw()

2014-06-30 Thread Jason Wang
All its callers depends on the return value of -ENOBUFS to reallocate a
bigger buffer and retry the receiving. So there's no need to call
pr_err() here since it was not a real issue, otherwise syslog will be
flooded by this false warning.

Cc: K. Y. Srinivasan k...@microsoft.com
Cc: Haiyang Zhang haiya...@microsoft.com
Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/hv/channel.c |6 +-
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 284cf66..531a593 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -808,12 +808,8 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, 
void *buffer,
 
*buffer_actual_len = packetlen;
 
-   if (packetlen  bufferlen) {
-   pr_err(Buffer too small - needed %d bytes but 
-   got space for only %d bytes\n,
-   packetlen, bufferlen);
+   if (packetlen  bufferlen)
return -ENOBUFS;
-   }
 
*requestid = desc.trans_id;
 
-- 
1.7.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next] hyperv: Move state setting for link query

2014-03-04 Thread Jason Wang
On 03/05/2014 12:57 AM, Haiyang Zhang wrote:

 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Monday, March 3, 2014 10:10 PM
 To: Haiyang Zhang; da...@davemloft.net; net...@vger.kernel.org
 Cc: KY Srinivasan; o...@aepfle.de; linux-ker...@vger.kernel.org; driverdev-
 de...@linuxdriverproject.org
 Subject: Re: [PATCH net-next] hyperv: Move state setting for link query

 On 03/04/2014 07:54 AM, Haiyang Zhang wrote:
 It moves the state setting for query into rndis_filter_receive_response().
 All callbacks including query-complete and status-callback are
 synchronized by channel-inbound_lock. This prevents pentential race
 between them.

 This still looks racy to me. The problem is workqueue is not synchronized 
 with
 those here.

 Consider the following case in netvsc_link_change():

 if (rdev-link_state) {
 ... receive interrupt ...
 rndis_filter_receice_response() which changes rdev-link_state
 ...
 netif_carrier_off()
 }

 And also it need to schedule a work otherwise the link status is out of sync.
 The rndis_filter_query_device_link_status() makes the query and wait for the
 complete message, including set state, before returning.

 The rndis_filter_query_device_link_status() is called from 
 rndis_filter_device_add(),
 which is called from either netvsc_change_mtu() or netvsc_probe().

 The change_mtu() and netvsc_link_change() are synchronized by rtnl_lock().
 In netvsc_probe(), the status query  complete happens before 
 register_netdev(), and
 the netvsc_linkstatus_callback() schedules the work only after netdevice is 
 registered.
 So, there are no race in either case.

 The carrier_on/off work will be scheduled when netvsc_open() is called. Then,
 the status will be updated based on the latest link_state.

 Thanks,
 - Haiyang


I see. Then if the link status is changing during mtu changing in guest.
Looks like we may miss a link status updating?
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next] hyperv: Move state setting for link query

2014-03-03 Thread Jason Wang
On 03/04/2014 07:54 AM, Haiyang Zhang wrote:
 It moves the state setting for query into rndis_filter_receive_response().
 All callbacks including query-complete and status-callback are synchronized
 by channel-inbound_lock. This prevents pentential race between them.

This still looks racy to me. The problem is workqueue is not
synchronized with those here.

Consider the following case in netvsc_link_change():

if (rdev-link_state) {
... receive interrupt ...
rndis_filter_receice_response() which changes rdev-link_state
...
netif_carrier_off()
}

And also it need to schedule a work otherwise the link status is out of
sync.
 Signed-off-by: Haiyang Zhang haiya...@microsoft.com
 ---
  drivers/net/hyperv/rndis_filter.c |   21 -
  1 files changed, 20 insertions(+), 1 deletions(-)

 diff --git a/drivers/net/hyperv/rndis_filter.c 
 b/drivers/net/hyperv/rndis_filter.c
 index f0cc8ef..6a9f602 100644
 --- a/drivers/net/hyperv/rndis_filter.c
 +++ b/drivers/net/hyperv/rndis_filter.c
 @@ -240,6 +240,22 @@ static int rndis_filter_send_request(struct rndis_device 
 *dev,
   return ret;
  }
  
 +static void rndis_set_link_state(struct rndis_device *rdev,
 +  struct rndis_request *request)
 +{
 + u32 link_status;
 + struct rndis_query_complete *query_complete;
 +
 + query_complete = request-response_msg.msg.query_complete;
 +
 + if (query_complete-status == RNDIS_STATUS_SUCCESS 
 + query_complete-info_buflen == sizeof(u32)) {
 + memcpy(link_status, (void *)((unsigned long)query_complete +
 +query_complete-info_buf_offset), sizeof(u32));
 + rdev-link_state = link_status != 0;
 + }
 +}
 +
  static void rndis_filter_receive_response(struct rndis_device *dev,
  struct rndis_message *resp)
  {
 @@ -269,6 +285,10 @@ static void rndis_filter_receive_response(struct 
 rndis_device *dev,
   sizeof(struct rndis_message) + RNDIS_EXT_LEN) {
   memcpy(request-response_msg, resp,
  resp-msg_len);
 + if (request-request_msg.ndis_msg_type ==
 + RNDIS_MSG_QUERY  request-request_msg.msg.
 + query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS)
 + rndis_set_link_state(dev, request);
   } else {
   netdev_err(ndev,
   rndis response buffer overflow 
 @@ -617,7 +637,6 @@ static int rndis_filter_query_device_link_status(struct 
 rndis_device *dev)
   ret = rndis_filter_query_device(dev,
 RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
 link_status, size);
 - dev-link_state = (link_status != 0) ? true : false;
  
   return ret;
  }

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH RESEND] x86, hyperv: bypass the timer_irq_works() check

2014-02-27 Thread Jason Wang
This patch bypass the timer_irq_works() check for hyperv guest since:

- It was guaranteed to work.
- timer_irq_works() may fail sometime due to the lpj calibration were inaccurate
  in a hyperv guest or a buggy host.

In the future, we should get the tsc frequency from hypervisor and use preset
lpj instead.

Cc: K. Y. Srinivasan k...@microsoft.com
Cc: Haiyang Zhang haiya...@microsoft.com
Cc: sta...@vger.kernel.org
Acked-by: K. Y. Srinivasan k...@microsoft.com
Signed-off-by: Jason Wang jasow...@redhat.com
---
 arch/x86/kernel/cpu/mshyperv.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9f7ca26..832d05a 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -26,6 +26,7 @@
 #include asm/irq_regs.h
 #include asm/i8259.h
 #include asm/apic.h
+#include asm/timer.h
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -105,6 +106,11 @@ static void __init ms_hyperv_init_platform(void)
 
if (ms_hyperv.features  HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(hyperv_cs, NSEC_PER_SEC/100);
+
+#ifdef CONFIG_X86_IO_APIC
+   no_timer_check = 1;
+#endif
+
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
-- 
1.8.3.2

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net,v3] hyperv: Fix the carrier status setting

2014-02-13 Thread Jason Wang
On 02/13/2014 11:04 PM, Haiyang Zhang wrote:

 -Original Message-
 From: Jason Wang [mailto:jasow...@redhat.com]
 Sent: Wednesday, February 12, 2014 10:52 PM
 To: Haiyang Zhang; da...@davemloft.net; net...@vger.kernel.org
 Cc: KY Srinivasan; o...@aepfle.de; linux-ker...@vger.kernel.org; driverdev-
 de...@linuxdriverproject.org
 Subject: Re: [PATCH net,v3] hyperv: Fix the carrier status setting

 On 02/13/2014 08:54 AM, Haiyang Zhang wrote:
 Without this patch, the cat /sys/class/net/ethN/operstate shows
 unknown, and ethtool ethN shows Link detected: yes, when VM
 boots up with or without vNIC connected.

 This patch fixed the problem.

 Signed-off-by: Haiyang Zhang haiya...@microsoft.com
 Reviewed-by: K. Y. Srinivasan k...@microsoft.com
 ---
  drivers/net/hyperv/netvsc_drv.c |   53
 ---
  1 files changed, 38 insertions(+), 15 deletions(-)

 diff --git a/drivers/net/hyperv/netvsc_drv.c
 b/drivers/net/hyperv/netvsc_drv.c index 7756118..7141a19 100644
 --- a/drivers/net/hyperv/netvsc_drv.c
 +++ b/drivers/net/hyperv/netvsc_drv.c
 @@ -88,8 +88,12 @@ static int netvsc_open(struct net_device *net)  {
 struct net_device_context *net_device_ctx = netdev_priv(net);
 struct hv_device *device_obj = net_device_ctx-device_ctx;
 +   struct netvsc_device *nvdev;
 +   struct rndis_device *rdev;
 int ret = 0;

 +   netif_carrier_off(net);
 +
 /* Open up the device */
 ret = rndis_filter_open(device_obj);
 if (ret != 0) {
 @@ -99,6 +103,11 @@ static int netvsc_open(struct net_device *net)

 netif_start_queue(net);

 +   nvdev = hv_get_drvdata(device_obj);
 +   rdev = nvdev-extension;
 +   if (!rdev-link_state)
 +   netif_carrier_on(net);
 +
 Maybe you can just schedule the work here and then you can drop the
 rtnl_lock in netvsc_link_change() ?
 The rtnl_lock will still be necessary in the netvsc_link_change(), because 
 we want to prevent it getting wrong rdev pointer when netvsc_change_mtu
 is removing/adding rndis device.

Ok.
 +
 +   if (notify)
 +   netdev_notify_peers(net);
  }

 Looks like this forces arp_notify here. Is it expected?
 Yes, this is expected. It's required after live migration.

 Thanks,
 - Haiyang

Yes, this does not change the current behaviour. (arp_notify is
meaningless for netvsc).

Acked-by: Jason Wang jasow...@redhat.com

The patch is also needed for stable.

Thanks

 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net,v3] hyperv: Fix the carrier status setting

2014-02-12 Thread Jason Wang
On 02/13/2014 08:54 AM, Haiyang Zhang wrote:
 Without this patch, the cat /sys/class/net/ethN/operstate shows
 unknown, and ethtool ethN shows Link detected: yes, when VM
 boots up with or without vNIC connected.

 This patch fixed the problem.

 Signed-off-by: Haiyang Zhang haiya...@microsoft.com
 Reviewed-by: K. Y. Srinivasan k...@microsoft.com
 ---
  drivers/net/hyperv/netvsc_drv.c |   53 
 ---
  1 files changed, 38 insertions(+), 15 deletions(-)

 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
 index 7756118..7141a19 100644
 --- a/drivers/net/hyperv/netvsc_drv.c
 +++ b/drivers/net/hyperv/netvsc_drv.c
 @@ -88,8 +88,12 @@ static int netvsc_open(struct net_device *net)
  {
   struct net_device_context *net_device_ctx = netdev_priv(net);
   struct hv_device *device_obj = net_device_ctx-device_ctx;
 + struct netvsc_device *nvdev;
 + struct rndis_device *rdev;
   int ret = 0;
  
 + netif_carrier_off(net);
 +
   /* Open up the device */
   ret = rndis_filter_open(device_obj);
   if (ret != 0) {
 @@ -99,6 +103,11 @@ static int netvsc_open(struct net_device *net)
  
   netif_start_queue(net);
  
 + nvdev = hv_get_drvdata(device_obj);
 + rdev = nvdev-extension;
 + if (!rdev-link_state)
 + netif_carrier_on(net);
 +

Maybe you can just schedule the work here and then you can drop the
rtnl_lock in netvsc_link_change() ?
   return ret;
  }
  
 @@ -229,23 +238,24 @@ void netvsc_linkstatus_callback(struct hv_device 
 *device_obj,
   struct net_device *net;
   struct net_device_context *ndev_ctx;
   struct netvsc_device *net_device;
 + struct rndis_device *rdev;
  
   net_device = hv_get_drvdata(device_obj);
 + rdev = net_device-extension;
 +
 + rdev-link_state = status != 1;
 +
   net = net_device-ndev;
  
 - if (!net) {
 - netdev_err(net, got link status but net device 
 - not initialized yet\n);
 + if (!net || net-reg_state != NETREG_REGISTERED)
   return;
 - }
  
 + ndev_ctx = netdev_priv(net);
   if (status == 1) {
 - netif_carrier_on(net);
 - ndev_ctx = netdev_priv(net);
   schedule_delayed_work(ndev_ctx-dwork, 0);
   schedule_delayed_work(ndev_ctx-dwork, msecs_to_jiffies(20));
   } else {
 - netif_carrier_off(net);
 + schedule_delayed_work(ndev_ctx-dwork, 0);
   }
  }
  
 @@ -388,17 +398,35 @@ static const struct net_device_ops device_ops = {
   * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
   * another netif_notify_peers() into a delayed work, otherwise GARP packet
   * will not be sent after quick migration, and cause network disconnection.
 + * Also, we update the carrier status here.
   */
 -static void netvsc_send_garp(struct work_struct *w)
 +static void netvsc_link_change(struct work_struct *w)
  {
   struct net_device_context *ndev_ctx;
   struct net_device *net;
   struct netvsc_device *net_device;
 + struct rndis_device *rdev;
 + bool notify;
 +
 + rtnl_lock();
  
   ndev_ctx = container_of(w, struct net_device_context, dwork.work);
   net_device = hv_get_drvdata(ndev_ctx-device_ctx);
 + rdev = net_device-extension;
   net = net_device-ndev;
 - netdev_notify_peers(net);
 +
 + if (rdev-link_state) {
 + netif_carrier_off(net);
 + notify = false;
 + } else {
 + netif_carrier_on(net);
 + notify = true;
 + }
 +
 + rtnl_unlock();
 +
 + if (notify)
 + netdev_notify_peers(net);
  }
  

Looks like this forces arp_notify here. Is it expected?

Other looks good.
  
 @@ -414,13 +442,10 @@ static int netvsc_probe(struct hv_device *dev,
   if (!net)
   return -ENOMEM;
  
 - /* Set initial state */
 - netif_carrier_off(net);
 -
   net_device_ctx = netdev_priv(net);
   net_device_ctx-device_ctx = dev;
   hv_set_drvdata(dev, net);
 - INIT_DELAYED_WORK(net_device_ctx-dwork, netvsc_send_garp);
 + INIT_DELAYED_WORK(net_device_ctx-dwork, netvsc_link_change);
   INIT_WORK(net_device_ctx-work, do_set_multicast);
  
   net-netdev_ops = device_ops;
 @@ -443,8 +468,6 @@ static int netvsc_probe(struct hv_device *dev,
   }
   memcpy(net-dev_addr, device_info.mac_adr, ETH_ALEN);
  
 - netif_carrier_on(net);
 -
   ret = register_netdev(net);
   if (ret != 0) {
   pr_err(Unable to register netdev.\n);

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] x86, hyperv: bypass the timer_irq_works() check

2014-02-11 Thread Jason Wang
On 01/26/2014 12:42 PM, Jason Wang wrote:
 On 01/25/2014 05:20 AM, H. Peter Anvin wrote:
 On 01/23/2014 10:02 PM, Jason Wang wrote:
 This patch bypass the timer_irq_works() check for hyperv guest since:

 - It was guaranteed to work.
 - timer_irq_works() may fail sometime due to the lpj calibration were 
 inaccurate
   in a hyperv guest or a buggy host.

 In the future, we should get the tsc frequency from hypervisor and use 
 preset
 lpj instead.

 Cc: K. Y. Srinivasan k...@microsoft.com
 Cc: Haiyang Zhang haiya...@microsoft.com
 Signed-off-by: Jason Wang jasow...@redhat.com
 This should be in -stable, right?

  -hpa


 Oh, right.

 Cc: sta...@vger.kernel.org

Ping, need I resend the patch or it's ok for you to apply?
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net,v2] hyperv: Fix the carrier status setting

2014-02-11 Thread Jason Wang
On 02/11/2014 02:15 AM, Haiyang Zhang wrote:
 Without this patch, the cat /sys/class/net/ethN/operstate shows
 unknown, and ethtool ethN shows Link detected: yes, when VM
 boots up with or without vNIC connected.

 This patch fixed the problem.

 Signed-off-by: Haiyang Zhang haiya...@microsoft.com
 Reviewed-by: K. Y. Srinivasan k...@microsoft.com
 ---
  drivers/net/hyperv/netvsc_drv.c |   24 +++-
  1 files changed, 15 insertions(+), 9 deletions(-)

 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
 index 7756118..18916f7 100644
 --- a/drivers/net/hyperv/netvsc_drv.c
 +++ b/drivers/net/hyperv/netvsc_drv.c
 @@ -88,8 +88,12 @@ static int netvsc_open(struct net_device *net)
  {
   struct net_device_context *net_device_ctx = netdev_priv(net);
   struct hv_device *device_obj = net_device_ctx-device_ctx;
 + struct netvsc_device *nvdev;
 + struct rndis_device *rdev;
   int ret = 0;
  
 + netif_carrier_off(net);
 +
   /* Open up the device */
   ret = rndis_filter_open(device_obj);
   if (ret != 0) {
 @@ -99,6 +103,11 @@ static int netvsc_open(struct net_device *net)
  
   netif_start_queue(net);
  
 + nvdev = hv_get_drvdata(device_obj);
 + rdev = nvdev-extension;
 + if (!rdev-link_state)

What if the link status interrupt comes here at this time?
 + netif_carrier_on(net);
 +
   return ret;
  }
  
 @@ -229,15 +238,17 @@ void netvsc_linkstatus_callback(struct hv_device 
 *device_obj,
   struct net_device *net;
   struct net_device_context *ndev_ctx;
   struct netvsc_device *net_device;
 + struct rndis_device *rdev;
  
   net_device = hv_get_drvdata(device_obj);
 + rdev = net_device-extension;
 +
 + rdev-link_state = status != 1;
 +
   net = net_device-ndev;
  
 - if (!net) {
 - netdev_err(net, got link status but net device 
 - not initialized yet\n);
 + if (!net || net-reg_state != NETREG_REGISTERED)
   return;
 - }
  
   if (status == 1) {
   netif_carrier_on(net);
 @@ -414,9 +425,6 @@ static int netvsc_probe(struct hv_device *dev,
   if (!net)
   return -ENOMEM;
  
 - /* Set initial state */
 - netif_carrier_off(net);
 -
   net_device_ctx = netdev_priv(net);
   net_device_ctx-device_ctx = dev;
   hv_set_drvdata(dev, net);
 @@ -443,8 +451,6 @@ static int netvsc_probe(struct hv_device *dev,
   }
   memcpy(net-dev_addr, device_info.mac_adr, ETH_ALEN);
  
 - netif_carrier_on(net);
 -
   ret = register_netdev(net);
   if (ret != 0) {
   pr_err(Unable to register netdev.\n);

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net] net: hyperv: initialize link status correctly

2014-01-27 Thread Jason Wang
On 01/27/2014 04:35 PM, David Miller wrote:
 From: Jason Wang jasow...@redhat.com
 Date: Mon, 27 Jan 2014 15:30:54 +0800

 Call netif_carrier_on() after register_device(). Otherwise it won't work 
 since
 the device was still in NETREG_UNINITIALIZED state.

 Fixes a68f9614614749727286f675d15f1e09d13cb54a
 (hyperv: Fix race between probe and open calls)

 Cc: Haiyang Zhang haiya...@microsoft.com
 Cc: K. Y. Srinivasan k...@microsoft.com
 Reported-by: Di Nie d...@redhat.com
 Tested-by: Di Nie d...@redhat.com
 Signed-off-by: Jason Wang jasow...@redhat.com
 A device up can occur at the moment you call register_netdevice(),
 therefore that up call can see the carrier as down and fail or
 similar.  So you really cannot resolve the carrier to be on in this
 way.

True, we need a workqueue to synchronize them.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net] net: hyperv: initialize link status correctly

2014-01-27 Thread Jason Wang
On 01/27/2014 06:22 PM, Ben Hutchings wrote:
 On Mon, 2014-01-27 at 17:40 +0800, Jason Wang wrote:
 On 01/27/2014 04:35 PM, David Miller wrote:
 From: Jason Wang jasow...@redhat.com
 Date: Mon, 27 Jan 2014 15:30:54 +0800

 Call netif_carrier_on() after register_device(). Otherwise it won't work 
 since
 the device was still in NETREG_UNINITIALIZED state.

 Fixes a68f9614614749727286f675d15f1e09d13cb54a
 (hyperv: Fix race between probe and open calls)

 Cc: Haiyang Zhang haiya...@microsoft.com
 Cc: K. Y. Srinivasan k...@microsoft.com
 Reported-by: Di Nie d...@redhat.com
 Tested-by: Di Nie d...@redhat.com
 Signed-off-by: Jason Wang jasow...@redhat.com
 A device up can occur at the moment you call register_netdevice(),
 therefore that up call can see the carrier as down and fail or
 similar.  So you really cannot resolve the carrier to be on in this
 way.
 True, we need a workqueue to synchronize them.
 Whatever for?  All you need to do is:

   rtnl_lock();
   register_netdevice();
   netif_carrier_on();
   rtnl_unlock();

 It would be nice if we could make the current code work with a change in
 the core, though.

 Ben.


Looks like the link status interrupt may happen during this (after
netvsc_device_add() was called by rndis_filter_device_add()) without any
synchronization. This may lead a wrong link status here.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net] net: hyperv: initialize link status correctly

2014-01-27 Thread Jason Wang
On 01/27/2014 06:30 PM, Ben Hutchings wrote:
 On Mon, 2014-01-27 at 18:28 +0800, Jason Wang wrote:
 On 01/27/2014 06:22 PM, Ben Hutchings wrote:
 On Mon, 2014-01-27 at 17:40 +0800, Jason Wang wrote:
 On 01/27/2014 04:35 PM, David Miller wrote:
 From: Jason Wang jasow...@redhat.com
 Date: Mon, 27 Jan 2014 15:30:54 +0800

 Call netif_carrier_on() after register_device(). Otherwise it won't work 
 since
 the device was still in NETREG_UNINITIALIZED state.

 Fixes a68f9614614749727286f675d15f1e09d13cb54a
 (hyperv: Fix race between probe and open calls)

 Cc: Haiyang Zhang haiya...@microsoft.com
 Cc: K. Y. Srinivasan k...@microsoft.com
 Reported-by: Di Nie d...@redhat.com
 Tested-by: Di Nie d...@redhat.com
 Signed-off-by: Jason Wang jasow...@redhat.com
 A device up can occur at the moment you call register_netdevice(),
 therefore that up call can see the carrier as down and fail or
 similar.  So you really cannot resolve the carrier to be on in this
 way.
 True, we need a workqueue to synchronize them.
 Whatever for?  All you need to do is:

 rtnl_lock();
 register_netdevice();
 netif_carrier_on();
 rtnl_unlock();

 It would be nice if we could make the current code work with a change in
 the core, though.

 Ben.

 Looks like the link status interrupt may happen during this (after
 netvsc_device_add() was called by rndis_filter_device_add()) without any
 synchronization. This may lead a wrong link status here.
 Now I'm confused - if there's a link status interrupt, why are you
 setting the carrier on initially?

 Ben.


I realize that setting carrier on initially was a bug after David's
comment. So I think we need a workqueue.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net] net: hyperv: initialize link status correctly

2014-01-26 Thread Jason Wang
Call netif_carrier_on() after register_device(). Otherwise it won't work since
the device was still in NETREG_UNINITIALIZED state.

Fixes a68f9614614749727286f675d15f1e09d13cb54a
(hyperv: Fix race between probe and open calls)

Cc: Haiyang Zhang haiya...@microsoft.com
Cc: K. Y. Srinivasan k...@microsoft.com
Reported-by: Di Nie d...@redhat.com
Tested-by: Di Nie d...@redhat.com
Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/net/hyperv/netvsc_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 71baeb3..dc11601 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -444,13 +444,13 @@ static int netvsc_probe(struct hv_device *dev,
}
memcpy(net-dev_addr, device_info.mac_adr, ETH_ALEN);
 
-   netif_carrier_on(net);
-
ret = register_netdev(net);
if (ret != 0) {
pr_err(Unable to register netdev.\n);
rndis_filter_device_remove(dev);
free_netdev(net);
+   } else {
+   netif_carrier_on(net);
}
 
return ret;
-- 
1.8.3.2

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] x86, hyperv: bypass the timer_irq_works() check

2014-01-25 Thread Jason Wang
On 01/25/2014 05:20 AM, H. Peter Anvin wrote:
 On 01/23/2014 10:02 PM, Jason Wang wrote:
 This patch bypass the timer_irq_works() check for hyperv guest since:

 - It was guaranteed to work.
 - timer_irq_works() may fail sometime due to the lpj calibration were 
 inaccurate
   in a hyperv guest or a buggy host.

 In the future, we should get the tsc frequency from hypervisor and use preset
 lpj instead.

 Cc: K. Y. Srinivasan k...@microsoft.com
 Cc: Haiyang Zhang haiya...@microsoft.com
 Signed-off-by: Jason Wang jasow...@redhat.com
 This should be in -stable, right?

   -hpa



Oh, right.

Cc: sta...@vger.kernel.org
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] x86, hyperv: bypass the timer_irq_works() check

2014-01-23 Thread Jason Wang
This patch bypass the timer_irq_works() check for hyperv guest since:

- It was guaranteed to work.
- timer_irq_works() may fail sometime due to the lpj calibration were inaccurate
  in a hyperv guest or a buggy host.

In the future, we should get the tsc frequency from hypervisor and use preset
lpj instead.

Cc: K. Y. Srinivasan k...@microsoft.com
Cc: Haiyang Zhang haiya...@microsoft.com
Signed-off-by: Jason Wang jasow...@redhat.com
---
 arch/x86/kernel/cpu/mshyperv.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9f7ca26..832d05a 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -26,6 +26,7 @@
 #include asm/irq_regs.h
 #include asm/i8259.h
 #include asm/apic.h
+#include asm/timer.h
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -105,6 +106,11 @@ static void __init ms_hyperv_init_platform(void)
 
if (ms_hyperv.features  HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(hyperv_cs, NSEC_PER_SEC/100);
+
+#ifdef CONFIG_X86_IO_APIC
+   no_timer_check = 1;
+#endif
+
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
-- 
1.8.3.2

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net] netvsc: don't flush peers notifying work during setting mtu

2013-12-13 Thread Jason Wang
There's a possible deadlock if we flush the peers notifying work during setting
mtu:

[   22.991149] ==
[   22.991173] [ INFO: possible circular locking dependency detected ]
[   22.991198] 3.10.0-54.0.1.el7.x86_64.debug #1 Not tainted
[   22.991219] ---
[   22.991243] ip/974 is trying to acquire lock:
[   22.991261]  (((net_device_ctx-dwork)-work)){+.+.+.}, at: 
[8108af95] flush_work+0x5/0x2e0
[   22.991307]
but task is already holding lock:
[   22.991330]  (rtnl_mutex){+.+.+.}, at: [81539deb] 
rtnetlink_rcv+0x1b/0x40
[   22.991367]
which lock already depends on the new lock.

[   22.991398]
the existing dependency chain (in reverse order) is:
[   22.991426]
- #1 (rtnl_mutex){+.+.+.}:
[   22.991449][810dfdd9] __lock_acquire+0xb19/0x1260
[   22.991477][810e0d12] lock_acquire+0xa2/0x1f0
[   22.991501][81673659] mutex_lock_nested+0x89/0x4f0
[   22.991529][815392b7] rtnl_lock+0x17/0x20
[   22.991552][815230b2] netdev_notify_peers+0x12/0x30
[   22.991579][a0340212] netvsc_send_garp+0x22/0x30 
[hv_netvsc]
[   22.991610][8108d251] process_one_work+0x211/0x6e0
[   22.991637][8108d83b] worker_thread+0x11b/0x3a0
[   22.991663][81095e5d] kthread+0xed/0x100
[   22.991686][81681c6c] ret_from_fork+0x7c/0xb0
[   22.991715]
- #0 (((net_device_ctx-dwork)-work)){+.+.+.}:
[   22.991715][810de817] check_prevs_add+0x967/0x970
[   22.991715][810dfdd9] __lock_acquire+0xb19/0x1260
[   22.991715][810e0d12] lock_acquire+0xa2/0x1f0
[   22.991715][8108afde] flush_work+0x4e/0x2e0
[   22.991715][8108e1b5] __cancel_work_timer+0x95/0x130
[   22.991715][8108e303] cancel_delayed_work_sync+0x13/0x20
[   22.991715][a03404e4] netvsc_change_mtu+0x84/0x200 
[hv_netvsc]
[   22.991715][815233d4] dev_set_mtu+0x34/0x80
[   22.991715][8153bc2a] do_setlink+0x23a/0xa00
[   22.991715][8153d054] rtnl_newlink+0x394/0x5e0
[   22.991715][81539eac] rtnetlink_rcv_msg+0x9c/0x260
[   22.991715][8155cdd9] netlink_rcv_skb+0xa9/0xc0
[   22.991715][81539dfa] rtnetlink_rcv+0x2a/0x40
[   22.991715][8155c41d] netlink_unicast+0xdd/0x190
[   22.991715][8155c807] netlink_sendmsg+0x337/0x750
[   22.991715][8150d219] sock_sendmsg+0x99/0xd0
[   22.991715][8150d63e] ___sys_sendmsg+0x39e/0x3b0
[   22.991715][8150eba2] __sys_sendmsg+0x42/0x80
[   22.991715][8150ebf2] SyS_sendmsg+0x12/0x20
[   22.991715][81681d19] system_call_fastpath+0x16/0x1b

This is because we hold the rtnl_lock() before ndo_change_mtu() and try to flush
the work in netvsc_change_mtu(), in the mean time, netdev_notify_peers() may be
called from worker and also trying to hold the rtnl_lock. This will lead the
flush won't succeed forever. Solve this by not canceling and flushing the work,
this is safe because the transmission done by NETDEV_NOTIFY_PEERS was
synchronized with the netif_tx_disable() called by netvsc_change_mtu().

Reported-by: Yaju Cao ya...@redhat.com
Tested-by: Yaju Cao ya...@redhat.com
Cc: K. Y. Srinivasan k...@microsoft.com
Cc: Haiyang Zhang haiya...@microsoft.com
Signed-off-by: Jason Wang jasow...@redhat.com
---
The patch is needed for stable.
---
 drivers/net/hyperv/netvsc_drv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 524f713..f813572 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -327,7 +327,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int 
mtu)
return -EINVAL;
 
nvdev-start_remove = true;
-   cancel_delayed_work_sync(ndevctx-dwork);
cancel_work_sync(ndevctx-work);
netif_tx_disable(ndev);
rndis_filter_device_remove(hdev);
-- 
1.8.3.2

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V2 4/4] x86: correctly detect hypervisor

2013-08-04 Thread Jason Wang
On 07/25/2013 04:54 PM, Jason Wang wrote:
 We try to handle the hypervisor compatibility mode by detecting hypervisor
 through a specific order. This is not robust, since hypervisors may implement
 each others features.

 This patch tries to handle this situation by always choosing the last one in 
 the
 CPUID leaves. This is done by letting .detect() returns a priority instead of
 true/false and just re-using the CPUID leaf where the signature were found as
 the priority (or 1 if it was found by DMI). Then we can just pick hypervisor 
 who
 has the highest priority. Other sophisticated detection method could also be
 implemented on top.

 Suggested by H. Peter Anvin and Paolo Bonzini.

 Cc: Thomas Gleixner t...@linutronix.de
 Cc: Ingo Molnar mi...@redhat.com
 Cc: H. Peter Anvin h...@zytor.com
 Cc: x...@kernel.org
 Cc: K. Y. Srinivasan k...@microsoft.com
 Cc: Haiyang Zhang haiya...@microsoft.com
 Cc: Konrad Rzeszutek Wilk konrad.w...@oracle.com
 Cc: Jeremy Fitzhardinge jer...@goop.org
 Cc: Doug Covelli dcove...@vmware.com
 Cc: Borislav Petkov b...@suse.de
 Cc: Dan Hecht dhe...@vmware.com
 Cc: Paul Gortmaker paul.gortma...@windriver.com
 Cc: Marcelo Tosatti mtosa...@redhat.com
 Cc: Gleb Natapov g...@redhat.com
 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Frederic Weisbecker fweis...@gmail.com
 Cc: linux-ker...@vger.kernel.org
 Cc: de...@linuxdriverproject.org
 Cc: k...@vger.kernel.org
 Cc: xen-de...@lists.xensource.com
 Cc: virtualizat...@lists.linux-foundation.org
 Signed-off-by: Jason Wang jasow...@redhat.com
 ---

Ping, any comments and acks for this series?

Thanks
  arch/x86/include/asm/hypervisor.h |2 +-
  arch/x86/kernel/cpu/hypervisor.c  |   15 +++
  arch/x86/kernel/cpu/mshyperv.c|   13 -
  arch/x86/kernel/cpu/vmware.c  |8 
  arch/x86/kernel/kvm.c |6 ++
  arch/x86/xen/enlighten.c  |9 +++--
  6 files changed, 25 insertions(+), 28 deletions(-)

 diff --git a/arch/x86/include/asm/hypervisor.h 
 b/arch/x86/include/asm/hypervisor.h
 index 2d4b5e6..e42f758 100644
 --- a/arch/x86/include/asm/hypervisor.h
 +++ b/arch/x86/include/asm/hypervisor.h
 @@ -33,7 +33,7 @@ struct hypervisor_x86 {
   const char  *name;
  
   /* Detection routine */
 - bool(*detect)(void);
 + uint32_t(*detect)(void);
  
   /* Adjust CPU feature bits (run once per CPU) */
   void(*set_cpu_features)(struct cpuinfo_x86 *);
 diff --git a/arch/x86/kernel/cpu/hypervisor.c 
 b/arch/x86/kernel/cpu/hypervisor.c
 index 8727921..36ce402 100644
 --- a/arch/x86/kernel/cpu/hypervisor.c
 +++ b/arch/x86/kernel/cpu/hypervisor.c
 @@ -25,11 +25,6 @@
  #include asm/processor.h
  #include asm/hypervisor.h
  
 -/*
 - * Hypervisor detect order.  This is specified explicitly here because
 - * some hypervisors might implement compatibility modes for other
 - * hypervisors and therefore need to be detected in specific sequence.
 - */
  static const __initconst struct hypervisor_x86 * const hypervisors[] =
  {
  #ifdef CONFIG_XEN_PVHVM
 @@ -49,15 +44,19 @@ static inline void __init
  detect_hypervisor_vendor(void)
  {
   const struct hypervisor_x86 *h, * const *p;
 + uint32_t pri, max_pri = 0;
  
   for (p = hypervisors; p  hypervisors + ARRAY_SIZE(hypervisors); p++) {
   h = *p;
 - if (h-detect()) {
 + pri = h-detect();
 + if (pri != 0  pri  max_pri) {
 + max_pri = pri;
   x86_hyper = h;
 - printk(KERN_INFO Hypervisor detected: %s\n, h-name);
 - break;
   }
   }
 +
 + if (max_pri)
 + printk(KERN_INFO Hypervisor detected: %s\n, x86_hyper-name);
  }
  
  void init_hypervisor(struct cpuinfo_x86 *c)
 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
 index 8f4be53..71a39f3 100644
 --- a/arch/x86/kernel/cpu/mshyperv.c
 +++ b/arch/x86/kernel/cpu/mshyperv.c
 @@ -27,20 +27,23 @@
  struct ms_hyperv_info ms_hyperv;
  EXPORT_SYMBOL_GPL(ms_hyperv);
  
 -static bool __init ms_hyperv_platform(void)
 +static uint32_t  __init ms_hyperv_platform(void)
  {
   u32 eax;
   u32 hyp_signature[3];
  
   if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
 - return false;
 + return 0;
  
   cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
 eax, hyp_signature[0], hyp_signature[1], hyp_signature[2]);
  
 - return eax = HYPERV_CPUID_MIN 
 - eax = HYPERV_CPUID_MAX 
 - !memcmp(Microsoft Hv, hyp_signature, 12);
 + if (eax = HYPERV_CPUID_MIN 
 + eax = HYPERV_CPUID_MAX 
 + !memcmp(Microsoft Hv, hyp_signature, 12))
 + return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
 +
 + return 0;
  }
  
  static cycle_t read_hv_clock(struct clocksource *arg)
 diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
 index 7076878..628a059 100644
 --- a/arch

[PATCH V2 4/4] x86: correctly detect hypervisor

2013-07-25 Thread Jason Wang
We try to handle the hypervisor compatibility mode by detecting hypervisor
through a specific order. This is not robust, since hypervisors may implement
each others features.

This patch tries to handle this situation by always choosing the last one in the
CPUID leaves. This is done by letting .detect() returns a priority instead of
true/false and just re-using the CPUID leaf where the signature were found as
the priority (or 1 if it was found by DMI). Then we can just pick hypervisor who
has the highest priority. Other sophisticated detection method could also be
implemented on top.

Suggested by H. Peter Anvin and Paolo Bonzini.

Cc: Thomas Gleixner t...@linutronix.de
Cc: Ingo Molnar mi...@redhat.com
Cc: H. Peter Anvin h...@zytor.com
Cc: x...@kernel.org
Cc: K. Y. Srinivasan k...@microsoft.com
Cc: Haiyang Zhang haiya...@microsoft.com
Cc: Konrad Rzeszutek Wilk konrad.w...@oracle.com
Cc: Jeremy Fitzhardinge jer...@goop.org
Cc: Doug Covelli dcove...@vmware.com
Cc: Borislav Petkov b...@suse.de
Cc: Dan Hecht dhe...@vmware.com
Cc: Paul Gortmaker paul.gortma...@windriver.com
Cc: Marcelo Tosatti mtosa...@redhat.com
Cc: Gleb Natapov g...@redhat.com
Cc: Paolo Bonzini pbonz...@redhat.com
Cc: Frederic Weisbecker fweis...@gmail.com
Cc: linux-ker...@vger.kernel.org
Cc: de...@linuxdriverproject.org
Cc: k...@vger.kernel.org
Cc: xen-de...@lists.xensource.com
Cc: virtualizat...@lists.linux-foundation.org
Signed-off-by: Jason Wang jasow...@redhat.com
---
 arch/x86/include/asm/hypervisor.h |2 +-
 arch/x86/kernel/cpu/hypervisor.c  |   15 +++
 arch/x86/kernel/cpu/mshyperv.c|   13 -
 arch/x86/kernel/cpu/vmware.c  |8 
 arch/x86/kernel/kvm.c |6 ++
 arch/x86/xen/enlighten.c  |9 +++--
 6 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/arch/x86/include/asm/hypervisor.h 
b/arch/x86/include/asm/hypervisor.h
index 2d4b5e6..e42f758 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -33,7 +33,7 @@ struct hypervisor_x86 {
const char  *name;
 
/* Detection routine */
-   bool(*detect)(void);
+   uint32_t(*detect)(void);
 
/* Adjust CPU feature bits (run once per CPU) */
void(*set_cpu_features)(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 8727921..36ce402 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -25,11 +25,6 @@
 #include asm/processor.h
 #include asm/hypervisor.h
 
-/*
- * Hypervisor detect order.  This is specified explicitly here because
- * some hypervisors might implement compatibility modes for other
- * hypervisors and therefore need to be detected in specific sequence.
- */
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
 #ifdef CONFIG_XEN_PVHVM
@@ -49,15 +44,19 @@ static inline void __init
 detect_hypervisor_vendor(void)
 {
const struct hypervisor_x86 *h, * const *p;
+   uint32_t pri, max_pri = 0;
 
for (p = hypervisors; p  hypervisors + ARRAY_SIZE(hypervisors); p++) {
h = *p;
-   if (h-detect()) {
+   pri = h-detect();
+   if (pri != 0  pri  max_pri) {
+   max_pri = pri;
x86_hyper = h;
-   printk(KERN_INFO Hypervisor detected: %s\n, h-name);
-   break;
}
}
+
+   if (max_pri)
+   printk(KERN_INFO Hypervisor detected: %s\n, x86_hyper-name);
 }
 
 void init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 8f4be53..71a39f3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -27,20 +27,23 @@
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
 
-static bool __init ms_hyperv_platform(void)
+static uint32_t  __init ms_hyperv_platform(void)
 {
u32 eax;
u32 hyp_signature[3];
 
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
-   return false;
+   return 0;
 
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
  eax, hyp_signature[0], hyp_signature[1], hyp_signature[2]);
 
-   return eax = HYPERV_CPUID_MIN 
-   eax = HYPERV_CPUID_MAX 
-   !memcmp(Microsoft Hv, hyp_signature, 12);
+   if (eax = HYPERV_CPUID_MIN 
+   eax = HYPERV_CPUID_MAX 
+   !memcmp(Microsoft Hv, hyp_signature, 12))
+   return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+
+   return 0;
 }
 
 static cycle_t read_hv_clock(struct clocksource *arg)
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 7076878..628a059 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -93,7 +93,7 @@ static void __init vmware_platform_setup(void)
  * serial key should be enough