[PATCH 2/2] virtio_balloon: do not change memory amount visible via /proc/meminfo

2015-08-19 Thread Denis V. Lunev
Balloon device is frequently used as a mean of cooperative memory control
in between guest and host to manage memory overcommitment. This is the
typical case for any hosting workload when KVM guest is provided for
end-user.

Though there is a problem in this setup. The end-user and hosting provider
have signed SLA agreement in which some amount of memory is guaranted for
the guest. The good thing is that this memory will be given to the guest
when the guest will really need it (f.e. with OOM in guest and with
VIRTIO_BALLOON_F_DEFLATE_ON_OOM configuration flag set). The bad thing
is that end-user does not know this.

Balloon by default reduce the amount of memory exposed to the end-user
each time when the page is stolen from guest or returned back by using
adjust_managed_page_count and thus /proc/meminfo shows reduced amount
of memory.

Fortunately the solution is simple, we should just avoid to call
adjust_managed_page_count with VIRTIO_BALLOON_F_DEFLATE_ON_OOM set.

Signed-off-by: Denis V. Lunev d...@openvz.org
CC: Michael S. Tsirkin m...@redhat.com
---
 drivers/virtio/virtio_balloon.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8543c9a..7efc329 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -157,7 +157,9 @@ static void fill_balloon(struct virtio_balloon *vb, size_t 
num)
}
set_page_pfns(vb-pfns + vb-num_pfns, page);
vb-num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
-   adjust_managed_page_count(page, -1);
+   if (!virtio_has_feature(vb-vdev,
+   VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+   adjust_managed_page_count(page, -1);
}
 
/* Did we get any? */
@@ -173,7 +175,9 @@ static void release_pages_balloon(struct virtio_balloon *vb)
/* Find pfns pointing at start of each page, get pages and free them. */
for (i = 0; i  vb-num_pfns; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
struct page *page = balloon_pfn_to_page(vb-pfns[i]);
-   adjust_managed_page_count(page, 1);
+   if (!virtio_has_feature(vb-vdev,
+   VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+   adjust_managed_page_count(page, 1);
put_page(page); /* balloon reference */
}
 }
-- 
2.1.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 0/2] virtio_balloon: do not change memory amount visible via /proc/meminfo

2015-08-19 Thread Denis V. Lunev
Though there is a problem in this setup. The end-user and hosting provider
have signed SLA agreement in which some amount of memory is guaranted for
the guest. The good thing is that this memory will be given to the guest
when the guest will really need it (f.e. with OOM in guest and with
VIRTIO_BALLOON_F_DEFLATE_ON_OOM configuration flag set). The bad thing
is that end-user does not know this.

Balloon by default reduce the amount of memory exposed to the end-user
each time when the page is stolen from guest or returned back by using
adjust_managed_page_count and thus /proc/meminfo shows reduced amount
of memory.

Fortunately the solution is simple, we should just avoid to call
adjust_managed_page_count with VIRTIO_BALLOON_F_DEFLATE_ON_OOM set.

Please note that neither VMWare ballon nor HyperV balloon do not care
about proper handling of adjust_managed_page_count at all.

Signed-off-by: Denis V. Lunev d...@openvz.org
CC: Michael S. Tsirkin m...@redhat.com
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/2] virtio_ballon: change stub of release_pages_by_pfn

2015-08-19 Thread Denis V. Lunev
and rename it to release_pages_balloon. The function originally takes
arrays of pfns and now it takes pointer to struct virtio_ballon.
This change is necessary to conditionally call adjust_managed_page_count
in the next patch.

Signed-off-by: Denis V. Lunev d...@openvz.org
CC: Michael S. Tsirkin m...@redhat.com
---
 drivers/virtio/virtio_balloon.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 82e80e0..8543c9a 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -166,13 +166,13 @@ static void fill_balloon(struct virtio_balloon *vb, 
size_t num)
mutex_unlock(vb-balloon_lock);
 }
 
-static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
+static void release_pages_balloon(struct virtio_balloon *vb)
 {
unsigned int i;
 
/* Find pfns pointing at start of each page, get pages and free them. */
-   for (i = 0; i  num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
-   struct page *page = balloon_pfn_to_page(pfns[i]);
+   for (i = 0; i  vb-num_pfns; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
+   struct page *page = balloon_pfn_to_page(vb-pfns[i]);
adjust_managed_page_count(page, 1);
put_page(page); /* balloon reference */
}
@@ -206,7 +206,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, 
size_t num)
if (vb-num_pfns != 0)
tell_host(vb, vb-deflate_vq);
mutex_unlock(vb-balloon_lock);
-   release_pages_by_pfn(vb-pfns, vb-num_pfns);
+   release_pages_balloon(vb);
return num_freed_pages;
 }
 
-- 
2.1.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 1/2] virtio-net: rephrase devconf fields description

2015-08-19 Thread Jason Wang


On 08/19/2015 07:54 PM, Victor Kaplansky wrote:
 On Mon, Aug 17, 2015 at 10:43:46AM +0800, Jason Wang wrote:

 On 08/16/2015 09:42 PM, Victor Kaplansky wrote:
 Clarify general description of the mac, status and
 max_virtqueue_pairs fields. Specifically, the old description is
 vague about configuration layout and fields offsets when some of
 the fields are non valid.

 Also clarify that validity of two status bits depends on two
 different feature flags.

 Signed-off-by: Victor Kaplansky vict...@redhat.com
 ---
 +
 +\item [\field{max_virtqueue_pairs}] tells the driver the maximum
 +number of each of virtqueues (receiveq1\ldots receiveqN and
 +transmitq1\ldots transmitqN respectively) that can be configured
 +on the device once VIRTIO_NET_F_MQ is negotiated.
 +\field{max_virtqueue_pairs} is valid only if VIRTIO_NET_F_MQ is
 +set and can be read by the driver.
 +

 I don't get the point that adding can be read by the driver. Looks
 like it's hard for hypervisor to detect this?
 AFAIU, if the device sets VIRTIO_NET_F_MQ, the device also sets
 the value of 'max_virtqueue_pairs' even before driver negotiated
 VIRTIO_NET_F_MQ. If so, the driver can read the value of
 'max_virtqueue_pairs' during negotiation and potentially this
 value can even affect negotiation decision of the driver.  

Looks not, it's legal to negotiate VIRTIO_NET_F_MQ with
max_virtqueue_pairs = 1.


 If above is correct, I'll change the description to make this
 point more clear.

 Thanks,
 -- Victor

Driver also sets 'mac' before driver negotiated VIRTIO_NET_F_MAC, so I'm
not sure this is really needed.

Thanks


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 2/2] virtio-net: add default_mtu configuration field

2015-08-19 Thread Jason Wang


On 08/19/2015 07:31 PM, Victor Kaplansky wrote:
 On Mon, Aug 17, 2015 at 11:07:15AM +0800, Jason Wang wrote:

 On 08/16/2015 09:42 PM, Victor Kaplansky wrote:
 @@ -3128,6 +3134,7 @@ struct virtio_net_config {
  u8 mac[6];
  le16 status;
  le16 max_virtqueue_pairs;
 +le16 default_mtu;
 Looks like mtu is ok, consider we use mac instead of default_mac.
 Good point. I'll change the name in the next version of the patch.

  };
  \end{lstlisting}
  
 @@ -3158,6 +3165,15 @@ by the driver after negotiation.
  \field{max_virtqueue_pairs} is valid only if VIRTIO_NET_F_MQ is
  set and can be read by the driver.
  
 +\item [\field{default_mtu}] is a hint to the driver set by the
 +device. It is valid during feature negotiation only if
 +VIRTIO_NET_F_DEFAULT_MTU is offered and holds the initial value
 +of MTU to be used by the driver. If VIRTIO_NET_F_DEFAULT_MTU is
 +negotiated, the driver uses the \field{default_mtu} as an initial
 +value, and also reports MTU changes to the device by writes to
 +\field{default_mtu}.  Such reporting can be used for debugging,
 +or it can be used for tunning MTU along the network.
 +
 I vaguely remember that config is read only in some arch or transport
 and that's why we introduce another vq cmd to confirm the announcement.
 Probably we should do same for this?
 If so, we need to add one more feature bit to confirm the ability
 of the driver to report MTU, or we can weaken the requirement in
 conformance statement and write the driver may report the MTU.
 What do you say?

 -- Victor

Either looks good for me. (Need confirm the read only issue with the
editors of other transport or arch)

Thanks
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/4] virtio_net: use DECLARE_EWMA

2015-08-19 Thread Johannes Berg
Sorry, forgot to Cc Michael and the virt list - patch reproduced below
in full.

johannes

From 22500fbcf722748fe3471b2e4c6156db47aade15 Mon Sep 17 00:00:00 2001
From: Johannes Berg johannes.b...@intel.com
Date: Wed, 19 Aug 2015 09:25:18 +0200
Subject: [PATCH] virtio_net: use DECLARE_EWMA

Instead of using the out-of-line EWMA calculation, use DECLARE_EWMA()
to create static inlines. On x86/64 this results in no change in code
size for me, but reduces the struct receive_queue size by the two
unsigned long values that store the parameters.

Signed-off-by: Johannes Berg johannes.b...@intel.com
---
 drivers/net/Kconfig  |  1 -
 drivers/net/virtio_net.c | 22 +++---
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index e58468b02987..f50373645ab4 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -282,7 +282,6 @@ config VETH
 config VIRTIO_NET
tristate Virtio network driver
depends on VIRTIO
-   select AVERAGE
---help---
  This is the virtual network driver for virtio.  It can be used with
  lguest or QEMU based VMMs (like KVM or Xen).  Say Y or M.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 546b669fbfdd..9b950f2db836 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -40,12 +40,12 @@ module_param(gso, bool, 0444);
 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
 #define GOOD_COPY_LEN  128
 
-/* Weight used for the RX packet size EWMA. The average packet size is used to
- * determine the packet buffer size when refilling RX rings. As the entire RX
- * ring may be refilled at once, the weight is chosen so that the EWMA will be
- * insensitive to short-term, transient changes in packet size.
+/* RX packet size EWMA. The average packet size is used to determine the packet
+ * buffer size when refilling RX rings. As the entire RX ring may be refilled
+ * at once, the weight is chosen so that the EWMA will be insensitive to short-
+ * term, transient changes in packet size.
  */
-#define RECEIVE_AVG_WEIGHT 64
+DECLARE_EWMA(pkt_len, 1, 64)
 
 /* Minimum alignment for mergeable packet buffers. */
 #define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256)
@@ -85,7 +85,7 @@ struct receive_queue {
struct page *pages;
 
/* Average packet length for mergeable receive buffers. */
-   struct ewma mrg_avg_pkt_len;
+   struct ewma_pkt_len mrg_avg_pkt_len;
 
/* Page frag for packet buffer allocation. */
struct page_frag alloc_frag;
@@ -407,7 +407,7 @@ static struct sk_buff *receive_mergeable(struct net_device 
*dev,
}
}
 
-   ewma_add(rq-mrg_avg_pkt_len, head_skb-len);
+   ewma_pkt_len_add(rq-mrg_avg_pkt_len, head_skb-len);
return head_skb;
 
 err_skb:
@@ -600,12 +600,12 @@ static int add_recvbuf_big(struct virtnet_info *vi, 
struct receive_queue *rq,
return err;
 }
 
-static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len)
+static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
 {
const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
unsigned int len;
 
-   len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len),
+   len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
 }
@@ -1615,7 +1615,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
napi_hash_add(vi-rq[i].napi);
 
sg_init_table(vi-rq[i].sg, ARRAY_SIZE(vi-rq[i].sg));
-   ewma_init(vi-rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
+   ewma_pkt_len_init(vi-rq[i].mrg_avg_pkt_len);
sg_init_table(vi-sq[i].sg, ARRAY_SIZE(vi-sq[i].sg));
}
 
@@ -1658,7 +1658,7 @@ static ssize_t mergeable_rx_buffer_size_show(struct 
netdev_rx_queue *queue,
 {
struct virtnet_info *vi = netdev_priv(queue-dev);
unsigned int queue_index = get_netdev_rx_queue_index(queue);
-   struct ewma *avg;
+   struct ewma_pkt_len *avg;
 
BUG_ON(queue_index = vi-max_queue_pairs);
avg = vi-rq[queue_index].mrg_avg_pkt_len;


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization