On Tue, Sep 28, 2010 at 9:49 PM, Dave Young <[email protected]> wrote:
> On Tue, Sep 28, 2010 at 9:34 PM, Anthony Liguori <[email protected]> 
> wrote:
>> On 09/28/2010 08:19 AM, Dave Young wrote:
>>>
>>> Balloon could cause guest memory oom killing and panic. If we disable the
>>> oom killer it will be better at least avoid guest panic.
>>>
>>> If alloc failed we can just adjust the balloon target to be equal to
>>> current number by call vdev->config->set
>>>
>>> But during test I found the config->set num_pages does not change the
>>> config actually, Should I do hacks in userspace as well? If so where should
>>> I start to hack?
>>>
>>
>
> Hi,
>
> Thanks your comments.
>
>> The guest is not supposed to set the target field in it's config.  This is a
>> host read/write, guest read-only field.
>
> Could you tell where to set it? If so, IMHO set config api should
> fail, isn't it?

Get it, in hw/virtio-balloon.c function virtio_balloon_set_config

>
>>
>> The problem with your approach generally speaking is that it's unclear
>> whether this is the right policy.  For instance, what if another application
>> held a very large allocation which caused the fill request to stop but then
>> shortly afterwards, the aforementioned application released that allocation.
>>  If instead of just stopping, we paused and tried again later, we could
>> potentially succeed.
>
> Yes, it is possible. But maybe better to do balloon from qemu monitor later?
>
>>
>> I think a better approach might be a graceful back off.  For instance, when
>> you hit this condition, deflate the balloon by 10% based on the assumption
>> that you probably already gone too far.  Before you attempt to allocate to
>> the target again, set a timeout that increases in duration exponentially
>> until you reach some maximum (say 10s).
>
> I'm afraid most times it will keep doing inflate/deflate circularly.
>
>>
>> This isn't the only approach, but hopefully it conveys the idea of
>> gracefully backing off without really giving up.
>>
>> Regards,
>>
>> Anthony Liguori
>>
>>> Thanks
>>>
>>> --- linux-2.6.orig/drivers/virtio/virtio_balloon.c      2010-09-25
>>> 20:58:14.190000001 +0800
>>> +++ linux-2.6/drivers/virtio/virtio_balloon.c   2010-09-28
>>> 21:05:42.203333675 +0800
>>> @@ -25,6 +25,7 @@
>>>  #include<linux/freezer.h>
>>>  #include<linux/delay.h>
>>>  #include<linux/slab.h>
>>> +#include<linux/oom.h>
>>>
>>>  struct virtio_balloon
>>>  {
>>> @@ -97,8 +98,22 @@ static void tell_host(struct virtio_ball
>>>        wait_for_completion(&vb->acked);
>>>  }
>>>
>>> -static void fill_balloon(struct virtio_balloon *vb, size_t num)
>>> +static int cblimit(int times)
>>>  {
>>> +       static int t;
>>> +
>>> +       if (t<  times)
>>> +               t++;
>>> +       else
>>> +               t = 0;
>>> +
>>> +       return !t;
>>> +}
>>> +
>>> +static int fill_balloon(struct virtio_balloon *vb, size_t num)
>>> +{
>>> +       int ret = 0;
>>> +
>>>        /* We can only do one array worth at a time. */
>>>        num = min(num, ARRAY_SIZE(vb->pfns));
>>>
>>> @@ -106,10 +121,13 @@ static void fill_balloon(struct virtio_b
>>>                struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY
>>> |
>>>                                        __GFP_NOMEMALLOC | __GFP_NOWARN);
>>>                if (!page) {
>>> -                       if (printk_ratelimit())
>>> +                       if (cblimit(5)) {
>>>                                dev_printk(KERN_INFO,&vb->vdev->dev,
>>>                                           "Out of puff! Can't get %zu
>>> pages\n",
>>>                                           num);
>>> +                               ret = -ENOMEM;
>>> +                               goto out;
>>> +                       }
>>>                        /* Sleep for at least 1/5 of a second before retry.
>>> */
>>>                        msleep(200);
>>>                        break;
>>> @@ -120,11 +138,11 @@ static void fill_balloon(struct virtio_b
>>>                list_add(&page->lru,&vb->pages);
>>>        }
>>>
>>> -       /* Didn't get any?  Oh well. */
>>> -       if (vb->num_pfns == 0)
>>> -               return;
>>> +out:
>>> +       if (vb->num_pfns)
>>> +               tell_host(vb, vb->inflate_vq);
>>>
>>> -       tell_host(vb, vb->inflate_vq);
>>> +       return ret;
>>>  }
>>>
>>>  static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
>>> @@ -251,6 +269,14 @@ static void update_balloon_size(struct v
>>>                        &actual, sizeof(actual));
>>>  }
>>>
>>> +static void update_balloon_target(struct virtio_balloon *vb)
>>> +{
>>> +       __le32 num_pages = cpu_to_le32(vb->num_pages);
>>> +       vb->vdev->config->set(vb->vdev,
>>> +                             offsetof(struct virtio_balloon_config,
>>> num_pages),
>>> +                       &num_pages, sizeof(num_pages));
>>> +}
>>> +
>>>  static int balloon(void *_vballoon)
>>>  {
>>>        struct virtio_balloon *vb = _vballoon;
>>> @@ -267,9 +293,14 @@ static int balloon(void *_vballoon)
>>>                                         || freezing(current));
>>>                if (vb->need_stats_update)
>>>                        stats_handle_request(vb);
>>> -               if (diff>  0)
>>> -                       fill_balloon(vb, diff);
>>> -               else if (diff<  0)
>>> +               if (diff>  0) {
>>> +                       int oom;
>>> +                       oom_killer_disable();
>>> +                       oom = fill_balloon(vb, diff);
>>> +                       oom_killer_enable();
>>> +                       if (oom)
>>> +                               update_balloon_target(vb);
>>> +               } else if (diff<  0)
>>>                        leak_balloon(vb, -diff);
>>>                update_balloon_size(vb);
>>>        }
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>> the body of a message to [email protected]
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>
>>
>
>
>
> --
> Regards
> dave
>



-- 
Regards
dave
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to