On 5/26/2025 5:17 PM, David Hildenbrand wrote:
> On 20.05.25 12:28, Chenyi Qiang wrote:
>> The current error handling is simple with the following assumption:
>> - QEMU will quit instead of resuming the guest if kvm_convert_memory()
>>    fails, thus no need to do rollback.
>> - The convert range is required to be in the desired state. It is not
>>    allowed to handle the mixture case.
>> - The conversion from shared to private is a non-failure operation.
>>
>> This is sufficient for now as complext error handling is not required.
>> For future extension, add some potential error handling.
>> - For private to shared conversion, do the rollback operation if
>>    ram_block_attribute_notify_to_populated() fails.
>> - For shared to private conversion, still assert it as a non-failure
>>    operation for now. It could be an easy fail path with in-place
>>    conversion, which will likely have to retry the conversion until it
>>    works in the future.
>> - For mixture case, process individual blocks for ease of rollback.
>>
>> Signed-off-by: Chenyi Qiang <chenyi.qi...@intel.com>
>> ---
>>   system/ram-block-attribute.c | 116 +++++++++++++++++++++++++++--------
>>   1 file changed, 90 insertions(+), 26 deletions(-)
>>
>> diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c
>> index 387501b569..0af3396aa4 100644
>> --- a/system/ram-block-attribute.c
>> +++ b/system/ram-block-attribute.c
>> @@ -289,7 +289,12 @@ static int
>> ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
>>           }
>>           ret = rdl->notify_discard(rdl, &tmp);
>>           if (ret) {
>> -            break;
>> +            /*
>> +             * The current to_private listeners (VFIO dma_unmap and
>> +             * KVM set_attribute_private) are non-failing operations.
>> +             * TODO: add rollback operations if it is allowed to fail.
>> +             */
>> +            g_assert(ret);
>>           }
>>       }
>>   
> 
> If it's not allowed to fail for now, then patch #8 does not make sense
> and should be dropped :)

It was intended for future extension as in-place conversion to_private
allows it to fail. So I add the patch #8.

But as you mentioned, since the conversion path is changing, and maybe
it is easier to handle from KVM code directly. Let me drop patch #8 and
wait for the in-place conversion to mature.

> 
> The implementations (vfio) should likely exit() instead on unexpected
> errors when discarding.

After drop patch #8, maybe keep vfio discard handling as it was. Adding
an additional exit() is also OK to me since it's non-fail case.

> 
> 
> 
> Why not squash all the below into the corresponding patch? Looks mostly
> like handling partial conversions correctly (as discussed previously)?

I extract these two handling 1) mixture conversion; 2) operation
rollback into this individual patch because they are not the practical
cases and are untested.

For 1), I still don't see any real case which will convert a range with
mixture attributes.

For 2), current failure of memory conversion (as seen in kvm_cpu_exec()
->kvm_convert_memory()) will cause the QEMU to quit instead of resuming
guest. Doing the rollback seems useless at present.

> 
>> @@ -300,7 +305,7 @@ static int
>>   ram_block_attribute_notify_to_populated(RamBlockAttribute *attr,
>>                                           uint64_t offset, uint64_t size)
>>   {
>> -    RamDiscardListener *rdl;
>> +    RamDiscardListener *rdl, *rdl2;
>>       int ret = 0;
>>         QLIST_FOREACH(rdl, &attr->rdl_list, next) {
>> @@ -315,6 +320,20 @@
>> ram_block_attribute_notify_to_populated(RamBlockAttribute *attr,
>>           }
>>       }
>>   +    if (ret) {
>> +        /* Notify all already-notified listeners. */
>> +        QLIST_FOREACH(rdl2, &attr->rdl_list, next) {
>> +            MemoryRegionSection tmp = *rdl2->section;
>> +
>> +            if (rdl == rdl2) {
>> +                break;
>> +            }
>> +            if (!memory_region_section_intersect_range(&tmp, offset,
>> size)) {
>> +                continue;
>> +            }
>> +            rdl2->notify_discard(rdl2, &tmp);
>> +        }
>> +    }
>>       return ret;
>>   }
>>   @@ -353,6 +372,9 @@ int
>> ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t
>> offset,
>>       const int block_size = ram_block_attribute_get_block_size(attr);
>>       const unsigned long first_bit = offset / block_size;
>>       const unsigned long nbits = size / block_size;
>> +    const uint64_t end = offset + size;
>> +    unsigned long bit;
>> +    uint64_t cur;
>>       int ret = 0;
>>         if (!ram_block_attribute_is_valid_range(attr, offset, size)) {
>> @@ -361,32 +383,74 @@ int
>> ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t
>> offset,
>>           return -1;
>>       }
>>   -    /* Already discard/populated */
>> -    if ((ram_block_attribute_is_range_discard(attr, offset, size) &&
>> -         to_private) ||
>> -        (ram_block_attribute_is_range_populated(attr, offset, size) &&
>> -         !to_private)) {
>> -        return 0;
>> -    }
>> -
>> -    /* Unexpected mixture */
>> -    if ((!ram_block_attribute_is_range_populated(attr, offset, size) &&
>> -         to_private) ||
>> -        (!ram_block_attribute_is_range_discard(attr, offset, size) &&
>> -         !to_private)) {
>> -        error_report("%s, the range is not all in the desired state: "
>> -                     "(offset 0x%lx, size 0x%lx), %s",
>> -                     __func__, offset, size,
>> -                     to_private ? "private" : "shared");
>> -        return -1;
>> -    }
>> -
>>       if (to_private) {
>> -        bitmap_clear(attr->bitmap, first_bit, nbits);
>> -        ret = ram_block_attribute_notify_to_discard(attr, offset, size);
>> +        if (ram_block_attribute_is_range_discard(attr, offset, size)) {
>> +            /* Already private */
>> +        } else if (!ram_block_attribute_is_range_populated(attr, offset,
>> +                                                           size)) {
>> +            /* Unexpected mixture: process individual blocks */
>> +            for (cur = offset; cur < end; cur += block_size) {
>> +                bit = cur / block_size;
>> +                if (!test_bit(bit, attr->bitmap)) {
>> +                    continue;
>> +                }
>> +                clear_bit(bit, attr->bitmap);
>> +                ram_block_attribute_notify_to_discard(attr, cur,
>> block_size);
>> +            }
>> +        } else {
>> +            /* Completely shared */
>> +            bitmap_clear(attr->bitmap, first_bit, nbits);
>> +            ram_block_attribute_notify_to_discard(attr, offset, size);
>> +        }
>>       } else {
>> -        bitmap_set(attr->bitmap, first_bit, nbits);
>> -        ret = ram_block_attribute_notify_to_populated(attr, offset,
>> size);
>> +        if (ram_block_attribute_is_range_populated(attr, offset,
>> size)) {
>> +            /* Already shared */
>> +        } else if (!ram_block_attribute_is_range_discard(attr,
>> offset, size)) {
>> +            /* Unexpected mixture: process individual blocks */
>> +            unsigned long *modified_bitmap = bitmap_new(nbits);
>> +
>> +            for (cur = offset; cur < end; cur += block_size) {
>> +                bit = cur / block_size;
>> +                if (test_bit(bit, attr->bitmap)) {
>> +                    continue;
>> +                }
>> +                set_bit(bit, attr->bitmap);
>> +                ret = ram_block_attribute_notify_to_populated(attr, cur,
>> +                                                           block_size);
>> +                if (!ret) {
>> +                    set_bit(bit - first_bit, modified_bitmap);
>> +                    continue;
>> +                }
>> +                clear_bit(bit, attr->bitmap);
>> +                break;
>> +            }
>> +
>> +            if (ret) {
>> +                /*
>> +                 * Very unexpected: something went wrong. Revert to
>> the old
>> +                 * state, marking only the blocks as private that we
>> converted
>> +                 * to shared.
>> +                 */
>> +                for (cur = offset; cur < end; cur += block_size) {
>> +                    bit = cur / block_size;
>> +                    if (!test_bit(bit - first_bit, modified_bitmap)) {
>> +                        continue;
>> +                    }
>> +                    assert(test_bit(bit, attr->bitmap));
>> +                    clear_bit(bit, attr->bitmap);
>> +                    ram_block_attribute_notify_to_discard(attr, cur,
>> +                                                          block_size);
>> +                }
>> +            }
>> +            g_free(modified_bitmap);
>> +        } else {
>> +            /* Complete private */
>> +            bitmap_set(attr->bitmap, first_bit, nbits);
>> +            ret = ram_block_attribute_notify_to_populated(attr,
>> offset, size);
>> +            if (ret) {
>> +                bitmap_clear(attr->bitmap, first_bit, nbits);
>> +            }
>> +        }
>>       }
>>         return ret;
> 
> 


Reply via email to