On 20/5/25 20:28, Chenyi Qiang wrote:
The current error handling is simple with the following assumption:
- QEMU will quit instead of resuming the guest if kvm_convert_memory()
   fails, thus no need to do rollback.
- The convert range is required to be in the desired state. It is not
   allowed to handle the mixture case.
- The conversion from shared to private is a non-failure operation.

This is sufficient for now as complext error handling is not required.
For future extension, add some potential error handling.
- For private to shared conversion, do the rollback operation if
   ram_block_attribute_notify_to_populated() fails.
- For shared to private conversion, still assert it as a non-failure
   operation for now. It could be an easy fail path with in-place
   conversion, which will likely have to retry the conversion until it
   works in the future.
- For mixture case, process individual blocks for ease of rollback.

Signed-off-by: Chenyi Qiang <chenyi.qi...@intel.com>
---
  system/ram-block-attribute.c | 116 +++++++++++++++++++++++++++--------
  1 file changed, 90 insertions(+), 26 deletions(-)

diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c
index 387501b569..0af3396aa4 100644
--- a/system/ram-block-attribute.c
+++ b/system/ram-block-attribute.c
@@ -289,7 +289,12 @@ static int 
ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
          }
          ret = rdl->notify_discard(rdl, &tmp);
          if (ret) {
-            break;
+            /*
+             * The current to_private listeners (VFIO dma_unmap and
+             * KVM set_attribute_private) are non-failing operations.
+             * TODO: add rollback operations if it is allowed to fail.
+             */
+            g_assert(ret);
          }
      }
@@ -300,7 +305,7 @@ static int
  ram_block_attribute_notify_to_populated(RamBlockAttribute *attr,
                                          uint64_t offset, uint64_t size)
  {
-    RamDiscardListener *rdl;
+    RamDiscardListener *rdl, *rdl2;
      int ret = 0;
QLIST_FOREACH(rdl, &attr->rdl_list, next) {
@@ -315,6 +320,20 @@ ram_block_attribute_notify_to_populated(RamBlockAttribute 
*attr,
          }
      }
+ if (ret) {
+        /* Notify all already-notified listeners. */
+        QLIST_FOREACH(rdl2, &attr->rdl_list, next) {
+            MemoryRegionSection tmp = *rdl2->section;
+
+            if (rdl == rdl2) {
+                break;
+            }
+            if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+                continue;
+            }
+            rdl2->notify_discard(rdl2, &tmp);
+        }
+    }
      return ret;
  }
@@ -353,6 +372,9 @@ int ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t offset,
      const int block_size = ram_block_attribute_get_block_size(attr);
      const unsigned long first_bit = offset / block_size;
      const unsigned long nbits = size / block_size;
+    const uint64_t end = offset + size;
+    unsigned long bit;
+    uint64_t cur;
      int ret = 0;
if (!ram_block_attribute_is_valid_range(attr, offset, size)) {
@@ -361,32 +383,74 @@ int ram_block_attribute_state_change(RamBlockAttribute 
*attr, uint64_t offset,
          return -1;
      }
- /* Already discard/populated */
-    if ((ram_block_attribute_is_range_discard(attr, offset, size) &&
-         to_private) ||
-        (ram_block_attribute_is_range_populated(attr, offset, size) &&
-         !to_private)) {
-        return 0;
-    }
-
-    /* Unexpected mixture */
-    if ((!ram_block_attribute_is_range_populated(attr, offset, size) &&
-         to_private) ||
-        (!ram_block_attribute_is_range_discard(attr, offset, size) &&
-         !to_private)) {
-        error_report("%s, the range is not all in the desired state: "
-                     "(offset 0x%lx, size 0x%lx), %s",
-                     __func__, offset, size,
-                     to_private ? "private" : "shared");
-        return -1;
-    }

David is right, this needs to be squashed where you added the above hunk.

-
      if (to_private) {
-        bitmap_clear(attr->bitmap, first_bit, nbits);
-        ret = ram_block_attribute_notify_to_discard(attr, offset, size);
+        if (ram_block_attribute_is_range_discard(attr, offset, size)) {
+            /* Already private */
+        } else if (!ram_block_attribute_is_range_populated(attr, offset,
+                                                           size)) {
+            /* Unexpected mixture: process individual blocks */


Is an "expected mix" situation possible?
May be just always run the code for "unexpected mix", or refuse mixing and let 
the VM deal with it?


+            for (cur = offset; cur < end; cur += block_size) {
+                bit = cur / block_size;
+                if (!test_bit(bit, attr->bitmap)) {
+                    continue;
+                }
+                clear_bit(bit, attr->bitmap);
+                ram_block_attribute_notify_to_discard(attr, cur, block_size);
+            }
+        } else {
+            /* Completely shared */
+            bitmap_clear(attr->bitmap, first_bit, nbits);
+            ram_block_attribute_notify_to_discard(attr, offset, size);
+        }
      } else {
-        bitmap_set(attr->bitmap, first_bit, nbits);
-        ret = ram_block_attribute_notify_to_populated(attr, offset, size);
+        if (ram_block_attribute_is_range_populated(attr, offset, size)) {
+            /* Already shared */
+        } else if (!ram_block_attribute_is_range_discard(attr, offset, size)) {
+            /* Unexpected mixture: process individual blocks */
+            unsigned long *modified_bitmap = bitmap_new(nbits);
+
+            for (cur = offset; cur < end; cur += block_size) {
+                bit = cur / block_size;
+                if (test_bit(bit, attr->bitmap)) {
+                    continue;
+                }
+                set_bit(bit, attr->bitmap);
+                ret = ram_block_attribute_notify_to_populated(attr, cur,
+                                                           block_size);
+                if (!ret) {
+                    set_bit(bit - first_bit, modified_bitmap);
+                    continue;
+                }
+                clear_bit(bit, attr->bitmap);
+                break;
+            }
+
+            if (ret) {
+                /*
+                 * Very unexpected: something went wrong. Revert to the old
+                 * state, marking only the blocks as private that we converted
+                 * to shared.


If something went wrong... well, on my AMD machine this usually means the fw is 
really unhappy and recovery is hardly possible and the machine needs reboot. 
Probably stopping the VM would make more sense for now (or stop the device so 
the user could save work from the VM, dunno).


+                 */
+                for (cur = offset; cur < end; cur += block_size) {
+                    bit = cur / block_size;
+                    if (!test_bit(bit - first_bit, modified_bitmap)) {
+                        continue;
+                    }
+                    assert(test_bit(bit, attr->bitmap));
+                    clear_bit(bit, attr->bitmap);
+                    ram_block_attribute_notify_to_discard(attr, cur,
+                                                          block_size);
+                }
+            }
+            g_free(modified_bitmap);
+        } else {
+            /* Complete private */

I'd swap this hunk with the previous one. Thanks,

+            bitmap_set(attr->bitmap, first_bit, nbits);
+            ret = ram_block_attribute_notify_to_populated(attr, offset, size);
+            if (ret) {
+                bitmap_clear(attr->bitmap, first_bit, nbits);
+            }
+        }
      }
return ret;

--
Alexey


Reply via email to