from:"huangy81"

[PATCH v4 06/10] migration: Introduce dirty-limit capability

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce migration dirty-limit capability, which can
be turned on before live migration and limit dirty
page rate durty live migration.

Introduce migrate_dirty_limit function to help check
if dirty-limit capability enabled during live migration.

Meanwhile, refactor vcpu_dirty_rate_stat_collect
so that period can be configured instead of hardcoded.

dirty-limit capability is kind of like auto-converge
but using dirty limit instead of traditional cpu-throttle
to throttle guest down. To enable this feature, turn on
the dirty-limit capability before live migration using
migrate-set-capabilities, and set the parameters
"x-vcpu-dirty-limit-period", "vcpu-dirty-limit" suitably
to speed up convergence.

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 migration/migration.c | 25 +
 migration/migration.h |  1 +
 qapi/migration.json   |  4 +++-
 softmmu/dirtylimit.c  | 11 ++-
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index e479c86575..f890e5966a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -64,6 +64,7 @@
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
 #include "ui/qemu-spice.h"
+#include "sysemu/kvm.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1444,6 +1445,20 @@ static bool migrate_caps_check(bool *cap_list,
 }
 }
 
+if (cap_list[MIGRATION_CAPABILITY_DIRTY_LIMIT]) {
+if (cap_list[MIGRATION_CAPABILITY_AUTO_CONVERGE]) {
+error_setg(errp, "dirty-limit conflicts with auto-converge"
+   " either of then available currently");
+return false;
+}
+
+if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+error_setg(errp, "dirty-limit requires KVM with accelerator"
+   " property 'dirty-ring-size' set");
+return false;
+}
+}
+
 return true;
 }
 
@@ -2635,6 +2650,15 @@ bool migrate_auto_converge(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 
+bool migrate_dirty_limit(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_LIMIT];
+}
+
 bool migrate_zero_blocks(void)
 {
 MigrationState *s;
@@ -4583,6 +4607,7 @@ static Property migration_properties[] = {
 DEFINE_PROP_MIG_CAP("x-zero-copy-send",
 MIGRATION_CAPABILITY_ZERO_COPY_SEND),
 #endif
+DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
 
 DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/migration/migration.h b/migration/migration.h
index 2da2f8a164..cd2e9bfeea 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -418,6 +418,7 @@ bool migrate_ignore_shared(void);
 bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
+bool migrate_dirty_limit(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index d33cc2d582..b7a92be055 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -477,6 +477,8 @@
 #will be handled faster.  This is a performance feature and
 #should not affect the correctness of postcopy migration.
 #(since 7.1)
+# @dirty-limit: Use dirty-limit to throttle down guest if enabled.
+#   (since 8.0)
 #
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -492,7 +494,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
-   'zero-copy-send', 'postcopy-preempt'] }
+   'zero-copy-send', 'postcopy-preempt', 'dirty-limit'] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index dcab9bf2b1..52d1b2c6fa 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -22,6 +22,8 @@
 #include "exec/memory.h"
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
 #include "trace.h"
 
 /*
@@ -74,11 +76,18 @@ static bool dirtylimit_quit;
 
 static void vcpu_dirty_rate_stat_collect(void)
 {
+MigrationState *s = migrate_get_current();
 VcpuStat stat;
 int i = 0;
+int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+if (migrate_dirty_limit() &&
+migration_is_active(s)) {
+period = s->parameters.x_vcpu_dirty_limit_period;
+}
 
 /* calculate vcpu dirtyrate */
-vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
+vcpu_calculate_dirtyrate(period,
  ,
  GLOBAL_DIRTY_LIMIT,
  false);
-- 
2.17.1

[PATCH v4 08/10] migration: Implement dirty-limit convergence algo

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Implement dirty-limit convergence algo for live migration,
which is kind of like auto-converge algo but using dirty-limit
instead of cpu throttle to make migration convergent.

Enable dirty page limit if dirty_rate_high_cnt greater than 2
when dirty-limit capability enabled, Disable dirty-limit if
migration be cancled.

Note that "set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit"
commands are not allowed during dirty-limit live migration.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Markus Armbruster 
---
 migration/migration.c  |  3 ++
 migration/ram.c| 63 --
 migration/trace-events |  1 +
 softmmu/dirtylimit.c   | 22 +++
 4 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index f890e5966a..7ccbc07257 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -256,6 +256,9 @@ void migration_cancel(const Error *error)
 if (error) {
 migrate_set_error(current_migration, error);
 }
+if (migrate_dirty_limit()) {
+qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
+}
 migrate_fd_cancel(current_migration);
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index 3e5dff4068..24d26b5135 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,6 +45,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-types-migration.h"
 #include "qapi/qapi-events-migration.h"
+#include "qapi/qapi-commands-migration.h"
 #include "qapi/qmp/qerror.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
@@ -57,6 +58,8 @@
 #include "qemu/iov.h"
 #include "multifd.h"
 #include "sysemu/runstate.h"
+#include "sysemu/dirtylimit.h"
+#include "sysemu/kvm.h"
 
 #include "hw/boards.h" /* for machine_dump_guest_core() */
 
@@ -1188,6 +1191,30 @@ static void migration_update_rates(RAMState *rs, int64_t 
end_time)
 }
 }
 
+/*
+ * Enable dirty-limit to throttle down the guest
+ */
+static void migration_dirty_limit_guest(void)
+{
+static int64_t quota_dirtyrate;
+MigrationState *s = migrate_get_current();
+
+/*
+ * If dirty limit already enabled and migration parameter
+ * vcpu-dirty-limit untouched.
+ */
+if (dirtylimit_in_service() &&
+quota_dirtyrate == s->parameters.vcpu_dirty_limit) {
+return;
+}
+
+quota_dirtyrate = s->parameters.vcpu_dirty_limit;
+
+/* Set or update quota dirty limit */
+qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
+trace_migration_dirty_limit_guest(quota_dirtyrate);
+}
+
 static void migration_trigger_throttle(RAMState *rs)
 {
 MigrationState *s = migrate_get_current();
@@ -1197,26 +1224,32 @@ static void migration_trigger_throttle(RAMState *rs)
 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
TARGET_PAGE_SIZE;
 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
 
-/* During block migration the auto-converge logic incorrectly detects
- * that ram migration makes no progress. Avoid this by disabling the
- * throttling logic during the bulk phase of block migration. */
-if (blk_mig_bulk_active()) {
-return;
-}
+/*
+ * The following detection logic can be refined later. For now:
+ * Check to see if the ratio between dirtied bytes and the approx.
+ * amount of bytes that just got transferred since the last time
+ * we were in this routine reaches the threshold. If that happens
+ * twice, start or increase throttling.
+ */
 
-if (migrate_auto_converge()) {
-/* The following detection logic can be refined later. For now:
-   Check to see if the ratio between dirtied bytes and the approx.
-   amount of bytes that just got transferred since the last time
-   we were in this routine reaches the threshold. If that happens
-   twice, start or increase throttling. */
+if ((bytes_dirty_period > bytes_dirty_threshold) &&
+(++rs->dirty_rate_high_cnt >= 2)) {
+rs->dirty_rate_high_cnt = 0;
+/*
+ * During block migration the auto-converge logic incorrectly detects
+ * that ram migration makes no progress. Avoid this by disabling the
+ * throttling logic during the bulk phase of block migration
+ */
+if (blk_mig_bulk_active()) {
+return;
+}
 
-if ((bytes_dirty_period > bytes_dirty_threshold) &&
-(++rs->dirty_rate_high_cnt >= 2)) {
+if (migrate_auto_converge()) {
 trace_migration_throttle();
-rs->dirty_rate_high_cnt = 0;
 mig_throttle_guest_down(bytes_dirty_period,
 bytes_dirty_threshold);
+} else if (migrate_dirty_limit()) {
+migration_dirty_limit_guest();
 }
 }
 }
diff --git a/migration/trace-events b/migration/trace-events
index 67b65a70ff..a689807a49 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -91,6

[PATCH v4 10/10] tests: Add migration dirty-limit capability test

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Add migration dirty-limit capability test if kernel support
dirty ring.

Migration dirty-limit capability introduce dirty limit
capability, two parameters: x-vcpu-dirty-limit-period and
vcpu-dirty-limit are introduced to implement the live
migration with dirty limit.

The test case does the following things:
1. start src, dst vm and enable dirty-limit capability
2. start migrate and set cancel it to check if dirty limit
   stop working.
3. restart dst vm
4. start migrate and enable dirty-limit capability
5. check if migration satisfy the convergence condition
   during pre-switchover phase.

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/qtest/migration-test.c | 157 +++
 1 file changed, 157 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 109bc8e7b1..6aad86e572 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2434,6 +2434,161 @@ static void test_vcpu_dirty_limit(void)
 dirtylimit_stop_vm(vm);
 }
 
+static void migrate_dirty_limit_wait_showup(QTestState *from,
+const int64_t period,
+const int64_t value)
+{
+/* Enable dirty limit capability */
+migrate_set_capability(from, "dirty-limit", true);
+
+/* Set dirty limit parameters */
+migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
+migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
+
+/* Make sure migrate can't converge */
+migrate_ensure_non_converge(from);
+
+/* To check limit rate after precopy */
+migrate_set_capability(from, "pause-before-switchover", true);
+
+/* Wait for the serial output from the source */
+wait_for_serial("src_serial");
+}
+
+/*
+ * This test does:
+ *  source   target
+ *   migrate_incoming
+ * migrate
+ * migrate_cancel
+ *   restart target
+ * migrate
+ *
+ *  And see that if dirty limit works correctly
+ */
+static void test_migrate_dirty_limit(void)
+{
+g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+QTestState *from, *to;
+int64_t remaining, throttle_us_per_full;
+/*
+ * We want the test to be stable and as fast as possible.
+ * E.g., with 1Gb/s bandwith migration may pass without dirty limit,
+ * so we need to decrease a bandwidth.
+ */
+const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
+const int64_t max_bandwidth = 4; /* ~400Mb/s */
+const int64_t downtime_limit = 250; /* 250ms */
+/*
+ * We migrate through unix-socket (> 500Mb/s).
+ * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
+ * So, we can predict expected_threshold
+ */
+const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
+int max_try_count = 10;
+MigrateCommon args = {
+.start = {
+.hide_stderr = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Start src, dst vm */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Prepare for dirty limit migration and wait src vm show up */
+migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full =
+read_migrate_property_int(from,
+"dirty-limit-throttle-time-per-round");
+usleep(100);
+g_assert_false(got_stop);
+}
+
+/* Now cancel migrate and wait for dirty limit throttle switch off */
+migrate_cancel(from);
+wait_for_migration_status(from, "cancelled", NULL);
+
+/* Check if dirty limit throttle switched off, set timeout 1ms */
+do {
+throttle_us_per_full =
+read_migrate_property_int(from,
+"dirty-limit-throttle-time-per-round");
+usleep(100);
+g_assert_false(got_stop);
+} while (throttle_us_per_full != 0 && --max_try_count);
+
+/* Assert dirty limit is not in service */
+g_assert_cmpint(throttle_us_per_full, ==, 0);
+
+args = (MigrateCommon) {
+.start = {
+.only_target = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Restart dst vm, src vm already show up so we needn't wait anymore */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full =
+read_migrate_property_int(from,
+

[PATCH v4 05/10] qapi/migration: Introduce vcpu-dirty-limit parameters

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "vcpu-dirty-limit" migration parameter used
to limit dirty page rate during live migration.

"vcpu-dirty-limit" and "x-vcpu-dirty-limit-period" are
two dirty-limit-related migration parameters, which can
be set before and during live migration by qmp
migrate-set-parameters.

This two parameters are used to help implement the dirty
page rate limit algo of migration.

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 migration/migration-hmp-cmds.c |  8 
 migration/migration.c  | 23 +++
 qapi/migration.json| 18 +++---
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 3bc751bec9..a61ec80d9d 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -349,6 +349,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %" PRIu64 " ms\n",
 MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
 params->x_vcpu_dirty_limit_period);
+
+monitor_printf(mon, "%s: %" PRIu64 " MB/s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT),
+params->vcpu_dirty_limit);
 }
 
 qapi_free_MigrationParameters(params);
@@ -609,6 +613,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_x_vcpu_dirty_limit_period = true;
 visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
 break;
+case MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT:
+p->has_vcpu_dirty_limit = true;
+visit_type_size(v, param, >vcpu_dirty_limit, );
+break;
 default:
 assert(0);
 }
diff --git a/migration/migration.c b/migration/migration.c
index 6162f048ae..e479c86575 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -120,6 +120,7 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
 #define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 1000/* microsecond */
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT1   /* MB/s */
 
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -1021,6 +1022,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->has_x_vcpu_dirty_limit_period = true;
 params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
 
+params->has_vcpu_dirty_limit = true;
+params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;
+
 return params;
 }
 
@@ -1674,6 +1678,14 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 return false;
 }
 
+if (params->has_vcpu_dirty_limit &&
+(params->vcpu_dirty_limit < 1)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "vcpu_dirty_limit",
+   "is invalid, it must greater then 1 MB/s");
+return false;
+}
+
 return true;
 }
 
@@ -1777,6 +1789,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_x_vcpu_dirty_limit_period) {
 dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
 }
+
+if (params->has_vcpu_dirty_limit) {
+dest->vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1904,6 +1920,9 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.x_vcpu_dirty_limit_period =
 params->x_vcpu_dirty_limit_period;
 }
+if (params->has_vcpu_dirty_limit) {
+s->parameters.vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4539,6 +4558,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
parameters.x_vcpu_dirty_limit_period,
DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
+DEFINE_PROP_UINT64("vcpu-dirty-limit", MigrationState,
+   parameters.vcpu_dirty_limit,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4629,6 +4651,7 @@ static void migration_instance_init(Object *obj)
 params->has_announce_rounds = true;
 params->has_announce_step = true;
 params->has_x_vcpu_dirty_limit_period = true;
+params->has_vcpu_dirty_limit = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/qapi/migration.json b/qapi/migration.json
index f43e4061b4..d33cc2d582 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -779,6 +779,9 @@
 # live migration. Should be in the range 1 to 
1000ms,
 #

[PATCH v4 09/10] migration: Extend query-migrate to provide dirty page limit info

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Extend query-migrate to provide throttle time and estimated
ring full time with dirty-limit capability enabled, through which
we can observe if dirty limit take effect during live migration.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Markus Armbruster 
---
 include/sysemu/dirtylimit.h|  2 ++
 migration/migration-hmp-cmds.c | 10 +
 migration/migration.c  | 10 +
 qapi/migration.json| 15 -
 softmmu/dirtylimit.c   | 39 ++
 5 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
index 8d2c1f3a6b..410a2bc0b6 100644
--- a/include/sysemu/dirtylimit.h
+++ b/include/sysemu/dirtylimit.h
@@ -34,4 +34,6 @@ void dirtylimit_set_vcpu(int cpu_index,
 void dirtylimit_set_all(uint64_t quota,
 bool enable);
 void dirtylimit_vcpu_execute(CPUState *cpu);
+int64_t dirtylimit_throttle_time_per_round(void);
+int64_t dirtylimit_ring_full_time(void);
 #endif
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index a61ec80d9d..1f090faec5 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -171,6 +171,16 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_dirty_limit_throttle_time_per_round) {
+monitor_printf(mon, "dirty-limit throttle time: %" PRIi64 " us\n",
+   info->dirty_limit_throttle_time_per_round);
+}
+
+if (info->has_dirty_limit_ring_full_time) {
+monitor_printf(mon, "dirty-limit ring full time: %" PRIi64 " us\n",
+   info->dirty_limit_ring_full_time);
+}
+
 if (info->has_postcopy_blocktime) {
 monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
diff --git a/migration/migration.c b/migration/migration.c
index 7ccbc07257..1f1e1f2268 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -65,6 +65,7 @@
 #include "sysemu/qtest.h"
 #include "ui/qemu-spice.h"
 #include "sysemu/kvm.h"
+#include "sysemu/dirtylimit.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1203,6 +1204,15 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->remaining = ram_bytes_remaining();
 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
 }
+
+if (migrate_dirty_limit() && dirtylimit_in_service()) {
+info->has_dirty_limit_throttle_time_per_round = true;
+info->dirty_limit_throttle_time_per_round =
+dirtylimit_throttle_time_per_round();
+
+info->has_dirty_limit_ring_full_time = true;
+info->dirty_limit_ring_full_time = dirtylimit_us_ring_full();
+}
 }
 
 static void populate_disk_info(MigrationInfo *info)
diff --git a/qapi/migration.json b/qapi/migration.json
index b7a92be055..f511771101 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -242,6 +242,17 @@
 #   Present and non-empty when migration is blocked.
 #   (since 6.0)
 #
+# @dirty-limit-throttle-time-per-round: Maximum throttle time (in 
microseconds) of virtual
+#   CPUs each dirty ring full round, which 
shows how
+#   MigrationCapability dirty-limit 
affects the guest
+#   during live migration. (since 8.0)
+#
+# @dirty-limit-ring-full-time: Estimated average dirty ring full time (in 
microseconds)
+#  each dirty ring full round, note that the value 
equals
+#  dirty ring memory size divided by average dirty 
page rate
+#  of virtual CPU, which can be used to observe 
the average
+#  memory load of virtual CPU indirectly. (since 
8.0)
+#
 # Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
@@ -259,7 +270,9 @@
'*postcopy-blocktime' : 'uint32',
'*postcopy-vcpu-blocktime': ['uint32'],
'*compression': 'CompressionStats',
-   '*socket-address': ['SocketAddress'] } }
+   '*socket-address': ['SocketAddress'],
+   '*dirty-limit-throttle-time-per-round': 'int64',
+   '*dirty-limit-ring-full-time': 'int64'} }
 
 ##
 # @query-migrate:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index ae77ebe5c5..3c07844a11 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -568,6 +568,45 @@ static struct DirtyLimitInfo *dirtylimit_query_vcpu(int 
cpu_index)
 return info;
 }
 
+/* Return the max throttle time of each virtual CPU */
+int64_t dirtylimit_throttle_time_per_round(void)
+{
+CPUState *cpu;
+int64_t max = 0;
+
+CPU_FOREACH(cpu) {
+if (cpu->throttle_us_per_full > max) {
+max

[PATCH v4 03/10] kvm: dirty-ring: Fix race with vcpu creation

2023-02-16 Thread huangy81

From: Peter Xu 

It's possible that we want to reap a dirty ring on a vcpu that is during
creation, because the vcpu is put onto list (CPU_FOREACH visible) before
initialization of the structures.  In this case:

qemu_init_vcpu
x86_cpu_realizefn
cpu_exec_realizefn
cpu_list_add  < can be probed by CPU_FOREACH
qemu_init_vcpu
cpus_accel->create_vcpu_thread(cpu);
kvm_init_vcpu
map kvm_dirty_gfns  <--- kvm_dirty_gfns valid

Don't try to reap dirty ring on vcpus during creation or it'll crash.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124756
Reported-by: Xiaohui Li 
Signed-off-by: Peter Xu 
---
 accel/kvm/kvm-all.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 9b26582655..47483cdfa0 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -685,6 +685,15 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, 
CPUState *cpu)
 uint32_t ring_size = s->kvm_dirty_ring_size;
 uint32_t count = 0, fetch = cpu->kvm_fetch_index;
 
+/*
+ * It's possible that we race with vcpu creation code where the vcpu is
+ * put onto the vcpus list but not yet initialized the dirty ring
+ * structures.  If so, skip it.
+ */
+if (!cpu->created) {
+return 0;
+}
+
 assert(dirty_gfns && ring_size);
 trace_kvm_dirty_ring_reap_vcpu(cpu->cpu_index);
 
-- 
2.17.1

[PATCH v4 00/10] migration: introduce dirtylimit capability

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

v4:
1. Polish the docs and update the release version suggested by Markus 
2. Rename the migrate exported info "dirty-limit-throttle-time-per-round"
   to "dirty-limit-throttle-time-per-full". 

The following 5 commits hasn't been acked or reviewed yet:

kvm: dirty-ring: Fix race with vcpu creation
qapi/migration: Introduce x-vcpu-dirty-limit-period parameter
migration: Implement dirty-limit convergence algo
migration: Extend query-migrate to provide dirty page limit info
tests: Add migration dirty-limit capability test

Ping David and Juan. 

Please review if you have time. Thanks. 

Yong

v3(resend):
- fix the syntax error of the topic.

v3:
This version make some modifications inspired by Peter and Markus
as following:
1. Do the code clean up in [PATCH v2 02/11] suggested by Markus 
2. Replace the [PATCH v2 03/11] with a much simpler patch posted by
   Peter to fix the following bug:
   https://bugzilla.redhat.com/show_bug.cgi?id=2124756
3. Fix the error path of migrate_params_check in [PATCH v2 04/11]
   pointed out by Markus. Enrich the commit message to explain why
   x-vcpu-dirty-limit-period an unstable parameter.
4. Refactor the dirty-limit convergence algo in [PATCH v2 07/11] 
   suggested by Peter:
   a. apply blk_mig_bulk_active check before enable dirty-limit
   b. drop the unhelpful check function before enable dirty-limit
   c. change the migration_cancel logic, just cancel dirty-limit
  only if dirty-limit capability turned on. 
   d. abstract a code clean commit [PATCH v3 07/10] to adjust
  the check order before enable auto-converge 
5. Change the name of observing indexes during dirty-limit live
   migration to make them more easy-understanding. Use the
   maximum throttle time of vpus as "dirty-limit-throttle-time-per-full"
6. Fix some grammatical and spelling errors pointed out by Markus
   and enrich the document about the dirty-limit live migration
   observing indexes "dirty-limit-ring-full-time"
   and "dirty-limit-throttle-time-per-full"
7. Change the default value of x-vcpu-dirty-limit-period to 1000ms,
   which is optimal value pointed out in cover letter in that
   testing environment.
8. Drop the 2 guestperf test commits [PATCH v2 10/11],
   [PATCH v2 11/11] and post them with a standalone series in the
   future.

Thanks Peter and Markus sincerely for the passionate, efficient
and careful comments and suggestions.

Please review.  

Yong

v2: 
This version make a little bit modifications comparing with
version 1 as following:
1. fix the overflow issue reported by Peter Maydell
2. add parameter check for hmp "set_vcpu_dirty_limit" command
3. fix the racing issue between dirty ring reaper thread and
   Qemu main thread.
4. add migrate parameter check for x-vcpu-dirty-limit-period
   and vcpu-dirty-limit.
5. add the logic to forbid hmp/qmp commands set_vcpu_dirty_limit,
   cancel_vcpu_dirty_limit during dirty-limit live migration when
   implement dirty-limit convergence algo.
6. add capability check to ensure auto-converge and dirty-limit
   are mutually exclusive.
7. pre-check if kvm dirty ring size is configured before setting
   dirty-limit migrate parameter 

A more comprehensive test was done comparing with version 1.

The following are test environment:
-
a. Host hardware info:

CPU:
Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz

CPU(s):  64
On-line CPU(s) list: 0-63
Thread(s) per core:  2
Core(s) per socket:  16
Socket(s):   2
NUMA node(s):2

NUMA node0 CPU(s):   0-15,32-47
NUMA node1 CPU(s):   16-31,48-63

Memory:
Hynix  503Gi

Interface:
Intel Corporation Ethernet Connection X722 for 1GbE (rev 09)
Speed: 1000Mb/s

b. Host software info:

OS: ctyunos release 2
Kernel: 4.19.90-2102.2.0.0066.ctl2.x86_64
Libvirt baseline version:  libvirt-6.9.0
Qemu baseline version: qemu-5.0

c. vm scale
CPU: 4
Memory: 4G
-

All the supplementary test data shown as follows are basing on
above test environment.

In version 1, we post a test data from unixbench as follows:

$ taskset -c 8-15 ./Run -i 2 -c 8 {unixbench test item}

host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 32800  | 32786  | 25292 |
  | whetstone-double| 10326  | 10315  | 9847  |
  | pipe| 15442  | 15271  | 14506 |
  | context1| 7260   | 6235   | 4514  |
  | spawn   | 3663   | 3317   | 3249  |
  | syscall | 4669   | 4667   | 3841  |

[PATCH v4 04/10] qapi/migration: Introduce x-vcpu-dirty-limit-period parameter

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "x-vcpu-dirty-limit-period" migration experimental
parameter, which is in the range of 1 to 1000ms and used to
make dirtyrate calculation period configurable.

Currently with the "x-vcpu-dirty-limit-period" varies, the
total time of live migration changes, test results show the
optimal value of "x-vcpu-dirty-limit-period" ranges from
500ms to 1000 ms. "x-vcpu-dirty-limit-period" should be made
stable once it proves best value can not be determined with
developer's experiments.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Markus Armbruster 
---
 migration/migration-hmp-cmds.c |  8 
 migration/migration.c  | 27 +++
 qapi/migration.json| 33 ++---
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index ef25bc8929..3bc751bec9 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -345,6 +345,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 }
 }
 }
+
+monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
+params->x_vcpu_dirty_limit_period);
 }
 
 qapi_free_MigrationParameters(params);
@@ -601,6 +605,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 error_setg(, "The block-bitmap-mapping parameter can only be set "
"through QMP");
 break;
+case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD:
+p->has_x_vcpu_dirty_limit_period = true;
+visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
+break;
 default:
 assert(0);
 }
diff --git a/migration/migration.c b/migration/migration.c
index 90fca70cb7..6162f048ae 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -119,6 +119,8 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 1000/* microsecond */
+
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -1016,6 +1018,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
s->parameters.block_bitmap_mapping);
 }
 
+params->has_x_vcpu_dirty_limit_period = true;
+params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
+
 return params;
 }
 
@@ -1660,6 +1665,15 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 }
 #endif
 
+if (params->has_x_vcpu_dirty_limit_period &&
+(params->x_vcpu_dirty_limit_period < 1 ||
+ params->x_vcpu_dirty_limit_period > 1000)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "x-vcpu-dirty-limit-period",
+   "a value between 1 and 1000");
+return false;
+}
+
 return true;
 }
 
@@ -1759,6 +1773,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_x_vcpu_dirty_limit_period) {
+dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1881,6 +1899,11 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+
+if (params->has_x_vcpu_dirty_limit_period) {
+s->parameters.x_vcpu_dirty_limit_period =
+params->x_vcpu_dirty_limit_period;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4513,6 +4536,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
 DEFINE_PROP_STRING("tls-hostname", MigrationState, 
parameters.tls_hostname),
 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
+DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
+   parameters.x_vcpu_dirty_limit_period,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4602,6 +4628,7 @@ static void migration_instance_init(Object *obj)
 params->has_announce_max = true;
 params->has_announce_rounds = true;
 params->has_announce_step = true;
+params->has_x_vcpu_dirty_limit_period = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/qapi/migration.json b/qapi/migration.json
index c84fa10e86..f43e4061b4 100644
---

[PATCH v4 07/10] migration: Refactor auto-converge capability logic

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Check if block migration is running before throttling
guest down in auto-converge way.

Note that this modification is kind of like code clean,
because block migration does not depend on auto-converge
capability, so the order of checks can be adjusted.

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 migration/ram.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/migration/ram.c b/migration/ram.c
index 521912385d..3e5dff4068 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1200,7 +1200,11 @@ static void migration_trigger_throttle(RAMState *rs)
 /* During block migration the auto-converge logic incorrectly detects
  * that ram migration makes no progress. Avoid this by disabling the
  * throttling logic during the bulk phase of block migration. */
-if (migrate_auto_converge() && !blk_mig_bulk_active()) {
+if (blk_mig_bulk_active()) {
+return;
+}
+
+if (migrate_auto_converge()) {
 /* The following detection logic can be refined later. For now:
Check to see if the ratio between dirtied bytes and the approx.
amount of bytes that just got transferred since the last time
-- 
2.17.1

[PATCH v4 01/10] dirtylimit: Fix overflow when computing MB

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

Coverity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
Reviewed-by: Peter Xu 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index c56f0f58c8..065ed18afc 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -235,14 +235,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100ULL / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
2.17.1

[PATCH v4 02/10] softmmu/dirtylimit: Add parameter check for hmp "set_vcpu_dirty_limit"

2023-02-16 Thread huangy81

From: Hyman Huang(黄勇) 

dirty_rate paraemter of hmp command "set_vcpu_dirty_limit" is invalid
if less than 0, so add parameter check for it.

Note that this patch also delete the unsolicited help message and
clean up the code.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Markus Armbruster 
Reviewed-by: Peter Xu 
---
 softmmu/dirtylimit.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 065ed18afc..dcab9bf2b1 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -514,14 +514,15 @@ void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict 
*qdict)
 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
 Error *err = NULL;
 
-qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, );
-if (err) {
-hmp_handle_error(mon, err);
-return;
+if (dirty_rate < 0) {
+error_setg(, "invalid dirty page limit %ld", dirty_rate);
+goto out;
 }
 
-monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
-   "dirty limit for virtual CPU]\n");
+qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, );
+
+out:
+hmp_handle_error(mon, err);
 }
 
 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
-- 
2.17.1

[PATCH v6 2/3] vhost-user: Refactor the chr_closed_bh

2022-12-21 Thread huangy81

From: Hyman Huang(黄勇) 

Use vhost_user_save_acked_features to implemente acked features
saving.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 net/vhost-user.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/vhost-user.c b/net/vhost-user.c
index f5cb095..5993e4a 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -260,11 +260,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i]);
 }
 
 qmp_set_link(name, false, );
-- 
1.8.3.1

[PATCH v6 3/3] vhost-user: Fix the virtio features negotiation flaw

2022-12-21 Thread huangy81

From: Hyman Huang(黄勇) 

This patch aims to fix unexpected negotiation features for
vhost-user netdev interface.

When openvswitch reconnect Qemu after an unexpected disconnection
and Qemu therefore start the vhost_dev, acked_features field in
vhost_dev is initialized with value fetched from acked_features
field in NetVhostUserState, which should be up-to-date at that
moment but Qemu could not make it actually during the time window
of virtio features negotiation.

So we save the acked_features right after being configured by
guest virtio driver so it can be used to restore acked_features
field in vhost_dev correctly.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
Signed-off-by: Liuxiangdong 
---
 hw/net/vhost_net-stub.c | 5 +
 hw/net/vhost_net.c  | 9 +
 hw/net/virtio-net.c | 6 ++
 include/net/vhost_net.h | 2 ++
 4 files changed, 22 insertions(+)

diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index 9f7daae..66ed5f0 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -113,3 +113,8 @@ int vhost_net_virtqueue_restart(VirtIODevice *vdev, 
NetClientState *nc,
 {
 return 0;
 }
+
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+
+}
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 043058f..984b130 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -144,6 +144,15 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+#ifdef CONFIG_VHOST_NET_USER
+if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
+vhost_user_save_acked_features(nc);
+}
+#endif
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 9cbdfa5..105fc30 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -980,6 +980,12 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 40b9a40..dfb1375 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -52,4 +52,6 @@ void vhost_net_virtqueue_reset(VirtIODevice *vdev, 
NetClientState *nc,
int vq_index);
 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
 int vq_index);
+
+void vhost_net_save_acked_features(NetClientState *nc);
 #endif
-- 
1.8.3.1

[PATCH v6 1/3] vhost-user: Refactor vhost acked features saving

2022-12-21 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  1 +
 net/vhost-user.c | 21 +++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6..35bf619 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,6 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 3a6b90d..f5cb095 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,10 +45,23 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
+NetVhostUserState *s;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
@@ -56,11 +69,7 @@ static void vhost_user_stop(int queues, NetClientState 
*ncs[])
 s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
 
 if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
+vhost_user_save_acked_features(ncs[i]);
 vhost_net_cleanup(s->vhost_net);
 }
 }
-- 
1.8.3.1

[PATCH v6 0/3] Fix the virtio features negotiation flaw

2022-12-21 Thread huangy81

From: Hyman Huang(黄勇) 

v6:
-rebase on master
-fix the compling issue
 vhost_user_save_acked_features symbol only exists when
 "vhost-user" be configured. Add MACRO statement before
 calling vhost_user_save_acked_features.

Thanks Michael for pointing out that issue in time.

Please review,

Yong

v5(resend):
-rebase on master

v5:
-fix the assert statement in [PATCH v4 3/3], reported by
 xiangdong. 

v4:
-rebase on master
-add stub function to fix build errors
-code clean on [PATCH v2 1/2]: drop 'cleanup' parameter in
 vhost_user_save_acked_features.
-code clean on [PATCH v2 2/2]: make refactor of chr_closed_bh
 a standalone patch.

Above changes are suggested by Michael and thanks very much.

Please review,

Yong

v3:
-rebase on master
-code clean on [PATCH v2 1/2]: keep the commit self-consistent and
 do not modify the logic of saving acked_features. Just abstract the
 util function.
-modify the [PATCH v2 2/2] logic: change the behavior of saving
 acked_features in chr_closed_bh: saving acked_features only if
 features aren't 0. For the case of 0, we implement it in
 virtio_net_set_features function, which will save the acked_features
 in advance, including assign 0 to acked_features.

v2:
Fix the typo in subject of [PATCH v2 2/2] 

v1:
This is the version 1 of the series and it is exactly the same as
RFC version, but fixing a typo in subject, which is reported by Michael. 

As for test for the behavior suggested by Michael, IMHO, it could be
post in another series, since i found that testing the negotiation
behavior using QGraph Test Framework requires more work than i thought.

The test patch may implement the following logic...
1. Introduce a fresh new qmp command to query netdev info, which show
   the NetClient status including guest features and acked_features.
2. Using vhost-user QGraph Test to check the behavior of the vhost user
   protocol cmd VHOST_USER_SET_FEATURES. 
3. Adding acked_features into TestServer, which receive the features
   set by QEMU.
4. Compare the acked_feature in TestServer with the acked_features 
   in the output of qmp query command.

Patch for RFC can be found in the following:
https://patchew.org/QEMU/20220926063641.25038-1-huang...@chinatelecom.cn/

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup

[PATCH RESEND v5 3/3] vhost-user: Fix the virtio features negotiation flaw

2022-12-20 Thread huangy81

From: Hyman Huang(黄勇) 

This patch aims to fix unexpected negotiation features for
vhost-user netdev interface.

When openvswitch reconnect Qemu after an unexpected disconnection
and Qemu therefore start the vhost_dev, acked_features field in
vhost_dev is initialized with value fetched from acked_features
field in NetVhostUserState, which should be up-to-date at that
moment but Qemu could not make it actually during the time window
of virtio features negotiation.

So we save the acked_features right after being configured by
guest virtio driver so it can be used to restore acked_features
field in vhost_dev correctly.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
Signed-off-by: Liuxiangdong 
---
 hw/net/vhost_net-stub.c | 5 +
 hw/net/vhost_net.c  | 7 +++
 hw/net/virtio-net.c | 6 ++
 include/net/vhost_net.h | 2 ++
 4 files changed, 20 insertions(+)

diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index 9f7daae..66ed5f0 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -113,3 +113,8 @@ int vhost_net_virtqueue_restart(VirtIODevice *vdev, 
NetClientState *nc,
 {
 return 0;
 }
+
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+
+}
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 043058f..89866c3 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -144,6 +144,13 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
+vhost_user_save_acked_features(nc);
+}
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 9cbdfa5..105fc30 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -980,6 +980,12 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 40b9a40..dfb1375 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -52,4 +52,6 @@ void vhost_net_virtqueue_reset(VirtIODevice *vdev, 
NetClientState *nc,
int vq_index);
 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
 int vq_index);
+
+void vhost_net_save_acked_features(NetClientState *nc);
 #endif
-- 
1.8.3.1

[PATCH RESEND v5 2/3] vhost-user: Refactor the chr_closed_bh

2022-12-20 Thread huangy81

From: Hyman Huang(黄勇) 

Use vhost_user_save_acked_features to implemente acked features
saving.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 net/vhost-user.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/vhost-user.c b/net/vhost-user.c
index f5cb095..5993e4a 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -260,11 +260,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i]);
 }
 
 qmp_set_link(name, false, );
-- 
1.8.3.1

[PATCH RESEND v5 1/3] vhost-user: Refactor vhost acked features saving

2022-12-20 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  1 +
 net/vhost-user.c | 21 +++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6..35bf619 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,6 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 3a6b90d..f5cb095 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,10 +45,23 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
+NetVhostUserState *s;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
@@ -56,11 +69,7 @@ static void vhost_user_stop(int queues, NetClientState 
*ncs[])
 s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
 
 if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
+vhost_user_save_acked_features(ncs[i]);
 vhost_net_cleanup(s->vhost_net);
 }
 }
-- 
1.8.3.1

[PATCH RESEND v5 0/3] Fix the virtio features negotiation flaw

2022-12-20 Thread huangy81

From: Hyman Huang(黄勇) 

v5(resend):
-rebase on master

v5:
-fix the assert statement in [PATCH v4 3/3], reported by
 xiangdong. 

v4:
-rebase on master
-add stub function to fix build errors
-code clean on [PATCH v2 1/2]: drop 'cleanup' parameter in
 vhost_user_save_acked_features.
-code clean on [PATCH v2 2/2]: make refactor of chr_closed_bh
 a standalone patch.

Above changes are suggested by Michael and thanks very much.

Please review,

Yong

v3:
-rebase on master
-code clean on [PATCH v2 1/2]: keep the commit self-consistent and
 do not modify the logic of saving acked_features. Just abstract the
 util function.
-modify the [PATCH v2 2/2] logic: change the behavior of saving
 acked_features in chr_closed_bh: saving acked_features only if
 features aren't 0. For the case of 0, we implement it in
 virtio_net_set_features function, which will save the acked_features
 in advance, including assign 0 to acked_features.

v2:
Fix the typo in subject of [PATCH v2 2/2] 

v1:
This is the version 1 of the series and it is exactly the same as
RFC version, but fixing a typo in subject, which is reported by Michael. 

As for test for the behavior suggested by Michael, IMHO, it could be
post in another series, since i found that testing the negotiation
behavior using QGraph Test Framework requires more work than i thought.

The test patch may implement the following logic...
1. Introduce a fresh new qmp command to query netdev info, which show
   the NetClient status including guest features and acked_features.
2. Using vhost-user QGraph Test to check the behavior of the vhost user
   protocol cmd VHOST_USER_SET_FEATURES. 
3. Adding acked_features into TestServer, which receive the features
   set by QEMU.
4. Compare the acked_feature in TestServer with the acked_features 
   in the output of qmp query command.

Patch for RFC can be found in the following:
https://patchew.org/QEMU/20220926063641.25038-1-huang...@chinatelecom.cn/

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup it in case of loss. 

4. once unix socket return to normal and get connected, qemu will
   call vhost_user_start to restore the vhost_dev and fetch the
   acked_features stored in NetVhostUserState previously. 

The above flow works fine in the normal scenarios, but it

[PATCH v5 3/3] vhost-user: Fix the virtio features negotiation flaw

2022-12-19 Thread huangy81

From: Hyman Huang(黄勇) 

This patch aims to fix unexpected negotiation features for
vhost-user netdev interface.

When openvswitch reconnect Qemu after an unexpected disconnection
and Qemu therefore start the vhost_dev, acked_features field in
vhost_dev is initialized with value fetched from acked_features
field in NetVhostUserState, which should be up-to-date at that
moment but Qemu could not make it actually during the time window
of virtio features negotiation.

So we save the acked_features right after being configured by
guest virtio driver so it can be used to restore acked_features
field in vhost_dev correctly.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
Signed-off-by: Liuxiangdong 
---
 hw/net/vhost_net-stub.c | 5 +
 hw/net/vhost_net.c  | 7 +++
 hw/net/virtio-net.c | 6 ++
 include/net/vhost_net.h | 1 +
 4 files changed, 19 insertions(+)

diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index 89d71cf..199b099 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -101,3 +101,8 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 {
 return 0;
 }
+
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+
+}
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 30379d2..4c2698d 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -141,6 +141,13 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
+vhost_user_save_acked_features(nc);
+}
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 1067e72..5f22865 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -917,6 +917,12 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 387e913..7bdbf48 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -48,4 +48,5 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
 
+void vhost_net_save_acked_features(NetClientState *nc);
 #endif
-- 
1.8.3.1

[PATCH v5 1/3] vhost-user: Refactor vhost acked features saving

2022-12-19 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  1 +
 net/vhost-user.c | 21 +++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6..35bf619 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,6 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index b1a0247..40a2fe8 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,10 +45,23 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
+NetVhostUserState *s;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
@@ -56,11 +69,7 @@ static void vhost_user_stop(int queues, NetClientState 
*ncs[])
 s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
 
 if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
+vhost_user_save_acked_features(ncs[i]);
 vhost_net_cleanup(s->vhost_net);
 }
 }
-- 
1.8.3.1

[PATCH v5 2/3] vhost-user: Refactor the chr_closed_bh

2022-12-19 Thread huangy81

From: Hyman Huang(黄勇) 

Use vhost_user_save_acked_features to implemente acked features
saving.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 net/vhost-user.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/vhost-user.c b/net/vhost-user.c
index 40a2fe8..4185cfa 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -260,11 +260,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i]);
 }
 
 qmp_set_link(name, false, );
-- 
1.8.3.1

[PATCH v5 0/3] Fix the virtio features negotiation flaw

2022-12-19 Thread huangy81

From: Hyman Huang(黄勇) 

v5:
-fix the assert statement in [PATCH v4 3/3], reported by
 xiangdong. 

v4:
-rebase on master
-add stub function to fix build errors
-code clean on [PATCH v2 1/2]: drop 'cleanup' parameter in
 vhost_user_save_acked_features.
-code clean on [PATCH v2 2/2]: make refactor of chr_closed_bh
 a standalone patch.

Above changes are suggested by Michael and thanks very much.

Please review,

Yong

v3:
-rebase on master
-code clean on [PATCH v2 1/2]: keep the commit self-consistent and
 do not modify the logic of saving acked_features. Just abstract the
 util function.
-modify the [PATCH v2 2/2] logic: change the behavior of saving
 acked_features in chr_closed_bh: saving acked_features only if
 features aren't 0. For the case of 0, we implement it in
 virtio_net_set_features function, which will save the acked_features
 in advance, including assign 0 to acked_features.

v2:
Fix the typo in subject of [PATCH v2 2/2] 

v1:
This is the version 1 of the series and it is exactly the same as
RFC version, but fixing a typo in subject, which is reported by Michael. 

As for test for the behavior suggested by Michael, IMHO, it could be
post in another series, since i found that testing the negotiation
behavior using QGraph Test Framework requires more work than i thought.

The test patch may implement the following logic...
1. Introduce a fresh new qmp command to query netdev info, which show
   the NetClient status including guest features and acked_features.
2. Using vhost-user QGraph Test to check the behavior of the vhost user
   protocol cmd VHOST_USER_SET_FEATURES. 
3. Adding acked_features into TestServer, which receive the features
   set by QEMU.
4. Compare the acked_feature in TestServer with the acked_features 
   in the output of qmp query command.

Patch for RFC can be found in the following:
https://patchew.org/QEMU/20220926063641.25038-1-huang...@chinatelecom.cn/

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup it in case of loss. 

4. once unix socket return to normal and get connected, qemu will
   call vhost_user_start to restore the vhost_dev and fetch the
   acked_features stored in NetVhostUserState previously. 

The above flow works fine in the normal scenarios, but it doesn't cover
the scenario that

[PATCH RESEND v3 02/10] softmmu/dirtylimit: Add parameter check for hmp "set_vcpu_dirty_limit"

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

dirty_rate paraemter of hmp command "set_vcpu_dirty_limit" is invalid
if less than 0, so add parameter check for it.

Note that this patch also delete the unsolicited help message and
clean up the code.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Markus Armbruster 
Reviewed-by: Peter Xu 
---
 softmmu/dirtylimit.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 940d238..53b66d5 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -515,14 +515,15 @@ void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict 
*qdict)
 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
 Error *err = NULL;
 
-qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, );
-if (err) {
-hmp_handle_error(mon, err);
-return;
+if (dirty_rate < 0) {
+error_setg(, "invalid dirty page limit %ld", dirty_rate);
+goto out;
 }
 
-monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
-   "dirty limit for virtual CPU]\n");
+qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, );
+
+out:
+hmp_handle_error(mon, err);
 }
 
 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
-- 
1.8.3.1

[PATCH RESEND v3 06/10] migration: Introduce dirty-limit capability

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce migration dirty-limit capability, which can
be turned on before live migration and limit dirty
page rate durty live migration.

Introduce migrate_dirty_limit function to help check
if dirty-limit capability enabled during live migration.

Meanwhile, refactor vcpu_dirty_rate_stat_collect
so that period can be configured instead of hardcoded.

dirty-limit capability is kind of like auto-converge
but using dirty limit instead of traditional cpu-throttle
to throttle guest down. To enable this feature, turn on
the dirty-limit capability before live migration using
migrate-set-capabilities, and set the parameters
"x-vcpu-dirty-limit-period", "vcpu-dirty-limit" suitably
to speed up convergence.

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 migration/migration.c | 25 +
 migration/migration.h |  1 +
 qapi/migration.json   |  4 +++-
 softmmu/dirtylimit.c  | 11 ++-
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index fd11c63..702e7f4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "sysemu/kvm.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1366,6 +1367,20 @@ static bool migrate_caps_check(bool *cap_list,
 }
 }
 
+if (cap_list[MIGRATION_CAPABILITY_DIRTY_LIMIT]) {
+if (cap_list[MIGRATION_CAPABILITY_AUTO_CONVERGE]) {
+error_setg(errp, "dirty-limit conflicts with auto-converge"
+   " only one of them is available currently");
+return false;
+}
+
+if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+error_setg(errp, "dirty-limit requires KVM with accelerator"
+   " property 'dirty-ring-size' set");
+return false;
+}
+}
+
 return true;
 }
 
@@ -2544,6 +2559,15 @@ bool migrate_auto_converge(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 
+bool migrate_dirty_limit(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_LIMIT];
+}
+
 bool migrate_zero_blocks(void)
 {
 MigrationState *s;
@@ -4473,6 +4497,7 @@ static Property migration_properties[] = {
 DEFINE_PROP_MIG_CAP("x-zero-copy-send",
 MIGRATION_CAPABILITY_ZERO_COPY_SEND),
 #endif
+DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
 
 DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/migration/migration.h b/migration/migration.h
index cdad8ac..7fbb9f8 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -409,6 +409,7 @@ bool migrate_ignore_shared(void);
 bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
+bool migrate_dirty_limit(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index 7e868a1..6055fdc 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -477,6 +477,8 @@
 #will be handled faster.  This is a performance feature and
 #should not affect the correctness of postcopy migration.
 #(since 7.1)
+# @dirty-limit: Use dirty-limit to throttle down guest if enabled.
+#   (since 7.3)
 #
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -492,7 +494,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
-   'zero-copy-send', 'postcopy-preempt'] }
+   'zero-copy-send', 'postcopy-preempt', 'dirty-limit'] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 53b66d5..2a07200 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -23,6 +23,8 @@
 #include "exec/memory.h"
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
 #include "trace.h"
 
 /*
@@ -75,11 +77,18 @@ static bool dirtylimit_quit;
 
 static void vcpu_dirty_rate_stat_collect(void)
 {
+MigrationState *s = migrate_get_current();
 VcpuStat stat;
 int i = 0;
+int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+if (migrate_dirty_limit() &&
+migration_is_active(s)) {
+period = s->parameters.x_vcpu_dirty_limit_period;
+}
 
 /* calculate vcpu dirtyrate */
-vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
+vcpu_calculate_dirtyrate(period,
  ,
  GLOBAL_DIRTY_LIMIT,
  false);
-- 
1.8.3.1

[PATCH RESEND v3 09/10] migration: Export dirty-limit time info for observation

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Export dirty limit throttle time and estimated ring full
time, through which we can observe if dirty limit take
effect during live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 include/sysemu/dirtylimit.h |  2 ++
 migration/migration.c   | 10 ++
 monitor/hmp-cmds.c  | 10 ++
 qapi/migration.json | 15 ++-
 softmmu/dirtylimit.c| 39 +++
 5 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
index 8d2c1f3..f15e01d 100644
--- a/include/sysemu/dirtylimit.h
+++ b/include/sysemu/dirtylimit.h
@@ -34,4 +34,6 @@ void dirtylimit_set_vcpu(int cpu_index,
 void dirtylimit_set_all(uint64_t quota,
 bool enable);
 void dirtylimit_vcpu_execute(CPUState *cpu);
+int64_t dirtylimit_throttle_time_per_full(void);
+int64_t dirtylimit_ring_full_time(void);
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 127d0fe..3f92389 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -62,6 +62,7 @@
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
 #include "sysemu/kvm.h"
+#include "sysemu/dirtylimit.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1114,6 +1115,15 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->remaining = ram_bytes_remaining();
 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
 }
+
+if (migrate_dirty_limit() && dirtylimit_in_service()) {
+info->has_dirty_limit_throttle_time_per_full = true;
+info->dirty_limit_throttle_time_per_full =
+dirtylimit_throttle_time_per_full();
+
+info->has_dirty_limit_ring_full_time = true;
+info->dirty_limit_ring_full_time = dirtylimit_us_ring_full();
+}
 }
 
 static void populate_disk_info(MigrationInfo *info)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 9ad6ee5..c3aaba3 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -339,6 +339,16 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_dirty_limit_throttle_time_per_full) {
+monitor_printf(mon, "dirty-limit throttle time: %" PRIi64 " us\n",
+   info->dirty_limit_throttle_time_per_full);
+}
+
+if (info->has_dirty_limit_ring_full_time) {
+monitor_printf(mon, "dirty-limit ring full time: %" PRIi64 " us\n",
+   info->dirty_limit_ring_full_time);
+}
+
 if (info->has_postcopy_blocktime) {
 monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
diff --git a/qapi/migration.json b/qapi/migration.json
index 6055fdc..ae7d22d 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -242,6 +242,17 @@
 #   Present and non-empty when migration is blocked.
 #   (since 6.0)
 #
+# @dirty-limit-throttle-time-per-full: Maximum throttle time (in microseconds) 
of virtual
+#  CPUs each dirty ring full round, used 
to observe
+#  if dirty-limit take effect during live 
migration.
+#  (since 7.3)
+#
+# @dirty-limit-ring-full-time: Estimated average dirty ring full time (in 
microseconds)
+#  each dirty ring full round, note that the value 
equals
+#  dirty ring memory size divided by average dirty 
page rate
+#  of virtual CPU, which can be used to observe 
the average
+#  memory load of virtual CPU indirectly. (since 
7.3)
+#
 # Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
@@ -259,7 +270,9 @@
'*postcopy-blocktime' : 'uint32',
'*postcopy-vcpu-blocktime': ['uint32'],
'*compression': 'CompressionStats',
-   '*socket-address': ['SocketAddress'] } }
+   '*socket-address': ['SocketAddress'],
+   '*dirty-limit-throttle-time-per-full': 'int64',
+   '*dirty-limit-ring-full-time': 'int64'} }
 
 ##
 # @query-migrate:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index b63032c..06de099 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -569,6 +569,45 @@ static struct DirtyLimitInfo *dirtylimit_query_vcpu(int 
cpu_index)
 return info;
 }
 
+/* Return the max throttle time of each virtual CPU */
+int64_t dirtylimit_throttle_time_per_full(void)
+{
+CPUState *cpu;
+int64_t max = 0;
+
+CPU_FOREACH(cpu) {
+if (cpu->throttle_us_per_full > max) {
+max = cpu->throttle_us_per_full;
+}
+}
+
+return max;
+}
+
+/*
+ * Estimate average dirty ring full time of each virtaul CPU.
+ * Return -1 if guest doesn't dirty memory.
+ */
+int64_t

[PATCH RESEND v3 01/10] dirtylimit: Fix overflow when computing MB

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Coverity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
Reviewed-by: Peter Xu 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 1266855..940d238 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -236,14 +236,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100ULL / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
1.8.3.1

[PATCH RESEND v3 00/10] migration: introduce dirtylimit capability

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

v3(resend):
- fix the syntax error of the topic.

v3:
This version make some modifications inspired by Peter and Markus
as following:
1. Do the code clean up in [PATCH v2 02/11] suggested by Markus 
2. Replace the [PATCH v2 03/11] with a much simpler patch posted by
   Peter to fix the following bug:
   https://bugzilla.redhat.com/show_bug.cgi?id=2124756
3. Fix the error path of migrate_params_check in [PATCH v2 04/11]
   pointed out by Markus. Enrich the commit message to explain why
   x-vcpu-dirty-limit-period an unstable parameter.
4. Refactor the dirty-limit convergence algo in [PATCH v2 07/11] 
   suggested by Peter:
   a. apply blk_mig_bulk_active check before enable dirty-limit
   b. drop the unhelpful check function before enable dirty-limit
   c. change the migration_cancel logic, just cancel dirty-limit
  only if dirty-limit capability turned on. 
   d. abstract a code clean commit [PATCH v3 07/10] to adjust
  the check order before enable auto-converge 
5. Change the name of observing indexes during dirty-limit live
   migration to make them more easy-understanding. Use the
   maximum throttle time of vpus as "dirty-limit-throttle-time-per-full"
6. Fix some grammatical and spelling errors pointed out by Markus
   and enrich the document about the dirty-limit live migration
   observing indexes "dirty-limit-ring-full-time"
   and "dirty-limit-throttle-time-per-full"
7. Change the default value of x-vcpu-dirty-limit-period to 1000ms,
   which is optimal value pointed out in cover letter in that
   testing environment.
8. Drop the 2 guestperf test commits [PATCH v2 10/11],
   [PATCH v2 11/11] and post them with a standalone series in the
   future.

Thanks Peter and Markus sincerely for the passionate, efficient
and careful comments and suggestions.

Please review.  

Yong

v2: 
This version make a little bit modifications comparing with
version 1 as following:
1. fix the overflow issue reported by Peter Maydell
2. add parameter check for hmp "set_vcpu_dirty_limit" command
3. fix the racing issue between dirty ring reaper thread and
   Qemu main thread.
4. add migrate parameter check for x-vcpu-dirty-limit-period
   and vcpu-dirty-limit.
5. add the logic to forbid hmp/qmp commands set_vcpu_dirty_limit,
   cancel_vcpu_dirty_limit during dirty-limit live migration when
   implement dirty-limit convergence algo.
6. add capability check to ensure auto-converge and dirty-limit
   are mutually exclusive.
7. pre-check if kvm dirty ring size is configured before setting
   dirty-limit migrate parameter 

A more comprehensive test was done comparing with version 1.

The following are test environment:
-
a. Host hardware info:

CPU:
Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz

CPU(s):  64
On-line CPU(s) list: 0-63
Thread(s) per core:  2
Core(s) per socket:  16
Socket(s):   2
NUMA node(s):2

NUMA node0 CPU(s):   0-15,32-47
NUMA node1 CPU(s):   16-31,48-63

Memory:
Hynix  503Gi

Interface:
Intel Corporation Ethernet Connection X722 for 1GbE (rev 09)
Speed: 1000Mb/s

b. Host software info:

OS: ctyunos release 2
Kernel: 4.19.90-2102.2.0.0066.ctl2.x86_64
Libvirt baseline version:  libvirt-6.9.0
Qemu baseline version: qemu-5.0

c. vm scale
CPU: 4
Memory: 4G
-

All the supplementary test data shown as follows are basing on
above test environment.

In version 1, we post a test data from unixbench as follows:

$ taskset -c 8-15 ./Run -i 2 -c 8 {unixbench test item}

host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 32800  | 32786  | 25292 |
  | whetstone-double| 10326  | 10315  | 9847  |
  | pipe| 15442  | 15271  | 14506 |
  | context1| 7260   | 6235   | 4514  |
  | spawn   | 3663   | 3317   | 3249  |
  | syscall | 4669   | 4667   | 3841  |
  |-+++---|

In version 2, we post a supplementary test data that do not use
taskset and make the scenario more general, see as follows:

$ ./Run

per-vcpu data:
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 2991   | 2902   | 1722  |
  | whetstone-double| 1018   | 1006   | 627   |
  | Execl Throughput| 955| 320| 660   |
  | File Copy - 1   | 2362   | 805| 1325

[PATCH RESEND v3 10/10] tests: Add migration dirty-limit capability test

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Add migration dirty-limit capability test if kernel support
dirty ring.

Migration dirty-limit capability introduce dirty limit
capability, two parameters: x-vcpu-dirty-limit-period and
vcpu-dirty-limit are introduced to implement the live
migration with dirty limit.

The test case does the following things:
1. start src, dst vm and enable dirty-limit capability
2. start migrate and set cancel it to check if dirty limit
   stop working.
3. restart dst vm
4. start migrate and enable dirty-limit capability
5. check if migration satisfy the convergence condition
   during pre-switchover phase.

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/qtest/migration-test.c | 154 +++
 1 file changed, 154 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 442998d..03b47f5 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2422,6 +2422,158 @@ static void test_vcpu_dirty_limit(void)
 dirtylimit_stop_vm(vm);
 }
 
+static void migrate_dirty_limit_wait_showup(QTestState *from,
+const int64_t period,
+const int64_t value)
+{
+/* Enable dirty limit capability */
+migrate_set_capability(from, "dirty-limit", true);
+
+/* Set dirty limit parameters */
+migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
+migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
+
+/* Make sure migrate can't converge */
+migrate_ensure_non_converge(from);
+
+/* To check limit rate after precopy */
+migrate_set_capability(from, "pause-before-switchover", true);
+
+/* Wait for the serial output from the source */
+wait_for_serial("src_serial");
+}
+
+/*
+ * This test does:
+ *  source   target
+ *   migrate_incoming
+ * migrate
+ * migrate_cancel
+ *   restart target
+ * migrate
+ *
+ *  And see that if dirty limit works correctly
+ */
+static void test_migrate_dirty_limit(void)
+{
+g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+QTestState *from, *to;
+int64_t remaining, throttle_us_per_full;
+/*
+ * We want the test to be stable and as fast as possible.
+ * E.g., with 1Gb/s bandwith migration may pass without dirty limit,
+ * so we need to decrease a bandwidth.
+ */
+const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
+const int64_t max_bandwidth = 4; /* ~400Mb/s */
+const int64_t downtime_limit = 250; /* 250ms */
+/*
+ * We migrate through unix-socket (> 500Mb/s).
+ * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
+ * So, we can predict expected_threshold
+ */
+const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
+int max_try_count = 10;
+MigrateCommon args = {
+.start = {
+.hide_stderr = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Start src, dst vm */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Prepare for dirty limit migration and wait src vm show up */
+migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full = read_migrate_property_int(from,
+"dirty-limit-throttle-time-per-full");
+usleep(100);
+g_assert_false(got_stop);
+}
+
+/* Now cancel migrate and wait for dirty limit throttle switch off */
+migrate_cancel(from);
+wait_for_migration_status(from, "cancelled", NULL);
+
+/* Check if dirty limit throttle switched off, set timeout 1ms */
+do {
+throttle_us_per_full = read_migrate_property_int(from,
+"dirty-limit-throttle-time-per-full");
+usleep(100);
+g_assert_false(got_stop);
+} while (throttle_us_per_full != 0 && --max_try_count);
+
+/* Assert dirty limit is not in service */
+g_assert_cmpint(throttle_us_per_full, ==, 0);
+
+args = (MigrateCommon) {
+.start = {
+.only_target = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Restart dst vm, src vm already show up so we needn't wait anymore */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full = read_migrate_property_int(from,
+

[PATCH RESEND v3 07/10] migration: Refactor auto-converge capability logic

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Check if block migration is running before throttling
guest down in auto-converge way.

Note that this modification is kind of like code clean,
because block migration does not depend on auto-converge
capability, so the order of checks can be adjusted.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/ram.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/migration/ram.c b/migration/ram.c
index 1338e47..5e66652 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1151,7 +1151,11 @@ static void migration_trigger_throttle(RAMState *rs)
 /* During block migration the auto-converge logic incorrectly detects
  * that ram migration makes no progress. Avoid this by disabling the
  * throttling logic during the bulk phase of block migration. */
-if (migrate_auto_converge() && !blk_mig_bulk_active()) {
+if (blk_mig_bulk_active()) {
+return;
+}
+
+if (migrate_auto_converge()) {
 /* The following detection logic can be refined later. For now:
Check to see if the ratio between dirtied bytes and the approx.
amount of bytes that just got transferred since the last time
-- 
1.8.3.1

[PATCH RESEND v3 03/10] kvm: dirty-ring: Fix race with vcpu creation

2022-12-03 Thread huangy81

From: Peter Xu 

It's possible that we want to reap a dirty ring on a vcpu that is during
creation, because the vcpu is put onto list (CPU_FOREACH visible) before
initialization of the structures.  In this case:

qemu_init_vcpu
x86_cpu_realizefn
cpu_exec_realizefn
cpu_list_add  < can be probed by CPU_FOREACH
qemu_init_vcpu
cpus_accel->create_vcpu_thread(cpu);
kvm_init_vcpu
map kvm_dirty_gfns  <--- kvm_dirty_gfns valid

Don't try to reap dirty ring on vcpus during creation or it'll crash.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124756
Reported-by: Xiaohui Li 
Signed-off-by: Peter Xu 
---
 accel/kvm/kvm-all.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0be..ff26b07 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -683,6 +683,15 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, 
CPUState *cpu)
 uint32_t ring_size = s->kvm_dirty_ring_size;
 uint32_t count = 0, fetch = cpu->kvm_fetch_index;
 
+/*
+ * It's possible that we race with vcpu creation code where the vcpu is
+ * put onto the vcpus list but not yet initialized the dirty ring
+ * structures.  If so, skip it.
+ */
+if (!cpu->created) {
+return 0;
+}
+
 assert(dirty_gfns && ring_size);
 trace_kvm_dirty_ring_reap_vcpu(cpu->cpu_index);
 
-- 
1.8.3.1

[PATCH RESEND v3 05/10] qapi/migration: Introduce vcpu-dirty-limit parameters

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "vcpu-dirty-limit" migration parameter used
to limit dirty page rate during live migration.

"vcpu-dirty-limit" and "x-vcpu-dirty-limit-period" are
two dirty-limit-related migration parameters, which can
be set before and during live migration by qmp
migrate-set-parameters.

This two parameters are used to help implement the dirty
page rate limit algo of migration.

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 migration/migration.c | 23 +++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 18 +++---
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 1439d61..fd11c63 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -117,6 +117,7 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
 #define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 1000/* microsecond */
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT1   /* MB/s */
 
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -968,6 +969,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->has_x_vcpu_dirty_limit_period = true;
 params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
 
+params->has_vcpu_dirty_limit = true;
+params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;
+
 return params;
 }
 
@@ -1596,6 +1600,14 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 return false;
 }
 
+if (params->has_vcpu_dirty_limit &&
+(params->vcpu_dirty_limit < 1)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "vcpu-dirty-limit",
+   "a value greater than or equal to 1");
+return false;
+}
+
 return true;
 }
 
@@ -1699,6 +1711,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_x_vcpu_dirty_limit_period) {
 dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
 }
+
+if (params->has_vcpu_dirty_limit) {
+dest->vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1825,6 +1841,9 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.x_vcpu_dirty_limit_period =
 params->x_vcpu_dirty_limit_period;
 }
+if (params->has_vcpu_dirty_limit) {
+s->parameters.vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4429,6 +4448,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
parameters.x_vcpu_dirty_limit_period,
DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
+DEFINE_PROP_UINT64("vcpu-dirty-limit", MigrationState,
+   parameters.vcpu_dirty_limit,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4521,6 +4543,7 @@ static void migration_instance_init(Object *obj)
 params->has_tls_hostname = true;
 params->has_tls_authz = true;
 params->has_x_vcpu_dirty_limit_period = true;
+params->has_vcpu_dirty_limit = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index a3170ca..9ad6ee5 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -517,6 +517,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %" PRIu64 " ms\n",
 MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
 params->x_vcpu_dirty_limit_period);
+
+monitor_printf(mon, "%s: %" PRIu64 " MB/s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT),
+params->vcpu_dirty_limit);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1340,6 +1344,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_x_vcpu_dirty_limit_period = true;
 visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
 break;
+case MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT:
+p->has_vcpu_dirty_limit = true;
+visit_type_size(v, param, >vcpu_dirty_limit, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index c428bcd..7e868a1 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -780,6 +780,9 @@
 # live migration. Should be in the range 1 to 
1000ms,
 # defaults to 1000ms. (Since 7.3)
 #
+# @vcpu-dirty-limit: Dirtyrate limit (MB/s)

[PATCH RESEND v3 08/10] migration: Implement dirty-limit convergence algo

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Implement dirty-limit convergence algo for live migration,
which is kind of like auto-converge algo but using dirty-limit
instead of cpu throttle to make migration convergent.

Enable dirty page limit if dirty_rate_high_cnt greater than 2
when dirty-limit capability enabled, disable dirty-limit if
migration be cancled.

Note that "set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit"
commands are not allowed during dirty-limit live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c  |  3 +++
 migration/ram.c| 63 ++
 migration/trace-events |  1 +
 softmmu/dirtylimit.c   | 22 ++
 4 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 702e7f4..127d0fe 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -240,6 +240,9 @@ void migration_cancel(const Error *error)
 if (error) {
 migrate_set_error(current_migration, error);
 }
+if (migrate_dirty_limit()) {
+qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
+}
 migrate_fd_cancel(current_migration);
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index 5e66652..78b9167 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,6 +45,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-types-migration.h"
 #include "qapi/qapi-events-migration.h"
+#include "qapi/qapi-commands-migration.h"
 #include "qapi/qmp/qerror.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
@@ -57,6 +58,8 @@
 #include "qemu/iov.h"
 #include "multifd.h"
 #include "sysemu/runstate.h"
+#include "sysemu/dirtylimit.h"
+#include "sysemu/kvm.h"
 
 #include "hw/boards.h" /* for machine_dump_guest_core() */
 
@@ -1139,6 +1142,30 @@ static void migration_update_rates(RAMState *rs, int64_t 
end_time)
 }
 }
 
+/*
+ * Enable dirty-limit to throttle down the guest
+ */
+static void migration_dirty_limit_guest(void)
+{
+static int64_t quota_dirtyrate;
+MigrationState *s = migrate_get_current();
+
+/*
+ * If dirty limit already enabled and migration parameter
+ * vcpu-dirty-limit untouched.
+ */
+if (dirtylimit_in_service() &&
+quota_dirtyrate == s->parameters.vcpu_dirty_limit) {
+return;
+}
+
+quota_dirtyrate = s->parameters.vcpu_dirty_limit;
+
+/* Set or update quota dirty limit */
+qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
+trace_migration_dirty_limit_guest(quota_dirtyrate);
+}
+
 static void migration_trigger_throttle(RAMState *rs)
 {
 MigrationState *s = migrate_get_current();
@@ -1148,26 +1175,32 @@ static void migration_trigger_throttle(RAMState *rs)
 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
TARGET_PAGE_SIZE;
 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
 
-/* During block migration the auto-converge logic incorrectly detects
- * that ram migration makes no progress. Avoid this by disabling the
- * throttling logic during the bulk phase of block migration. */
-if (blk_mig_bulk_active()) {
-return;
-}
+/*
+ * The following detection logic can be refined later. For now:
+ * Check to see if the ratio between dirtied bytes and the approx.
+ * amount of bytes that just got transferred since the last time
+ * we were in this routine reaches the threshold. If that happens
+ * twice, start or increase throttling.
+ */
 
-if (migrate_auto_converge()) {
-/* The following detection logic can be refined later. For now:
-   Check to see if the ratio between dirtied bytes and the approx.
-   amount of bytes that just got transferred since the last time
-   we were in this routine reaches the threshold. If that happens
-   twice, start or increase throttling. */
+if ((bytes_dirty_period > bytes_dirty_threshold) &&
+(++rs->dirty_rate_high_cnt >= 2)) {
+rs->dirty_rate_high_cnt = 0;
+/*
+ * During block migration the auto-converge logic incorrectly detects
+ * that ram migration makes no progress. Avoid this by disabling the
+ * throttling logic during the bulk phase of block migration
+ */
+if (blk_mig_bulk_active()) {
+return;
+}
 
-if ((bytes_dirty_period > bytes_dirty_threshold) &&
-(++rs->dirty_rate_high_cnt >= 2)) {
+if (migrate_auto_converge()) {
 trace_migration_throttle();
-rs->dirty_rate_high_cnt = 0;
 mig_throttle_guest_down(bytes_dirty_period,
 bytes_dirty_threshold);
+} else if (migrate_dirty_limit()) {
+migration_dirty_limit_guest();
 }
 }
 }
diff --git a/migration/trace-events b/migration/trace-events
index 57003ed..33a2666 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -91,6 +91,7 @@

[PATCH RESEND v3 04/10] qapi/migration: Introduce x-vcpu-dirty-limit-period parameter

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "x-vcpu-dirty-limit-period" migration experimental
parameter, which is in the range of 1 to 1000ms and used to
make dirtyrate calculation period configurable.

Currently with the "x-vcpu-dirty-limit-period" varies, the
total time of live migration changes, test results show the
optimal value of "x-vcpu-dirty-limit-period" ranges from
500ms to 1000 ms. "x-vcpu-dirty-limit-period" should be made
stable once it proves best value can not be determined with
developer's experiments.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 26 ++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 34 +++---
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index f485eea..1439d61 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -116,6 +116,8 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 1000/* microsecond */
+
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -963,6 +965,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
s->parameters.block_bitmap_mapping);
 }
 
+params->has_x_vcpu_dirty_limit_period = true;
+params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
+
 return params;
 }
 
@@ -1582,6 +1587,15 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 }
 #endif
 
+if (params->has_x_vcpu_dirty_limit_period &&
+(params->x_vcpu_dirty_limit_period < 1 ||
+ params->x_vcpu_dirty_limit_period > 1000)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "x-vcpu-dirty-limit-period",
+   "a value between 1 and 1000");
+return false;
+}
+
 return true;
 }
 
@@ -1681,6 +1695,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_x_vcpu_dirty_limit_period) {
+dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1803,6 +1821,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+if (params->has_x_vcpu_dirty_limit_period) {
+s->parameters.x_vcpu_dirty_limit_period =
+params->x_vcpu_dirty_limit_period;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4404,6 +4426,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
 DEFINE_PROP_STRING("tls-hostname", MigrationState, 
parameters.tls_hostname),
 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
+DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
+   parameters.x_vcpu_dirty_limit_period,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4495,6 +4520,7 @@ static void migration_instance_init(Object *obj)
 params->has_tls_creds = true;
 params->has_tls_hostname = true;
 params->has_tls_authz = true;
+params->has_x_vcpu_dirty_limit_period = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 01b789a..a3170ca 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -513,6 +513,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 }
 }
 }
+
+monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
+params->x_vcpu_dirty_limit_period);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1332,6 +1336,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 error_setg(, "The block-bitmap-mapping parameter can only be set "
"through QMP");
 break;
+case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD:
+p->has_x_vcpu_dirty_limit_period = true;
+visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 88ecf86..c428bcd 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -776,8 +776,13 @@
 #block device name if there is one, and to

[PATCH v3 03/10] kvm: dirty-ring: Fix race with vcpu creation

2022-12-03 Thread huangy81

From: Peter Xu 

It's possible that we want to reap a dirty ring on a vcpu that is during
creation, because the vcpu is put onto list (CPU_FOREACH visible) before
initialization of the structures.  In this case:

qemu_init_vcpu
x86_cpu_realizefn
cpu_exec_realizefn
cpu_list_add  < can be probed by CPU_FOREACH
qemu_init_vcpu
cpus_accel->create_vcpu_thread(cpu);
kvm_init_vcpu
map kvm_dirty_gfns  <--- kvm_dirty_gfns valid

Don't try to reap dirty ring on vcpus during creation or it'll crash.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124756
Reported-by: Xiaohui Li 
Signed-off-by: Peter Xu 
---
 accel/kvm/kvm-all.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0be..ff26b07 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -683,6 +683,15 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, 
CPUState *cpu)
 uint32_t ring_size = s->kvm_dirty_ring_size;
 uint32_t count = 0, fetch = cpu->kvm_fetch_index;
 
+/*
+ * It's possible that we race with vcpu creation code where the vcpu is
+ * put onto the vcpus list but not yet initialized the dirty ring
+ * structures.  If so, skip it.
+ */
+if (!cpu->created) {
+return 0;
+}
+
 assert(dirty_gfns && ring_size);
 trace_kvm_dirty_ring_reap_vcpu(cpu->cpu_index);
 
-- 
1.8.3.1

[PATCH v3 07/10] migration: Refactor auto-converge capability logic

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Check if block migration is running before throttling
guest down in auto-converge way.

Note that this modification is kind of like code clean,
because block migration does not depend on auto-converge
capability, so the order of checks can be adjusted.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/ram.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/migration/ram.c b/migration/ram.c
index 1338e47..5e66652 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1151,7 +1151,11 @@ static void migration_trigger_throttle(RAMState *rs)
 /* During block migration the auto-converge logic incorrectly detects
  * that ram migration makes no progress. Avoid this by disabling the
  * throttling logic during the bulk phase of block migration. */
-if (migrate_auto_converge() && !blk_mig_bulk_active()) {
+if (blk_mig_bulk_active()) {
+return;
+}
+
+if (migrate_auto_converge()) {
 /* The following detection logic can be refined later. For now:
Check to see if the ratio between dirtied bytes and the approx.
amount of bytes that just got transferred since the last time
-- 
1.8.3.1

[PATCH v3 09/10] migration: Export dirty-limit time info for observation

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Export dirty limit throttle time and estimated ring full
time, through which we can observe if dirty limit take
effect during live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 include/sysemu/dirtylimit.h |  2 ++
 migration/migration.c   | 10 ++
 monitor/hmp-cmds.c  | 10 ++
 qapi/migration.json | 15 ++-
 softmmu/dirtylimit.c| 39 +++
 5 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
index 8d2c1f3..f15e01d 100644
--- a/include/sysemu/dirtylimit.h
+++ b/include/sysemu/dirtylimit.h
@@ -34,4 +34,6 @@ void dirtylimit_set_vcpu(int cpu_index,
 void dirtylimit_set_all(uint64_t quota,
 bool enable);
 void dirtylimit_vcpu_execute(CPUState *cpu);
+int64_t dirtylimit_throttle_time_per_full(void);
+int64_t dirtylimit_ring_full_time(void);
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 127d0fe..3f92389 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -62,6 +62,7 @@
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
 #include "sysemu/kvm.h"
+#include "sysemu/dirtylimit.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1114,6 +1115,15 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->remaining = ram_bytes_remaining();
 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
 }
+
+if (migrate_dirty_limit() && dirtylimit_in_service()) {
+info->has_dirty_limit_throttle_time_per_full = true;
+info->dirty_limit_throttle_time_per_full =
+dirtylimit_throttle_time_per_full();
+
+info->has_dirty_limit_ring_full_time = true;
+info->dirty_limit_ring_full_time = dirtylimit_us_ring_full();
+}
 }
 
 static void populate_disk_info(MigrationInfo *info)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 9ad6ee5..c3aaba3 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -339,6 +339,16 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_dirty_limit_throttle_time_per_full) {
+monitor_printf(mon, "dirty-limit throttle time: %" PRIi64 " us\n",
+   info->dirty_limit_throttle_time_per_full);
+}
+
+if (info->has_dirty_limit_ring_full_time) {
+monitor_printf(mon, "dirty-limit ring full time: %" PRIi64 " us\n",
+   info->dirty_limit_ring_full_time);
+}
+
 if (info->has_postcopy_blocktime) {
 monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
diff --git a/qapi/migration.json b/qapi/migration.json
index 6055fdc..ae7d22d 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -242,6 +242,17 @@
 #   Present and non-empty when migration is blocked.
 #   (since 6.0)
 #
+# @dirty-limit-throttle-time-per-full: Maximum throttle time (in microseconds) 
of virtual
+#  CPUs each dirty ring full round, used 
to observe
+#  if dirty-limit take effect during live 
migration.
+#  (since 7.3)
+#
+# @dirty-limit-ring-full-time: Estimated average dirty ring full time (in 
microseconds)
+#  each dirty ring full round, note that the value 
equals
+#  dirty ring memory size divided by average dirty 
page rate
+#  of virtual CPU, which can be used to observe 
the average
+#  memory load of virtual CPU indirectly. (since 
7.3)
+#
 # Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
@@ -259,7 +270,9 @@
'*postcopy-blocktime' : 'uint32',
'*postcopy-vcpu-blocktime': ['uint32'],
'*compression': 'CompressionStats',
-   '*socket-address': ['SocketAddress'] } }
+   '*socket-address': ['SocketAddress'],
+   '*dirty-limit-throttle-time-per-full': 'int64',
+   '*dirty-limit-ring-full-time': 'int64'} }
 
 ##
 # @query-migrate:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index b63032c..06de099 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -569,6 +569,45 @@ static struct DirtyLimitInfo *dirtylimit_query_vcpu(int 
cpu_index)
 return info;
 }
 
+/* Return the max throttle time of each virtual CPU */
+int64_t dirtylimit_throttle_time_per_full(void)
+{
+CPUState *cpu;
+int64_t max = 0;
+
+CPU_FOREACH(cpu) {
+if (cpu->throttle_us_per_full > max) {
+max = cpu->throttle_us_per_full;
+}
+}
+
+return max;
+}
+
+/*
+ * Estimate average dirty ring full time of each virtaul CPU.
+ * Return -1 if guest doesn't dirty memory.
+ */
+int64_t

[PATCH v3 01/10] dirtylimit: Fix overflow when computing MB

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Coverity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
Reviewed-by: Peter Xu 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 1266855..940d238 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -236,14 +236,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100ULL / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
1.8.3.1

[PATCH v3 00/10] migration: introduce dirtylimit capabilit

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

v3:
This version make some modifications inspired by Peter and Markus
as following:
1. Do the code clean up in [PATCH v2 02/11] suggested by Markus 
2. Replace the [PATCH v2 03/11] with a much simpler patch posted by
   Peter to fix the following bug:
   https://bugzilla.redhat.com/show_bug.cgi?id=2124756
3. Fix the error path of migrate_params_check in [PATCH v2 04/11]
   pointed out by Markus. Enrich the commit message to explain why
   x-vcpu-dirty-limit-period an unstable parameter.
4. Refactor the dirty-limit convergence algo in [PATCH v2 07/11] 
   suggested by Peter:
   a. apply blk_mig_bulk_active check before enable dirty-limit
   b. drop the unhelpful check function before enable dirty-limit
   c. change the migration_cancel logic, just cancel dirty-limit
  only if dirty-limit capability turned on. 
   d. abstract a code clean commit [PATCH v3 07/10] to adjust
  the check order before enable auto-converge 
5. Change the name of observing indexes during dirty-limit live
   migration to make them more easy-understanding. Use the
   maximum throttle time of vpus as "dirty-limit-throttle-time-per-full"
6. Fix some grammatical and spelling errors pointed out by Markus
   and enrich the document about the dirty-limit live migration
   observing indexes "dirty-limit-ring-full-time"
   and "dirty-limit-throttle-time-per-full"
7. Change the default value of x-vcpu-dirty-limit-period to 1000ms,
   which is optimal value pointed out in cover letter in that
   testing environment.
8. Drop the 2 guestperf test commits [PATCH v2 10/11],
   [PATCH v2 11/11] and post them with a standalone series in the
   future.

Thanks Peter and Markus sincerely for the passionate, efficient
and careful comments and suggestions.

Please review.  

Yong

v2: 
This version make a little bit modifications comparing with
version 1 as following:
1. fix the overflow issue reported by Peter Maydell
2. add parameter check for hmp "set_vcpu_dirty_limit" command
3. fix the racing issue between dirty ring reaper thread and
   Qemu main thread.
4. add migrate parameter check for x-vcpu-dirty-limit-period
   and vcpu-dirty-limit.
5. add the logic to forbid hmp/qmp commands set_vcpu_dirty_limit,
   cancel_vcpu_dirty_limit during dirty-limit live migration when
   implement dirty-limit convergence algo.
6. add capability check to ensure auto-converge and dirty-limit
   are mutually exclusive.
7. pre-check if kvm dirty ring size is configured before setting
   dirty-limit migrate parameter 

A more comprehensive test was done comparing with version 1.

The following are test environment:
-
a. Host hardware info:

CPU:
Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz

CPU(s):  64
On-line CPU(s) list: 0-63
Thread(s) per core:  2
Core(s) per socket:  16
Socket(s):   2
NUMA node(s):2

NUMA node0 CPU(s):   0-15,32-47
NUMA node1 CPU(s):   16-31,48-63

Memory:
Hynix  503Gi

Interface:
Intel Corporation Ethernet Connection X722 for 1GbE (rev 09)
Speed: 1000Mb/s

b. Host software info:

OS: ctyunos release 2
Kernel: 4.19.90-2102.2.0.0066.ctl2.x86_64
Libvirt baseline version:  libvirt-6.9.0
Qemu baseline version: qemu-5.0

c. vm scale
CPU: 4
Memory: 4G
-

All the supplementary test data shown as follows are basing on
above test environment.

In version 1, we post a test data from unixbench as follows:

$ taskset -c 8-15 ./Run -i 2 -c 8 {unixbench test item}

host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 32800  | 32786  | 25292 |
  | whetstone-double| 10326  | 10315  | 9847  |
  | pipe| 15442  | 15271  | 14506 |
  | context1| 7260   | 6235   | 4514  |
  | spawn   | 3663   | 3317   | 3249  |
  | syscall | 4669   | 4667   | 3841  |
  |-+++---|

In version 2, we post a supplementary test data that do not use
taskset and make the scenario more general, see as follows:

$ ./Run

per-vcpu data:
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 2991   | 2902   | 1722  |
  | whetstone-double| 1018   | 1006   | 627   |
  | Execl Throughput| 955| 320| 660   |
  | File Copy - 1   | 2362   | 805| 1325  |
  | File Copy - 2   | 1500   | 1406

[PATCH v3 10/10] tests: Add migration dirty-limit capability test

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Add migration dirty-limit capability test if kernel support
dirty ring.

Migration dirty-limit capability introduce dirty limit
capability, two parameters: x-vcpu-dirty-limit-period and
vcpu-dirty-limit are introduced to implement the live
migration with dirty limit.

The test case does the following things:
1. start src, dst vm and enable dirty-limit capability
2. start migrate and set cancel it to check if dirty limit
   stop working.
3. restart dst vm
4. start migrate and enable dirty-limit capability
5. check if migration satisfy the convergence condition
   during pre-switchover phase.

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/qtest/migration-test.c | 154 +++
 1 file changed, 154 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 442998d..03b47f5 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2422,6 +2422,158 @@ static void test_vcpu_dirty_limit(void)
 dirtylimit_stop_vm(vm);
 }
 
+static void migrate_dirty_limit_wait_showup(QTestState *from,
+const int64_t period,
+const int64_t value)
+{
+/* Enable dirty limit capability */
+migrate_set_capability(from, "dirty-limit", true);
+
+/* Set dirty limit parameters */
+migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
+migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
+
+/* Make sure migrate can't converge */
+migrate_ensure_non_converge(from);
+
+/* To check limit rate after precopy */
+migrate_set_capability(from, "pause-before-switchover", true);
+
+/* Wait for the serial output from the source */
+wait_for_serial("src_serial");
+}
+
+/*
+ * This test does:
+ *  source   target
+ *   migrate_incoming
+ * migrate
+ * migrate_cancel
+ *   restart target
+ * migrate
+ *
+ *  And see that if dirty limit works correctly
+ */
+static void test_migrate_dirty_limit(void)
+{
+g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+QTestState *from, *to;
+int64_t remaining, throttle_us_per_full;
+/*
+ * We want the test to be stable and as fast as possible.
+ * E.g., with 1Gb/s bandwith migration may pass without dirty limit,
+ * so we need to decrease a bandwidth.
+ */
+const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
+const int64_t max_bandwidth = 4; /* ~400Mb/s */
+const int64_t downtime_limit = 250; /* 250ms */
+/*
+ * We migrate through unix-socket (> 500Mb/s).
+ * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
+ * So, we can predict expected_threshold
+ */
+const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
+int max_try_count = 10;
+MigrateCommon args = {
+.start = {
+.hide_stderr = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Start src, dst vm */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Prepare for dirty limit migration and wait src vm show up */
+migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full = read_migrate_property_int(from,
+"dirty-limit-throttle-time-per-full");
+usleep(100);
+g_assert_false(got_stop);
+}
+
+/* Now cancel migrate and wait for dirty limit throttle switch off */
+migrate_cancel(from);
+wait_for_migration_status(from, "cancelled", NULL);
+
+/* Check if dirty limit throttle switched off, set timeout 1ms */
+do {
+throttle_us_per_full = read_migrate_property_int(from,
+"dirty-limit-throttle-time-per-full");
+usleep(100);
+g_assert_false(got_stop);
+} while (throttle_us_per_full != 0 && --max_try_count);
+
+/* Assert dirty limit is not in service */
+g_assert_cmpint(throttle_us_per_full, ==, 0);
+
+args = (MigrateCommon) {
+.start = {
+.only_target = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Restart dst vm, src vm already show up so we needn't wait anymore */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full = read_migrate_property_int(from,
+

[PATCH v3 04/10] qapi/migration: Introduce x-vcpu-dirty-limit-period parameter

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "x-vcpu-dirty-limit-period" migration experimental
parameter, which is in the range of 1 to 1000ms and used to
make dirtyrate calculation period configurable.

Currently with the "x-vcpu-dirty-limit-period" varies, the
total time of live migration changes, test results show the
optimal value of "x-vcpu-dirty-limit-period" ranges from
500ms to 1000 ms. "x-vcpu-dirty-limit-period" should be made
stable once it proves best value can not be determined with
developer's experiments.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 26 ++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 34 +++---
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index f485eea..1439d61 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -116,6 +116,8 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 1000/* microsecond */
+
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -963,6 +965,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
s->parameters.block_bitmap_mapping);
 }
 
+params->has_x_vcpu_dirty_limit_period = true;
+params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
+
 return params;
 }
 
@@ -1582,6 +1587,15 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 }
 #endif
 
+if (params->has_x_vcpu_dirty_limit_period &&
+(params->x_vcpu_dirty_limit_period < 1 ||
+ params->x_vcpu_dirty_limit_period > 1000)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "x-vcpu-dirty-limit-period",
+   "a value between 1 and 1000");
+return false;
+}
+
 return true;
 }
 
@@ -1681,6 +1695,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_x_vcpu_dirty_limit_period) {
+dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1803,6 +1821,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+if (params->has_x_vcpu_dirty_limit_period) {
+s->parameters.x_vcpu_dirty_limit_period =
+params->x_vcpu_dirty_limit_period;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4404,6 +4426,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
 DEFINE_PROP_STRING("tls-hostname", MigrationState, 
parameters.tls_hostname),
 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
+DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
+   parameters.x_vcpu_dirty_limit_period,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4495,6 +4520,7 @@ static void migration_instance_init(Object *obj)
 params->has_tls_creds = true;
 params->has_tls_hostname = true;
 params->has_tls_authz = true;
+params->has_x_vcpu_dirty_limit_period = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 01b789a..a3170ca 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -513,6 +513,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 }
 }
 }
+
+monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
+params->x_vcpu_dirty_limit_period);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1332,6 +1336,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 error_setg(, "The block-bitmap-mapping parameter can only be set "
"through QMP");
 break;
+case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD:
+p->has_x_vcpu_dirty_limit_period = true;
+visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 88ecf86..c428bcd 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -776,8 +776,13 @@
 #block device name if there is one, and to

[PATCH v3 06/10] migration: Introduce dirty-limit capability

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce migration dirty-limit capability, which can
be turned on before live migration and limit dirty
page rate durty live migration.

Introduce migrate_dirty_limit function to help check
if dirty-limit capability enabled during live migration.

Meanwhile, refactor vcpu_dirty_rate_stat_collect
so that period can be configured instead of hardcoded.

dirty-limit capability is kind of like auto-converge
but using dirty limit instead of traditional cpu-throttle
to throttle guest down. To enable this feature, turn on
the dirty-limit capability before live migration using
migrate-set-capabilities, and set the parameters
"x-vcpu-dirty-limit-period", "vcpu-dirty-limit" suitably
to speed up convergence.

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 migration/migration.c | 25 +
 migration/migration.h |  1 +
 qapi/migration.json   |  4 +++-
 softmmu/dirtylimit.c  | 11 ++-
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index fd11c63..702e7f4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "sysemu/kvm.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1366,6 +1367,20 @@ static bool migrate_caps_check(bool *cap_list,
 }
 }
 
+if (cap_list[MIGRATION_CAPABILITY_DIRTY_LIMIT]) {
+if (cap_list[MIGRATION_CAPABILITY_AUTO_CONVERGE]) {
+error_setg(errp, "dirty-limit conflicts with auto-converge"
+   " only one of them is available currently");
+return false;
+}
+
+if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+error_setg(errp, "dirty-limit requires KVM with accelerator"
+   " property 'dirty-ring-size' set");
+return false;
+}
+}
+
 return true;
 }
 
@@ -2544,6 +2559,15 @@ bool migrate_auto_converge(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 
+bool migrate_dirty_limit(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_LIMIT];
+}
+
 bool migrate_zero_blocks(void)
 {
 MigrationState *s;
@@ -4473,6 +4497,7 @@ static Property migration_properties[] = {
 DEFINE_PROP_MIG_CAP("x-zero-copy-send",
 MIGRATION_CAPABILITY_ZERO_COPY_SEND),
 #endif
+DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
 
 DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/migration/migration.h b/migration/migration.h
index cdad8ac..7fbb9f8 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -409,6 +409,7 @@ bool migrate_ignore_shared(void);
 bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
+bool migrate_dirty_limit(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index 7e868a1..6055fdc 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -477,6 +477,8 @@
 #will be handled faster.  This is a performance feature and
 #should not affect the correctness of postcopy migration.
 #(since 7.1)
+# @dirty-limit: Use dirty-limit to throttle down guest if enabled.
+#   (since 7.3)
 #
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -492,7 +494,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
-   'zero-copy-send', 'postcopy-preempt'] }
+   'zero-copy-send', 'postcopy-preempt', 'dirty-limit'] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 53b66d5..2a07200 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -23,6 +23,8 @@
 #include "exec/memory.h"
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
 #include "trace.h"
 
 /*
@@ -75,11 +77,18 @@ static bool dirtylimit_quit;
 
 static void vcpu_dirty_rate_stat_collect(void)
 {
+MigrationState *s = migrate_get_current();
 VcpuStat stat;
 int i = 0;
+int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+if (migrate_dirty_limit() &&
+migration_is_active(s)) {
+period = s->parameters.x_vcpu_dirty_limit_period;
+}
 
 /* calculate vcpu dirtyrate */
-vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
+vcpu_calculate_dirtyrate(period,
  ,
  GLOBAL_DIRTY_LIMIT,
  false);
-- 
1.8.3.1

[PATCH v3 05/10] qapi/migration: Introduce vcpu-dirty-limit parameters

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "vcpu-dirty-limit" migration parameter used
to limit dirty page rate during live migration.

"vcpu-dirty-limit" and "x-vcpu-dirty-limit-period" are
two dirty-limit-related migration parameters, which can
be set before and during live migration by qmp
migrate-set-parameters.

This two parameters are used to help implement the dirty
page rate limit algo of migration.

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 migration/migration.c | 23 +++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 18 +++---
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 1439d61..fd11c63 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -117,6 +117,7 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
 #define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 1000/* microsecond */
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT1   /* MB/s */
 
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -968,6 +969,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->has_x_vcpu_dirty_limit_period = true;
 params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
 
+params->has_vcpu_dirty_limit = true;
+params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;
+
 return params;
 }
 
@@ -1596,6 +1600,14 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 return false;
 }
 
+if (params->has_vcpu_dirty_limit &&
+(params->vcpu_dirty_limit < 1)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "vcpu-dirty-limit",
+   "a value greater than or equal to 1");
+return false;
+}
+
 return true;
 }
 
@@ -1699,6 +1711,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_x_vcpu_dirty_limit_period) {
 dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
 }
+
+if (params->has_vcpu_dirty_limit) {
+dest->vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1825,6 +1841,9 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.x_vcpu_dirty_limit_period =
 params->x_vcpu_dirty_limit_period;
 }
+if (params->has_vcpu_dirty_limit) {
+s->parameters.vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4429,6 +4448,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
parameters.x_vcpu_dirty_limit_period,
DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
+DEFINE_PROP_UINT64("vcpu-dirty-limit", MigrationState,
+   parameters.vcpu_dirty_limit,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4521,6 +4543,7 @@ static void migration_instance_init(Object *obj)
 params->has_tls_hostname = true;
 params->has_tls_authz = true;
 params->has_x_vcpu_dirty_limit_period = true;
+params->has_vcpu_dirty_limit = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index a3170ca..9ad6ee5 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -517,6 +517,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %" PRIu64 " ms\n",
 MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
 params->x_vcpu_dirty_limit_period);
+
+monitor_printf(mon, "%s: %" PRIu64 " MB/s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT),
+params->vcpu_dirty_limit);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1340,6 +1344,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_x_vcpu_dirty_limit_period = true;
 visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
 break;
+case MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT:
+p->has_vcpu_dirty_limit = true;
+visit_type_size(v, param, >vcpu_dirty_limit, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index c428bcd..7e868a1 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -780,6 +780,9 @@
 # live migration. Should be in the range 1 to 
1000ms,
 # defaults to 1000ms. (Since 7.3)
 #
+# @vcpu-dirty-limit: Dirtyrate limit (MB/s)

[PATCH v3 08/10] migration: Implement dirty-limit convergence algo

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

Implement dirty-limit convergence algo for live migration,
which is kind of like auto-converge algo but using dirty-limit
instead of cpu throttle to make migration convergent.

Enable dirty page limit if dirty_rate_high_cnt greater than 2
when dirty-limit capability enabled, disable dirty-limit if
migration be cancled.

Note that "set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit"
commands are not allowed during dirty-limit live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c  |  3 +++
 migration/ram.c| 63 ++
 migration/trace-events |  1 +
 softmmu/dirtylimit.c   | 22 ++
 4 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 702e7f4..127d0fe 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -240,6 +240,9 @@ void migration_cancel(const Error *error)
 if (error) {
 migrate_set_error(current_migration, error);
 }
+if (migrate_dirty_limit()) {
+qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
+}
 migrate_fd_cancel(current_migration);
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index 5e66652..78b9167 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,6 +45,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-types-migration.h"
 #include "qapi/qapi-events-migration.h"
+#include "qapi/qapi-commands-migration.h"
 #include "qapi/qmp/qerror.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
@@ -57,6 +58,8 @@
 #include "qemu/iov.h"
 #include "multifd.h"
 #include "sysemu/runstate.h"
+#include "sysemu/dirtylimit.h"
+#include "sysemu/kvm.h"
 
 #include "hw/boards.h" /* for machine_dump_guest_core() */
 
@@ -1139,6 +1142,30 @@ static void migration_update_rates(RAMState *rs, int64_t 
end_time)
 }
 }
 
+/*
+ * Enable dirty-limit to throttle down the guest
+ */
+static void migration_dirty_limit_guest(void)
+{
+static int64_t quota_dirtyrate;
+MigrationState *s = migrate_get_current();
+
+/*
+ * If dirty limit already enabled and migration parameter
+ * vcpu-dirty-limit untouched.
+ */
+if (dirtylimit_in_service() &&
+quota_dirtyrate == s->parameters.vcpu_dirty_limit) {
+return;
+}
+
+quota_dirtyrate = s->parameters.vcpu_dirty_limit;
+
+/* Set or update quota dirty limit */
+qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
+trace_migration_dirty_limit_guest(quota_dirtyrate);
+}
+
 static void migration_trigger_throttle(RAMState *rs)
 {
 MigrationState *s = migrate_get_current();
@@ -1148,26 +1175,32 @@ static void migration_trigger_throttle(RAMState *rs)
 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
TARGET_PAGE_SIZE;
 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
 
-/* During block migration the auto-converge logic incorrectly detects
- * that ram migration makes no progress. Avoid this by disabling the
- * throttling logic during the bulk phase of block migration. */
-if (blk_mig_bulk_active()) {
-return;
-}
+/*
+ * The following detection logic can be refined later. For now:
+ * Check to see if the ratio between dirtied bytes and the approx.
+ * amount of bytes that just got transferred since the last time
+ * we were in this routine reaches the threshold. If that happens
+ * twice, start or increase throttling.
+ */
 
-if (migrate_auto_converge()) {
-/* The following detection logic can be refined later. For now:
-   Check to see if the ratio between dirtied bytes and the approx.
-   amount of bytes that just got transferred since the last time
-   we were in this routine reaches the threshold. If that happens
-   twice, start or increase throttling. */
+if ((bytes_dirty_period > bytes_dirty_threshold) &&
+(++rs->dirty_rate_high_cnt >= 2)) {
+rs->dirty_rate_high_cnt = 0;
+/*
+ * During block migration the auto-converge logic incorrectly detects
+ * that ram migration makes no progress. Avoid this by disabling the
+ * throttling logic during the bulk phase of block migration
+ */
+if (blk_mig_bulk_active()) {
+return;
+}
 
-if ((bytes_dirty_period > bytes_dirty_threshold) &&
-(++rs->dirty_rate_high_cnt >= 2)) {
+if (migrate_auto_converge()) {
 trace_migration_throttle();
-rs->dirty_rate_high_cnt = 0;
 mig_throttle_guest_down(bytes_dirty_period,
 bytes_dirty_threshold);
+} else if (migrate_dirty_limit()) {
+migration_dirty_limit_guest();
 }
 }
 }
diff --git a/migration/trace-events b/migration/trace-events
index 57003ed..33a2666 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -91,6 +91,7 @@

[PATCH v3 02/10] softmmu/dirtylimit: Add parameter check for hmp "set_vcpu_dirty_limit"

2022-12-03 Thread huangy81

From: Hyman Huang(黄勇) 

dirty_rate paraemter of hmp command "set_vcpu_dirty_limit" is invalid
if less than 0, so add parameter check for it.

Note that this patch also delete the unsolicited help message and
clean up the code.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Markus Armbruster 
Reviewed-by: Peter Xu 
---
 softmmu/dirtylimit.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 940d238..53b66d5 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -515,14 +515,15 @@ void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict 
*qdict)
 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
 Error *err = NULL;
 
-qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, );
-if (err) {
-hmp_handle_error(mon, err);
-return;
+if (dirty_rate < 0) {
+error_setg(, "invalid dirty page limit %ld", dirty_rate);
+goto out;
 }
 
-monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
-   "dirty limit for virtual CPU]\n");
+qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, );
+
+out:
+hmp_handle_error(mon, err);
 }
 
 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
-- 
1.8.3.1

[PATCH v2 08/11] migration: Export dirty-limit time info

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Export dirty limit throttle time and estimated ring full
time, through which we can observe the process of dirty
limit during live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 include/sysemu/dirtylimit.h |  2 ++
 migration/migration.c   | 10 ++
 monitor/hmp-cmds.c  | 10 ++
 qapi/migration.json | 10 +-
 softmmu/dirtylimit.c| 31 +++
 5 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
index 8d2c1f3..98cc4a6 100644
--- a/include/sysemu/dirtylimit.h
+++ b/include/sysemu/dirtylimit.h
@@ -34,4 +34,6 @@ void dirtylimit_set_vcpu(int cpu_index,
 void dirtylimit_set_all(uint64_t quota,
 bool enable);
 void dirtylimit_vcpu_execute(CPUState *cpu);
+int64_t dirtylimit_throttle_us_per_full(void);
+int64_t dirtylimit_us_ring_full(void);
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 096b61a..886c25d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -62,6 +62,7 @@
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
 #include "sysemu/kvm.h"
+#include "sysemu/dirtylimit.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1112,6 +1113,15 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->remaining = ram_bytes_remaining();
 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
 }
+
+if (migrate_dirty_limit() && dirtylimit_in_service()) {
+info->has_dirty_limit_throttle_us_per_full = true;
+info->dirty_limit_throttle_us_per_full =
+dirtylimit_throttle_us_per_full();
+
+info->has_dirty_limit_us_ring_full = true;
+info->dirty_limit_us_ring_full = dirtylimit_us_ring_full();
+}
 }
 
 static void populate_disk_info(MigrationInfo *info)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 9ad6ee5..9d02baf 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -339,6 +339,16 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_dirty_limit_throttle_us_per_full) {
+monitor_printf(mon, "dirty-limit throttle time: %" PRIi64 " us\n",
+   info->dirty_limit_throttle_us_per_full);
+}
+
+if (info->has_dirty_limit_us_ring_full) {
+monitor_printf(mon, "dirty-limit ring full time: %" PRIi64 " us\n",
+   info->dirty_limit_us_ring_full);
+}
+
 if (info->has_postcopy_blocktime) {
 monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
diff --git a/qapi/migration.json b/qapi/migration.json
index af6b2da..62db5cb 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -242,6 +242,12 @@
 #   Present and non-empty when migration is blocked.
 #   (since 6.0)
 #
+# @dirty-limit-throttle-us-per-full: Throttle time (us) during the period of
+#dirty ring full (since 7.1)
+#
+# @dirty-limit-us-ring-full: Estimated periodic time (us) of dirty ring full.
+#(since 7.1)
+#
 # Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
@@ -259,7 +265,9 @@
'*postcopy-blocktime' : 'uint32',
'*postcopy-vcpu-blocktime': ['uint32'],
'*compression': 'CompressionStats',
-   '*socket-address': ['SocketAddress'] } }
+   '*socket-address': ['SocketAddress'],
+   '*dirty-limit-throttle-us-per-full': 'int64',
+   '*dirty-limit-us-ring-full': 'int64'} }
 
 ##
 # @query-migrate:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 3f3c405..9d1df9b 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -573,6 +573,37 @@ static struct DirtyLimitInfo *dirtylimit_query_vcpu(int 
cpu_index)
 return info;
 }
 
+/* Pick up first vcpu throttle time by default */
+int64_t dirtylimit_throttle_us_per_full(void)
+{
+CPUState *cpu = first_cpu;
+return cpu->throttle_us_per_full;
+}
+
+/*
+ * Estimate dirty ring full time under current dirty page rate.
+ * Return -1 if guest doesn't dirty memory.
+ */
+int64_t dirtylimit_us_ring_full(void)
+{
+CPUState *cpu;
+uint64_t curr_rate = 0;
+int nvcpus = 0;
+
+CPU_FOREACH(cpu) {
+if (cpu->running) {
+nvcpus++;
+curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
+}
+}
+
+if (!curr_rate || !nvcpus) {
+return -1;
+}
+
+return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
+}
+
 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
 {
 int i, index;
-- 
1.8.3.1

[PATCH v2 07/11] migration: Implement dirty-limit convergence algo

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Implement dirty-limit convergence algo for live migration,
which is kind of like auto-converge algo but using dirty-limit
instead of cpu throttle to make migration convergent.

Enable dirty page limit if dirty_rate_high_cnt greater than 2
when dirty-limit capability enabled, Disable dirty-limit if
migration be cancled.

Note that "set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit"
commands are not allowed during dirty-limit live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c  |  1 +
 migration/ram.c| 62 +++---
 migration/trace-events |  1 +
 softmmu/dirtylimit.c   | 22 ++
 4 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 86950a1..096b61a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -240,6 +240,7 @@ void migration_cancel(const Error *error)
 if (error) {
 migrate_set_error(current_migration, error);
 }
+qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
 migrate_fd_cancel(current_migration);
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index dc1de9d..94516b7 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,6 +45,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-types-migration.h"
 #include "qapi/qapi-events-migration.h"
+#include "qapi/qapi-commands-migration.h"
 #include "qapi/qmp/qerror.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
@@ -57,6 +58,8 @@
 #include "qemu/iov.h"
 #include "multifd.h"
 #include "sysemu/runstate.h"
+#include "sysemu/dirtylimit.h"
+#include "sysemu/kvm.h"
 
 #include "hw/boards.h" /* for machine_dump_guest_core() */
 
@@ -1139,6 +1142,30 @@ static void migration_update_rates(RAMState *rs, int64_t 
end_time)
 }
 }
 
+/*
+ * Enable dirty-limit to throttle down the guest
+ */
+static void migration_dirty_limit_guest(void)
+{
+static int64_t quota_dirtyrate;
+MigrationState *s = migrate_get_current();
+
+/*
+ * If dirty limit already enabled and migration parameter
+ * vcpu-dirty-limit untouched.
+ */
+if (dirtylimit_in_service() &&
+quota_dirtyrate == s->parameters.vcpu_dirty_limit) {
+return;
+}
+
+quota_dirtyrate = s->parameters.vcpu_dirty_limit;
+
+/* Set or update quota dirty limit */
+qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
+trace_migration_dirty_limit_guest(quota_dirtyrate);
+}
+
 static void migration_trigger_throttle(RAMState *rs)
 {
 MigrationState *s = migrate_get_current();
@@ -1148,22 +1175,31 @@ static void migration_trigger_throttle(RAMState *rs)
 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
TARGET_PAGE_SIZE;
 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
 
-/* During block migration the auto-converge logic incorrectly detects
- * that ram migration makes no progress. Avoid this by disabling the
- * throttling logic during the bulk phase of block migration. */
-if (migrate_auto_converge() && !blk_mig_bulk_active()) {
-/* The following detection logic can be refined later. For now:
-   Check to see if the ratio between dirtied bytes and the approx.
-   amount of bytes that just got transferred since the last time
-   we were in this routine reaches the threshold. If that happens
-   twice, start or increase throttling. */
-
-if ((bytes_dirty_period > bytes_dirty_threshold) &&
-(++rs->dirty_rate_high_cnt >= 2)) {
+/*
+ * The following detection logic can be refined later. For now:
+ * Check to see if the ratio between dirtied bytes and the approx.
+ * amount of bytes that just got transferred since the last time
+ * we were in this routine reaches the threshold. If that happens
+ * twice, start or increase throttling.
+ */
+
+if ((bytes_dirty_period > bytes_dirty_threshold) &&
+(++rs->dirty_rate_high_cnt >= 2)) {
+rs->dirty_rate_high_cnt = 0;
+/*
+ * During block migration the auto-converge logic incorrectly detects
+ * that ram migration makes no progress. Avoid this by disabling the
+ * throttling logic during the bulk phase of block migration
+ */
+
+if (migrate_auto_converge() && !blk_mig_bulk_active()) {
 trace_migration_throttle();
-rs->dirty_rate_high_cnt = 0;
 mig_throttle_guest_down(bytes_dirty_period,
 bytes_dirty_threshold);
+} else if (migrate_dirty_limit() &&
+   kvm_dirty_ring_enabled() &&
+   migration_is_active(s)) {
+migration_dirty_limit_guest();
 }
 }
 }
diff --git a/migration/trace-events b/migration/trace-events
index 57003ed..33a2666 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -91,6 +91,7 @@ migration_bitmap_sync_start(void) ""

[PATCH v2 10/11] tests/migration: Introduce dirty-ring-size option into guestperf

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Guestperf tool does not enable diry ring feature when test
migration by default.

To support dirty ring migration performance test, introduce
dirty-ring-size option into guestperf tools, which ranges in
[1024, 65536].

To set dirty ring size with 4096 during migration test:
$ ./tests/migration/guestperf.py --dirty-ring-size 4096 xxx

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/migration/guestperf/engine.py   | 7 ++-
 tests/migration/guestperf/hardware.py | 8 ++--
 tests/migration/guestperf/shell.py| 7 ++-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 59fca2c..d7b75b9 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -303,7 +303,6 @@ def _get_common_args(self, hardware, tunnelled=False):
 cmdline = "'" + cmdline + "'"
 
 argv = [
-"-accel", "kvm",
 "-cpu", "host",
 "-kernel", self._kernel,
 "-initrd", self._initrd,
@@ -314,6 +313,12 @@ def _get_common_args(self, hardware, tunnelled=False):
 "-smp", str(hardware._cpus),
 ]
 
+if hardware._dirty_ring_size:
+argv.extend(["-accel", "kvm,dirty-ring-size=%s" %
+ hardware._dirty_ring_size])
+else:
+argv.extend(["-accel", "kvm"])
+
 if self._debug:
 argv.extend(["-device", "sga"])
 
diff --git a/tests/migration/guestperf/hardware.py 
b/tests/migration/guestperf/hardware.py
index 3145785..f779cc0 100644
--- a/tests/migration/guestperf/hardware.py
+++ b/tests/migration/guestperf/hardware.py
@@ -23,7 +23,8 @@ def __init__(self, cpus=1, mem=1,
  src_cpu_bind=None, src_mem_bind=None,
  dst_cpu_bind=None, dst_mem_bind=None,
  prealloc_pages = False,
- huge_pages=False, locked_pages=False):
+ huge_pages=False, locked_pages=False,
+ dirty_ring_size=0):
 self._cpus = cpus
 self._mem = mem # GiB
 self._src_mem_bind = src_mem_bind # List of NUMA nodes
@@ -33,6 +34,7 @@ def __init__(self, cpus=1, mem=1,
 self._prealloc_pages = prealloc_pages
 self._huge_pages = huge_pages
 self._locked_pages = locked_pages
+self._dirty_ring_size = dirty_ring_size
 
 
 def serialize(self):
@@ -46,6 +48,7 @@ def serialize(self):
 "prealloc_pages": self._prealloc_pages,
 "huge_pages": self._huge_pages,
 "locked_pages": self._locked_pages,
+"dirty_ring_size": self._dirty_ring_size,
 }
 
 @classmethod
@@ -59,4 +62,5 @@ def deserialize(cls, data):
 data["dst_mem_bind"],
 data["prealloc_pages"],
 data["huge_pages"],
-data["locked_pages"])
+data["locked_pages"],
+data["dirty_ring_size"])
diff --git a/tests/migration/guestperf/shell.py 
b/tests/migration/guestperf/shell.py
index 8a809e3..559616f 100644
--- a/tests/migration/guestperf/shell.py
+++ b/tests/migration/guestperf/shell.py
@@ -60,6 +60,8 @@ def __init__(self):
 parser.add_argument("--prealloc-pages", dest="prealloc_pages", 
default=False)
 parser.add_argument("--huge-pages", dest="huge_pages", default=False)
 parser.add_argument("--locked-pages", dest="locked_pages", 
default=False)
+parser.add_argument("--dirty-ring-size", dest="dirty_ring_size",
+default=0, type=int)
 
 self._parser = parser
 
@@ -89,7 +91,10 @@ def split_map(value):
 
 locked_pages=args.locked_pages,
 huge_pages=args.huge_pages,
-prealloc_pages=args.prealloc_pages)
+prealloc_pages=args.prealloc_pages,
+
+dirty_ring_size=args.dirty_ring_size)
+
 
 
 class Shell(BaseShell):
-- 
1.8.3.1

[PATCH v2 03/11] kvm-all: Do not allow reap vcpu dirty ring buffer if not ready

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

When tested large vcpu size vm with dirtylimit feature, Qemu crashed
due to the assertion in kvm_dirty_ring_reap_one, which assert that
vcpu's kvm_dirty_gfns has been allocated and not NULL.

Because dirty ring reaper thread races with Qemu main thread, reaper
may reap vcpu's dirty ring buffer when main thread doesn't complete
vcpu instantiation. So add the waiting logic in reaper thread and
start to reap until vcpu instantiation is completed.

Signed-off-by: Hyman Huang(黄勇) 
---
 accel/kvm/kvm-all.c | 36 
 1 file changed, 36 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0be..9457715 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1401,6 +1401,35 @@ out:
 kvm_slots_unlock();
 }
 
+/*
+ * test if dirty ring has been initialized by checking if vcpu
+ * has been initialized and gfns was allocated correspondlingly.
+ * return true if dirty ring has been initialized, false otherwise.
+ */
+static bool kvm_vcpu_dirty_ring_initialized(void)
+{
+CPUState *cpu;
+MachineState *ms = MACHINE(qdev_get_machine());
+int ncpus = ms->smp.cpus;
+
+/*
+ * assume vcpu has not been initilaized if generation
+ * id less than number of vcpu
+ */
+if (ncpus > cpu_list_generation_id_get()) {
+return false;
+}
+
+CPU_FOREACH(cpu) {
+if (!cpu->kvm_dirty_gfns) {
+return false;
+}
+}
+
+return true;
+}
+
+
 static void *kvm_dirty_ring_reaper_thread(void *data)
 {
 KVMState *s = data;
@@ -1410,6 +1439,13 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
 
 trace_kvm_dirty_ring_reaper("init");
 
+retry:
+/* don't allow reaping dirty ring if ring buffer hasn't been mapped */
+if (!kvm_vcpu_dirty_ring_initialized()) {
+sleep(1);
+goto retry;
+}
+
 while (true) {
 r->reaper_state = KVM_DIRTY_RING_REAPER_WAIT;
 trace_kvm_dirty_ring_reaper("wait");
-- 
1.8.3.1

[PATCH v2 05/11] qapi/migration: Introduce vcpu-dirty-limit parameters

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "vcpu-dirty-limit" migration parameter used
to limit dirty page rate during live migration.

"vcpu-dirty-limit" and "x-vcpu-dirty-limit-period" are
two dirty-limit-related migration parameters, which can
be set before and during live migration by qmp
migrate-set-parameters.

This two parameters are used to help implement the dirty
page rate limit algo of migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 23 +++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 18 +++---
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 701267c..e2aada2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -117,6 +117,7 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
 #define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 500 /* ms */
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT1   /* MB/s */
 
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -968,6 +969,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->has_x_vcpu_dirty_limit_period = true;
 params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
 
+params->has_vcpu_dirty_limit = true;
+params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;
+
 return params;
 }
 
@@ -1578,6 +1582,14 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 return false;
 }
 
+if (params->has_vcpu_dirty_limit &&
+(params->vcpu_dirty_limit < 1)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "vcpu_dirty_limit",
+   "is invalid, it must greater then 1 MB/s");
+return false;
+}
+
 return true;
 }
 
@@ -1681,6 +1693,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_x_vcpu_dirty_limit_period) {
 dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
 }
+
+if (params->has_vcpu_dirty_limit) {
+dest->vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1807,6 +1823,9 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.x_vcpu_dirty_limit_period =
 params->x_vcpu_dirty_limit_period;
 }
+if (params->has_vcpu_dirty_limit) {
+s->parameters.vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4411,6 +4430,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
parameters.x_vcpu_dirty_limit_period,
DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
+DEFINE_PROP_UINT64("vcpu-dirty-limit", MigrationState,
+   parameters.vcpu_dirty_limit,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4503,6 +4525,7 @@ static void migration_instance_init(Object *obj)
 params->has_tls_hostname = true;
 params->has_tls_authz = true;
 params->has_x_vcpu_dirty_limit_period = true;
+params->has_vcpu_dirty_limit = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index a3170ca..9ad6ee5 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -517,6 +517,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %" PRIu64 " ms\n",
 MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
 params->x_vcpu_dirty_limit_period);
+
+monitor_printf(mon, "%s: %" PRIu64 " MB/s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT),
+params->vcpu_dirty_limit);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1340,6 +1344,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_x_vcpu_dirty_limit_period = true;
 visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
 break;
+case MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT:
+p->has_vcpu_dirty_limit = true;
+visit_type_size(v, param, >vcpu_dirty_limit, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 5175779..dd667dd 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -780,6 +780,9 @@
 # Should be in the range 1 to 1000ms, defaults to 
500ms.
 # (Since 7.1)
 #
+# @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
+#

[PATCH v2 04/11] qapi/migration: Introduce x-vcpu-dirty-limit-period parameter

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "x-vcpu-dirty-limit-period" migration experimental
parameter, which is in the range of 1 to 1000ms and used to
make dirtyrate calculation period configurable.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 26 ++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 34 +++---
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 739bb68..701267c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -116,6 +116,8 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 500 /* ms */
+
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -963,6 +965,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
s->parameters.block_bitmap_mapping);
 }
 
+params->has_x_vcpu_dirty_limit_period = true;
+params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
+
 return params;
 }
 
@@ -1564,6 +1569,15 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 }
 #endif
 
+if (params->has_x_vcpu_dirty_limit_period &&
+(params->x_vcpu_dirty_limit_period < 1 ||
+ params->x_vcpu_dirty_limit_period > 1000)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "x_vcpu_dirty_limit_period",
+   "is invalid, it must be in the range of 1 to 1000 ms");
+return false;
+}
+
 return true;
 }
 
@@ -1663,6 +1677,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_x_vcpu_dirty_limit_period) {
+dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1785,6 +1803,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+if (params->has_x_vcpu_dirty_limit_period) {
+s->parameters.x_vcpu_dirty_limit_period =
+params->x_vcpu_dirty_limit_period;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4386,6 +4408,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
 DEFINE_PROP_STRING("tls-hostname", MigrationState, 
parameters.tls_hostname),
 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
+DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
+   parameters.x_vcpu_dirty_limit_period,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4477,6 +4502,7 @@ static void migration_instance_init(Object *obj)
 params->has_tls_creds = true;
 params->has_tls_hostname = true;
 params->has_tls_authz = true;
+params->has_x_vcpu_dirty_limit_period = true;
 
 qemu_sem_init(>postcopy_pause_sem, 0);
 qemu_sem_init(>postcopy_pause_rp_sem, 0);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 01b789a..a3170ca 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -513,6 +513,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 }
 }
 }
+
+monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
+params->x_vcpu_dirty_limit_period);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1332,6 +1336,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 error_setg(, "The block-bitmap-mapping parameter can only be set "
"through QMP");
 break;
+case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD:
+p->has_x_vcpu_dirty_limit_period = true;
+visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 88ecf86..5175779 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -776,8 +776,13 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2)
 #
+# @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
+# Should be in the range 1 to 1000ms, defaults to 
500ms.
+# (Since 7.1)
+#
 # Features:
-#

[PATCH v2 09/11] tests: Add migration dirty-limit capability test

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Add migration dirty-limit capability test if kernel support
dirty ring.

Migration dirty-limit capability introduce dirty limit
capability, two parameters: x-vcpu-dirty-limit-period and
x-vcpu-dirty-limit are introduced to implement the live
migration with dirty limit.

The test case does the following things:
1. start src, dst vm and enable dirty-limit capability
2. start migrate and set cancel it to check if dirty limit
   stop working.
3. restart dst vm
4. start migrate and enable dirty-limit capability
5. check if migration satisfy the convergence condition
   during pre-switchover phase.

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/qtest/migration-test.c | 154 +++
 1 file changed, 154 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 442998d..baa614c 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2422,6 +2422,158 @@ static void test_vcpu_dirty_limit(void)
 dirtylimit_stop_vm(vm);
 }
 
+static void migrate_dirty_limit_wait_showup(QTestState *from,
+const int64_t period,
+const int64_t value)
+{
+/* Enable dirty limit capability */
+migrate_set_capability(from, "dirty-limit", true);
+
+/* Set dirty limit parameters */
+migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
+migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
+
+/* Make sure migrate can't converge */
+migrate_ensure_non_converge(from);
+
+/* To check limit rate after precopy */
+migrate_set_capability(from, "pause-before-switchover", true);
+
+/* Wait for the serial output from the source */
+wait_for_serial("src_serial");
+}
+
+/*
+ * This test does:
+ *  source   target
+ *   migrate_incoming
+ * migrate
+ * migrate_cancel
+ *   restart target
+ * migrate
+ *
+ *  And see that if dirty limit works correctly
+ */
+static void test_migrate_dirty_limit(void)
+{
+g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+QTestState *from, *to;
+int64_t remaining, throttle_us_per_full;
+/*
+ * We want the test to be stable and as fast as possible.
+ * E.g., with 1Gb/s bandwith migration may pass without dirty limit,
+ * so we need to decrease a bandwidth.
+ */
+const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
+const int64_t max_bandwidth = 4; /* ~400Mb/s */
+const int64_t downtime_limit = 250; /* 250ms */
+/*
+ * We migrate through unix-socket (> 500Mb/s).
+ * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
+ * So, we can predict expected_threshold
+ */
+const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
+int max_try_count = 10;
+MigrateCommon args = {
+.start = {
+.hide_stderr = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Start src, dst vm */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Prepare for dirty limit migration and wait src vm show up */
+migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full =
+read_migrate_property_int(from, 
"dirty-limit-throttle-us-per-full");
+usleep(100);
+g_assert_false(got_stop);
+}
+
+/* Now cancel migrate and wait for dirty limit throttle switch off */
+migrate_cancel(from);
+wait_for_migration_status(from, "cancelled", NULL);
+
+/* Check if dirty limit throttle switched off, set timeout 1ms */
+do {
+throttle_us_per_full =
+read_migrate_property_int(from, 
"dirty-limit-throttle-us-per-full");
+usleep(100);
+g_assert_false(got_stop);
+} while (throttle_us_per_full != 0 && --max_try_count);
+
+/* Assert dirty limit is not in service */
+g_assert_cmpint(throttle_us_per_full, ==, 0);
+
+args = (MigrateCommon) {
+.start = {
+.only_target = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Restart dst vm, src vm already show up so we needn't wait anymore */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full =
+read_migrate_property_int(from, 
"dirty-limit-throttle-us-per-full");
+

[PATCH v2 11/11] tests/migration: Introduce dirty-limit into guestperf

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Guestperf tool does not cover the dirty-limit migration
currently, support this feature.

To enable dirty-limit, setting x-vcpu-dirty-limit-period
as 500ms and x-vcpu-dirty-limit as 10MB/s:
$ ./tests/migration/guestperf.py \
--dirty-limit --x-vcpu-dirty-limit-period 500 \
--vcpu-dirty-limit 10 --output output.json \

To run the entire standardized set of dirty-limit-enabled
comparisons, with unix migration:
$ ./tests/migration/guestperf-batch.py \
--dst-host localhost --transport unix \
--filter compr-dirty-limit* --output outputdir

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/migration/guestperf/comparison.py | 24 
 tests/migration/guestperf/engine.py | 17 +
 tests/migration/guestperf/progress.py   | 17 +++--
 tests/migration/guestperf/scenario.py   | 11 ++-
 tests/migration/guestperf/shell.py  | 18 +-
 5 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/tests/migration/guestperf/comparison.py 
b/tests/migration/guestperf/comparison.py
index c03b3f6..ad403f9 100644
--- a/tests/migration/guestperf/comparison.py
+++ b/tests/migration/guestperf/comparison.py
@@ -135,4 +135,28 @@ def __init__(self, name, scenarios):
 Scenario("compr-multifd-channels-64",
  multifd=True, multifd_channels=64),
 ]),
+
+
+# Looking at effect of dirty-limit with
+# varying x_vcpu_dirty_limit_period
+Comparison("compr-dirty-limit-period", scenarios = [
+Scenario("compr-dirty-limit-period-100",
+ dirty_limit=True, x_vcpu_dirty_limit_period=100),
+Scenario("compr-dirty-limit-period-500",
+ dirty_limit=True, x_vcpu_dirty_limit_period=500),
+Scenario("compr-dirty-limit-period-1000",
+ dirty_limit=True, x_vcpu_dirty_limit_period=1000),
+]),
+
+
+# Looking at effect of dirty-limit with
+# varying vcpu_dirty_limit
+Comparison("compr-dirty-limit", scenarios = [
+Scenario("compr-dirty-limit-10MB",
+ dirty_limit=True, vcpu_dirty_limit=10),
+Scenario("compr-dirty-limit-20MB",
+ dirty_limit=True, vcpu_dirty_limit=20),
+Scenario("compr-dirty-limit-50MB",
+ dirty_limit=True, vcpu_dirty_limit=50),
+]),
 ]
diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index d7b75b9..e3940bf 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -102,6 +102,8 @@ def _migrate_progress(self, vm):
 info.get("expected-downtime", 0),
 info.get("setup-time", 0),
 info.get("cpu-throttle-percentage", 0),
+info.get("dirty-limit-throttle-us-per-full", 0),
+info.get("dirty-limit-us-ring-full", 0),
 )
 
 def _migrate(self, hardware, scenario, src, dst, connect_uri):
@@ -203,6 +205,21 @@ def _migrate(self, hardware, scenario, src, dst, 
connect_uri):
 resp = dst.command("migrate-set-parameters",
multifd_channels=scenario._multifd_channels)
 
+if scenario._dirty_limit:
+if not hardware._dirty_ring_size:
+raise Exception("dirty ring size must be configured when "
+"testing dirty limit migration")
+
+resp = src.command("migrate-set-capabilities",
+   capabilities = [
+   { "capability": "dirty-limit",
+ "state": True }
+   ])
+resp = src.command("migrate-set-parameters",
+x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period)
+resp = src.command("migrate-set-parameters",
+   vcpu_dirty_limit=scenario._vcpu_dirty_limit)
+
 resp = src.command("migrate", uri=connect_uri)
 
 post_copy = False
diff --git a/tests/migration/guestperf/progress.py 
b/tests/migration/guestperf/progress.py
index ab1ee57..dd5d86b 100644
--- a/tests/migration/guestperf/progress.py
+++ b/tests/migration/guestperf/progress.py
@@ -81,7 +81,9 @@ def __init__(self,
  downtime,
  downtime_expected,
  setup_time,
- throttle_pcent):
+ throttle_pcent,
+ dirty_limit_throttle_us_per_full,
+ dirty_limit_us_ring_full):
 
 self._status = status
 self._ram = ram
@@ -91,6 +93,11 @@ def __init__(self,
 self._downtime_expected = downtime_expected
 self._setup_time = setup_time
 self._throttle_pcent = throttle_pcent
+self._dirty_limit_throttle_us_per_full =
+dirty_limit_throttle_us_per_full
+self._dirty_limit_us_ring_full =
+dirty_limit_us_ring_full
+
 
 def serialize(self):
 return {
@@ -102,6 +109,10 @@ def

[PATCH v2 06/11] migration: Introduce dirty-limit capability

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce migration dirty-limit capability, which can
be turned on before live migration and limit dirty
page rate durty live migration.

Introduce migrate_dirty_limit function to help check
if dirty-limit capability enabled during live migration.

Meanwhile, refactor vcpu_dirty_rate_stat_collect
so that period can be configured instead of hardcoded.

dirty-limit capability is kind of like auto-converge
but using dirty limit instead of traditional cpu-throttle
to throttle guest down. To enable this feature, turn on
the dirty-limit capability before live migration using
migratioin-set-capabilities, and set the parameters
"x-vcpu-dirty-limit-period", "vcpu-dirty-limit" suitably
to speed up convergence.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 25 +
 migration/migration.h |  1 +
 qapi/migration.json   |  4 +++-
 softmmu/dirtylimit.c  | 11 ++-
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index e2aada2..86950a1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "sysemu/kvm.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1348,6 +1349,20 @@ static bool migrate_caps_check(bool *cap_list,
 }
 }
 
+if (cap_list[MIGRATION_CAPABILITY_DIRTY_LIMIT]) {
+if (cap_list[MIGRATION_CAPABILITY_AUTO_CONVERGE]) {
+error_setg(errp, "dirty-limit conflicts with auto-converge"
+   " either of then available currently");
+return false;
+}
+
+if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+error_setg(errp, "dirty-limit requires KVM with accelerator"
+   " property 'dirty-ring-size' set");
+return false;
+}
+}
+
 return true;
 }
 
@@ -2526,6 +2541,15 @@ bool migrate_auto_converge(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 
+bool migrate_dirty_limit(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_LIMIT];
+}
+
 bool migrate_zero_blocks(void)
 {
 MigrationState *s;
@@ -4455,6 +4479,7 @@ static Property migration_properties[] = {
 DEFINE_PROP_MIG_CAP("x-zero-copy-send",
 MIGRATION_CAPABILITY_ZERO_COPY_SEND),
 #endif
+DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
 
 DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/migration/migration.h b/migration/migration.h
index cdad8ac..7fbb9f8 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -409,6 +409,7 @@ bool migrate_ignore_shared(void);
 bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
+bool migrate_dirty_limit(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index dd667dd..af6b2da 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -477,6 +477,8 @@
 #will be handled faster.  This is a performance feature and
 #should not affect the correctness of postcopy migration.
 #(since 7.1)
+# @dirty-limit: Use dirty-limit to throttle down guest if enabled.
+#   (since 7.1)
 #
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -492,7 +494,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
-   'zero-copy-send', 'postcopy-preempt'] }
+   'zero-copy-send', 'postcopy-preempt', 'dirty-limit'] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index c42eddd..4537c51 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -23,6 +23,8 @@
 #include "exec/memory.h"
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
 #include "trace.h"
 
 /*
@@ -75,11 +77,18 @@ static bool dirtylimit_quit;
 
 static void vcpu_dirty_rate_stat_collect(void)
 {
+MigrationState *s = migrate_get_current();
 VcpuStat stat;
 int i = 0;
+int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+if (migrate_dirty_limit() &&
+migration_is_active(s)) {
+period = s->parameters.x_vcpu_dirty_limit_period;
+}
 
 /* calculate vcpu dirtyrate */
-vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
+vcpu_calculate_dirtyrate(period,
  ,
  GLOBAL_DIRTY_LIMIT,
  false);
-- 
1.8.3.1

[PATCH v2 00/11] migration: introduce dirtylimit capability

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

v2: 
This version make a little bit modifications comparing with
version 1 as following:
1. fix the overflow issue reported by Peter Maydell
2. add parameter check for hmp "set_vcpu_dirty_limit" command
3. fix the racing issue between dirty ring reaper thread and
   Qemu main thread.
4. add migrate parameter check for x-vcpu-dirty-limit-period
   and vcpu-dirty-limit.
5. add the logic to forbid hmp/qmp commands set_vcpu_dirty_limit,
   cancel_vcpu_dirty_limit during dirty-limit live migration when
   implement dirty-limit convergence algo.
6. add capability check to ensure auto-converge and dirty-limit
   are mutually exclusive.
7. pre-check if kvm dirty ring size is configured before setting
   dirty-limit migrate parameter 

A more comprehensive test was done comparing with version 1.

The following are test environment:
-
a. Host hardware info:

CPU:
Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz

CPU(s):  64
On-line CPU(s) list: 0-63
Thread(s) per core:  2
Core(s) per socket:  16
Socket(s):   2
NUMA node(s):2

NUMA node0 CPU(s):   0-15,32-47
NUMA node1 CPU(s):   16-31,48-63

Memory:
Hynix  503Gi

Interface:
Intel Corporation Ethernet Connection X722 for 1GbE (rev 09)
Speed: 1000Mb/s

b. Host software info:

OS: ctyunos release 2
Kernel: 4.19.90-2102.2.0.0066.ctl2.x86_64
Libvirt baseline version:  libvirt-6.9.0
Qemu baseline version: qemu-5.0

c. vm scale
CPU: 4
Memory: 4G
-

All the supplementary test data shown as follows are basing on
above test environment.

In version 1, we post a test data from unixbench as follows:

$ taskset -c 8-15 ./Run -i 2 -c 8 {unixbench test item}

host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 32800  | 32786  | 25292 |
  | whetstone-double| 10326  | 10315  | 9847  |
  | pipe| 15442  | 15271  | 14506 |
  | context1| 7260   | 6235   | 4514  |
  | spawn   | 3663   | 3317   | 3249  |
  | syscall | 4669   | 4667   | 3841  |
  |-+++---|

In version 2, we post a supplementary test data that do not use
taskset and make the scenario more general, see as follows:

$ ./Run

per-vcpu data:
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 2991   | 2902   | 1722  |
  | whetstone-double| 1018   | 1006   | 627   |
  | Execl Throughput| 955| 320| 660   |
  | File Copy - 1   | 2362   | 805| 1325  |
  | File Copy - 2   | 1500   | 1406   | 643   |  
  | File Copy - 3   | 4778   | 2160   | 1047  | 
  | Pipe Throughput | 1181   | 1170   | 842   |
  | Context Switching   | 192| 224| 198   |
  | Process Creation| 490| 145| 95|
  | Shell Scripts - 1   | 1284   | 565| 610   |
  | Shell Scripts - 2   | 2368   | 900| 1040  |
  | System Call Overhead| 983| 948| 698   |
  | Index Score | 1263   | 815| 600   |
  |-+++---|
Note:
  File Copy - 1: File Copy 1024 bufsize 2000 maxblocks
  File Copy - 2: File Copy 256 bufsize 500 maxblocks 
  File Copy - 3: File Copy 4096 bufsize 8000 maxblocks 
  Shell Scripts - 1: Shell Scripts (1 concurrent)
  Shell Scripts - 2: Shell Scripts (8 concurrent)

Basing on above data, we can draw a conclusion that dirty-limit
can hugely improve the system benchmark almost in every respect,
the "System Benchmarks Index Score" show it improve 35% performance
comparing with auto-converge during live migration.

4-vcpu parallel data(we run a test vm with 4c4g-scale):
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 7975   | 7146   | 5071  |
  | whetstone-double| 3982   | 3561   | 2124  |
  | Execl Throughput| 1882   | 1205   | 768   |
  | File Copy - 1   | 1061   | 865| 498   |
  | File Copy - 2   | 676| 491| 519   |  
  | File Copy - 3   | 2260   | 923| 1329  | 
  | Pipe

[PATCH v2 01/11] dirtylimit: Fix overflow when computing MB

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

overity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 1266855..940d238 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -236,14 +236,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100ULL / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
1.8.3.1

[PATCH v2 02/11] softmmu/dirtylimit: Add parameter check for hmp "set_vcpu_dirty_limit"

2022-11-21 Thread huangy81

From: Hyman Huang(黄勇) 

dirty_rate paraemter of hmp command "set_vcpu_dirty_limit" is invalid
if less than 0, so add parameter check for it.

Signed-off-by: Hyman Huang(黄勇) 
---
 softmmu/dirtylimit.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 940d238..c42eddd 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -515,6 +515,11 @@ void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict 
*qdict)
 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
 Error *err = NULL;
 
+if (dirty_rate < 0) {
+monitor_printf(mon, "invalid dirty page limit %ld\n", dirty_rate);
+return;
+}
+
 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, );
 if (err) {
 hmp_handle_error(mon, err);
-- 
1.8.3.1

[PATCH v4 2/3] vhost-user: Refactor the chr_closed_bh

2022-11-17 Thread huangy81

From: Hyman Huang(黄勇) 

Use vhost_user_save_acked_features to implemente acked features
saving.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 net/vhost-user.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/vhost-user.c b/net/vhost-user.c
index f5cb095..5993e4a 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -260,11 +260,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i]);
 }
 
 qmp_set_link(name, false, );
-- 
1.8.3.1

[PATCH v4 0/3] Fix the virtio features negotiation flaw

2022-11-17 Thread huangy81

From: Hyman Huang(黄勇) 

v4:
-rebase on master
-add stub function to fix build errors
-code clean on [PATCH v2 1/2]: drop 'cleanup' parameter in
 vhost_user_save_acked_features.
-code clean on [PATCH v2 2/2]: make refactor of chr_closed_bh
 a standalone patch.

Above changes are suggested by Michael and thanks very much.

Please review,

Yong

v3:
-rebase on master
-code clean on [PATCH v2 1/2]: keep the commit self-consistent and
 do not modify the logic of saving acked_features. Just abstract the
 util function.
-modify the [PATCH v2 2/2] logic: change the behavior of saving
 acked_features in chr_closed_bh: saving acked_features only if
 features aren't 0. For the case of 0, we implement it in
 virtio_net_set_features function, which will save the acked_features
 in advance, including assign 0 to acked_features.

v2:
Fix the typo in subject of [PATCH v2 2/2] 

v1:
This is the version 1 of the series and it is exactly the same as
RFC version, but fixing a typo in subject, which is reported by Michael. 

As for test for the behavior suggested by Michael, IMHO, it could be
post in another series, since i found that testing the negotiation
behavior using QGraph Test Framework requires more work than i thought.

The test patch may implement the following logic...
1. Introduce a fresh new qmp command to query netdev info, which show
   the NetClient status including guest features and acked_features.
2. Using vhost-user QGraph Test to check the behavior of the vhost user
   protocol cmd VHOST_USER_SET_FEATURES. 
3. Adding acked_features into TestServer, which receive the features
   set by QEMU.
4. Compare the acked_feature in TestServer with the acked_features 
   in the output of qmp query command.

Patch for RFC can be found in the following:
https://patchew.org/QEMU/20220926063641.25038-1-huang...@chinatelecom.cn/

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup it in case of loss. 

4. once unix socket return to normal and get connected, qemu will
   call vhost_user_start to restore the vhost_dev and fetch the
   acked_features stored in NetVhostUserState previously. 

The above flow works fine in the normal scenarios, but it doesn't cover
the scenario that openvswitch service restart in the same time of
virtio features

[PATCH v4 1/3] vhost-user: Refactor vhost acked features saving

2022-11-17 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  1 +
 net/vhost-user.c | 21 +++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6..35bf619 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,6 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 3a6b90d..f5cb095 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,10 +45,23 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
+NetVhostUserState *s;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
@@ -56,11 +69,7 @@ static void vhost_user_stop(int queues, NetClientState 
*ncs[])
 s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
 
 if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
+vhost_user_save_acked_features(ncs[i]);
 vhost_net_cleanup(s->vhost_net);
 }
 }
-- 
1.8.3.1

[PATCH v4 3/3] vhost-user: Fix the virtio features negotiation flaw

2022-11-17 Thread huangy81

From: Hyman Huang(黄勇) 

This patch aims to fix unexpected negotiation features for
vhost-user netdev interface.

When openvswitch reconnect Qemu after an unexpected disconnection
and Qemu therefore start the vhost_dev, acked_features field in
vhost_dev is initialized with value fetched from acked_features
field in NetVhostUserState, which should be up-to-date at that
moment but Qemu could not make it actually during the time window
of virtio features negotiation.

So we save the acked_features right after being configured by
guest virtio driver so it can be used to restore acked_features
field in vhost_dev correctly.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 hw/net/vhost_net-stub.c | 5 +
 hw/net/vhost_net.c  | 6 ++
 hw/net/virtio-net.c | 6 ++
 include/net/vhost_net.h | 2 ++
 4 files changed, 19 insertions(+)

diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index 9f7daae..66ed5f0 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -113,3 +113,8 @@ int vhost_net_virtqueue_restart(VirtIODevice *vdev, 
NetClientState *nc,
 {
 return 0;
 }
+
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+
+}
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index feda448..ceb962c 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -143,6 +143,12 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+vhost_user_save_acked_features(nc);
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index aba1275..91cbd0c 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -981,6 +981,12 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 40b9a40..dfb1375 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -52,4 +52,6 @@ void vhost_net_virtqueue_reset(VirtIODevice *vdev, 
NetClientState *nc,
int vq_index);
 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
 int vq_index);
+
+void vhost_net_save_acked_features(NetClientState *nc);
 #endif
-- 
1.8.3.1

[PATCH RFC 2/4] hmp: Add "info netdev" cmd

2022-10-31 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "info netdev" command so developers can play with
it easier.

Signed-off-by: Hyman Huang(黄勇) 
---
 hmp-commands-info.hx  | 14 ++
 include/monitor/hmp.h |  1 +
 net/net.c | 31 +++
 3 files changed, 46 insertions(+)

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 754b1e8..217843c 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -880,6 +880,20 @@ SRST
 Display the vcpu dirty page limit information.
 ERST
 
+{
+.name   = "netdev",
+.args_type  = "",
+.params = "",
+.help   = "show information about netdev, guest acked features are 
"
+  "also printed if supporting virtio-net dataplane 
offloading",
+.cmd= hmp_info_netdev,
+},
+
+SRST
+  ``info netdev``
+Display information about netdev.
+ERST
+
 #if defined(TARGET_I386)
 {
 .name   = "sgx",
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index a9cf064..0bd496a 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -142,5 +142,6 @@ void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict 
*qdict);
 void hmp_human_readable_text_helper(Monitor *mon,
 HumanReadableText *(*qmp_handler)(Error 
**));
 void hmp_info_stats(Monitor *mon, const QDict *qdict);
+void hmp_info_netdev(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/net/net.c b/net/net.c
index 5d11674..c27ebfa 100644
--- a/net/net.c
+++ b/net/net.c
@@ -55,6 +55,7 @@
 #include "net/filter.h"
 #include "net/vhost-user.h"
 #include "qapi/string-output-visitor.h"
+#include "monitor/hmp.h"
 
 /* Net bridge is currently not supported for W32. */
 #if !defined(_WIN32)
@@ -1268,6 +1269,36 @@ NetDevInfoList *qmp_query_netdev(Error **errp)
 return head;
 }
 
+void hmp_info_netdev(Monitor *mon, const QDict *qdict)
+{
+NetDevInfoList *info, *head, *info_list = NULL;
+Error *err = NULL;
+
+info_list = qmp_query_netdev();
+if (err) {
+hmp_handle_error(mon, err);
+return;
+}
+
+head = info_list;
+for (info = head; info != NULL; info = info->next) {
+monitor_printf(mon, "%s: %s device, "
+"ufo %s, vnet-hdr %s, vnet-hdr-len %s",
+info->value->name,
+NetClientDriver_str(info->value->type),
+info->value->ufo ? "supported" : "unsupported",
+info->value->vnet_hdr ? "supported" : "unsupported",
+info->value->vnet_hdr_len ? "supported" : "unsupported");
+if (info->value->has_acked_features) {
+monitor_printf(mon, ", acked-features 0x%" PRIx64,
+info->value->acked_features);
+}
+monitor_printf(mon, "\n");
+}
+
+g_free(info_list);
+}
+
 static void netfilter_print_info(Monitor *mon, NetFilterState *nf)
 {
 char *str;
-- 
1.8.3.1

[PATCH RFC 1/4] net: Introduce qmp cmd "query-netdev"

2022-10-31 Thread huangy81

From: Hyman Huang(黄勇) 

For netdev device that can offload virtio-net dataplane to slave,
such as vhost-net, vhost-user and vhost-vdpa, exporting it's
capability information and acked features would be more friendly for
developers. These infomation can be analyzed and compare to slave
capability provided by, eg dpdk or other slaves directly, helping to
draw conclusions about if vm network interface works normally, if
it vm can be migrated to another feature-compatible destination or
whatever else.

For developers who devote to offload virtio-net dataplane to DPU
and make efforts to migrate vm lively from software-based source
host to DPU-offload destination host smoothly, virtio-net feature
compatibility is an serious issue, exporting the key capability
and acked_features of netdev could also help to debug greatly.

So we export out the key capabilities of netdev, which may affect
the final negotiated virtio-net features, meanwhile, backed-up
acked_features also exported, which is used to initialize or
restore features negotiated between qemu and vhost slave when
starting vhost_dev device.

Signed-off-by: Hyman Huang(黄勇) 
---
 net/net.c | 44 +++
 qapi/net.json | 66 +++
 2 files changed, 110 insertions(+)

diff --git a/net/net.c b/net/net.c
index 2db160e..5d11674 100644
--- a/net/net.c
+++ b/net/net.c
@@ -53,6 +53,7 @@
 #include "sysemu/runstate.h"
 #include "net/colo-compare.h"
 #include "net/filter.h"
+#include "net/vhost-user.h"
 #include "qapi/string-output-visitor.h"
 
 /* Net bridge is currently not supported for W32. */
@@ -1224,6 +1225,49 @@ void qmp_netdev_del(const char *id, Error **errp)
 }
 }
 
+static NetDevInfo *query_netdev(NetClientState *nc)
+{
+NetDevInfo *info = NULL;
+
+if (!nc || !nc->is_netdev) {
+return NULL;
+}
+
+info = g_malloc0(sizeof(*info));
+info->name = g_strdup(nc->name);
+info->type = nc->info->type;
+info->ufo = nc->info->has_ufo;
+info->vnet_hdr = nc->info->has_vnet_hdr;
+info->vnet_hdr_len = nc->info->has_vnet_hdr_len;
+
+if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
+info->has_acked_features = true;
+info->acked_features = vhost_user_get_acked_features(nc);
+}
+
+return info;
+}
+
+NetDevInfoList *qmp_query_netdev(Error **errp)
+{
+NetClientState *nc;
+NetDevInfo *info = NULL;
+NetDevInfoList *head = NULL, **tail = 
+
+QTAILQ_FOREACH(nc, _clients, next) {
+if (nc->info->type == NET_CLIENT_DRIVER_NIC) {
+continue;
+}
+
+info = query_netdev(nc);
+if (info) {
+QAPI_LIST_APPEND(tail, info);
+}
+}
+
+return head;
+}
+
 static void netfilter_print_info(Monitor *mon, NetFilterState *nf)
 {
 char *str;
diff --git a/qapi/net.json b/qapi/net.json
index dd088c0..76a6513 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -631,6 +631,72 @@
'if': 'CONFIG_VMNET' } } }
 
 ##
+# @NetDevInfo:
+#
+# NetDev information.  This structure describes a NetDev information, including
+# capabilities and negotiated features.
+#
+# @name: The NetDev name.
+#
+# @type: Type of NetDev.
+#
+# @ufo: True if NetDev has ufo capability.
+#
+# @vnet-hdr: True if NetDev has vnet_hdr.
+#
+# @vnet-hdr-len: True if given length can be assigned to NetDev.
+#
+# @acked-features: Negotiated features with vhost slave device if device 
support
+#  dataplane offload.
+#
+# Since:  7.1
+##
+{'struct': 'NetDevInfo',
+ 'data': {
+'name': 'str',
+'type': 'NetClientDriver',
+'ufo':'bool',
+'vnet-hdr':'bool',
+'vnet-hdr-len':'bool',
+'*acked-features': 'uint64' } }
+
+##
+# @query-netdev:
+#
+# Get a list of NetDevInfo for all virtual netdev peer devices.
+#
+# Returns: a list of @NetDevInfo describing each virtual netdev peer device.
+#
+# Since: 7.1
+#
+# Example:
+#
+# -> { "execute": "query-netdev" }
+# <- {
+#   "return":[
+#  {
+# "name":"hostnet0",
+# "type":"vhost-user",
+# "ufo":true,
+# "vnet-hdr":true,
+# "vnet-hdr-len":true,
+# "acked-features":"5111807907",
+#  },
+#  {
+# "name":"hostnet1",
+# "type":"vhost-user",
+# "ufo":true,
+# "vnet-hdr":true,
+# "vnet-hdr-len":true,
+# "acked-features":"5111807907",
+#  }
+#   ]
+#}
+#
+##
+{ 'command': 'query-netdev', 'returns': ['NetDevInfo'] }
+
+##
 # @RxState:
 #
 # Packets receiving state
-- 
1.8.3.1

[PATCH RFC 3/4] hmp: Add netdev information into output of hmp cmd "info network"

2022-10-31 Thread huangy81

From: Hyman Huang(黄勇) 

Add netdev information into output of hmp command hmp_info_network
so developers can analyze interface capability more easily.

Signed-off-by: Hyman Huang(黄勇) 
---
 net/net.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/net/net.c b/net/net.c
index c27ebfa..9325628 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1330,6 +1330,21 @@ void print_net_client(Monitor *mon, NetClientState *nc)
nc->queue_index,
NetClientDriver_str(nc->info->type),
nc->info_str);
+if (nc->info->type != NET_CLIENT_DRIVER_NIC) {
+NetDevInfo *info = query_netdev(nc);
+if (info) {
+monitor_printf(mon, "netdev info: ufo=%s, vnet-hdr=%s, "
+"vnet-hdr-len=%s", info->ufo ? "on" : "off",
+info->vnet_hdr ? "on" : "off",
+info->vnet_hdr_len ? "on" : "off");
+if (info->has_acked_features) {
+monitor_printf(mon, ", acked-features=0x%" PRIx64,
+info->acked_features);
+}
+monitor_printf(mon, "\n");
+g_free(info);
+}
+}
 if (!QTAILQ_EMPTY(>filters)) {
 monitor_printf(mon, "filters:\n");
 }
-- 
1.8.3.1

[PATCH RFC 4/4] vhost-user-test: Add negotiated features check

2022-10-31 Thread huangy81

From: Hyman Huang(黄勇) 

For vhost-user network device, Qemu backup the final features as
acked_features internally after guest acknowledging features during
virtio-net driver initialization, so the acked_features could be
used as input of VHOST_USER_SET_FEATURES command when slave device
restore from an unexpected failure.

Negotiated features check just assert if the acked_features in Qemu
is exactly the same as features in vhost slave device, which
checks if features are negotiated correctly via vhost user protocol.

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/qtest/vhost-user-test.c | 67 +++
 1 file changed, 67 insertions(+)

diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c
index bf9f7c4..f8bf2fa 100644
--- a/tests/qtest/vhost-user-test.c
+++ b/tests/qtest/vhost-user-test.c
@@ -29,6 +29,9 @@
 #include "libqos/qgraph_internal.h"
 #include "hw/virtio/virtio-net.h"
 
+#include "migration-helpers.h"
+#include "qapi/qmp/qlist.h"
+
 #include "standard-headers/linux/vhost_types.h"
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_net.h"
@@ -167,6 +170,7 @@ typedef struct TestServer {
 int test_flags;
 int queues;
 struct vhost_user_ops *vu_ops;
+uint64_t features;
 } TestServer;
 
 struct vhost_user_ops {
@@ -796,6 +800,64 @@ static void test_read_guest_mem(void *obj, void *arg, 
QGuestAllocator *alloc)
 read_guest_mem_server(global_qtest, server);
 }
 
+static QDict *query_netdev(QTestState *who)
+{
+QDict *rsp;
+
+rsp = qtest_qmp(who, "{ 'execute': 'query-netdev' }");
+g_assert(!qdict_haskey(rsp, "error"));
+g_assert(qdict_haskey(rsp, "return"));
+
+return rsp;
+}
+
+static uint64_t get_acked_features(QTestState *who)
+{
+QDict *rsp_return;
+QList *info_list;
+const QListEntry *entry;
+QDict *info;
+uint64_t acked_features;
+
+rsp_return = query_netdev(who);
+g_assert(rsp_return);
+
+info_list = qdict_get_qlist(rsp_return, "return");
+g_assert(info_list && !qlist_empty(info_list));
+
+entry = qlist_first(info_list);
+g_assert(entry);
+
+info = qobject_to(QDict, qlist_entry_obj(entry));
+g_assert(info);
+
+acked_features = qdict_get_try_int(info, "acked-features", 0);
+
+qobject_unref(rsp_return);
+return acked_features;
+}
+
+static void read_acked_features(QTestState *qts, TestServer *s)
+{
+uint64_t acked_features;
+
+acked_features = get_acked_features(qts);
+g_assert_cmpint(acked_features, ==, s->features);
+}
+
+static void test_read_acked_features(void *obj,
+ void *arg,
+ QGuestAllocator *alloc)
+{
+TestServer *server = arg;
+
+if (!wait_for_fds(server)) {
+return;
+}
+
+read_acked_features(global_qtest, server);
+}
+
 static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc)
 {
 TestServer *s = arg;
@@ -1037,6 +1099,7 @@ static void vu_net_set_features(TestServer *s, 
CharBackend *chr,
 qemu_chr_fe_disconnect(chr);
 s->test_flags = TEST_FLAGS_BAD;
 }
+s->features = msg->payload.u64;
 }
 
 static void vu_net_get_protocol_features(TestServer *s, CharBackend *chr,
@@ -1078,6 +1141,10 @@ static void register_vhost_user_test(void)
  "virtio-net",
  test_read_guest_mem, );
 
+qos_add_test("vhost-user/read_acked_features",
+ "virtio-net",
+ test_read_acked_features, );
+
 if (qemu_memfd_check(MFD_ALLOW_SEALING)) {
 opts.before = vhost_user_test_setup_memfd;
 qos_add_test("vhost-user/read-guest-mem/memfd",
-- 
1.8.3.1

[PATCH RFC 0/4] Export netdev capabilities and information

2022-10-31 Thread huangy81

From: Hyman Huang(黄勇) 

This series is enlightened by Michael when we fixed a virtio features
negotiation flaw, see the details here:
https://lore.kernel.org/qemu-devel/cover.1667136717.git.huang...@chinatelecom.cn/

And a test is suggested to be added to test behavior of virtio-net features
negotiation(if i understand correctly), see the details here:
https://lore.kernel.org/qemu-devel/20221026105516-mutt-send-email-...@kernel.org/

Indeed, Qemu does not export interface capabilities such as ufo, vnet-hdr or
negotiated features to developers. OVS-DPDK will show the interface status such
as features, mode, ring_size and so on if we execute "ovs-vsctl list interface"
by comparison. It could be more friendly if we export above capabilities and
information for developers, especially for those who devote to offload 
virtio-net
dataplane to DPU and make efforts to migrate vm lively from software-based 
source
to DPU-offload destination smoothly, virtio-net feature compatibility is an
serious issue, exporting the key capability and acked_features of netdev could
help to debug greatly.

This series export out the key capabilities of netdev, which may affect the 
final
negotiated virtio-net features, meanwhile, backed-up acked_features also 
exported,
which is used to initialize or restore features negotiated between qemu and 
vhost
slave when starting vhost_dev device.

Another thing the patchset did is adding a virtio-net features check test, which
use the fresh new qmp interface "query-netdev" to check if features are
negotiated correctly via vhost user protocol.

This patchset depends on the previous patchset which is in the process of code
reviewing. So this post aims to request for comments as the subject say, any
suggestions and comments are welcome and i would appreciate a lot.

Please review, thanks,

Hyman Huang (4):
  net: Introduce qmp cmd "query-netdev"
  hmp: Add "info netdev" cmd
  hmp: Add netdev information into output of hmp cmd "info network"
  vhost-user-test: Add negotiated features check

 hmp-commands-info.hx  | 14 +++
 include/monitor/hmp.h |  1 +
 net/net.c | 90 +++
 qapi/net.json | 66 +++
 tests/qtest/vhost-user-test.c | 67 
 5 files changed, 238 insertions(+)

-- 
1.8.3.1

[PATCH v3 0/2] Fix the virtio features negotiation flaw

2022-10-30 Thread huangy81

From: Hyman Huang(黄勇) 

v3:
-rebase on master
-code clean on [PATCH v2 1/2]: keep the commit self-consistent and
 do not modify the logic of saving acked_features. Just abstract the
 util function.
-modify the [PATCH v2 2/2] logic: change the behavior of saving
 acked_features in chr_closed_bh: saving acked_features only if
 features aren't 0. For the case of 0, we implement it in
 virtio_net_set_features function, which will save the acked_features
 in advance, including assign 0 to acked_features.

Thanks Michael for the comments and suggestions about the self-consistent
of commits. :)

Please review,

Yong

v2:
Fix the typo in subject of [PATCH v2 2/2] 

v1:
This is the version 1 of the series and it is exactly the same as
RFC version, but fixing a typo in subject, which is reported by Michael. 

As for test for the behavior suggested by Michael, IMHO, it could be
post in another series, since i found that testing the negotiation
behavior using QGraph Test Framework requires more work than i thought.

The test patch may implement the following logic...
1. Introduce a fresh new qmp command to query netdev info, which show
   the NetClient status including guest features and acked_features.
2. Using vhost-user QGraph Test to check the behavior of the vhost user
   protocol cmd VHOST_USER_SET_FEATURES. 
3. Adding acked_features into TestServer, which receive the features
   set by QEMU.
4. Compare the acked_feature in TestServer with the acked_features 
   in the output of qmp query command.

Anyway, idea above can be discussed in the future and any suggestion
are welcom. Let's fix the existing bug first, :)

Please review,

Yong

Patch for RFC can be found in the following:
https://patchew.org/QEMU/20220926063641.25038-1-huang...@chinatelecom.cn/

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup it in case of loss. 

4. once unix socket return to normal and get connected, qemu will
   call vhost_user_start to restore the vhost_dev and fetch the
   acked_features stored in NetVhostUserState previously. 

The above flow works fine in the normal scenarios, but it doesn't cover
the scenario that openvswitch service restart in the same time of
virtio features negotiation.

Let's analyze such scenario: 
   qemu

[PATCH v3 2/2] vhost-net: Fix the virtio features negotiation flaw

2022-10-30 Thread huangy81

From: Hyman Huang(黄勇) 

Save the acked_features once it be configured by guest
virtio driver so it can't miss any features.

Note that this patch also change the features saving logic
in chr_closed_bh, which originally backup features no matter
whether the features are 0 or not, but now do it only if
features aren't 0.

As to reset acked_features to 0 if needed, Qemu always
keeping the backup acked_features up-to-date, and save the
acked_features after virtio_net_set_features in advance,
including reset acked_features to 0, so the behavior is
also covered.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 hw/net/vhost_net.c  | 9 +
 hw/net/virtio-net.c | 5 +
 include/net/vhost_net.h | 2 ++
 net/vhost-user.c| 6 +-
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b9..2bffc27 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -141,6 +141,15 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+if (nc->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+return;
+}
+
+vhost_user_save_acked_features(nc, false);
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e9f696b..5f8f788 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -924,6 +924,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 387e913..3a5579b 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -46,6 +46,8 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
 
 uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
+void vhost_net_save_acked_features(NetClientState *nc);
+
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
 
 #endif
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 74f349c..c512cc9 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -258,11 +258,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], false);
 }
 
 qmp_set_link(name, false, );
-- 
1.8.3.1

[PATCH v3 1/2] vhost-user: Refactor vhost acked features saving

2022-10-30 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  2 ++
 net/vhost-user.c | 29 ++---
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6..00d4661 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,7 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc,
+bool cleanup);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index b1a0247..74f349c 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,24 +45,31 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc, bool cleanup)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+
+if (cleanup) {
+vhost_net_cleanup(s->vhost_net);
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
 
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
-vhost_net_cleanup(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], true);
 }
 }
 
-- 
1.8.3.1

[PATCH v2 1/2] vhost-user: Refactor vhost acked features saving

2022-10-28 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  2 ++
 net/vhost-user.c | 35 +++
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6..00d4661 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,7 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc,
+bool cleanup);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index b1a0247..c512cc9 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,24 +45,31 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc, bool cleanup)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+
+if (cleanup) {
+vhost_net_cleanup(s->vhost_net);
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
 
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
-vhost_net_cleanup(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], true);
 }
 }
 
@@ -251,11 +258,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], false);
 }
 
 qmp_set_link(name, false, );
-- 
1.8.3.1

[PATCH v2 2/2] vhost-net: Fix the virtio features negotiation flaw

2022-10-28 Thread huangy81

From: Hyman Huang(黄勇) 

Save the acked_features once it be configured by guest
virtio driver so it can't miss any features.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 hw/net/vhost_net.c  | 9 +
 hw/net/virtio-net.c | 5 +
 include/net/vhost_net.h | 2 ++
 3 files changed, 16 insertions(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b9..2bffc27 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -141,6 +141,15 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+if (nc->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+return;
+}
+
+vhost_user_save_acked_features(nc, false);
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e9f696b..5f8f788 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -924,6 +924,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 387e913..3a5579b 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -46,6 +46,8 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
 
 uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
+void vhost_net_save_acked_features(NetClientState *nc);
+
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
 
 #endif
-- 
1.8.3.1

[PATCH v2 0/2] Fix the virtio features negotiation flaw

2022-10-28 Thread huangy81

From: Hyman Huang(黄勇) 

v2:
Fix the typo in subject of [PATCH v2 2/2] 

v1:
This is the version 1 of the series and it is exactly the same as
RFC version, but fixing a typo in subject, which is reported by Michael. 

As for test for the behavior suggested by Michael, IMHO, it could be
post in another series, since i found that testing the negotiation
behavior using QGraph Test Framework requires more work than i thought.

The test patch may implement the following logic...
1. Introduce a fresh new qmp command to query netdev info, which show
   the NetClient status including guest features and acked_features.
2. Using vhost-user QGraph Test to check the behavior of the vhost user
   protocol cmd VHOST_USER_SET_FEATURES. 
3. Adding acked_features into TestServer, which receive the features
   set by QEMU.
4. Compare the acked_feature in TestServer with the acked_features 
   in the output of qmp query command.

Anyway, idea above can be discussed in the future and any suggestion
are welcom. Let's fix the existing bug first, :)

Please review,

Yong

Patch for RFC can be found in the following:
https://patchew.org/QEMU/20220926063641.25038-1-huang...@chinatelecom.cn/

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup it in case of loss. 

4. once unix socket return to normal and get connected, qemu will
   call vhost_user_start to restore the vhost_dev and fetch the
   acked_features stored in NetVhostUserState previously. 

The above flow works fine in the normal scenarios, but it doesn't cover
the scenario that openvswitch service restart in the same time of
virtio features negotiation.

Let's analyze such scenario: 
   qemu dpdk

   vhost_net_init()  
 |  systemctl stop openvswitch.service
   virtio_set_features() | 
 |  systemctl start openvswitch.service
   virtio_set_status()  

Ovs stop service before guset setting virtio features, chr_closed_bh()
be called and fetch acked_features in vhost_dev, if may store the
incomplete features to NetVhostUserState since it doesn't include
guest features, eg "0x4000". 

Guest set virtio features with another features, eg "0x7060a782",

[PATCH v1 2/2] vhost-net: Fix the virito features negotiation flaw

2022-10-28 Thread huangy81

From: Hyman Huang(黄勇) 

Save the acked_features once it be configured by guest
virtio driver so it can't miss any features.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 hw/net/vhost_net.c  | 9 +
 hw/net/virtio-net.c | 5 +
 include/net/vhost_net.h | 2 ++
 3 files changed, 16 insertions(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b9..2bffc27 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -141,6 +141,15 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+if (nc->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+return;
+}
+
+vhost_user_save_acked_features(nc, false);
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e9f696b..5f8f788 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -924,6 +924,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 387e913..3a5579b 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -46,6 +46,8 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
 
 uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
+void vhost_net_save_acked_features(NetClientState *nc);
+
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
 
 #endif
-- 
1.8.3.1

[PATCH v1 1/2] vhost-user: Refactor vhost acked features saving

2022-10-28 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  2 ++
 net/vhost-user.c | 35 +++
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6..00d4661 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,7 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc,
+bool cleanup);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index b1a0247..c512cc9 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,24 +45,31 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc, bool cleanup)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+
+if (cleanup) {
+vhost_net_cleanup(s->vhost_net);
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
 
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
-vhost_net_cleanup(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], true);
 }
 }
 
@@ -251,11 +258,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], false);
 }
 
 qmp_set_link(name, false, );
-- 
1.8.3.1

[PATCH v1 0/2] Fix the virtio features negotiation flaw

2022-10-28 Thread huangy81

From: Hyman Huang(黄勇) 

v1:
This is the version 1 of the series and it is exactly the same as
RFC version, but fixing a typo in subject, which is reported by Michael. 

As for test for the behavior suggested by Michael, IMHO, it could be
post in another series, since i found that testing the negotiation
behavior using QGraph Test Framework requires more work than i thought.

The test patch may implement the following logic...
1. Introduce a fresh new qmp command to query netdev info, which show
   the NetClient status including guest features and acked_features.
2. Using vhost-user QGraph Test to check the behavior of the vhost user
   protocol cmd VHOST_USER_SET_FEATURES. 
3. Adding acked_features into TestServer, which receive the features
   set by QEMU.
4. Compare the acked_feature in TestServer with the acked_features 
   in the output of qmp query command.

Anyway, idea above can be discussed in the future and any suggestion
are welcom. Let's fix the existing bug first, :)

Please review,

Yong

Patch for RFC can be found in the following:
https://patchew.org/QEMU/20220926063641.25038-1-huang...@chinatelecom.cn/

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup it in case of loss. 

4. once unix socket return to normal and get connected, qemu will
   call vhost_user_start to restore the vhost_dev and fetch the
   acked_features stored in NetVhostUserState previously. 

The above flow works fine in the normal scenarios, but it doesn't cover
the scenario that openvswitch service restart in the same time of
virtio features negotiation.

Let's analyze such scenario: 
   qemu dpdk

   vhost_net_init()  
 |  systemctl stop openvswitch.service
   virtio_set_features() | 
 |  systemctl start openvswitch.service
   virtio_set_status()  

Ovs stop service before guset setting virtio features, chr_closed_bh()
be called and fetch acked_features in vhost_dev, if may store the
incomplete features to NetVhostUserState since it doesn't include
guest features, eg "0x4000". 

Guest set virtio features with another features, eg "0x7060a782",
this value will store in acked_features of

[PATCH 0/2] Fix the virito features negotiation flaw

2022-09-26 Thread huangy81

From: "Hyman Huang(黄勇)" 

This patchset aim to fix the unexpected negotiation features for
vhost-user netdev interface. 

Steps to reproduce the issue:
Prepare a vm (CentOS 8 in my work scenario) with vhost-user
backend interface and configure qemu as server mode. So dpdk
would connect qemu's unix socket periodically.

1. start vm in background and restart openvswitch service 
   concurrently and repeatedly in the process of vm start. 

2. check if negotiated virtio features of port is "0x4000" at
   dpdk side by executing:
   ovs-vsctl list interface | grep features | grep {port_socket_path}
   
3. if features equals "0x4000", go to the vm and check if sending 
   arp package works, executing:
   arping {IP_ADDR}
   if vm interface is configured to boot with dhcp protocol, it
   would get no ip. 

After doing above steps, we'll find the arping not work, the ovs on
host side has forwarded unexpected arp packages, which be added 0x
in the head of ethenet frame.  Though qemu report some error when
read/write cmd of vhost protocol during the process of vm start,
like the following:

"Failed to set msg fds"
"vhost VQ 0 ring restore failed: -22: Invalid argument (22)"

The vm does not stop or report more suggestive error message, it
seems that everthing is ok. 

The root cause is that dpdk port negotiated nothing but only one
VHOST_USER_F_PROTOCOL_FEATURES feature with vhost-user interface at
qemu side, which is an unexpected behavior. qemu only load the
VHOST_USER_F_PROTOCOL_FEATURES when VHOST_USER_SET_FEATURES and loss
the guest features configured by front-end virtio driver using the
VIRTIO_PCI_COMMON_GF addr, which is stored in acked_features field
of struct vhost_dev.

To explain how the acked_features disappear, we may need to know the
lifecyle of acked_features in vhost_dev during feature negotiation. 

1. qemu init acked_features field of struct vhost_dev in vhost_net_init()
   by calling vhost_net_ack_features(), the init value fetched from
   acked_features field of struct NetVhostUserState, which is the backup
   role after vhost stopping or unix socket closed.
   In the first time, the acked_features of struct NetVhostUserState is 0
   so the init value of vhost_dev's acked_features also 0. 

2. when guest virtio driver set features, qemu accept the features and
   call virtio_set_features to store the features as acked_features in
   vhost_dev.

3. when unix socket closed or vhost_dev device doesn't work and be
   stopped unexpectedly, qemu will call chr_closed_bh or vhost_user_stop,
   which will copy acked_features from vhost_dev to NetVhostUserState and
   cleanup the vhost_dev. Since virtio driver not allowed to set features
   once status of virtio device changes to VIRTIO_CONFIG_S_FEATURE_OK,
   qemu need to backup it in case of loss. 

4. once unix socket return to normal and get connected, qemu will
   call vhost_user_start to restore the vhost_dev and fetch the
   acked_features stored in NetVhostUserState previously. 

The above flow works fine in the normal scenarios, but it doesn't cover
the scenario that openvswitch service restart in the same time of
virtio features negotiation.

Let's analyze such scenario: 
   qemu dpdk

   vhost_net_init()  
 |  systemctl stop openvswitch.service
   virtio_set_features() | 
 |  systemctl start openvswitch.service
   virtio_set_status()  

Ovs stop service before guset setting virtio features, chr_closed_bh()
be called and fetch acked_features in vhost_dev, if may store the
incomplete features to NetVhostUserState since it doesn't include
guest features, eg "0x4000". 

Guest set virtio features with another features, eg "0x7060a782",
this value will store in acked_features of vhost_dev, which is the
right and up-to-date features.

After ovs service show up, qemu unix socket get connected and call
vhost_user_start(), which will restore acked_features of vhost_dev
using NetVhostUserState and "0x4000" be loaded, which is obsolete.

Guest set virtio device status and therefore qemu call 
virtio_net_vhost_status finally, checking if vhost-net device has
started, start it if not, consequently the obsolete acked_features
"0x4000" be negotiated after calling vhost_dev_set_features(). 

So the key point of solving this issue making the acked_features 
in NetVhostUserState up-to-date, these patchset provide this
solution.  

[PATCH 1/2]: Abstract the existing code of saving acked_features
 into vhost_user_save_acked_features so the next
 patch seems clean. 

[PATCH 2/2]: Save the acked_features to NetVhostUserState once
 Guest virtio driver configured. This step makes
 acked_features in NetVhostUserState up-to-date. 

Please review, any comments and suggestions are welcome. 

Best regard.

Yong

Hyman Huang (2):
  vhost-user:

[PATCH 1/2] vhost-user: Refactor vhost acked features saving

2022-09-26 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract vhost acked features saving into
vhost_user_save_acked_features, export it as util function.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 include/net/vhost-user.h |  2 ++
 net/vhost-user.c | 35 +++
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 5bcd8a6285..00d46613d3 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -14,5 +14,7 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc,
+bool cleanup);
 
 #endif /* VHOST_USER_H */
diff --git a/net/vhost-user.c b/net/vhost-user.c
index b1a0247b59..c512cc9727 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -45,24 +45,31 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc)
 return s->acked_features;
 }
 
-static void vhost_user_stop(int queues, NetClientState *ncs[])
+void vhost_user_save_acked_features(NetClientState *nc, bool cleanup)
 {
 NetVhostUserState *s;
+
+s = DO_UPCAST(NetVhostUserState, nc, nc);
+if (s->vhost_net) {
+uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+if (features) {
+s->acked_features = features;
+}
+
+if (cleanup) {
+vhost_net_cleanup(s->vhost_net);
+}
+}
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
 int i;
 
 for (i = 0; i < queues; i++) {
 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
 
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-/* save acked features */
-uint64_t features = vhost_net_get_acked_features(s->vhost_net);
-if (features) {
-s->acked_features = features;
-}
-vhost_net_cleanup(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], true);
 }
 }
 
@@ -251,11 +258,7 @@ static void chr_closed_bh(void *opaque)
 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
 
 for (i = queues -1; i >= 0; i--) {
-s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
-
-if (s->vhost_net) {
-s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-}
+vhost_user_save_acked_features(ncs[i], false);
 }
 
 qmp_set_link(name, false, );
-- 
2.27.0

[PATCH 2/2] vhost-net: Fix the virito features negotiation flaw

2022-09-26 Thread huangy81

From: Hyman Huang(黄勇) 

Save the acked_features once it be configured by guest
virtio driver so it can't miss any features.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
 hw/net/vhost_net.c  | 9 +
 hw/net/virtio-net.c | 5 +
 include/net/vhost_net.h | 2 ++
 3 files changed, 16 insertions(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b974b..2bffc276b9 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -141,6 +141,15 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
 return net->dev.acked_features;
 }
 
+void vhost_net_save_acked_features(NetClientState *nc)
+{
+if (nc->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+return;
+}
+
+vhost_user_save_acked_features(nc, false);
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
 switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index dd0d056fde..69c00b4e74 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -918,6 +918,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 continue;
 }
 vhost_net_ack_features(get_vhost_net(nc->peer), features);
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
 }
 
 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 387e913e4e..3a5579b075 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -46,6 +46,8 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
 
 uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
+void vhost_net_save_acked_features(NetClientState *nc);
+
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
 
 #endif
-- 
2.27.0

[PATCH v1 0/8] migration: introduce dirtylimit capability

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

v1:
- make parameter vcpu-dirty-limit experimental 
- switch dirty limit off when cancel migrate
- add cancel logic in migration test 

Please review, thanks,

Yong 

Abstract


This series added a new migration capability called "dirtylimit".  It can
be enabled when dirty ring is enabled, and it'll improve the vCPU performance
during the process of migration. It is based on the previous patchset:
https://lore.kernel.org/qemu-devel/cover.1656177590.git.huang...@chinatelecom.cn/

As mentioned in patchset "support dirty restraint on vCPU", dirtylimit way of
migration can make the read-process not be penalized. This series wires up the
vcpu dirty limit and wrappers as dirtylimit capability of migration. I introduce
two parameters vcpu-dirtylimit-period and vcpu-dirtylimit to implement the 
setup 
of dirtylimit during live migration.

To validate the implementation, i tested a 32 vCPU vm live migration with such 
model:
Only dirty vcpu0, vcpu1 with heavy memory workoad and leave the rest vcpus
untouched, running unixbench on the vpcu8-vcpu15 by setup the cpu affinity as
the following command:
taskset -c 8-15 ./Run -i 2 -c 8 {unixbench test item}

The following are results:

host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 32800  | 32786  | 25292 |
  | whetstone-double| 10326  | 10315  | 9847  |
  | pipe| 15442  | 15271  | 14506 |
  | context1| 7260   | 6235   | 4514  |
  | spawn   | 3663   | 3317   | 3249  |
  | syscall | 4669   | 4667   | 3841  |
  |-+++---|
>From the data above we can draw a conclusion that vcpus that do not dirty 
>memory
in vm are almost unaffected during the dirtylimit migration, but the auto 
converge
way does. 

I also tested the total time of dirtylimit migration with variable dirty memory
size in vm.

senario 1:
host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |---++---|
  | dirty memory size(MB) | Dirtylimit(ms) | Auto-converge(ms) |
  |---++---|
  | 60| 2014   | 2131  |
  | 70| 5381   | 12590 |
  | 90| 6037   | 33545 |
  | 110   | 7660   | [*]   |
  |---++---|
  [*]: This case means migration is not convergent. 

senario 2:
host cpu: Intel(R) Xeon(R) CPU E5-2650
host interface speed: 1Mb/s
  |---++---|
  | dirty memory size(MB) | Dirtylimit(ms) | Auto-converge(ms) |
  |---++---|
  | 1600  | 15842  | 27548 |
  | 2000  | 19026  | 38447 |
  | 2400  | 19897  | 46381 |
  | 2800  | 22338  | 57149 |
  |---++---|
Above data shows that dirtylimit way of migration can also reduce the total
time of migration and it achieves convergence more easily in some case.

In addition to implement dirtylimit capability itself, this series
add 3 tests for migration, aiming at playing around for developer simply: 
 1. qtest for dirty limit migration
 2. support dirty ring way of migration for guestperf tool
 3. support dirty limit migration for guestperf tool

Please review, thanks !

Hyman Huang (8):
  qapi/migration: Introduce x-vcpu-dirty-limit-period parameter
  qapi/migration: Introduce x-vcpu-dirty-limit parameters
  migration: Introduce dirty-limit capability
  migration: Implement dirty-limit convergence algo
  migration: Export dirty-limit time info
  tests: Add migration dirty-limit capability test
  tests/migration: Introduce dirty-ring-size option into guestperf
  tests/migration: Introduce dirty-limit into guestperf

 include/sysemu/dirtylimit.h |   2 +
 migration/migration.c   |  51 +++
 migration/migration.h   |   1 +
 migration/ram.c |  53 ---
 migration/trace-events  |   1 +
 monitor/hmp-cmds.c  |  26 ++
 qapi/migration.json |  57 ++--
 softmmu/dirtylimit.c|  33 ++-
 tests/migration/guestperf/comparison.py |  24 +
 tests/migration/guestperf/engine.py |  33 ++-
 tests/migration/guestperf/hardware.py   |   8 +-

[PATCH v1 3/8] migration: Introduce dirty-limit capability

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce migration dirty-limit capability, which can
be turned on before live migration and limit dirty
page rate durty live migration.

Introduce migrate_dirty_limit function to help check
if dirty-limit capability enabled during live migration.

Meanwhile, refactor vcpu_dirty_rate_stat_collect
so that period can be configured instead of hardcoded.

dirty-limit capability is kind of like auto-converge
but using dirty limit instead of traditional cpu-throttle
to throttle guest down. To enable this feature, turn on
the dirty-limit capability before live migration using
migratioin-set-capabilities, and set the parameters
"x-vcpu-dirty-limit-period", "vcpu-dirty-limit" suitably
to speed up convergence.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 10 ++
 migration/migration.h |  1 +
 qapi/migration.json   |  4 +++-
 softmmu/dirtylimit.c  | 11 ++-
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index a748fe5..d117bb4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2508,6 +2508,15 @@ bool migrate_auto_converge(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 
+bool migrate_dirty_limit(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_LIMIT];
+}
+
 bool migrate_zero_blocks(void)
 {
 MigrationState *s;
@@ -4437,6 +4446,7 @@ static Property migration_properties[] = {
 DEFINE_PROP_MIG_CAP("x-zero-copy-send",
 MIGRATION_CAPABILITY_ZERO_COPY_SEND),
 #endif
+DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
 
 DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/migration/migration.h b/migration/migration.h
index cdad8ac..7fbb9f8 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -409,6 +409,7 @@ bool migrate_ignore_shared(void);
 bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
+bool migrate_dirty_limit(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index 8554d33..bc4bc96 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -477,6 +477,8 @@
 #will be handled faster.  This is a performance feature and
 #should not affect the correctness of postcopy migration.
 #(since 7.1)
+# @dirty-limit: Use dirty-limit to throttle down guest if enabled.
+#   (since 7.1)
 #
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -492,7 +494,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
-   'zero-copy-send', 'postcopy-preempt'] }
+   'zero-copy-send', 'postcopy-preempt', 'dirty-limit'] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 8d98cb7..1fdd8c6 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -23,6 +23,8 @@
 #include "exec/memory.h"
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
 #include "trace.h"
 
 /*
@@ -75,11 +77,18 @@ static bool dirtylimit_quit;
 
 static void vcpu_dirty_rate_stat_collect(void)
 {
+MigrationState *s = migrate_get_current();
 VcpuStat stat;
 int i = 0;
+int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+if (migrate_dirty_limit() &&
+migration_is_active(s)) {
+period = s->parameters.x_vcpu_dirty_limit_period;
+}
 
 /* calculate vcpu dirtyrate */
-vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
+vcpu_calculate_dirtyrate(period,
  ,
  GLOBAL_DIRTY_LIMIT,
  false);
-- 
1.8.3.1

[PATCH v1 8/8] tests/migration: Introduce dirty-limit into guestperf

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Guestperf tool does not cover the dirty-limit migration
currently, support this feature.

To enable dirty-limit, setting x-vcpu-dirty-limit-period
as 500ms and x-vcpu-dirty-limit as 10MB/s:
$ ./tests/migration/guestperf.py \
--dirty-limit --x-vcpu-dirty-limit-period 500 \
--x-vcpu-dirty-limit 10 --output output.json \

To run the entire standardized set of dirty-limit-enabled
comparisons, with unix migration:
$ ./tests/migration/guestperf-batch.py \
--dst-host localhost --transport unix \
--filter compr-dirty-limit* --output outputdir

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/migration/guestperf/comparison.py | 24 
 tests/migration/guestperf/engine.py | 26 ++
 tests/migration/guestperf/progress.py   | 17 +++--
 tests/migration/guestperf/scenario.py   | 11 ++-
 tests/migration/guestperf/shell.py  | 18 +-
 5 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/tests/migration/guestperf/comparison.py 
b/tests/migration/guestperf/comparison.py
index c03b3f6..d082a54 100644
--- a/tests/migration/guestperf/comparison.py
+++ b/tests/migration/guestperf/comparison.py
@@ -135,4 +135,28 @@ def __init__(self, name, scenarios):
 Scenario("compr-multifd-channels-64",
  multifd=True, multifd_channels=64),
 ]),
+
+
+# Looking at effect of dirty-limit with
+# varying x_vcpu_dirty_limit_period
+Comparison("compr-dirty-limit-period", scenarios = [
+Scenario("compr-dirty-limit-period-100",
+ dirty_limit=True, x_vcpu_dirty_limit_period=100),
+Scenario("compr-dirty-limit-period-500",
+ dirty_limit=True, x_vcpu_dirty_limit_period=500),
+Scenario("compr-dirty-limit-period-1000",
+ dirty_limit=True, x_vcpu_dirty_limit_period=1000),
+]),
+
+
+# Looking at effect of dirty-limit with
+# varying x_vcpu_dirty_limit
+Comparison("compr-dirty-limit", scenarios = [
+Scenario("compr-dirty-limit-10MB",
+ dirty_limit=True, x_vcpu_dirty_limit=10),
+Scenario("compr-dirty-limit-20MB",
+ dirty_limit=True, x_vcpu_dirty_limit=20),
+Scenario("compr-dirty-limit-50MB",
+ dirty_limit=True, x_vcpu_dirty_limit=50),
+]),
 ]
diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 2b98f00..c6b9bb1 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -103,6 +103,8 @@ def _migrate_progress(self, vm):
 info.get("expected-downtime", 0),
 info.get("setup-time", 0),
 info.get("cpu-throttle-percentage", 0),
+info.get("dirty-limit-throttle-us-per-full", 0),
+info.get("dirty-limit-us-ring-full", 0),
 )
 
 def _migrate(self, hardware, scenario, src, dst, connect_uri):
@@ -204,6 +206,30 @@ def _migrate(self, hardware, scenario, src, dst, 
connect_uri):
 resp = dst.command("migrate-set-parameters",
multifd_channels=scenario._multifd_channels)
 
+if scenario._dirty_limit:
+if not hardware._dirty_ring_size:
+raise Exception("dirty ring size must be configured when "
+"testing dirty limit migration")
+
+resp = src.command("migrate-set-capabilities",
+   capabilities = [
+   { "capability": "dirty-limit",
+ "state": True }
+   ])
+resp = src.command("migrate-set-parameters",
+x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period)
+resp = src.command("migrate-set-parameters",
+   x_vcpu_dirty_limit=scenario._x_vcpu_dirty_limit)
+resp = dst.command("migrate-set-capabilities",
+   capabilities = [
+   { "capability": "dirty-limit",
+ "state": True }
+   ])
+resp = dst.command("migrate-set-parameters",
+x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period)
+resp = dst.command("migrate-set-parameters",
+   x_vcpu_dirty_limit=scenario._x_vcpu_dirty_limit)
+
 resp = src.command("migrate", uri=connect_uri)
 
 post_copy = False
diff --git a/tests/migration/guestperf/progress.py 
b/tests/migration/guestperf/progress.py
index ab1ee57..dd5d86b 100644
--- a/tests/migration/guestperf/progress.py
+++ b/tests/migration/guestperf/progress.py
@@ -81,7 +81,9 @@ def __init__(self,
  downtime,
  downtime_expected,
  setup_time,
- throttle_pcent):
+ throttle_pcent,
+

[PATCH v1 4/8] migration: Implement dirty-limit convergence algo

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Implement dirty-limit convergence algo for live migration,
which is kind of like auto-converge algo but using dirty-limit
instead of cpu throttle to make migration convergent.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c  |  1 +
 migration/ram.c| 53 +-
 migration/trace-events |  1 +
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index d117bb4..64696de 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -239,6 +239,7 @@ void migration_cancel(const Error *error)
 if (error) {
 migrate_set_error(current_migration, error);
 }
+qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
 migrate_fd_cancel(current_migration);
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index dc1de9d..cc19c5e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,6 +45,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-types-migration.h"
 #include "qapi/qapi-events-migration.h"
+#include "qapi/qapi-commands-migration.h"
 #include "qapi/qmp/qerror.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
@@ -57,6 +58,8 @@
 #include "qemu/iov.h"
 #include "multifd.h"
 #include "sysemu/runstate.h"
+#include "sysemu/dirtylimit.h"
+#include "sysemu/kvm.h"
 
 #include "hw/boards.h" /* for machine_dump_guest_core() */
 
@@ -1139,6 +1142,21 @@ static void migration_update_rates(RAMState *rs, int64_t 
end_time)
 }
 }
 
+/*
+ * Enable dirty-limit to throttle down the guest
+ */
+static void migration_dirty_limit_guest(void)
+{
+if (!dirtylimit_in_service()) {
+MigrationState *s = migrate_get_current();
+int64_t quota_dirtyrate = s->parameters.x_vcpu_dirty_limit;
+
+/* Set quota dirtyrate if dirty limit not in service */
+qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
+trace_migration_dirty_limit_guest(quota_dirtyrate);
+}
+}
+
 static void migration_trigger_throttle(RAMState *rs)
 {
 MigrationState *s = migrate_get_current();
@@ -1148,22 +1166,31 @@ static void migration_trigger_throttle(RAMState *rs)
 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
TARGET_PAGE_SIZE;
 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
 
-/* During block migration the auto-converge logic incorrectly detects
- * that ram migration makes no progress. Avoid this by disabling the
- * throttling logic during the bulk phase of block migration. */
-if (migrate_auto_converge() && !blk_mig_bulk_active()) {
-/* The following detection logic can be refined later. For now:
-   Check to see if the ratio between dirtied bytes and the approx.
-   amount of bytes that just got transferred since the last time
-   we were in this routine reaches the threshold. If that happens
-   twice, start or increase throttling. */
-
-if ((bytes_dirty_period > bytes_dirty_threshold) &&
-(++rs->dirty_rate_high_cnt >= 2)) {
+/*
+ * The following detection logic can be refined later. For now:
+ * Check to see if the ratio between dirtied bytes and the approx.
+ * amount of bytes that just got transferred since the last time
+ * we were in this routine reaches the threshold. If that happens
+ * twice, start or increase throttling.
+ */
+
+if ((bytes_dirty_period > bytes_dirty_threshold) &&
+(++rs->dirty_rate_high_cnt >= 2)) {
+rs->dirty_rate_high_cnt = 0;
+/*
+ * During block migration the auto-converge logic incorrectly detects
+ * that ram migration makes no progress. Avoid this by disabling the
+ * throttling logic during the bulk phase of block migration
+ */
+
+if (migrate_auto_converge() && !blk_mig_bulk_active()) {
 trace_migration_throttle();
-rs->dirty_rate_high_cnt = 0;
 mig_throttle_guest_down(bytes_dirty_period,
 bytes_dirty_threshold);
+} else if (migrate_dirty_limit() &&
+   kvm_dirty_ring_enabled() &&
+   migration_is_active(s)) {
+migration_dirty_limit_guest();
 }
 }
 }
diff --git a/migration/trace-events b/migration/trace-events
index 57003ed..33a2666 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -91,6 +91,7 @@ migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, 
unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
 migration_throttle(void) ""
+migration_dirty_limit_guest(int64_t dirtyrate) "guest dirty page rate limit %" 
PRIi64 " MB/s"
 ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: 
%" PRIx64 " %zx"
 ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s:

[PATCH v1 5/8] migration: Export dirty-limit time info

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Export dirty limit throttle time and estimated ring full
time, through which we can observe the process of dirty
limit during live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 include/sysemu/dirtylimit.h |  2 ++
 migration/migration.c   | 10 ++
 monitor/hmp-cmds.c  | 10 ++
 qapi/migration.json | 10 +-
 softmmu/dirtylimit.c| 22 ++
 5 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
index 8d2c1f3..98cc4a6 100644
--- a/include/sysemu/dirtylimit.h
+++ b/include/sysemu/dirtylimit.h
@@ -34,4 +34,6 @@ void dirtylimit_set_vcpu(int cpu_index,
 void dirtylimit_set_all(uint64_t quota,
 bool enable);
 void dirtylimit_vcpu_execute(CPUState *cpu);
+int64_t dirtylimit_throttle_us_per_full(void);
+int64_t dirtylimit_us_ring_full(void);
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 64696de..22ba197 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "sysemu/dirtylimit.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1110,6 +,15 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->remaining = ram_bytes_remaining();
 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
 }
+
+if (migrate_dirty_limit() && dirtylimit_in_service()) {
+info->has_dirty_limit_throttle_us_per_full = true;
+info->dirty_limit_throttle_us_per_full =
+dirtylimit_throttle_us_per_full();
+
+info->has_dirty_limit_us_ring_full = true;
+info->dirty_limit_us_ring_full = dirtylimit_us_ring_full();
+}
 }
 
 static void populate_disk_info(MigrationInfo *info)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index b362fae..23c3f48 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -358,6 +358,16 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_dirty_limit_throttle_us_per_full) {
+monitor_printf(mon, "dirty-limit throttle time: %" PRIu64 " us\n",
+   info->dirty_limit_throttle_us_per_full);
+}
+
+if (info->has_dirty_limit_us_ring_full) {
+monitor_printf(mon, "dirty-limit ring full time: %" PRIu64 " us\n",
+   info->dirty_limit_us_ring_full);
+}
+
 if (info->has_postcopy_blocktime) {
 monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
diff --git a/qapi/migration.json b/qapi/migration.json
index bc4bc96..c263d54 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -242,6 +242,12 @@
 #   Present and non-empty when migration is blocked.
 #   (since 6.0)
 #
+# @dirty-limit-throttle-us-per-full: Throttle time (us) during the period of
+#dirty ring full (since 7.0)
+#
+# @dirty-limit-us-ring-full: Estimated periodic time (us) of dirty ring full.
+#(since 7.0)
+#
 # Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
@@ -259,7 +265,9 @@
'*postcopy-blocktime' : 'uint32',
'*postcopy-vcpu-blocktime': ['uint32'],
'*compression': 'CompressionStats',
-   '*socket-address': ['SocketAddress'] } }
+   '*socket-address': ['SocketAddress'],
+   '*dirty-limit-throttle-us-per-full': 'int64',
+   '*dirty-limit-us-ring-full': 'int64'} }
 
 ##
 # @query-migrate:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 1fdd8c6..1251b27 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -546,6 +546,28 @@ static struct DirtyLimitInfo *dirtylimit_query_vcpu(int 
cpu_index)
 return info;
 }
 
+/* Pick up first vcpu throttle time by default */
+int64_t dirtylimit_throttle_us_per_full(void)
+{
+CPUState *cpu = first_cpu;
+return cpu->throttle_us_per_full;
+}
+
+/*
+ * Estimate dirty ring full time under current dirty page rate.
+ * Return -1 if guest doesn't dirty memory.
+ */
+int64_t dirtylimit_us_ring_full(void)
+{
+uint64_t curr_rate = vcpu_dirty_rate_get(0);
+
+if (!curr_rate) {
+return -1;
+}
+
+return dirtylimit_dirty_ring_full_time(curr_rate);
+}
+
 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
 {
 int i, index;
-- 
1.8.3.1

[PATCH v1 7/8] tests/migration: Introduce dirty-ring-size option into guestperf

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Guestperf tool does not enable diry ring feature when test
migration by default.

To support dirty ring migration performance test, introduce
dirty-ring-size option into guestperf tools, which ranges in
[1024, 65536].

To set dirty ring size with 4096 during migration test:
$ ./tests/migration/guestperf.py --dirty-ring-size 4096 xxx

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/migration/guestperf/engine.py   | 7 ++-
 tests/migration/guestperf/hardware.py | 8 ++--
 tests/migration/guestperf/shell.py| 7 ++-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 87a6ab2..2b98f00 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -304,7 +304,6 @@ def _get_common_args(self, hardware, tunnelled=False):
 cmdline = "'" + cmdline + "'"
 
 argv = [
-"-accel", "kvm",
 "-cpu", "host",
 "-kernel", self._kernel,
 "-initrd", self._initrd,
@@ -315,6 +314,12 @@ def _get_common_args(self, hardware, tunnelled=False):
 "-smp", str(hardware._cpus),
 ]
 
+if hardware._dirty_ring_size:
+argv.extend(["-accel", "kvm,dirty-ring-size=%s" %
+ hardware._dirty_ring_size])
+else:
+argv.extend(["-accel", "kvm"])
+
 if self._debug:
 argv.extend(["-device", "sga"])
 
diff --git a/tests/migration/guestperf/hardware.py 
b/tests/migration/guestperf/hardware.py
index 3145785..f779cc0 100644
--- a/tests/migration/guestperf/hardware.py
+++ b/tests/migration/guestperf/hardware.py
@@ -23,7 +23,8 @@ def __init__(self, cpus=1, mem=1,
  src_cpu_bind=None, src_mem_bind=None,
  dst_cpu_bind=None, dst_mem_bind=None,
  prealloc_pages = False,
- huge_pages=False, locked_pages=False):
+ huge_pages=False, locked_pages=False,
+ dirty_ring_size=0):
 self._cpus = cpus
 self._mem = mem # GiB
 self._src_mem_bind = src_mem_bind # List of NUMA nodes
@@ -33,6 +34,7 @@ def __init__(self, cpus=1, mem=1,
 self._prealloc_pages = prealloc_pages
 self._huge_pages = huge_pages
 self._locked_pages = locked_pages
+self._dirty_ring_size = dirty_ring_size
 
 
 def serialize(self):
@@ -46,6 +48,7 @@ def serialize(self):
 "prealloc_pages": self._prealloc_pages,
 "huge_pages": self._huge_pages,
 "locked_pages": self._locked_pages,
+"dirty_ring_size": self._dirty_ring_size,
 }
 
 @classmethod
@@ -59,4 +62,5 @@ def deserialize(cls, data):
 data["dst_mem_bind"],
 data["prealloc_pages"],
 data["huge_pages"],
-data["locked_pages"])
+data["locked_pages"],
+data["dirty_ring_size"])
diff --git a/tests/migration/guestperf/shell.py 
b/tests/migration/guestperf/shell.py
index 8a809e3..559616f 100644
--- a/tests/migration/guestperf/shell.py
+++ b/tests/migration/guestperf/shell.py
@@ -60,6 +60,8 @@ def __init__(self):
 parser.add_argument("--prealloc-pages", dest="prealloc_pages", 
default=False)
 parser.add_argument("--huge-pages", dest="huge_pages", default=False)
 parser.add_argument("--locked-pages", dest="locked_pages", 
default=False)
+parser.add_argument("--dirty-ring-size", dest="dirty_ring_size",
+default=0, type=int)
 
 self._parser = parser
 
@@ -89,7 +91,10 @@ def split_map(value):
 
 locked_pages=args.locked_pages,
 huge_pages=args.huge_pages,
-prealloc_pages=args.prealloc_pages)
+prealloc_pages=args.prealloc_pages,
+
+dirty_ring_size=args.dirty_ring_size)
+
 
 
 class Shell(BaseShell):
-- 
1.8.3.1

[PATCH v1 2/8] qapi/migration: Introduce x-vcpu-dirty-limit parameters

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "x-vcpu-dirty-limit" migration parameter used
to limit dirty page rate during live migration.

"x-vcpu-dirty-limit" and "x-vcpu-dirty-limit-period" are
two dirty-limit-related migration parameters, which can
be set before and during live migration by qmp
migrate-set-parameters.

This two parameters are used to help implement the dirty
page rate limit algo of migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 14 ++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 18 +++---
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index a8a8065..a748fe5 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -117,6 +117,7 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
 #define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 500 /* ms */
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT1   /* MB/s */
 
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -967,6 +968,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->has_x_vcpu_dirty_limit_period = true;
 params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
 
+params->has_x_vcpu_dirty_limit = true;
+params->x_vcpu_dirty_limit = s->parameters.x_vcpu_dirty_limit;
+
 return params;
 }
 
@@ -1671,6 +1675,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_x_vcpu_dirty_limit_period) {
 dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
 }
+
+if (params->has_x_vcpu_dirty_limit) {
+dest->x_vcpu_dirty_limit = params->x_vcpu_dirty_limit;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1797,6 +1805,9 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.x_vcpu_dirty_limit_period =
 params->x_vcpu_dirty_limit_period;
 }
+if (params->has_x_vcpu_dirty_limit) {
+s->parameters.x_vcpu_dirty_limit = params->x_vcpu_dirty_limit;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4401,6 +4412,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
parameters.x_vcpu_dirty_limit_period,
DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
+DEFINE_PROP_UINT64("vcpu-dirty-limit", MigrationState,
+   parameters.x_vcpu_dirty_limit,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 7569859..b362fae 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -536,6 +536,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %" PRIu64 " ms\n",
 MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
 params->x_vcpu_dirty_limit_period);
+
+monitor_printf(mon, "%s: %" PRIu64 " MB/s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT),
+params->x_vcpu_dirty_limit);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1359,6 +1363,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_x_vcpu_dirty_limit_period = true;
 visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
 break;
+case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT:
+p->has_x_vcpu_dirty_limit = true;
+visit_type_size(v, param, >x_vcpu_dirty_limit, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 332c087..8554d33 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -779,6 +779,9 @@
 # @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
 # Defaults to 500ms. (Since 7.1)
 #
+# @x-vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
+#  Defaults to 1. (Since 7.1)
+#
 # Features:
 # @unstable: Member @x-checkpoint-delay and @x-vcpu-dirty-limit-period
 #are experimental.
@@ -801,7 +804,8 @@
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level', 'multifd-zstd-level',
'block-bitmap-mapping',
-   { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] } ] 
}
+   { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
+   'x-vcpu-dirty-limit'] }
 
 ##
 # @MigrateSetParameters:
@@ -949,6 +953,9 @@
 # @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
 # Defaults to 500ms. (Since

[PATCH v1 6/8] tests: Add migration dirty-limit capability test

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Add migration dirty-limit capability test if kernel support
dirty ring.

Migration dirty-limit capability introduce dirty limit
capability, two parameters: x-vcpu-dirty-limit-period and
x-vcpu-dirty-limit are introduced to implement the live
migration with dirty limit.

The test case does the following things:
1. start src, dst vm and enable dirty-limit capability
2. start migrate and set cancel it to check if dirty limit
   stop working.
3. restart dst vm
4. start migrate and enable dirty-limit capability
5. check if migration satisfy the convergence condition
   during pre-switchover phase.

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/qtest/migration-test.c | 154 +++
 1 file changed, 154 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 4728d52..f3bfd85 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2409,6 +2409,158 @@ static void test_vcpu_dirty_limit(void)
 dirtylimit_stop_vm(vm);
 }
 
+static void migrate_dirty_limit_wait_showup(QTestState *from,
+const int64_t period,
+const int64_t value)
+{
+/* Enable dirty limit capability */
+migrate_set_capability(from, "dirty-limit", true);
+
+/* Set dirty limit parameters */
+migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
+migrate_set_parameter_int(from, "x-vcpu-dirty-limit", value);
+
+/* Make sure migrate can't converge */
+migrate_ensure_non_converge(from);
+
+/* To check limit rate after precopy */
+migrate_set_capability(from, "pause-before-switchover", true);
+
+/* Wait for the serial output from the source */
+wait_for_serial("src_serial");
+}
+
+/*
+ * This test does:
+ *  source   target
+ *   migrate_incoming
+ * migrate
+ * migrate_cancel
+ *   restart target
+ * migrate
+ *
+ *  And see that if dirty limit works correctly
+ */
+static void test_migrate_dirty_limit(void)
+{
+g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+QTestState *from, *to;
+int64_t remaining, throttle_us_per_full;
+/*
+ * We want the test to be stable and as fast as possible.
+ * E.g., with 1Gb/s bandwith migration may pass without dirty limit,
+ * so we need to decrease a bandwidth.
+ */
+const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
+const int64_t max_bandwidth = 4; /* ~400Mb/s */
+const int64_t downtime_limit = 250; /* 250ms */
+/*
+ * We migrate through unix-socket (> 500Mb/s).
+ * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
+ * So, we can predict expected_threshold
+ */
+const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
+int max_try_count = 10;
+MigrateCommon args = {
+.start = {
+.hide_stderr = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Start src, dst vm */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Prepare for dirty limit migration and wait src vm show up */
+migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full =
+read_migrate_property_int(from, 
"dirty-limit-throttle-us-per-full");
+usleep(100);
+g_assert_false(got_stop);
+}
+
+/* Now cancel migrate and wait for dirty limit throttle switch off */
+migrate_cancel(from);
+wait_for_migration_status(from, "cancelled", NULL);
+
+/* Check if dirty limit throttle switched off, set timeout 1ms */
+do {
+throttle_us_per_full =
+read_migrate_property_int(from, 
"dirty-limit-throttle-us-per-full");
+usleep(100);
+g_assert_false(got_stop);
+} while (throttle_us_per_full != 0 && --max_try_count);
+
+/* Assert dirty limit is not in service */
+g_assert_cmpint(throttle_us_per_full, ==, 0);
+
+args = (MigrateCommon) {
+.start = {
+.only_target = true,
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+/* Restart dst vm, src vm already show up so we needn't wait anymore */
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+/* Start migrate */
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit throttle begin */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full =
+read_migrate_property_int(from, 
"dirty-limit-throttle-us-per-full");

[PATCH v1 1/8] qapi/migration: Introduce x-vcpu-dirty-limit-period parameter

2022-09-01 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "x-vcpu-dirty-limit-period" migration experimental
parameter, which is used to make dirtyrate calculation period
configurable.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 16 
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 31 ---
 3 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index bb8bbdd..a8a8065 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -116,6 +116,8 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 500 /* ms */
+
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -962,6 +964,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
s->parameters.block_bitmap_mapping);
 }
 
+params->has_x_vcpu_dirty_limit_period = true;
+params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
+
 return params;
 }
 
@@ -1662,6 +1667,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_x_vcpu_dirty_limit_period) {
+dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1784,6 +1793,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+if (params->has_x_vcpu_dirty_limit_period) {
+s->parameters.x_vcpu_dirty_limit_period =
+params->x_vcpu_dirty_limit_period;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4385,6 +4398,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
 DEFINE_PROP_STRING("tls-hostname", MigrationState, 
parameters.tls_hostname),
 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
+DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
+   parameters.x_vcpu_dirty_limit_period,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index c6cd6f9..7569859 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -532,6 +532,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 }
 }
 }
+
+monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
+params->x_vcpu_dirty_limit_period);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1351,6 +1355,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 error_setg(, "The block-bitmap-mapping parameter can only be set "
"through QMP");
 break;
+case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD:
+p->has_x_vcpu_dirty_limit_period = true;
+visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 81185d4..332c087 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -776,8 +776,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2)
 #
+# @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
+# Defaults to 500ms. (Since 7.1)
+#
 # Features:
-# @unstable: Member @x-checkpoint-delay is experimental.
+# @unstable: Member @x-checkpoint-delay and @x-vcpu-dirty-limit-period
+#are experimental.
 #
 # Since: 2.4
 ##
@@ -795,8 +799,9 @@
'multifd-channels',
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
-   'multifd-zlib-level' ,'multifd-zstd-level',
-   'block-bitmap-mapping' ] }
+   'multifd-zlib-level', 'multifd-zstd-level',
+   'block-bitmap-mapping',
+   { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] } ] 
}
 
 ##
 # @MigrateSetParameters:
@@ -941,8 +946,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2)
 #
+# @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
+# Defaults to 500ms.

[PATCH v1] dirtylimit: Fix overflow when computing MB

2022-07-29 Thread huangy81

From: Hyman Huang(黄勇) 

Coverity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 8d98cb7..1423225 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -236,14 +236,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100ULL / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
1.8.3.1

[PATCH] dirtylimit: Fix overflow when computing MB

2022-07-29 Thread huangy81

From: Hyman Huang(黄勇) 

Coverity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 8d98cb7..ab62f29 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -236,14 +236,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100 / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
1.8.3.1

[PATCH 8/8] tests/migration: Introduce dirty-limit into guestperf

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Guestperf tool does not cover the dirty-limit migration
currently, support this feature.

To enable dirty-limit, setting x-vcpu-dirty-limit-period
as 500ms and vcpu-dirty-limit as 10MB/s:
$ ./tests/migration/guestperf.py \
--dirty-limit --x-vcpu-dirty-limit-period 500 \
--dirty-limit --vcpu-dirty-limit 10 \
--output output.json

To run the entire standardized set of dirty-limit-enabled
comparisons, with unix migration:
$ ./tests/migration/guestperf-batch.py \
--dst-host localhost --transport unix \
--filter compr-dirty-limit-period* --output outputdir

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/migration/guestperf/comparison.py | 14 ++
 tests/migration/guestperf/engine.py | 26 ++
 tests/migration/guestperf/progress.py   | 17 +++--
 tests/migration/guestperf/scenario.py   | 11 ++-
 tests/migration/guestperf/shell.py  | 18 +-
 5 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/tests/migration/guestperf/comparison.py 
b/tests/migration/guestperf/comparison.py
index c03b3f6..ccc0db9 100644
--- a/tests/migration/guestperf/comparison.py
+++ b/tests/migration/guestperf/comparison.py
@@ -135,4 +135,18 @@ def __init__(self, name, scenarios):
 Scenario("compr-multifd-channels-64",
  multifd=True, multifd_channels=64),
 ]),
+
+
+# Looking at effect of dirty-limit with
+# varying x_vcpu_dirty_limit_period
+Comparison("compr-dirty-limit", scenarios = [
+Scenario("compr-dirty-limit-period-100",
+ dirty_limit=True, x_vcpu_dirty_limit_period=100),
+Scenario("compr-dirty-limit-period-500",
+ dirty_limit=True, x_vcpu_dirty_limit_period=500),
+Scenario("compr-dirty-limit-period-1000",
+ dirty_limit=True, x_vcpu_dirty_limit_period=1000),
+]),
+
+
 ]
diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 2b98f00..2f29471 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -103,6 +103,8 @@ def _migrate_progress(self, vm):
 info.get("expected-downtime", 0),
 info.get("setup-time", 0),
 info.get("cpu-throttle-percentage", 0),
+info.get("dirty-limit-throttle-us-per-full", 0),
+info.get("dirty-limit-us-ring-full", 0),
 )
 
 def _migrate(self, hardware, scenario, src, dst, connect_uri):
@@ -204,6 +206,30 @@ def _migrate(self, hardware, scenario, src, dst, 
connect_uri):
 resp = dst.command("migrate-set-parameters",
multifd_channels=scenario._multifd_channels)
 
+if scenario._dirty_limit:
+if not hardware._dirty_ring_size:
+raise Exception("dirty ring size must be configured when "
+"testing dirty limit migration")
+
+resp = src.command("migrate-set-capabilities",
+   capabilities = [
+   { "capability": "dirty-limit",
+ "state": True }
+   ])
+resp = src.command("migrate-set-parameters",
+x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period)
+resp = src.command("migrate-set-parameters",
+   vcpu_dirty_limit=scenario._vcpu_dirty_limit)
+resp = dst.command("migrate-set-capabilities",
+   capabilities = [
+   { "capability": "dirty-limit",
+ "state": True }
+   ])
+resp = dst.command("migrate-set-parameters",
+x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period)
+resp = dst.command("migrate-set-parameters",
+   vcpu_dirty_limit=scenario._vcpu_dirty_limit)
+
 resp = src.command("migrate", uri=connect_uri)
 
 post_copy = False
diff --git a/tests/migration/guestperf/progress.py 
b/tests/migration/guestperf/progress.py
index ab1ee57..dd5d86b 100644
--- a/tests/migration/guestperf/progress.py
+++ b/tests/migration/guestperf/progress.py
@@ -81,7 +81,9 @@ def __init__(self,
  downtime,
  downtime_expected,
  setup_time,
- throttle_pcent):
+ throttle_pcent,
+ dirty_limit_throttle_us_per_full,
+ dirty_limit_us_ring_full):
 
 self._status = status
 self._ram = ram
@@ -91,6 +93,11 @@ def __init__(self,
 self._downtime_expected = downtime_expected
 self._setup_time = setup_time
 self._throttle_pcent = throttle_pcent
+self._dirty_limit_throttle_us_per_full =
+dirty_limit_throttle_us_per_full
+self._dirty_limit_us_ring_full =
+

[PATCH 6/8] tests: Add migration dirty-limit capability test

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Add migration dirty-limit capability test if kernel support
dirty ring.

Migration dirty-limit capability introduce dirty limit
capability, two parameters: x-vcpu-dirty-limit-period and
vcpu-dirty-limit are introduced to implement the live
migration with dirty limit.

The test case enable the capability and set the corresponding
parameters to test migration. When migration switch to
pre-switchover phase, like the auto-converge, checking if
migration satisfy the convergence condition.

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/qtest/migration-test.c | 92 
 1 file changed, 92 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 71595a7..88503a1 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2412,6 +2412,96 @@ static void test_vcpu_dirty_limit(void)
 dirtylimit_stop_vm(vm);
 }
 
+static void test_migrate_dirty_limit(void)
+{
+g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+QTestState *from, *to;
+int64_t remaining, throttle_us_per_full;
+/*
+ * We want the test to be stable and as fast as possible.
+ * E.g., with 1Gb/s bandwith migration may pass without dirty limit,
+ * so we need to decrease a bandwidth.
+ */
+const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
+const int64_t max_bandwidth = 4; /* ~400Mb/s */
+const int64_t downtime_limit = 250; /* 250ms */
+/*
+ * We migrate through unix-socket (> 500Mb/s).
+ * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
+ * So, we can predict expected_threshold
+ */
+const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
+MigrateCommon args = {
+.start = {
+.use_dirty_ring = true,
+},
+.listen_uri = uri,
+.connect_uri = uri,
+};
+
+if (test_migrate_start(, , args.listen_uri, )) {
+return;
+}
+
+migrate_set_capability(from, "dirty-limit", true);
+migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period",
+  dirtylimit_period);
+migrate_set_parameter_int(from, "vcpu-dirty-limit", dirtylimit_value);
+
+/*
+ * Set the initial parameters so that the migration could not converge
+ * without dirty limit.
+ */
+migrate_set_parameter_int(from, "downtime-limit", 1);
+migrate_set_parameter_int(from, "max-bandwidth", 1); /* ~100Mb/s */
+
+/* To check limit rate after precopy */
+migrate_set_capability(from, "pause-before-switchover", true);
+
+/* Wait for the first serial output from the source */
+wait_for_serial("src_serial");
+
+migrate_qmp(from, uri, "{}");
+
+/* Wait for dirty limit begins */
+throttle_us_per_full = 0;
+while (throttle_us_per_full == 0) {
+throttle_us_per_full =
+read_migrate_property_int(from, 
"dirty-limit-throttle-us-per-full");
+usleep(100);
+g_assert_false(got_stop);
+}
+
+/*
+ * The dirty limit rate should equals the return value of
+ * query-vcpu-dirty-limit if dirty limit cap set
+ */
+g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from));
+
+/* Now, when we tested if dirty limit works, let it converge */
+migrate_set_parameter_int(from, "downtime-limit", downtime_limit);
+migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth);
+
+/*
+ * Wait for pre-switchover status to check if migration
+ * satisfy the convergence condition
+ */
+wait_for_migration_status(from, "pre-switchover", NULL);
+
+remaining = read_ram_property_int(from, "remaining");
+g_assert_cmpint(remaining, <,
+(expected_threshold + expected_threshold / 100));
+
+migrate_continue(from, "pre-switchover");
+
+qtest_qmp_eventwait(to, "RESUME");
+
+wait_for_serial("dest_serial");
+wait_for_migration_complete(from);
+
+test_migrate_end(from, to, true);
+}
+
 static bool kvm_dirty_ring_supported(void)
 {
 #if defined(__linux__) && defined(HOST_X86_64)
@@ -2577,6 +2667,8 @@ int main(int argc, char **argv)
test_precopy_unix_dirty_ring);
 qtest_add_func("/migration/vcpu_dirty_limit",
test_vcpu_dirty_limit);
+qtest_add_func("/migration/dirty_limit",
+   test_migrate_dirty_limit);
 }
 
 ret = g_test_run();
-- 
1.8.3.1

[PATCH 5/8] migration: Export dirty-limit time info

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Export dirty limit throttle time and estimated ring full
time, through which we can observe the process of dirty
limit during live migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 include/sysemu/dirtylimit.h |  2 ++
 migration/migration.c   | 10 ++
 monitor/hmp-cmds.c  | 10 ++
 qapi/migration.json | 10 +-
 softmmu/dirtylimit.c| 22 ++
 5 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
index 8d2c1f3..98cc4a6 100644
--- a/include/sysemu/dirtylimit.h
+++ b/include/sysemu/dirtylimit.h
@@ -34,4 +34,6 @@ void dirtylimit_set_vcpu(int cpu_index,
 void dirtylimit_set_all(uint64_t quota,
 bool enable);
 void dirtylimit_vcpu_execute(CPUState *cpu);
+int64_t dirtylimit_throttle_us_per_full(void);
+int64_t dirtylimit_us_ring_full(void);
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 84b592e..81a46e2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "sysemu/dirtylimit.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -1109,6 +1110,15 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->remaining = ram_bytes_remaining();
 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
 }
+
+if (migrate_dirty_limit() && dirtylimit_in_service()) {
+info->has_dirty_limit_throttle_us_per_full = true;
+info->dirty_limit_throttle_us_per_full =
+dirtylimit_throttle_us_per_full();
+
+info->has_dirty_limit_us_ring_full = true;
+info->dirty_limit_us_ring_full = dirtylimit_us_ring_full();
+}
 }
 
 static void populate_disk_info(MigrationInfo *info)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index acbc5e8..accc869 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -358,6 +358,16 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_dirty_limit_throttle_us_per_full) {
+monitor_printf(mon, "dirty-limit throttle time: %" PRIu64 " us\n",
+   info->dirty_limit_throttle_us_per_full);
+}
+
+if (info->has_dirty_limit_us_ring_full) {
+monitor_printf(mon, "dirty-limit ring full time: %" PRIu64 " us\n",
+   info->dirty_limit_us_ring_full);
+}
+
 if (info->has_postcopy_blocktime) {
 monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
diff --git a/qapi/migration.json b/qapi/migration.json
index 39e5f5e..6b8283f 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -242,6 +242,12 @@
 #   Present and non-empty when migration is blocked.
 #   (since 6.0)
 #
+# @dirty-limit-throttle-us-per-full: Throttle time (us) during the period of
+#dirty ring full (since 7.0)
+#
+# @dirty-limit-us-ring-full: Estimated periodic time (us) of dirty ring full.
+#(since 7.0)
+#
 # Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
@@ -259,7 +265,9 @@
'*postcopy-blocktime' : 'uint32',
'*postcopy-vcpu-blocktime': ['uint32'],
'*compression': 'CompressionStats',
-   '*socket-address': ['SocketAddress'] } }
+   '*socket-address': ['SocketAddress'],
+   '*dirty-limit-throttle-us-per-full': 'int64',
+   '*dirty-limit-us-ring-full': 'int64'} }
 
 ##
 # @query-migrate:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 1fdd8c6..1251b27 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -546,6 +546,28 @@ static struct DirtyLimitInfo *dirtylimit_query_vcpu(int 
cpu_index)
 return info;
 }
 
+/* Pick up first vcpu throttle time by default */
+int64_t dirtylimit_throttle_us_per_full(void)
+{
+CPUState *cpu = first_cpu;
+return cpu->throttle_us_per_full;
+}
+
+/*
+ * Estimate dirty ring full time under current dirty page rate.
+ * Return -1 if guest doesn't dirty memory.
+ */
+int64_t dirtylimit_us_ring_full(void)
+{
+uint64_t curr_rate = vcpu_dirty_rate_get(0);
+
+if (!curr_rate) {
+return -1;
+}
+
+return dirtylimit_dirty_ring_full_time(curr_rate);
+}
+
 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
 {
 int i, index;
-- 
1.8.3.1

[PATCH 3/8] migration: Introduce dirty-limit capability

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce migration dirty-limit capability, which can
be turned on before live migration and limit dirty
page rate durty live migration.

Introduce migrate_dirty_limit function to help check
if dirty-limit capability enabled during live migration.

Meanwhile, refactor vcpu_dirty_rate_stat_collect
so that period can be configured instead of hardcoded.

dirty-limit capability is kind of like auto-converge
but using dirty limit instead of traditional cpu-throttle
to throttle guest down. To enable this feature, turn on
the dirty-limit capability before live migration using
migratioin-set-capabilities, and set the parameters
"x-vcpu-dirty-limit-period", "vcpu-dirty-limit" suitably
to speed up convergence.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 10 ++
 migration/migration.h |  1 +
 qapi/migration.json   |  4 +++-
 softmmu/dirtylimit.c  | 11 ++-
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ed1a47b..84b592e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2508,6 +2508,15 @@ bool migrate_auto_converge(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 
+bool migrate_dirty_limit(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_LIMIT];
+}
+
 bool migrate_zero_blocks(void)
 {
 MigrationState *s;
@@ -4436,6 +4445,7 @@ static Property migration_properties[] = {
 DEFINE_PROP_MIG_CAP("x-zero-copy-send",
 MIGRATION_CAPABILITY_ZERO_COPY_SEND),
 #endif
+DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
 
 DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/migration/migration.h b/migration/migration.h
index cdad8ac..7fbb9f8 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -409,6 +409,7 @@ bool migrate_ignore_shared(void);
 bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
+bool migrate_dirty_limit(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index 0963bab..39e5f5e 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -477,6 +477,8 @@
 #will be handled faster.  This is a performance feature and
 #should not affect the correctness of postcopy migration.
 #(since 7.1)
+# @dirty-limit: Use dirty-limit to throttle down guest if enabled.
+#   (since 7.1)
 #
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -492,7 +494,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
-   'zero-copy-send', 'postcopy-preempt'] }
+   'zero-copy-send', 'postcopy-preempt', 'dirty-limit'] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 8d98cb7..1fdd8c6 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -23,6 +23,8 @@
 #include "exec/memory.h"
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
 #include "trace.h"
 
 /*
@@ -75,11 +77,18 @@ static bool dirtylimit_quit;
 
 static void vcpu_dirty_rate_stat_collect(void)
 {
+MigrationState *s = migrate_get_current();
 VcpuStat stat;
 int i = 0;
+int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+if (migrate_dirty_limit() &&
+migration_is_active(s)) {
+period = s->parameters.x_vcpu_dirty_limit_period;
+}
 
 /* calculate vcpu dirtyrate */
-vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
+vcpu_calculate_dirtyrate(period,
  ,
  GLOBAL_DIRTY_LIMIT,
  false);
-- 
1.8.3.1

[PATCH 1/8] qapi/migration: Introduce x-vcpu-dirty-limit-period parameter

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "x-vcpu-dirty-limit-period" migration experimental
parameter, which is used to make dirtyrate calculation period
configurable.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 16 
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 31 ---
 3 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index e03f698..7b19f85 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -116,6 +116,8 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 500 /* ms */
+
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -962,6 +964,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
s->parameters.block_bitmap_mapping);
 }
 
+params->has_x_vcpu_dirty_limit_period = true;
+params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
+
 return params;
 }
 
@@ -1662,6 +1667,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_x_vcpu_dirty_limit_period) {
+dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1784,6 +1793,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+if (params->has_x_vcpu_dirty_limit_period) {
+s->parameters.x_vcpu_dirty_limit_period =
+params->x_vcpu_dirty_limit_period;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4384,6 +4397,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
 DEFINE_PROP_STRING("tls-hostname", MigrationState, 
parameters.tls_hostname),
 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
+DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
+   parameters.x_vcpu_dirty_limit_period,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index a6dc79e..64c996c 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -532,6 +532,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 }
 }
 }
+
+monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
+params->x_vcpu_dirty_limit_period);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1351,6 +1355,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 error_setg(, "The block-bitmap-mapping parameter can only be set "
"through QMP");
 break;
+case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD:
+p->has_x_vcpu_dirty_limit_period = true;
+visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 81185d4..332c087 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -776,8 +776,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2)
 #
+# @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
+# Defaults to 500ms. (Since 7.1)
+#
 # Features:
-# @unstable: Member @x-checkpoint-delay is experimental.
+# @unstable: Member @x-checkpoint-delay and @x-vcpu-dirty-limit-period
+#are experimental.
 #
 # Since: 2.4
 ##
@@ -795,8 +799,9 @@
'multifd-channels',
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
-   'multifd-zlib-level' ,'multifd-zstd-level',
-   'block-bitmap-mapping' ] }
+   'multifd-zlib-level', 'multifd-zstd-level',
+   'block-bitmap-mapping',
+   { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] } ] 
}
 
 ##
 # @MigrateSetParameters:
@@ -941,8 +946,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2)
 #
+# @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
+# Defaults to 500ms.

[PATCH 4/8] migration: Implement dirty-limit convergence algo

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Implement dirty-limit convergence algo for live migration,
which is kind of like auto-converge algo but using dirty-limit
instead of cpu throttle to make migration convergent.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/ram.c| 53 +-
 migration/trace-events |  1 +
 2 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index b94669b..2a5cd23 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,6 +45,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-types-migration.h"
 #include "qapi/qapi-events-migration.h"
+#include "qapi/qapi-commands-migration.h"
 #include "qapi/qmp/qerror.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
@@ -57,6 +58,8 @@
 #include "qemu/iov.h"
 #include "multifd.h"
 #include "sysemu/runstate.h"
+#include "sysemu/dirtylimit.h"
+#include "sysemu/kvm.h"
 
 #include "hw/boards.h" /* for machine_dump_guest_core() */
 
@@ -1139,6 +1142,21 @@ static void migration_update_rates(RAMState *rs, int64_t 
end_time)
 }
 }
 
+/*
+ * Enable dirty-limit to throttle down the guest
+ */
+static void migration_dirty_limit_guest(void)
+{
+if (!dirtylimit_in_service()) {
+MigrationState *s = migrate_get_current();
+int64_t quota_dirtyrate = s->parameters.vcpu_dirty_limit;
+
+/* Set quota dirtyrate if dirty limit not in service */
+qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
+trace_migration_dirty_limit_guest(quota_dirtyrate);
+}
+}
+
 static void migration_trigger_throttle(RAMState *rs)
 {
 MigrationState *s = migrate_get_current();
@@ -1148,22 +1166,31 @@ static void migration_trigger_throttle(RAMState *rs)
 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
TARGET_PAGE_SIZE;
 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
 
-/* During block migration the auto-converge logic incorrectly detects
- * that ram migration makes no progress. Avoid this by disabling the
- * throttling logic during the bulk phase of block migration. */
-if (migrate_auto_converge() && !blk_mig_bulk_active()) {
-/* The following detection logic can be refined later. For now:
-   Check to see if the ratio between dirtied bytes and the approx.
-   amount of bytes that just got transferred since the last time
-   we were in this routine reaches the threshold. If that happens
-   twice, start or increase throttling. */
-
-if ((bytes_dirty_period > bytes_dirty_threshold) &&
-(++rs->dirty_rate_high_cnt >= 2)) {
+/*
+ * The following detection logic can be refined later. For now:
+ * Check to see if the ratio between dirtied bytes and the approx.
+ * amount of bytes that just got transferred since the last time
+ * we were in this routine reaches the threshold. If that happens
+ * twice, start or increase throttling.
+ */
+
+if ((bytes_dirty_period > bytes_dirty_threshold) &&
+(++rs->dirty_rate_high_cnt >= 2)) {
+rs->dirty_rate_high_cnt = 0;
+/*
+ * During block migration the auto-converge logic incorrectly detects
+ * that ram migration makes no progress. Avoid this by disabling the
+ * throttling logic during the bulk phase of block migration
+ */
+
+if (migrate_auto_converge() && !blk_mig_bulk_active()) {
 trace_migration_throttle();
-rs->dirty_rate_high_cnt = 0;
 mig_throttle_guest_down(bytes_dirty_period,
 bytes_dirty_threshold);
+} else if (migrate_dirty_limit() &&
+   kvm_dirty_ring_enabled() &&
+   migration_is_active(s)) {
+migration_dirty_limit_guest();
 }
 }
 }
diff --git a/migration/trace-events b/migration/trace-events
index a34afe7..3eb4b0d 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -89,6 +89,7 @@ migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, 
unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
 migration_throttle(void) ""
+migration_dirty_limit_guest(int64_t dirtyrate) "guest dirty page rate limit %" 
PRIi64 " MB/s"
 ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: 
%" PRIx64 " %zx"
 ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: 
addr: 0x%" PRIx64 " flags: 0x%x host: %p"
 ram_load_postcopy_loop(int channel, uint64_t addr, int flags) "chan=%d 
addr=0x%" PRIx64 " flags=0x%x"
-- 
1.8.3.1

[PATCH 7/8] tests/migration: Introduce dirty-ring-size option into guestperf

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Guestperf tool does not enable diry ring feature when test
migration by default.

To support dirty ring migration performance test, introduce
dirty-ring-size option into guestperf tools, which ranges in
[1024, 65536].

To set dirty ring size with 4096 during migration test:
$ ./tests/migration/guestperf.py --dirty-ring-size 4096 xxx

Signed-off-by: Hyman Huang(黄勇) 
---
 tests/migration/guestperf/engine.py   | 7 ++-
 tests/migration/guestperf/hardware.py | 8 ++--
 tests/migration/guestperf/shell.py| 7 ++-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 87a6ab2..2b98f00 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -304,7 +304,6 @@ def _get_common_args(self, hardware, tunnelled=False):
 cmdline = "'" + cmdline + "'"
 
 argv = [
-"-accel", "kvm",
 "-cpu", "host",
 "-kernel", self._kernel,
 "-initrd", self._initrd,
@@ -315,6 +314,12 @@ def _get_common_args(self, hardware, tunnelled=False):
 "-smp", str(hardware._cpus),
 ]
 
+if hardware._dirty_ring_size:
+argv.extend(["-accel", "kvm,dirty-ring-size=%s" %
+ hardware._dirty_ring_size])
+else:
+argv.extend(["-accel", "kvm"])
+
 if self._debug:
 argv.extend(["-device", "sga"])
 
diff --git a/tests/migration/guestperf/hardware.py 
b/tests/migration/guestperf/hardware.py
index 3145785..f779cc0 100644
--- a/tests/migration/guestperf/hardware.py
+++ b/tests/migration/guestperf/hardware.py
@@ -23,7 +23,8 @@ def __init__(self, cpus=1, mem=1,
  src_cpu_bind=None, src_mem_bind=None,
  dst_cpu_bind=None, dst_mem_bind=None,
  prealloc_pages = False,
- huge_pages=False, locked_pages=False):
+ huge_pages=False, locked_pages=False,
+ dirty_ring_size=0):
 self._cpus = cpus
 self._mem = mem # GiB
 self._src_mem_bind = src_mem_bind # List of NUMA nodes
@@ -33,6 +34,7 @@ def __init__(self, cpus=1, mem=1,
 self._prealloc_pages = prealloc_pages
 self._huge_pages = huge_pages
 self._locked_pages = locked_pages
+self._dirty_ring_size = dirty_ring_size
 
 
 def serialize(self):
@@ -46,6 +48,7 @@ def serialize(self):
 "prealloc_pages": self._prealloc_pages,
 "huge_pages": self._huge_pages,
 "locked_pages": self._locked_pages,
+"dirty_ring_size": self._dirty_ring_size,
 }
 
 @classmethod
@@ -59,4 +62,5 @@ def deserialize(cls, data):
 data["dst_mem_bind"],
 data["prealloc_pages"],
 data["huge_pages"],
-data["locked_pages"])
+data["locked_pages"],
+data["dirty_ring_size"])
diff --git a/tests/migration/guestperf/shell.py 
b/tests/migration/guestperf/shell.py
index 8a809e3..559616f 100644
--- a/tests/migration/guestperf/shell.py
+++ b/tests/migration/guestperf/shell.py
@@ -60,6 +60,8 @@ def __init__(self):
 parser.add_argument("--prealloc-pages", dest="prealloc_pages", 
default=False)
 parser.add_argument("--huge-pages", dest="huge_pages", default=False)
 parser.add_argument("--locked-pages", dest="locked_pages", 
default=False)
+parser.add_argument("--dirty-ring-size", dest="dirty_ring_size",
+default=0, type=int)
 
 self._parser = parser
 
@@ -89,7 +91,10 @@ def split_map(value):
 
 locked_pages=args.locked_pages,
 huge_pages=args.huge_pages,
-prealloc_pages=args.prealloc_pages)
+prealloc_pages=args.prealloc_pages,
+
+dirty_ring_size=args.dirty_ring_size)
+
 
 
 class Shell(BaseShell):
-- 
1.8.3.1

[PATCH 2/8] qapi/migration: Introduce vcpu-dirty-limit parameters

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Introduce "vcpu-dirty-limit" migration parameter used
to limit dirty page rate during live migration.

"vcpu-dirty-limit" and "x-vcpu-dirty-limit-period" are
two dirty-limit-related migration parameters, which can
be set before and during live migration by qmp
migrate-set-parameters.

This two parameters are used to help implement the dirty
page rate limit algo of migration.

Signed-off-by: Hyman Huang(黄勇) 
---
 migration/migration.c | 14 ++
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 18 +++---
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 7b19f85..ed1a47b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -117,6 +117,7 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
 #define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 500 /* ms */
+#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT1   /* MB/s */
 
 static NotifierList migration_state_notifiers =
 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -967,6 +968,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->has_x_vcpu_dirty_limit_period = true;
 params->x_vcpu_dirty_limit_period = 
s->parameters.x_vcpu_dirty_limit_period;
 
+params->has_vcpu_dirty_limit = true;
+params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;
+
 return params;
 }
 
@@ -1671,6 +1675,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_x_vcpu_dirty_limit_period) {
 dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
 }
+
+if (params->has_vcpu_dirty_limit) {
+dest->vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1797,6 +1805,9 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.x_vcpu_dirty_limit_period =
 params->x_vcpu_dirty_limit_period;
 }
+if (params->has_vcpu_dirty_limit) {
+s->parameters.vcpu_dirty_limit = params->vcpu_dirty_limit;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -4400,6 +4411,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
parameters.x_vcpu_dirty_limit_period,
DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
+DEFINE_PROP_UINT64("vcpu-dirty-limit", MigrationState,
+   parameters.vcpu_dirty_limit,
+   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 64c996c..acbc5e8 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -536,6 +536,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %" PRIu64 " ms\n",
 MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
 params->x_vcpu_dirty_limit_period);
+
+monitor_printf(mon, "%s: %" PRIu64 " MB/s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT),
+params->vcpu_dirty_limit);
 }
 
 qapi_free_MigrationParameters(params);
@@ -1359,6 +1363,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_x_vcpu_dirty_limit_period = true;
 visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
 break;
+case MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT:
+p->has_vcpu_dirty_limit = true;
+visit_type_size(v, param, >vcpu_dirty_limit, );
+break;
 default:
 assert(0);
 }
diff --git a/qapi/migration.json b/qapi/migration.json
index 332c087..0963bab 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -779,6 +779,9 @@
 # @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
 # Defaults to 500ms. (Since 7.1)
 #
+# @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
+#Defaults to 1. (Since 7.1)
+#
 # Features:
 # @unstable: Member @x-checkpoint-delay and @x-vcpu-dirty-limit-period
 #are experimental.
@@ -801,7 +804,8 @@
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level', 'multifd-zstd-level',
'block-bitmap-mapping',
-   { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] } ] 
}
+   { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
+   'vcpu-dirty-limit'] }
 
 ##
 # @MigrateSetParameters:
@@ -949,6 +953,9 @@
 # @x-vcpu-dirty-limit-period: Periodic time (ms) of dirty limit during live 
migration.
 # Defaults to 500ms. (Since 7.1)
 #
+# @vcpu-dirty-limit: Dirtyrate

[PATCH 0/8] migration: introduce dirtylimit capability

2022-07-23 Thread huangy81

From: Hyman Huang(黄勇) 

Abstract


This series added a new migration capability called "dirtylimit".  It can
be enabled when dirty ring is enabled, and it'll improve the vCPU performance
during the process of migration. It is based on the previous patchset:
https://lore.kernel.org/qemu-devel/cover.1656177590.git.huang...@chinatelecom.cn/

As mentioned in patchset "support dirty restraint on vCPU", dirtylimit way of
migration can make the read-process not be penalized. This series wires up the
vcpu dirty limit and wrappers as dirtylimit capability of migration. I introduce
two parameters vcpu-dirtylimit-period and vcpu-dirtylimit to implement the 
setup 
of dirtylimit during live migration.

To validate the implementation, i tested a 32 vCPU vm live migration with such 
model:
Only dirty vcpu0, vcpu1 with heavy memory workoad and leave the rest vcpus
untouched, running unixbench on the vpcu8-vcpu15 by setup the cpu affinity as
the following command:
taskset -c 8-15 ./Run -i 2 -c 8 {unixbench test item}

The following are results:

host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |-+++---|
  | UnixBench test item | Normal | Dirtylimit | Auto-converge |
  |-+++---|
  | dhry2reg| 32800  | 32786  | 25292 |
  | whetstone-double| 10326  | 10315  | 9847  |
  | pipe| 15442  | 15271  | 14506 |
  | context1| 7260   | 6235   | 4514  |
  | spawn   | 3663   | 3317   | 3249  |
  | syscall | 4669   | 4667   | 3841  |
  |-+++---|
>From the data above we can draw a conclusion that vcpus that do not dirty 
>memory
in vm are almost unaffected during the dirtylimit migration, but the auto 
converge
way does. 

I also tested the total time of dirtylimit migration with variable dirty memory
size in vm.

senario 1:
host cpu: Intel(R) Xeon(R) Platinum 8378A
host interface speed: 1000Mb/s
  |---++---|
  | dirty memory size(MB) | Dirtylimit(ms) | Auto-converge(ms) |
  |---++---|
  | 60| 2014   | 2131  |
  | 70| 5381   | 12590 |
  | 90| 6037   | 33545 |
  | 110   | 7660   | [*]   |
  |---++---|
  [*]: This case means migration is not convergent. 

senario 2:
host cpu: Intel(R) Xeon(R) CPU E5-2650
host interface speed: 1Mb/s
  |---++---|
  | dirty memory size(MB) | Dirtylimit(ms) | Auto-converge(ms) |
  |---++---|
  | 1600  | 15842  | 27548 |
  | 2000  | 19026  | 38447 |
  | 2400  | 19897  | 46381 |
  | 2800  | 22338  | 57149 |
  |---++---|
Above data shows that dirtylimit way of migration can also reduce the total
time of migration and it achieves convergence more easily in some case.

In addition to implement dirtylimit capability itself, this series
add 3 tests for migration, aiming at playing around for developer simply: 
 1. qtest for dirty limit migration
 2. support dirty ring way of migration for guestperf tool
 3. support dirty limit migration for guestperf tool

Please review, thanks !

Hyman Huang (8):
  qapi/migration: Introduce x-vcpu-dirty-limit-period parameter
  qapi/migration: Introduce vcpu-dirty-limit parameters
  migration: Introduce dirty-limit capability
  migration: Implement dirty-limit convergence algo
  migration: Export dirty-limit time info
  tests: Add migration dirty-limit capability test
  tests/migration: Introduce dirty-ring-size option into guestperf
  tests/migration: Introduce dirty-limit into guestperf

 include/sysemu/dirtylimit.h |  2 +
 migration/migration.c   | 50 ++
 migration/migration.h   |  1 +
 migration/ram.c | 53 ++-
 migration/trace-events  |  1 +
 monitor/hmp-cmds.c  | 26 ++
 qapi/migration.json | 57 
 softmmu/dirtylimit.c| 33 +++-
 tests/migration/guestperf/comparison.py | 14 +
 tests/migration/guestperf/engine.py | 33 +++-
 tests/migration/guestperf/hardware.py   |  8 ++-
 tests/migration/guestperf/progress.py   | 17 +-
 tests/migration/guestperf/scenario.py   | 11 +++-
 tests/migration/guestperf/shell.py  | 25 -

[PATCH v25 6/8] softmmu/dirtylimit: Implement virtual CPU throttle

2022-06-25 Thread huangy81

From: Hyman Huang(黄勇) 

Setup a negative feedback system when vCPU thread
handling KVM_EXIT_DIRTY_RING_FULL exit by introducing
throttle_us_per_full field in struct CPUState. Sleep
throttle_us_per_full microseconds to throttle vCPU
if dirtylimit is in service.

Signed-off-by: Hyman Huang(黄勇) 
Reviewed-by: Peter Xu 
---
 accel/kvm/kvm-all.c |  20 ++-
 include/hw/core/cpu.h   |   6 +
 include/sysemu/dirtylimit.h |  15 +++
 softmmu/dirtylimit.c| 291 
 softmmu/trace-events|   7 ++
 5 files changed, 338 insertions(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 59b8ea1..18e67af 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -45,6 +45,7 @@
 #include "qemu/guest-random.h"
 #include "sysemu/hw_accel.h"
 #include "kvm-cpus.h"
+#include "sysemu/dirtylimit.h"
 
 #include "hw/boards.h"
 #include "monitor/stats.h"
@@ -477,6 +478,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
 cpu->kvm_state = s;
 cpu->vcpu_dirty = true;
 cpu->dirty_pages = 0;
+cpu->throttle_us_per_full = 0;
 
 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
 if (mmap_size < 0) {
@@ -1470,6 +1472,11 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
  */
 sleep(1);
 
+/* keep sleeping so that dirtylimit not be interfered by reaper */
+if (dirtylimit_in_service()) {
+continue;
+}
+
 trace_kvm_dirty_ring_reaper("wakeup");
 r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
 
@@ -2975,8 +2982,19 @@ int kvm_cpu_exec(CPUState *cpu)
  */
 trace_kvm_dirty_ring_full(cpu->cpu_index);
 qemu_mutex_lock_iothread();
-kvm_dirty_ring_reap(kvm_state, NULL);
+/*
+ * We throttle vCPU by making it sleep once it exit from kernel
+ * due to dirty ring full. In the dirtylimit scenario, reaping
+ * all vCPUs after a single vCPU dirty ring get full result in
+ * the miss of sleep, so just reap the ring-fulled vCPU.
+ */
+if (dirtylimit_in_service()) {
+kvm_dirty_ring_reap(kvm_state, cpu);
+} else {
+kvm_dirty_ring_reap(kvm_state, NULL);
+}
 qemu_mutex_unlock_iothread();
+dirtylimit_vcpu_execute(cpu);
 ret = 0;
 break;
 case KVM_EXIT_SYSTEM_EVENT:
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 996f940..500503d 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -418,6 +418,12 @@ struct CPUState {
  */
 bool throttle_thread_scheduled;
 
+/*
+ * Sleep throttle_us_per_full microseconds once dirty ring is full
+ * if dirty page rate limit is enabled.
+ */
+int64_t throttle_us_per_full;
+
 bool ignore_memory_transaction_failures;
 
 /* Used for user-only emulation of prctl(PR_SET_UNALIGN). */
diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
index da459f0..8d2c1f3 100644
--- a/include/sysemu/dirtylimit.h
+++ b/include/sysemu/dirtylimit.h
@@ -19,4 +19,19 @@ void vcpu_dirty_rate_stat_start(void);
 void vcpu_dirty_rate_stat_stop(void);
 void vcpu_dirty_rate_stat_initialize(void);
 void vcpu_dirty_rate_stat_finalize(void);
+
+void dirtylimit_state_lock(void);
+void dirtylimit_state_unlock(void);
+void dirtylimit_state_initialize(void);
+void dirtylimit_state_finalize(void);
+bool dirtylimit_in_service(void);
+bool dirtylimit_vcpu_index_valid(int cpu_index);
+void dirtylimit_process(void);
+void dirtylimit_change(bool start);
+void dirtylimit_set_vcpu(int cpu_index,
+ uint64_t quota,
+ bool enable);
+void dirtylimit_set_all(uint64_t quota,
+bool enable);
+void dirtylimit_vcpu_execute(CPUState *cpu);
 #endif
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index ebdc064..e5a4f97 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -18,6 +18,26 @@
 #include "sysemu/dirtylimit.h"
 #include "exec/memory.h"
 #include "hw/boards.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
+
+/*
+ * Dirtylimit stop working if dirty page rate error
+ * value less than DIRTYLIMIT_TOLERANCE_RANGE
+ */
+#define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
+/*
+ * Plus or minus vcpu sleep time linearly if dirty
+ * page rate error value percentage over
+ * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
+ * Otherwise, plus or minus a fixed vcpu sleep time.
+ */
+#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
+/*
+ * Max vcpu sleep time percentage during a cycle
+ * composed of dirty ring full and sleep time.
+ */
+#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
 
 struct {
 VcpuStat stat;
@@ -25,6 +45,30 @@ struct {
 QemuThread thread;
 } *vcpu_dirty_rate_stat;
 
+typedef struct VcpuDirtyLimitState {
+int cpu_index;
+bool enabled;
+/*
+ * Quota dirty page rate, unit is MB/s

[PATCH v25 8/8] tests: Add dirty page rate limit test

2022-06-25 Thread huangy81

From: Hyman Huang(黄勇) 

Add dirty page rate limit test if kernel support dirty ring,

The following qmp commands are covered by this test case:
"calc-dirty-rate", "query-dirty-rate", "set-vcpu-dirty-limit",
"cancel-vcpu-dirty-limit" and "query-vcpu-dirty-limit".

Signed-off-by: Hyman Huang(黄勇) 
Acked-by: Peter Xu 
---
 tests/qtest/migration-helpers.c |  22 
 tests/qtest/migration-helpers.h |   2 +
 tests/qtest/migration-test.c| 256 
 3 files changed, 280 insertions(+)

diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index a6aa59e..4849cba 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -76,6 +76,28 @@ QDict *wait_command(QTestState *who, const char *command, 
...)
 }
 
 /*
+ * Execute the qmp command only
+ */
+QDict *qmp_command(QTestState *who, const char *command, ...)
+{
+va_list ap;
+QDict *resp, *ret;
+
+va_start(ap, command);
+resp = qtest_vqmp(who, command, ap);
+va_end(ap);
+
+g_assert(!qdict_haskey(resp, "error"));
+g_assert(qdict_haskey(resp, "return"));
+
+ret = qdict_get_qdict(resp, "return");
+qobject_ref(ret);
+qobject_unref(resp);
+
+return ret;
+}
+
+/*
  * Send QMP command "migrate".
  * Arguments are built from @fmt... (formatted like
  * qobject_from_jsonf_nofail()) with "uri": @uri spliced in.
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 78587c2..5956189 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -23,6 +23,8 @@ QDict *wait_command_fd(QTestState *who, int fd, const char 
*command, ...);
 G_GNUC_PRINTF(2, 3)
 QDict *wait_command(QTestState *who, const char *command, ...);
 
+QDict *qmp_command(QTestState *who, const char *command, ...);
+
 G_GNUC_PRINTF(3, 4)
 void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...);
 
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index d33e806..4cd87ef 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -24,6 +24,7 @@
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qobject-output-visitor.h"
 #include "crypto/tlscredspsk.h"
+#include "qapi/qmp/qlist.h"
 
 #include "migration-helpers.h"
 #include "tests/migration/migration-test.h"
@@ -49,6 +50,12 @@ static bool uffd_feature_thread_id;
 /* A downtime where the test really should converge */
 #define CONVERGE_DOWNTIME 1000
 
+/*
+ * Dirtylimit stop working if dirty page rate error
+ * value less than DIRTYLIMIT_TOLERANCE_RANGE
+ */
+#define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
+
 #if defined(__linux__)
 #include 
 #include 
@@ -2070,6 +2077,253 @@ static void test_multifd_tcp_cancel(void)
 test_migrate_end(from, to2, true);
 }
 
+static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
+{
+qobject_unref(qmp_command(who,
+  "{ 'execute': 'calc-dirty-rate',"
+  "'arguments': { "
+  "'calc-time': %ld,"
+  "'mode': 'dirty-ring' }}",
+  calc_time));
+}
+
+static QDict *query_dirty_rate(QTestState *who)
+{
+return qmp_command(who, "{ 'execute': 'query-dirty-rate' }");
+}
+
+static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate)
+{
+qobject_unref(qmp_command(who,
+  "{ 'execute': 'set-vcpu-dirty-limit',"
+  "'arguments': { "
+  "'dirty-rate': %ld } }",
+  dirtyrate));
+}
+
+static void cancel_vcpu_dirty_limit(QTestState *who)
+{
+qobject_unref(qmp_command(who,
+  "{ 'execute': 'cancel-vcpu-dirty-limit' }"));
+}
+
+static QDict *query_vcpu_dirty_limit(QTestState *who)
+{
+QDict *rsp;
+
+rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }");
+g_assert(!qdict_haskey(rsp, "error"));
+g_assert(qdict_haskey(rsp, "return"));
+
+return rsp;
+}
+
+static bool calc_dirtyrate_ready(QTestState *who)
+{
+QDict *rsp_return;
+gchar *status;
+
+rsp_return = query_dirty_rate(who);
+g_assert(rsp_return);
+
+status = g_strdup(qdict_get_str(rsp_return, "status"));
+g_assert(status);
+
+return g_strcmp0(status, "measuring");
+}
+
+static void wait_for_calc_dirtyrate_complete(QTestState *who,
+ int64_t time_s)
+{
+int max_try_count = 1;
+usleep(time_s * 100);
+
+while (!calc_dirtyrate_ready(who) && max_try_count--) {
+usleep(1000);
+}
+
+/*
+ * Set the timeout with 10 s(max_try_count * 1000us),
+ * if dirtyrate measurement not complete, fail test.
+ */
+g_assert_cmpint(max_try_count, !=, 0);
+}
+
+static int64_t get_dirty_rate(QTestState *who)
+{
+QDict *rsp_return;
+gchar *status;
+QList *rates;
+const QListEntry *entry;
+QDict *rate;
+int64_t dirtyrate;
+
+rsp_return = query_dirty_rate(who);
+g_assert(rsp_return);
+
+status =

1 2 3 4 5 >

1 - 100 of 413 matches

Mail list logo