FYI,

Share my local COLO test steps/scripts on the same host.



S1: ./primary.sh
S2: ./secondary.sh
S3: cat secondary-cmd.json | nc localhost 55555
S4: cat primary-cmd.json | nc localhost 25555

Till now, primary and secondary VM entered the COLO state
Then, we can trigger the failover

(Primary takeover)S5_1: killall -9 secondary; sleep 1; cat 
primary-failover.json | nc localhost 25555
or
(Secondary takeover)S_2: killall -9 primary; sleep 1; cat 
secondary-failover.json | nc localhost 55555

=========scripts=============
# cat primary.sh
cmd="./primary -enable-kvm -cpu qemu64,kvmclock=on -m 4096 -smp 1 -device 
piix3-usb-uhci -device usb-tablet -name primary -netdev 
tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device 
e1000,id=e0,netdev=hn0 -chardev 
socket,id=mirror0,host=0.0.0.0,port=9003,server=on,wait=off -chardev 
socket,id=compare1,host=0.0.0.0,port=9004,server=on,wait=on -chardev 
socket,id=compare0,host=127.0.0.1,port=9001,server=on,wait=off -chardev 
socket,id=compare0-0,host=127.0.0.1,port=9001 -chardev 
socket,id=compare_out,host=127.0.0.1,port=9005,server=on,wait=off -chardev 
socket,id=compare_out0,host=127.0.0.1,port=9005 -object 
filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 -object 
filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out -object 
filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 -object 
iothread,id=iothread1 -object 
colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,iothread=iothread1
 -drive 
if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,children.0.file.filename=/home/lizhijian/images/colo/primary/primary.qcow2,children.0.driver=qcow2
 -nographic -monitor telnet:127.0.0.1:15555,server,nowait -qmp 
telnet:127.0.0.1:25555,server,nowait -S"

echo $cmd
exec $cmd

# cat secondary.sh
cmd="./secondary -enable-kvm -cpu qemu64,kvmclock=on -m 4096 -smp 1 -qmp 
telnet:127.0.0.1:55555,server,nowait -device piix3-usb-uhci -device usb-tablet 
-name secondary -netdev 
tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device 
e1000,id=e0,netdev=hn0 -chardev 
socket,id=red0,host=127.0.0.1,port=9003,reconnect-ms=1 -chardev 
socket,id=red1,host=127.0.0.1,port=9004,reconnect-ms=1 -object 
filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 -object 
filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 -object 
filter-rewriter,id=rew0,netdev=hn0,queue=all -drive 
if=none,id=parent0,file.filename=/home/lizhijian/images/colo/secondary/primary.qcow2,driver=qcow2
 -drive 
if=none,id=childs0,driver=replication,mode=secondary,file.driver=qcow2,top-id=colo-disk0,file.file.filename=/home/lizhijian/images/colo/secondary/secondary-active.qcow2,file.backing.driver=qcow2,file.backing.file.filename=/home/lizhijian/images/colo/secondary/secondary-hidden.qcow2,file.backing.backing=parent0
 -drive 
if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,children.0=childs0
 -incoming tcp:0.0.0.0:9998 -nographic -monitor 
telnet:127.0.0.1:55554,server,nowait"

echo $cmd
exec $cmd

# cat secondary-cmd.json
{"execute":"qmp_capabilities"}
{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ 
{"capability": "x-colo", "state": true} ] } }
{"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": 
{"host": "0.0.0.0", "port": "9999"} } } }
{"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": 
true } }
{'execute': 'trace-event-set-state', 'arguments': {'name': 'colo*', 'enable': 
true} }

# cat primary-cmd.json
{"execute":"qmp_capabilities"}
{'execute': 'trace-event-set-state', 'arguments': {'name': 'colo*', 'enable': 
true} }
{'execute': 'trace-event-set-state', 'arguments': {'name': 'migrat*', 'enable': 
true} }
{"execute": "human-monitor-command", "arguments": {"command-line": "drive_add 
-n buddy 
driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}}
{"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node": 
"replication0" } }
{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ 
{"capability": "x-colo", "state": true } ] } }
{"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } }

# cat primary-failover.json
{"execute":"qmp_capabilities"}
{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "child": 
"children.1"} }
{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_del 
replication0" } }
{"execute": "object-del", "arguments":{ "id": "comp0" } }
{"execute": "object-del", "arguments":{ "id": "iothread1" } }
{"execute": "object-del", "arguments":{ "id": "m0" } }
{"execute": "object-del", "arguments":{ "id": "redire0" } }
{"execute": "object-del", "arguments":{ "id": "redire1" } }
{"execute": "x-colo-lost-heartbeat" }

# cat secondary-failover.json
{"execute":"qmp_capabilities"}
{"execute": "nbd-server-stop"}
{"execute": "x-colo-lost-heartbeat"}

{"execute": "object-del", "arguments":{ "id": "f2" } }
{"execute": "object-del", "arguments":{ "id": "f1" } }
{"execute": "chardev-remove", "arguments":{ "id": "red1" } }
{"execute": "chardev-remove", "arguments":{ "id": "red0" } }

{"execute": "chardev-add", "arguments":{ "id": "mirror0", "backend": {"type": 
"socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", 
"port": "9003" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare1", "backend": {"type": 
"socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", 
"port": "9004" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare0", "backend": {"type": 
"socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", 
"port": "9001" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare0-0", "backend": 
{"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": 
"127.0.0.1", "port": "9001" } }, "server": false } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare_out", "backend": 
{"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": 
"127.0.0.1", "port": "9005" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare_out0", "backend": 
{"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": 
"127.0.0.1", "port": "9005" } }, "server": false } } } }


On 04/11/2025 09:36, Li Zhijian wrote:
> Commit 4881411136 ("migration: Always set DEVICE state") set a new DEVICE
> state before completed during migration, which broke the original transition
> to COLO. The migration flow for precopy has changed to:
> active -> pre-switchover -> device -> completed.
> 
> This patch updates the transition state to ensure that the Pre-COLO
> state corresponds to DEVICE state correctly.
> 
> Fixes: 4881411136 ("migration: Always set DEVICE state")
> Signed-off-by: Li Zhijian <[email protected]>
> ---
>   migration/migration.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index a63b46bbef..6ec7f3cec8 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3095,9 +3095,9 @@ static void migration_completion(MigrationState *s)
>           goto fail;
>       }
>   
> -    if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
> +    if (migrate_colo() && s->state == MIGRATION_STATUS_DEVICE) {
>           /* COLO does not support postcopy */
> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> +        migrate_set_state(&s->state, MIGRATION_STATUS_DEVICE,
>                             MIGRATION_STATUS_COLO);
>       } else {
>           migration_completion_end(s);

Reply via email to