struct MigrationTest QEMU

一个关于 migration 的 test case 使用结构体 MigrationTest 来表示。

typedef struct {
    // path name: /migration/postcopy/plain
    char *name;
    // func: test_postcopy()
    void (*func)(void);
} MigrationTest;

ufd_version_check() QEMU

static bool ufd_version_check(void)
{
    struct uffdio_api api_struct;
    uint64_t ioctl_mask;

    int ufd = uffd_open(O_CLOEXEC);

    if (ufd == -1) {
        g_test_message("Skipping test: userfaultfd not available");
        return false;
    }

    api_struct.api = UFFD_API;
    api_struct.features = 0;
    if (ioctl(ufd, UFFDIO_API, &api_struct)) {
        g_test_message("Skipping test: UFFDIO_API failed");
        return false;
    }
    uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID;

    ioctl_mask = 1ULL << _UFFDIO_REGISTER | 1ULL << _UFFDIO_UNREGISTER;
    if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
        g_test_message("Skipping test: Missing userfault feature");
        return false;
    }

    return true;
}

migration_test_add() QEMU

// path: 表示 test suite 和 test case
// fn: 表示测试对应的函数
void migration_test_add(const char *path, void (*fn)(void))
{
    // 初始化 MigrationTest
    MigrationTest *test = g_new0(MigrationTest, 1);
    test->func = fn;
    test->name = g_strdup(path);

    qtest_add_data_func_full(path, test, migration_test_wrapper, migration_test_destroy);
}

migration_test_wrapper() QEMU

static void migration_test_wrapper(const void *data)
{
    // Convert void to MigrationTest
    MigrationTest *test = (MigrationTest *)data;
    //..
    // call the function
    test->func();
}

bootfile_create() / x86_bootsect QEMU

为什么 x86_bootsect 这 512 bytes 要设计成这样的呢?这是从这个汇编文件中 dump 出来的:tests/migration/i386/a-b-bootblock.S

unsigned char x86_bootsect[] = {
  0xfa, 0x0f, 0x01, 0x16, 0xb8, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00,
  0x0f, 0x22, 0xc0, 0x66, 0xea, 0x20, 0x7c, 0x00, 0x00, 0x08, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x92, 0x0c, 0x02,
  0xe6, 0x92, 0xb8, 0x10, 0x00, 0x00, 0x00, 0x8e, 0xd8, 0x66, 0xb8, 0x41,
  0x00, 0x66, 0xba, 0xf8, 0x03, 0xee, 0xb3, 0x00, 0xb8, 0x00, 0x00, 0x10,
  0x00, 0xc6, 0x00, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00,
  0x40, 0x06, 0x7c, 0xf1, 0xb8, 0x00, 0x00, 0x10, 0x00, 0xfe, 0x00, 0x05,
  0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x40, 0x06, 0x7c, 0xf2, 0xfe,
  0xc3, 0x80, 0xe3, 0x3f, 0x75, 0xe6, 0x66, 0xb8, 0x42, 0x00, 0x66, 0xba,
  0xf8, 0x03, 0xee, 0xa1, 0xbe, 0x7c, 0x00, 0x00, 0x83, 0xf8, 0x00, 0x74,
  0xd3, 0xb8, 0x04, 0x00, 0x10, 0x00, 0x8b, 0x00, 0x83, 0xf8, 0x01, 0x74,
  0xc7, 0xb0, 0xf1, 0xe6, 0xb2, 0xb8, 0x04, 0x00, 0x10, 0x00, 0xc7, 0x00,
  0x01, 0x00, 0x00, 0x00, 0x66, 0xb8, 0x01, 0x24, 0x66, 0xba, 0x04, 0x06,
  0x66, 0xef, 0x66, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00, 0xff, 0xff, 0x00, 0x00,
  0x00, 0x92, 0xcf, 0x00, 0x27, 0x00, 0xa0, 0x7c, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0xaa
};

The boot code assembly in tests/migration/i386/a-b-bootblock.S:

This assembly code repeatedly increments the first byte of each page in a 100MB range. Outputs an initial 'A' on serial followed by repeated 'B's.

这之中有一个变量其实是可以修改的,比如 suspend_me。默认是 0 也就是永远不会 suspend(sleep),如果注入成为了 1,那么就会 suspend。

# Instructs the assembler to use 16-bit instructions initially.
.code16
.org 0x7c00
        .file   "fill.s"
        .text
        .globl  start
        .type   start, @function
start:             # at 0x7c00 ?
        cli
        lgdt gdtdesc
        # Enables protected mode
        mov 1,%eax
        mov %eax,%cr0  # Protected mode enable

        # Switches to 32-bit instructions and jumps to the start function at address 0x7c20.
        data32 ljmp 8,0x7c20

.org 0x7c20
.code32
        # A20 enable - not sure I actually need this
        # This section attempts to enable the A20 gate, which allows access to memory above 1MB in real mode
        inb 0x92,%al
        or  2,outb2,%al
        outb %al, 0x92

        # set up DS for the whole of RAM (needed on KVM)
        mov 16,%eax
        mov %eax,%ds

# Start from 1MB
.set TEST_MEM_START, X86_TEST_MEM_START
.set TEST_MEM_END, X86_TEST_MEM_END

        # Output "A" to the serial port
        mov 65,%ax
        mov 0x3f8,%dx
        outb %al,%dx

        # bl keeps a counter so we limit the output speed
        mov 0, %bl

# Loops through memory from TEST_MEM_START to TEST_MEM_END and fills it with zeros.
pre_zero:
        mov TEST_MEM_START,%eax

# do_zero 是一个循环,把每一个 page 的第一个 byte 初始化为了 0
do_zero:
        movb 0, (%eax)
        add 4096,cmp4096,%eax
        cmp TEST_MEM_END,%eax
        jl do_zero

# 给每一个page 的第一个 byte 自增 1。
mainloop:
        mov TESTMEMSTART,innerloop:incb(addTEST_MEM_START,%eax
innerloop:
        incb (%eax)
        add 4096,%eax
        cmp TEST_MEM_END,%eax
        jl innerloop

        # bl 表示的是现在每一个 page 的第一个 byte 值是多少
        # 对于上述过程,我们执行 0x3f 次,也就是 64 次。
        inc %bl
        andb 0x3f,%bl
        jnz mainloop

        # These 3 lines of code writes the value 66 (from %ax) to the I/O port
        # specified by the address 0x3f8 (stored in %dx).
        # Write the character "B" to serial port
        mov 66,mov66,%ax
        mov 0x3f8,%dx
        outb %al,%dx

        # should this test suspend?
        mov (suspend_me),%eax
        cmp 0,%eax
        # We may never suspend
        je mainloop

        # Are we waking after suspend?  do not suspend again.
        # suspended is the memory address
        mov suspended,%eax
        mov (%eax),%eax
        # if (suspended)==suspended) == 1?
        cmp 1,%eax
        je mainloop

        # enable acpi
        mov ACPI_ENABLE,%al
        outb %al,ACPI_PORT_SMI_CMD

        # suspend to ram
        # move 1 to (suspended),sothatwecanensureweonlysuspend1timemovsuspended), so that we can ensure we only suspend 1 time
        mov suspended,%eax
        movl 1,(%eax)

        # Interact with ACPI to suspend
        mov SLEEP,%ax
        mov $(ACPI_PM_BASE + PM1A_CNT_OFFSET),%dx
        outw %ax,%dx
        # not reached.  The wakeup causes reset and restart at 0x7c00, and we
        # do not save and restore registers as a real kernel would do.

        # GDT magic from old (GPLv2)  Grub startup.S
        .p2align        2       /* force 4-byte alignment */
gdt:
        .word   0, 0
        .byte   0, 0, 0, 0

        /* -- code segment --
         * base = 0x00000000, limit = 0xFFFFF (4 KiB Granularity), present
         * type = 32bit code execute/read, DPL = 0
         */
        .word   0xFFFF, 0
        .byte   0, 0x9A, 0xCF, 0

        /* -- data segment --
         * base = 0x00000000, limit 0xFFFFF (4 KiB Granularity), present
         * type = 32 bit data read/write, DPL = 0
         */
        .word   0xFFFF, 0
        .byte   0, 0x92, 0xCF, 0

gdtdesc:
        .word   0x27                    /* limit */
        .long   gdt                     /* addr */

        /* test launcher can poke a 1 here to exercise suspend */
suspend_me:
        .int  0

/* I'm a bootable disk */
.org 0x7dfe
        .byte 0x55
        .byte 0xAA
/* 
 * The boot file modifies memory area in [start_address, end_address)
 * repeatedly. It outputs a 'B' at a fixed rate while it's still running.
 */
static void bootfile_create(char *dir, bool suspend_me)
{
    const char *arch = qtest_get_arch();
    unsigned char *content;
    size_t len;

    bootpath = g_strdup_printf("%s/bootsect", dir);
    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
        //...
        // 这是一个控制开关,
        x86_bootsect[SYM_suspend_me - SYM_start] = suspend_me;
        // 
        content = x86_bootsect;
        // 
        len = sizeof(x86_bootsect);
    }
    // different architectures:
    // ...

    // 把 boot sector 的代码写到文件当中。
    // 一般来说是写到 tmpfs 当中的文件。
    // 此 tmpfs 非彼 tmpfs,这个只表示一个临时创建的文件夹。
    FILE *bootfile = fopen(bootpath, "wb");
    g_assert_cmpint(fwrite(content, len, 1, bootfile), ==, 1);
    fclose(bootfile);
}

struct MigrateStart QEMU

这个结构体控制整个 migration 测试的一些参数,不是 specific to source 和 destination 的。

typedef struct {
    /*
     * QTEST_LOG=1 may override this.  When QTEST_LOG=1, we always dump errors
     * unconditionally, because it means the user would like to be verbose.
     */
    bool hide_stderr;
    bool use_shmem;
    /* only launch the target process */
    bool only_target;
    /* Use dirty ring if true; dirty logging otherwise */
    bool use_dirty_ring;
    // source 端要加的参数?
    const char *opts_source;
    // destination 端要加的参数
    const char *opts_target;
    /* suspend the src before migrating to dest. */
    bool suspend_me;
} MigrateStart;

struct QTestMigrationState QEMU

表示一端 migration 的状态,比如 source 端或者 destination 端的。

typedef struct QTestMigrationState {
    bool stop_seen;
    bool resume_seen;
    bool suspend_seen;
    bool suspend_me;
} QTestMigrationState;

test_migrate_start() QEMU

// from: 传进来的空指针,需要在这个函数里面初始化
// to: 也是空指针
// uri: 这个 uri 表示的是 destination --incoming 的 uri
static int test_migrate_start(QTestState **from, QTestState **to, const char *uri, MigrateStart *args)
{
    g_autofree gchar *arch_source = NULL;
    g_autofree gchar *arch_target = NULL;
    /* options for source and target */
    g_autofree gchar *arch_opts = NULL;
    g_autofree gchar *cmd_source = NULL;
    g_autofree gchar *cmd_target = NULL;
    const gchar *ignore_stderr;
    g_autofree char *shmem_opts = NULL;
    g_autofree char *shmem_path = NULL;
    const char *kvm_opts = NULL;
    const char *arch = qtest_get_arch();
    const char *memory_size;
    const char *machine_alias, *machine_opts = "";
    g_autofree char *machine = NULL;

    //...
    dst_state = (QTestMigrationState) { };
    src_state = (QTestMigrationState) { };
    // 在 tmpfs 下面创建 bootfile
    bootfile_create(tmpfs, args->suspend_me);
    src_state.suspend_me = args->suspend_me;

    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
        memory_size = "150M";

        if (g_str_equal(arch, "i386")) {
            machine_alias = "pc";
        } else {
            machine_alias = "q35";
        }
        // bootpath 之前已经 bootfile_create() 的时候写到 tmpfs 里了。
        arch_opts = g_strdup_printf(
            "-drive if=none,id=d0,file=%s,format=raw "
            "-device ide-hd,drive=d0,secs=1,cyls=1,heads=1", bootpath);
        start_address = X86_TEST_MEM_START;
        end_address = X86_TEST_MEM_END;
    } 
    // other architectures
    //...

    // Log-related
    //...
    // use shmem as the memory backend
    if (args->use_shmem) {
        shmem_path = g_strdup_printf("/dev/shm/qemu-%d", getpid());
        shmem_opts = g_strdup_printf(
            "-object memory-backend-file,id=mem0,size=%s"
            ",mem-path=%s,share=on -numa node,memdev=mem0",
            memory_size, shmem_path);
    }

    if (args->use_dirty_ring) {
        kvm_opts = ",dirty-ring-size=4096";
    }

    // sanity checks...
    // ...
    machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC, QEMU_ENV_DST);

    //...
    // src 端启动 qemu 的 command
    cmd_source = g_strdup_printf("-accel kvm%s -accel tcg "
                                 "-machine %s,%s "
                                 "-name source,debug-threads=on "
                                 "-m %s "
                                 "-serial file:%s/src_serial "
                                 "%s %s %s %s %s",
                                 kvm_opts ? kvm_opts : "",
                                 machine, machine_opts,
                                 memory_size, tmpfs,
                                 arch_opts ? arch_opts : "",
                                 arch_source ? arch_source : "",
                                 shmem_opts ? shmem_opts : "",
                                 args->opts_source ? args->opts_source : "",
                                 ignore_stderr);
    // 只启动 destination
    if (!args->only_target) {
        *from = qtest_init_with_env(QEMU_ENV_SRC, cmd_source);
        qtest_qmp_set_event_callback(*from, migrate_watch_for_events, &src_state);
    }

    cmd_target = g_strdup_printf("-accel kvm%s -accel tcg "
                                 "-machine %s,%s "
                                 "-name target,debug-threads=on "
                                 "-m %s "
                                 "-serial file:%s/dest_serial "
                                 "-incoming %s "
                                 "%s %s %s %s %s",
                                 kvm_opts ? kvm_opts : "",
                                 machine, machine_opts,
                                 memory_size, tmpfs, uri,
                                 arch_opts ? arch_opts : "",
                                 arch_target ? arch_target : "",
                                 shmem_opts ? shmem_opts : "",
                                 args->opts_target ? args->opts_target : "",
                                 ignore_stderr);
    *to = qtest_init_with_env(QEMU_ENV_DST, cmd_target);
    qtest_qmp_set_event_callback(*to, migrate_watch_for_events, &dst_state);

    /*
     * Remove shmem file immediately to avoid memory leak in test failed case.
     * It's valid because QEMU has already opened this file
     */
    if (args->use_shmem) {
        unlink(shmem_path);
    }

    return 0;
}

struct MigrateCommon QEMU

包含了一个 MigrateStart 也就是启动的参数。MigrateStart 是为了更加精细的参数配置才用的,一般来说我们还是用 MigrateCommon 这个结构体(可以参考下面的内容):

// 传进来的参数是 MigrateCommon 类型
static void test_precopy_common(MigrateCommon *args)
{
    // args->tart 是 MigrateStart 类型的
    if (test_migrate_start(&from, &to, args->listen_uri, &args->start))
        return;
    //...
typedef struct {
    /* Optional: fine tune start parameters */
    MigrateStart start;

    /* Required: the URI for the dst QEMU to listen on */
    const char *listen_uri;

    /*
     * Optional: the URI for the src QEMU to connect to
     * If NULL, then it will query the dst QEMU for its actual
     * listening address and use that as the connect address.
     * This allows for dynamically picking a free TCP port.
     */
    const char *connect_uri;

    /*
     * Optional: JSON-formatted list of src QEMU URIs. If a port is
     * defined as '0' in any QDict key a value of '0' will be
     * automatically converted to the correct destination port.
     */
    const char *connect_channels;

    /* Optional: callback to run at start to set migration parameters */
    TestMigrateStartHook start_hook;
    /* Optional: callback to run at finish to cleanup */
    TestMigrateFinishHook finish_hook;

    /*
     * Optional: normally we expect the migration process to complete.
     *
     * There can be a variety of reasons and stages in which failure
     * can happen during tests.
     *
     * If a failure is expected to happen at time of establishing
     * the connection, then MIG_TEST_FAIL will indicate that the dst
     * QEMU is expected to stay running and accept future migration
     * connections.
     *
     * If a failure is expected to happen while processing the
     * migration stream, then MIG_TEST_FAIL_DEST_QUIT_ERR will indicate
     * that the dst QEMU is expected to quit with non-zero exit status
     */
    enum {
        /* This test should succeed, the default */
        MIG_TEST_SUCCEED = 0,
        /* This test should fail, dest qemu should keep alive */
        MIG_TEST_FAIL,
        /* This test should fail, dest qemu should fail with abnormal status */
        MIG_TEST_FAIL_DEST_QUIT_ERR,
        /* The QMP command for this migration should fail with an error */
        MIG_TEST_QMP_ERROR,
    } result;

    /*
     * Optional: set number of migration passes to wait for, if live==true.
     * If zero, then merely wait for a few MB of dirty data
     */
    unsigned int iterations;

    /*
     * Optional: whether the guest CPUs should be running during a precopy
     * migration test.  We used to always run with live but it took much
     * longer so we reduced live tests to only the ones that have solid
     * reason to be tested live-only.  For each of the new test cases for
     * precopy please provide justifications to use live explicitly (please
     * refer to existing ones with live=true), or use live=off by default.
     */
    // default to false,处于性能考虑。
    bool live;

    /* Postcopy specific fields */
    void *postcopy_data;
    bool postcopy_preempt;
    // postcopy recovery is designed to fail
    bool postcopy_recovery_test_fail;
} MigrateCommon;

wait_for_serial() QEMU

/*
 * Wait for some output in the serial output file,
 * we get an 'A' followed by an endless string of 'B's
 * but on the destination we won't have the A (unless we enabled suspend/resume)
 */
static void wait_for_serial(const char *side)
{
    g_autofree char *serialpath = g_strdup_printf("%s/%s", tmpfs, side);
    FILE *serialfile = fopen(serialpath, "r");
    const char *arch = qtest_get_arch();
    int started = (strcmp(side, "src_serial") == 0 && strcmp(arch, "ppc64") == 0) ? 0 : 1;

    do {
        int readvalue = fgetc(serialfile);

        if (!started) {
            /* SLOF prints its banner before starting test,
             * to ignore it, mark the start of the test with '_',
             * ignore all characters until this marker
             */
            switch (readvalue) {
            case '_':
                started = 1;
                break;
            case EOF:
                fseek(serialfile, 0, SEEK_SET);
                usleep(1000);
                break;
            }
            continue;
        }
        switch (readvalue) {
        case 'A':
            /* Fine */
            break;

        case 'B':
            /* It's alive! */
            // If we found the 'B' char, then we can return.
            fclose(serialfile);
            return;

        case EOF:
            started = (strcmp(side, "src_serial") == 0 && strcmp(arch, "ppc64") == 0) ? 0 : 1;
            fseek(serialfile, 0, SEEK_SET);
            usleep(1000);
            break;

        default:
            fprintf(stderr, "Unexpected %d on %s serial\n", readvalue, side);
            g_assert_not_reached();
        }
    } while (true);
}

wait_for_suspend() QEMU

static void wait_for_suspend(QTestState *who, QTestMigrationState *state)
{
    // 如果我们要 suspend 自己,但是我们还没有被 suspend
    if (state->suspend_me && !state->suspend_seen) {
        qtest_qmp_eventwait(who, "SUSPEND");
    }
}

migrate_qmp_fail() QEMU

void migrate_qmp_fail(QTestState *who, const char *uri, const char *channels, const char *fmt, ...)
{
    va_list ap;
    QDict *args, *err;

    //...
    // convert from string to QDict
    args = qdict_from_vjsonf_nofail(fmt, ap);

    //...
    if (uri)
        qdict_put_str(args, "uri", uri);

    //...
    if (channels) {
        QObject *channels_obj = qobject_from_json(channels, &error_abort);
        qdict_put_obj(args, "channels", channels_obj);
    }

    // assert 就是 migration 会 failure
    err = qtest_qmp_assert_failure_ref(who, "{ 'execute': 'migrate', 'arguments': %p}", args);
    //...
}

migrate_prepare_for_dirty_mem() / migrate_wait_for_dirty_mem() QEMU

This function is to ensure the migration goes through at least 2 rounds.

  1. Before the migration, we write a magic marker in the given offset in src.
  2. Watch the dst until the marker appears. This is a prof that the offset is transferred.
  3. Dirty the offset in src. (This is done by the guest bootfile)
  4. Go back to the source read a byte before the marker to see if the value flipped.

这样我们就可以保证有第二轮迁移。

// 这个函数对应上述第一步。
static void migrate_prepare_for_dirty_mem(QTestState *from)
{
    /*
     * The guest workflow iterates from start_address to
     * end_address, writing 1 byte every TEST_MEM_PAGE_SIZE
     * bytes.
     *
     * IOW, if we write to mem at a point which is NOT
     * a multiple of TEST_MEM_PAGE_SIZE, our write won't
     * conflict with the migration workflow.
     *
     * We put in a marker here, that we'll use to determine
     * when the data has been transferred to the dst.
     */
    qtest_writeq(from, start_address + MAGIC_OFFSET, MAGIC_MARKER);
}

// 这个函数对应上述二三四步。
static void migrate_wait_for_dirty_mem(QTestState *from, QTestState *to)
{
    uint64_t watch_address = start_address + MAGIC_OFFSET_BASE;
    uint64_t marker_address = start_address + MAGIC_OFFSET;
    uint8_t watch_byte;

    // 一直读 dst 来看 marker 有没有被 transfered。
    do {
        usleep(1000 * 10);
    } while (qtest_readq(to, marker_address) != MAGIC_MARKER);


    // If suspended, src only iterates once, and watch_byte may never change
    if (src_state.suspend_me)
        return;

    /*
     * Now ensure that already transferred bytes are dirty again from the guest workload.
     * 
     * Note the guest byte value will wrap around and by chance match the original watch_byte. This is harmless
     * as we'll eventually see a different value if we keep watching
     */
    // 为什么会变 dirty 呢?这是因为我们的 guest bootfile will
    // repeatedly increments the first byte of each page
    watch_byte = qtest_readb(from, watch_address);
    do {
        usleep(1000 * 10);
    } while (qtest_readb(from, watch_address) == watch_byte);
}

test_precopy_common() QEMU

static void test_precopy_common(MigrateCommon *args)
{
    QTestState *from, *to;
    void *data_hook = NULL;

    // Create the src and dst QEMU
    if (test_migrate_start(&from, &to, args->listen_uri, &args->start)) {
        return;
    }

    if (args->start_hook) {
        data_hook = args->start_hook(from, to);
    }

    // The expect result is SUCCEED, not others.
    /* Wait for the first serial output from the source */
    if (args->result == MIG_TEST_SUCCEED) {
        wait_for_serial("src_serial");
        // wait for a suspend event
        wait_for_suspend(from, &src_state);
    }

    // 表示这是 live migration
    if (args->live) {
        // 将 downtime-limit 设置为 1 从而没有办法 converge
        migrate_ensure_non_converge(from);
        // 为了保证至少迁移两轮,先把 magic value 写进去
        migrate_prepare_for_dirty_mem(from);
    } else {
        /*
         * Testing non-live migration, we allow it to run at
         * full speed to ensure short test case duration.
         * For tests expected to fail, we don't need to
         * change anything.
         */
        // non-live migration
        if (args->result == MIG_TEST_SUCCEED) {
            // stop the source machine, since we are performing non-live migration
            qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}");
            // Wait for the source stop
            wait_for_stop(from, &src_state);
            // setup the bandwidth and downtime
            migrate_ensure_converge(from);
        }
    }

    if (args->result == MIG_TEST_QMP_ERROR) {
        // 期望这个 QMP 命令就会失败,
        // 注意不是觉得 migration 本身会失败。
        migrate_qmp_fail(from, args->connect_uri, args->connect_channels, "{}");
        goto finish;
    }

    migrate_qmp(from, to, args->connect_uri, args->connect_channels, "{}");

    if (args->result != MIG_TEST_SUCCEED) {
        bool allow_active = args->result == MIG_TEST_FAIL;
        wait_for_migration_fail(from, allow_active);

        if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR)
            qtest_set_expected_status(to, EXIT_FAILURE);
    } else {
        if (args->live) {
            /*
             * For initial iteration(s) we must do a full pass,
             * but for the final iteration, we need only wait
             * for some dirty mem before switching to converge
             */
            // Wait for several iterations first
            while (args->iterations > 1) {
                wait_for_migration_pass(from);
                args->iterations--;
            }

            // 等待看到 dirty again
            migrate_wait_for_dirty_mem(from, to);
            // 把 bandwidth 和 downtime limit 改回来。
            migrate_ensure_converge(from);

            // We do this first, as it has a timeout to stop us
            // hanging forever if migration didn't converge
            wait_for_migration_complete(from);

            // 期望 migration 结束后 source 端会 stop
            wait_for_stop(from, &src_state);
        } else {
            wait_for_migration_complete(from);
            /*
             * Must wait for dst to finish reading all incoming
             * data on the socket before issuing 'cont' otherwise
             * it'll be ignored
             */
            wait_for_migration_complete(to);
            qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
        }

        // destination start running
        wait_for_resume(to, &dst_state);

        // 如果 suspend 了,我们要在 destination 端将其 wakeup
        if (args->start.suspend_me) {
            /* wakeup succeeds only if guest is suspended */
            qtest_qmp_assert_success(to, "{'execute': 'system_wakeup'}");
        }

        // See the outoput from serial
        wait_for_serial("dest_serial");
    }

finish:
    if (args->finish_hook) {
        args->finish_hook(from, to, data_hook);
    }

    test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
}

test_file_common() QEMU

static void test_file_common(MigrateCommon *args, bool stop_src)
{
    QTestState *from, *to;
    void *data_hook = NULL;

    // Create the src and dst QEMU
    if (test_migrate_start(&from, &to, args->listen_uri, &args->start))
        return;

    /*
     * File migration is never live. We can keep the source VM running
     * during migration, but the destination will not be running
     * concurrently.
     */
    // 我们不能是 live migration,这并不代表我们不能让 src vcpu 处于 running 的状态。
    // 进行迁移,而是表示 destination 不能够在 save 完成后马上开始 running。
    g_assert_false(args->live);

    if (args->start_hook)
        data_hook = args->start_hook(from, to);

    migrate_ensure_converge(from);
    wait_for_serial("src_serial");

    // should we stop the source while migrating? Note, even if
    // the source is not stop, it shouldn't be called live migration
    if (stop_src) {
        qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}");
        wait_for_stop(from, &src_state);
    }

    // We are expecting an error
    if (args->result == MIG_TEST_QMP_ERROR) {
        migrate_qmp_fail(from, args->connect_uri, NULL, "{}");
        goto finish;
    }

    migrate_qmp(from, to, args->connect_uri, NULL, "{}");
    wait_for_migration_complete(from);

    // We need to wait for the source to finish before starting the destination.
    migrate_incoming_qmp(to, args->connect_uri, "{}");
    wait_for_migration_complete(to);

    // The VM is stop, so we cannot continue
    if (stop_src) {
        qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
    }
    wait_for_resume(to, &dst_state);

    // Ensure the destination is resumed
    wait_for_serial("dest_serial");

finish:
    if (args->finish_hook) {
        args->finish_hook(from, to, data_hook);
    }

    test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
}

migrate_postcopy_start() QEMU

static void migrate_postcopy_start(QTestState *from, QTestState *to)
{
    // This qmp command should be successful
    qtest_qmp_assert_success(from, "{ 'execute': 'migrate-start-postcopy' }");

    wait_for_stop(from, &src_state);
    // Wait destination RESUME
    qtest_qmp_eventwait(to, "RESUME");
}

migrate_postcopy_complete() QEMU

Wait for the migration is complete and see the output from the serial in destination side.

static void migrate_postcopy_complete(QTestState *from, QTestState *to, MigrateCommon *args)
{
    wait_for_migration_complete(from);

    // wake up the detination
    if (args->start.suspend_me) {
        /* wakeup succeeds only if guest is suspended */
        qtest_qmp_assert_success(to, "{'execute': 'system_wakeup'}");
    }

    // Make sure we get at least one "B" on destination
    wait_for_serial("dest_serial");

    if (uffd_feature_thread_id) {
        read_blocktime(to);
    }

    if (args->finish_hook) {
        args->finish_hook(from, to, args->postcopy_data);
        args->postcopy_data = NULL;
    }

    test_migrate_end(from, to, true);
}

test_postcopy_common() QEMU

static void test_postcopy_common(MigrateCommon *args)
{
    QTestState *from, *to;

    if (migrate_postcopy_prepare(&from, &to, args)) {
        return;
    }
    migrate_postcopy_start(from, to);
    migrate_postcopy_complete(from, to, args);
}

migrate_postcopy_prepare() QEMU

static int migrate_postcopy_prepare(QTestState **from_ptr,
                                    QTestState **to_ptr,
                                    MigrateCommon *args)
{
    QTestState *from, *to;

    // destination with --incoming defer
    if (test_migrate_start(&from, &to, "defer", &args->start)) {
        return -1;
    }

    if (args->start_hook)
        args->postcopy_data = args->start_hook(from, to);

    migrate_set_capability(from, "postcopy-ram", true);
    migrate_set_capability(to, "postcopy-ram", true);
    migrate_set_capability(to, "postcopy-blocktime", true);

    // create the preempt channel
    if (args->postcopy_preempt) {
        migrate_set_capability(from, "postcopy-preempt", true);
        migrate_set_capability(to, "postcopy-preempt", true);
    }

    // non converge
    migrate_ensure_non_converge(from);
    // write the magic offset/value
    migrate_prepare_for_dirty_mem(from);

    // destination 的 incoming address 是什么
    qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
                             "  'arguments': { "
                             "      'channels': [ { 'channel-type': 'main',"
                             "      'addr': { 'transport': 'socket',"
                             "                'type': 'inet',"
                             "                'host': '127.0.0.1',"
                             "                'port': '0' } } ] } }");

    /* Wait for the first serial output from the source */
    wait_for_serial("src_serial");
    // Wait for source's suspend
    wait_for_suspend(from, &src_state);

    // start the postcopy
    migrate_qmp(from, to, NULL, NULL, "{}");
    migrate_wait_for_dirty_mem(from, to);

    *from_ptr = from;
    *to_ptr = to;

    return 0;
}

postcopy_recover_fail() QEMU

Recover the postcopy migration then pause it again. So the state PAUSED -> RUNNING -> PAUSED.

不过 recover 使用的 socket 其实是一个错误的 socket。Test when a wrong socket specified for recover, and then the ability to kick it out, and continue with a correct socket.

static void postcopy_recover_fail(QTestState *from, QTestState *to)
{
    int ret, pair1[2], pair2[2];
    char c;

    // Create two unrelated socketpairs
    // 2 fds in each pair are connected
    ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair1);
    ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair2);

    // Give the guests unpaired ends of the sockets, so they'll all blocked
    // at reading. This mimics a wrong channel established.

    // pass the pair1[0] fd to the source QEMU
    // pass the pair2[0] fd to the destination QEMU
    qtest_qmp_fds_assert_success(from, &pair1[0], 1,
                                 "{ 'execute': 'getfd',"
                                 "  'arguments': { 'fdname': 'fd-mig' }}");
    qtest_qmp_fds_assert_success(to, &pair2[0], 1,
                                 "{ 'execute': 'getfd',"
                                 "  'arguments': { 'fdname': 'fd-mig' }}");

    /*
     * Write the 1st byte as QEMU_VM_COMMAND (0x8) for the dest socket, to
     * emulate the 1st byte of a real recovery, but stops from there to
     * keep dest QEMU in RECOVER.  This is needed so that we can kick off
     * the recover process on dest QEMU (by triggering the G_IO_IN event).
     *
     * NOTE: this trick is not needed on src QEMUs, because src doesn't
     * rely on an pre-existing G_IO_IN event, so it will always trigger the
     * upcoming recovery anyway even if it can read nothing.
     */
#define QEMU_VM_COMMAND              0x08
    c = QEMU_VM_COMMAND;
    // send the command to the destination fd
    ret = send(pair2[1], &c, 1, 0);

    // recover to's postcopy
    // 还没有 pause,为什么要 recover 呢?
    migrate_recover(to, "fd:fd-mig");
    // recover from's postcopy
    migrate_qmp(from, to, "fd:fd-mig", NULL, "{'resume': true}");

    /*
     * Make sure both QEMU instances will go into RECOVER stage, then test
     * kicking them out using migrate-pause.
     */
    wait_for_postcopy_status(from, "postcopy-recover");
    wait_for_postcopy_status(to, "postcopy-recover");

    // This would be issued by the admin upon noticing the hang, we should
    // make sure we're able to kick this out.
    migrate_pause(from);
    wait_for_postcopy_status(from, "postcopy-paused");

    /* Do the same test on dest */
    migrate_pause(to);
    wait_for_postcopy_status(to, "postcopy-paused");

    close(pair1[0]);
    close(pair1[1]);
    close(pair2[0]);
    close(pair2[1]);
}

test_postcopy_recovery_common() / QEMU

static void test_postcopy_recovery_common(MigrateCommon *args)
{
    QTestState *from, *to;
    g_autofree char *uri = NULL;

    // Always hide errors for postcopy recover tests since they're expected
    args->start.hide_stderr = true;

    if (migrate_postcopy_prepare(&from, &to, args))
        return;

    // Turn postcopy speed down, 4K/s is slow enough on any machines
    migrate_set_parameter_int(from, "max-postcopy-bandwidth", 4096);

    // Now we start the postcopy
    migrate_postcopy_start(from, to);

    /*
     * Wait until postcopy is really started; we can only run the
     * migrate-pause command during a postcopy
     */
    wait_for_migration_status(from, "postcopy-active", NULL);

    // stop the postcopy migration. This emulates a network failure with the migration socket
    migrate_pause(from);

    /*
     * Wait for destination side to reach postcopy-paused state.  The
     * migrate-recover command can only succeed if destination machine
     * is in the paused state
     */
    wait_for_postcopy_status(to, "postcopy-paused");
    wait_for_postcopy_status(from, "postcopy-paused");

    if (args->postcopy_recovery_test_fail) {
        // Test when a wrong socket specified for recover, and then the
        // ability to kick it out, and continue with a correct socket.
        postcopy_recover_fail(from, to);
        /* continue with a good recovery */
    }

    /*
     * Create a new socket to emulate a new channel that is different
     * from the broken migration channel; tell the destination to
     * listen to the new port
     */
    uri = g_strdup_printf("unix:%s/migsocket-recover", tmpfs);
    migrate_recover(to, uri);

    // Try to rebuild the migration channel using the resume flag and the newly created channel
    migrate_qmp(from, to, uri, NULL, "{'resume': true}");

    /* Restore the postcopy bandwidth to unlimited */
    migrate_set_parameter_int(from, "max-postcopy-bandwidth", 0);

    migrate_postcopy_complete(from, to, args);
}

do_test_validate_uuid() QEMU

static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
{
    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
    QTestState *from, *to;

    if (test_migrate_start(&from, &to, uri, args))
        return;

    /*
     * UUID validation is at the begin of migration. So, the main process of
     * migration is not interesting for us here. Thus, set huge downtime for
     * very fast migration.
     */
    migrate_set_parameter_int(from, "downtime-limit", 1000000);

    // enable this to validate the uuid
    migrate_set_capability(from, "validate-uuid", true);

    /* Wait for the first serial output from the source */
    wait_for_serial("src_serial");

    migrate_qmp(from, to, uri, NULL, "{}");

    if (should_fail) {
        qtest_set_expected_status(to, EXIT_FAILURE);
        wait_for_migration_fail(from, true);
    } else {
        wait_for_migration_complete(from);
    }

    test_migrate_end(from, to, false);
}

do_test_validate_uri_channel() QEMU

connect_uri and connect_channels.

注定就是要 fail,因为 connect_uriconnect_channels 这两个不能同时被 set。

static void do_test_validate_uri_channel(MigrateCommon *args)
{
    QTestState *from, *to;

    if (test_migrate_start(&from, &to, args->listen_uri, &args->start))
        return;

    // Wait for the first serial output from the source
    wait_for_serial("src_serial");

    // 'uri' and 'channels' validation is checked even before the migration starts.
    migrate_qmp_fail(from, args->connect_uri, args->connect_channels, "{}");
    test_migrate_end(from, to, false);
}

QEMU Migration Test Cases

test_baddest() / "/migration/bad_dest" / QEMU

因为 destination bind 的是一个 dummy url,所以注定迁移会失败。

static void test_baddest(void)
{
    MigrateStart args = {
        .hide_stderr = true
    };
    QTestState *from, *to;

    // 启动 source 和 destination 两台机器,destination listen "tcp:127.0.0.1:0"...
    // 这是一个 dummy url,我们不知道最后 port bind 的是多少,所以应该会迁移失败
    // 如果指定了 port 是 0,那么其实就是随机分配的。我们可以在 set migrate incoming 之后
    // 通过进入到 destination 的 hmp 然后执行 info migrate 来查看 socket address 到底是什么。
    if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args))
        return;
    // ...
    // Unlike most port numbers, port 0 is a reserved port in TCP/IP networking,
    // meaning that it should not be used in TCP or UDP messages
    migrate_qmp(from, to, "tcp:127.0.0.1:0", NULL, "{}");
    wait_for_migration_fail(from, false);
    test_migrate_end(from, to, false);
}

test_analyze_script() / "/migration/analyze-script" / QEMU

static void test_analyze_script(void)
{
    MigrateStart args = {
        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
    };
    QTestState *from, *to;
    g_autofree char *uri = NULL;
    g_autofree char *file = NULL;
    int pid, wstatus;
    const char *python = g_getenv("PYTHON");

    //...
    // create source and destination QEMU
    // destination with dummy url
    if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) {
        return;
    }

    /*
     * Setting these two capabilities causes the "configuration"
     * vmstate to include subsections for them. The script needs to
     * parse those subsections properly.
     */
    migrate_set_capability(from, "validate-uuid", true);
    migrate_set_capability(from, "x-ignore-shared", true);

    // 并不是 migrate 到 destination,而是 migrate 到 migfile 里
    // 方便 analysis
    file = g_strdup_printf("%s/migfile", tmpfs);
    uri = g_strdup_printf("exec:cat > %s", file);

    // set max-bandwidth and downtime-limit
    migrate_ensure_converge(from);
    migrate_qmp(from, to, uri, NULL, "{}");
    wait_for_migration_complete(from);

    pid = fork();
    if (!pid) {
        // child process
        close(1);
        open("/dev/null", O_WRONLY);
        // analyse the "migfile" file
        execl(python, python, ANALYZE_SCRIPT, "-f", file, NULL);
        g_assert_not_reached();
    }

    // wait for the analysis python script complete
    g_assert(waitpid(pid, &wstatus, 0) == pid);
    if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) {
        g_test_message("Failed to analyze the migration stream");
        g_test_fail();
    }
    test_migrate_end(from, to, false);
    cleanup("migfile");
}

test_precopy_unix_suspend_live() / "/migration/precopy/unix/suspend/live" / QEMU

这个 case 会测试 suspend / live-migration 的情况。

static void test_precopy_unix_suspend_live(void)
{
    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
    MigrateCommon args = {
        // src 和 dst 通过这个 uri 进行连接。
        .listen_uri = uri,
        .connect_uri = uri,
        /*
         * despite being live, the test is fast because the src
         * suspends immediately.
         */
        .live = true,
        // we need to suspend
        .start.suspend_me = true,
    };

    test_precopy_common(&args);
}

test_precopy_unix_suspend_notlive() / "/migration/precopy/unix/suspend/notlive" / QEMU

和上一个没什么区别,就是改成了 non-live migration。

static void test_precopy_unix_suspend_notlive(void)
{
    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
    MigrateCommon args = {
        .listen_uri = uri,
        .connect_uri = uri,
        .start.suspend_me = true,
    };

    test_precopy_common(&args);
}

test_postcopy() / "/migration/postcopy/plain" / QEMU

Plain post copy test.

static void test_postcopy(void)
{
    MigrateCommon args = { };

    test_postcopy_common(&args);
}

test_postcopy_recovery() / "/migration/postcopy/recovery/plain" / QEMU

test postcopy pause then recover.

static void test_postcopy_recovery(void)
{
    MigrateCommon args = { };

    test_postcopy_recovery_common(&args);
}

test_postcopy_preempt() / "/migration/postcopy/preempt/plain" / QEMU

static void test_postcopy_preempt(void)
{
    MigrateCommon args = {
        .postcopy_preempt = true,
    };

    test_postcopy_common(&args);
}

test_postcopy_preempt_recovery() / "/migration/postcopy/preempt/recovery/plain" / QEMU

static void test_postcopy_preempt_recovery(void)
{
    MigrateCommon args = {
        .postcopy_preempt = true,
    };

    test_postcopy_recovery_common(&args);
}

test_postcopy_recovery_double_fail() / "/migration/postcopy/recovery/double-failures" / QEMU

failed to recover.

static void test_postcopy_recovery_double_fail(void)
{
    MigrateCommon args = {
        .postcopy_recovery_test_fail = true,
    };

    test_postcopy_recovery_common(&args);
}

test_postcopy_suspend() / "/migration/postcopy/suspend" / QEMU

static void test_postcopy_suspend(void)
{
    MigrateCommon args = {
        .start.suspend_me = true,
    };

    test_postcopy_common(&args);
}

test_precopy_unix_plain() / /migration/precopy/unix/plain / QEMU

static void test_precopy_unix_plain(void)
{
    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
    MigrateCommon args = {
        .listen_uri = uri,
        .connect_uri = uri,
        /*
         * The simplest use case of precopy, covering smoke tests of
         * get-dirty-log dirty tracking.
         */
        .live = true,
    };

    test_precopy_common(&args);
}

test_validate_uuid() / "/migration/validate_uuid" / QEMU

static void test_validate_uuid(void)
{
    MigrateStart args = {
        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
        .opts_target = "-uuid 11111111-1111-1111-1111-111111111111",
    };

    // should fail is false, which means we should success
    do_test_validate_uuid(&args, false);
}

test_validate_uuid_error() / "/migration/validate_uuid_error" / QEMU

static void test_validate_uuid_error(void)
{
    MigrateStart args = {
        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
        .opts_target = "-uuid 22222222-2222-2222-2222-222222222222",
        .hide_stderr = true,
    };

    // We should fail
    do_test_validate_uuid(&args, true);
}

test_validate_uuid_src_not_set() / "/migration/validate_uuid_src_not_set" / QEMU

static void test_validate_uuid_src_not_set(void)
{
    MigrateStart args = {
        .opts_target = "-uuid 22222222-2222-2222-2222-222222222222",
        .hide_stderr = true,
    };

    // Should success
    do_test_validate_uuid(&args, false);
}

test_validate_uuid_dst_not_set() / "/migration/validate_uuid_dst_not_set" / QEMU

static void test_validate_uuid_dst_not_set(void)
{
    MigrateStart args = {
        .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
        .hide_stderr = true,
    };

    // Should success
    do_test_validate_uuid(&args, false);
}

test_validate_uri_channels_both_set() / "/migration/validate_uri/channels/both_set" / QEMU

这两个的区别在于。

static void test_validate_uri_channels_both_set(void)
{
    MigrateCommon args = {
        .start = {
            .hide_stderr = true,
        },
        .listen_uri = "defer",
        .connect_uri = "tcp:127.0.0.1:0",
        .connect_channels = "[ { 'channel-type': 'main',"
                            "    'addr': { 'transport': 'socket',"
                            "              'type': 'inet',"
                            "              'host': '127.0.0.1',"
                            "              'port': '0' } } ]",
    };

    do_test_validate_uri_channel(&args);
}

test_migrate_auto_converge() / "/migration/auto_converge" / QEMU

/*
 * The way auto_converge works, we need to do too many passes to
 * run this test.  Auto_converge logic is only run once every
 * three iterations, so:
 *
 * - 3 iterations without auto_converge enabled
 * - 3 iterations with pct = 5
 * - 3 iterations with pct = 30
 * - 3 iterations with pct = 55
 * - 3 iterations with pct = 80
 * - 3 iterations with pct = 95 (max(95, 80 + 25))
 *
 * To make things even worse, we need to run the initial stage at
 * 3MB/s so we enter autoconverge even when host is (over)loaded.
 */
static void test_migrate_auto_converge(void)
{
    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
    MigrateStart args = {};
    QTestState *from, *to;
    int64_t percentage;

    /*
     * We want the test to be stable and as fast as possible.
     * E.g., with 1Gb/s bandwidth migration may pass without throttling,
     * so we need to decrease a bandwidth.
     */
    const int64_t init_pct = 5, inc_pct = 25, max_pct = 95;

    if (test_migrate_start(&from, &to, uri, &args)) {
        return;
    }

    migrate_set_capability(from, "auto-converge", true);
    migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct);
    migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct);
    migrate_set_parameter_int(from, "max-cpu-throttle", max_pct);

    /*
     * Set the initial parameters so that the migration could not converge
     * without throttling.
     */
    migrate_ensure_non_converge(from);

    /* To check remaining size after precopy */
    migrate_set_capability(from, "pause-before-switchover", true);

    /* Wait for the first serial output from the source */
    wait_for_serial("src_serial");

    migrate_qmp(from, to, uri, NULL, "{}");

    /* Wait for throttling begins */
    percentage = 0;
    do {
        percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
        if (percentage != 0) {
            break;
        }
        usleep(20);
        g_assert_false(src_state.stop_seen);
    } while (true);
    /* The first percentage of throttling should be at least init_pct */
    g_assert_cmpint(percentage, >=, init_pct);
    /* Now, when we tested that throttling works, let it converge */
    migrate_ensure_converge(from);

    /*
     * Wait for pre-switchover status to check last throttle percentage
     * and remaining. These values will be zeroed later
     */
    wait_for_migration_status(from, "pre-switchover", NULL);

    /* The final percentage of throttling shouldn't be greater than max_pct */
    percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
    g_assert_cmpint(percentage, <=, max_pct);
    migrate_continue(from, "pre-switchover");

    qtest_qmp_eventwait(to, "RESUME");

    wait_for_serial("dest_serial");
    wait_for_migration_complete(from);

    test_migrate_end(from, to, true);
}

test_multifd_tcp_cancel() / "/migration/multifd/tcp/plain/cancel" / QEMU

static void test_multifd_tcp_cancel(void)
{
    MigrateStart args = {
        .hide_stderr = true,
    };
    QTestState *from, *to, *to2;

    if (test_migrate_start(&from, &to, "defer", &args))
        return;

    migrate_ensure_non_converge(from);
    migrate_prepare_for_dirty_mem(from);

    migrate_set_parameter_int(from, "multifd-channels", 16);
    migrate_set_parameter_int(to, "multifd-channels", 16);
    migrate_set_capability(from, "multifd", true);
    migrate_set_capability(to, "multifd", true);

    /* Start incoming migration from the 1st socket */
    migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}");

    /* Wait for the first serial output from the source */
    wait_for_serial("src_serial");
    migrate_qmp(from, to, NULL, NULL, "{}");
    migrate_wait_for_dirty_mem(from, to);

    // cancel migrate to "to", but we still can migrate to a "to2"
    migrate_cancel(from);

    // Make sure QEMU process "to" exited
    qtest_set_expected_status(to, EXIT_FAILURE);
    qtest_wait_qemu(to);

    args = (MigrateStart){
        .only_target = true,
    };

    if (test_migrate_start(&from, &to2, "defer", &args)) {
        return;
    }

    migrate_set_parameter_int(to2, "multifd-channels", 16);
    migrate_set_capability(to2, "multifd", true);

    /* Start incoming migration from the 1st socket */
    migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", "{}");

    wait_for_migration_status(from, "cancelled", NULL);

    migrate_ensure_non_converge(from);

    migrate_qmp(from, to2, NULL, NULL, "{}");

    migrate_wait_for_dirty_mem(from, to2);

    migrate_ensure_converge(from);

    wait_for_stop(from, &src_state);
    qtest_qmp_eventwait(to2, "RESUME");

    wait_for_serial("dest_serial");
    wait_for_migration_complete(from);
    test_migrate_end(from, to2, true);
}

test_precopy_tcp_switchover_ack() / "/migration/precopy/tcp/plain/switchover-ack" / QEMU

static void test_precopy_tcp_switchover_ack(void)
{
    MigrateCommon args = {
        .listen_uri = "tcp:127.0.0.1:0",
        .start_hook = test_migrate_switchover_ack_start,
        /*
         * Source VM must be running in order to consider the switchover ACK
         * when deciding to do switchover or not.
         */
        .live = true,
    };

    test_precopy_common(&args);
}

test_migrate_switchover_ack_start() QEMU

static void *test_migrate_switchover_ack_start(QTestState *from, QTestState *to)
{
    // 开启 switchover-ack 必须要打开 return-path,是前置的。
    migrate_set_capability(from, "return-path", true);
    migrate_set_capability(to, "return-path", true);

    migrate_set_capability(from, "switchover-ack", true);
    migrate_set_capability(to, "switchover-ack", true);
    //...
}

test_migrate_precopy_fd_socket() / "/migration/precopy/fd/tcp" / QEMU

static void test_migrate_precopy_fd_socket(void)
{
    MigrateCommon args = {
        .listen_uri = "defer",
        // 既然是使用 fd 作为迁移的方式,
        // 我们需要提前把 fd 打开并设置好。
        .connect_uri = "fd:fd-mig",
        .start_hook = test_migrate_fd_start_hook,
        .finish_hook = test_migrate_fd_finish_hook
    };
    test_precopy_common(&args);
}

test_migrate_fd_start_hook() QEMU

static void *test_migrate_fd_start_hook(QTestState *from,
                                        QTestState *to)
{
    int ret;
    int pair[2];

    // Create two connected sockets for migration
    ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);

    // Send the 1st socket to the target
    // 我们有 .connect_uri = "fd:fd-mig"
    // 所以我们这里不用指定了。
    qtest_qmp_fds_assert_success(to, &pair[0], 1,
                                 "{ 'execute': 'getfd',"
                                 "  'arguments': { 'fdname': 'fd-mig' }}");

    /* Start incoming migration from the 1st socket */
    // Tell the destination using the fd-mig fd for --incoming.
    migrate_incoming_qmp(to, "fd:fd-mig", "{}");

    /* Send the 2nd socket to the target */
    qtest_qmp_fds_assert_success(from, &pair[1], 1,
                                 "{ 'execute': 'getfd',"
                                 "  'arguments': { 'fdname': 'fd-mig' }}");

    // close 的原因是 QEMU 进程里还在 connect 着,
    // 只不过是在我们的测试进程里面我们需要关掉。
    close(pair[0]);
    close(pair[1]);
}

test_migrate_fd_finish_hook() QEMU

Just close the fds in the source and the destination.

static void test_migrate_fd_finish_hook(QTestState *from, QTestState *to, void *opaque)
{
    QDict *rsp;
    const char *error_desc;

    /* Test closing fds */
    /* We assume, that QEMU removes named fd from its list,
     * so this should fail */
    rsp = qtest_qmp(from, "{ 'execute': 'closefd',"
                          "  'arguments': { 'fdname': 'fd-mig' }}");
    g_assert_true(qdict_haskey(rsp, "error"));
    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
    qobject_unref(rsp);

    rsp = qtest_qmp(to, "{ 'execute': 'closefd',"
                        "  'arguments': { 'fdname': 'fd-mig' }}");
    g_assert_true(qdict_haskey(rsp, "error"));
    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
    qobject_unref(rsp);
}

test_migrate_precopy_fd_file() / "/migration/precopy/fd/file" / QEMU

如果是 precopy 的方式,我们是两个 fd 互相连接,没有中间人。而 file 的方式我们是两个 fd 都是打开了同一个文件。

static void test_migrate_precopy_fd_file(void)
{
    MigrateCommon args = {
        .listen_uri = "defer",
        .connect_uri = "fd:fd-mig",
        .start_hook = migrate_precopy_fd_file_start,
        .finish_hook = test_migrate_fd_finish_hook
    };
    test_file_common(&args, true);
}

migrate_precopy_fd_file_start() QEMU

static void *migrate_precopy_fd_file_start(QTestState *from, QTestState *to)
{
    g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
    int src_flags = O_CREAT | O_RDWR;
    int dst_flags = O_CREAT | O_RDWR;
    int fds[2];

    fds[0] = open(file, src_flags, 0660);
    fds[1] = open(file, dst_flags, 0660);
    //...
    qtest_qmp_fds_assert_success(to, &fds[0], 1,
                                 "{ 'execute': 'getfd',"
                                 "  'arguments': { 'fdname': 'fd-mig' }}");
    qtest_qmp_fds_assert_success(from, &fds[1], 1,
                                 "{ 'execute': 'getfd',"
                                 "  'arguments': { 'fdname': 'fd-mig' }}");

    close(fds[0]);
    close(fds[1]);

    return NULL;
}

test_migrate_fd_finish_hook() QEMU

Close the 2 fds.

static void test_migrate_fd_finish_hook(QTestState *from, QTestState *to, void *opaque)
{
    QDict *rsp;
    const char *error_desc;

    /* Test closing fds */
    /* We assume, that QEMU removes named fd from its list,
     * so this should fail */
    rsp = qtest_qmp(from, "{ 'execute': 'closefd',"
                          "  'arguments': { 'fdname': 'fd-mig' }}");
    g_assert_true(qdict_haskey(rsp, "error"));
    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
    qobject_unref(rsp);

    rsp = qtest_qmp(to, "{ 'execute': 'closefd',"
                        "  'arguments': { 'fdname': 'fd-mig' }}");
    g_assert_true(qdict_haskey(rsp, "error"));
    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
    qobject_unref(rsp);
}