diff options
author | Frederick Mayle <fmayle@google.com> | 2024-04-10 17:02:39 -0700 |
---|---|---|
committer | crosvm LUCI <crosvm-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2024-04-11 15:51:23 +0000 |
commit | c64b320d356d6fbb7331a901a89fd3181aa78641 (patch) | |
tree | 9a0222a64f342cbed84674cdd56bdf68a2ccfa9f | |
parent | 5efff8bcddd0dedfa2de9026c9f091039141885a (diff) | |
download | crosvm-c64b320d356d6fbb7331a901a89fd3181aa78641.tar.gz |
e2e_tests: wait for restore to complete before sending commands
I can't reproduce the test flake, but I suspect the issue is that, in
the test infa machines, the restore is slow enough that it causes the 5
second timeout for sending a command to the guest to fail.
Instead of immediately proceeding with the test, wait for the restore
work to finish first. This allows us to use a longer timeout and also
should make it more obvious which step of the test is slow and/or flaky.
BUG=b:333090069
TEST=./tools/run_tests --dut=host --filter-expr="test(snapshot_restore) | test(suspend_resume)" --run-root-tests --no-unit-tests --repetitions 100
Change-Id: I75ad900dcd080d4fa76ceca14974ac044b41e7d5
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/5446126
Reviewed-by: Dennis Kempin <denniskempin@google.com>
Commit-Queue: Frederick Mayle <fmayle@google.com>
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r-- | e2e_tests/fixture/src/vm.rs | 22 | ||||
-rw-r--r-- | e2e_tests/tests/suspend_resume.rs | 4 | ||||
-rw-r--r-- | e2e_tests/tests/vsock.rs | 10 | ||||
-rw-r--r-- | src/crosvm/cmdline.rs | 6 |
4 files changed, 32 insertions, 10 deletions
diff --git a/e2e_tests/fixture/src/vm.rs b/e2e_tests/fixture/src/vm.rs index 483fd6ab0..ee5197832 100644 --- a/e2e_tests/fixture/src/vm.rs +++ b/e2e_tests/fixture/src/vm.rs @@ -406,7 +406,27 @@ impl TestVm { TestVm::new_generic(TestVmSys::append_config_args, cfg, false) } - pub fn new_cold_restore(cfg: Config) -> Result<TestVm> { + /// Create `TestVm` from a snapshot, using `--restore` but NOT `--suspended`. + pub fn new_restore(cfg: Config) -> Result<TestVm> { + let mut vm = TestVm::new_generic_restore(TestVmSys::append_config_args, cfg, false)?; + // Send a resume request to wait for the restore to finish. + // We don't want to return from this function until the restore is complete, otherwise it + // will be difficult to differentiate between a slow restore and a slow response from the + // guest. + let vm = run_with_timeout( + move || { + vm.resume_full().expect("failed to resume after VM restore"); + vm + }, + Duration::from_secs(60), + ) + .expect("VM restore timeout"); + + Ok(vm) + } + + /// Create `TestVm` from a snapshot, using `--restore` AND `--suspended`. + pub fn new_restore_suspended(cfg: Config) -> Result<TestVm> { TestVm::new_generic_restore(TestVmSys::append_config_args, cfg, false) } diff --git a/e2e_tests/tests/suspend_resume.rs b/e2e_tests/tests/suspend_resume.rs index f893d2cf6..c814d39e8 100644 --- a/e2e_tests/tests/suspend_resume.rs +++ b/e2e_tests/tests/suspend_resume.rs @@ -119,7 +119,7 @@ fn suspend_resume_system(disabled_sandbox: bool) -> anyhow::Result<()> { // shut down VM drop(vm); // Start up VM with cold restore. - let mut vm = TestVm::new_cold_restore(new_config().extra_args(vec![ + let mut vm = TestVm::new_restore_suspended(new_config().extra_args(vec![ "--restore".to_string(), snap1_path.to_str().unwrap().to_string(), "--suspended".to_string(), @@ -213,7 +213,7 @@ fn snapshot_vhost_user() { snap_path.to_str().unwrap().to_string(), "--no-usb".to_string(), ]); - let _vm = TestVm::new_cold_restore(config).unwrap(); + let _vm = TestVm::new_restore(config).unwrap(); } fn create_net_config(socket: &Path) -> VuConfig { diff --git a/e2e_tests/tests/vsock.rs b/e2e_tests/tests/vsock.rs index 641f34f53..dfdb13c00 100644 --- a/e2e_tests/tests/vsock.rs +++ b/e2e_tests/tests/vsock.rs @@ -64,7 +64,6 @@ fn host_to_guest_disable_sandbox() { } #[test] -#[ignore = "b/333090177 test is flaky"] fn host_to_guest_snapshot_restore() { let guest_port = generate_vhost_port(); let guest_cid = generate_guest_cid(); @@ -90,7 +89,7 @@ fn host_to_guest_snapshot_restore() { ]) .with_stdout_hardware("legacy-virtio-console"); drop(vm); - vm = TestVm::new_cold_restore(config).unwrap(); + vm = TestVm::new_restore(config).unwrap(); host_to_guest_connection(&mut vm, guest_cid, guest_port); } @@ -120,7 +119,7 @@ fn host_to_guest_disable_sandbox_snapshot_restore() { ]) .with_stdout_hardware("legacy-virtio-console"); drop(vm); - vm = TestVm::new_cold_restore(config.disable_sandbox()).unwrap(); + vm = TestVm::new_restore(config.disable_sandbox()).unwrap(); host_to_guest_connection(&mut vm, guest_cid, guest_port); } @@ -175,7 +174,6 @@ fn guest_to_host_disable_sandbox() { } #[test] -#[ignore = "b/333090177 test is flaky"] fn guest_to_host_snapshot_restore() { let host_port = generate_vhost_port(); let guest_cid = generate_guest_cid(); @@ -201,7 +199,7 @@ fn guest_to_host_snapshot_restore() { ]) .with_stdout_hardware("legacy-virtio-console"); drop(vm); - vm = TestVm::new_cold_restore(config).unwrap(); + vm = TestVm::new_restore(config).unwrap(); guest_to_host_connection(&mut vm, host_port); } @@ -232,7 +230,7 @@ fn guest_to_host_disable_sandbox_snapshot_restore() { ]) .with_stdout_hardware("legacy-virtio-console"); drop(vm); - vm = TestVm::new_cold_restore(config.disable_sandbox()).unwrap(); + vm = TestVm::new_restore(config.disable_sandbox()).unwrap(); guest_to_host_connection(&mut vm, host_port); } diff --git a/src/crosvm/cmdline.rs b/src/crosvm/cmdline.rs index 04f343dcd..3033ee77e 100644 --- a/src/crosvm/cmdline.rs +++ b/src/crosvm/cmdline.rs @@ -289,7 +289,11 @@ pub struct MakeRTCommand { #[derive(FromArgs)] #[argh(subcommand, name = "resume")] -/// Resumes the crosvm instance +/// Resumes the crosvm instance. No-op if already running. When starting crosvm with `--restore`, +/// this command can be used to wait until the restore is complete +// Implementation note: All the restore work happens before crosvm becomes able to process incoming +// commands, so really all commands can be used to wait for restore to complete, but few are side +// effect free. pub struct ResumeCommand { #[argh(positional, arg_name = "VM_SOCKET")] /// VM Socket path |