From c52827d42e48739690b39f01ab619b089b701502 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 3 Oct 2024 11:41:15 -0700 Subject: [PATCH] [nexus] allow reconfiguring auto-restart policies (#6743) This commit extends the `instance-reconfigure` API endpoint added in #6585 to also allow setting instance auto-restart policies (as added in #6503). I've also added the actual auto-restart policy to the external API instance view, along with the boolean `auto_restart_enabled` added in #6503. This way, it's possible to change just the boot disk by providing the current auto-restart policy in an instance POST. --- common/src/api/external/mod.rs | 18 +- nexus/db-model/src/instance.rs | 5 + nexus/db-queries/src/db/datastore/instance.rs | 330 +++++++++++------- nexus/src/app/instance.rs | 4 +- nexus/src/app/sagas/instance_create.rs | 11 +- nexus/tests/integration_tests/endpoints.rs | 5 +- nexus/tests/integration_tests/instances.rs | 297 +++++++++++----- nexus/types/src/external_api/params.rs | 5 + openapi/nexus-internal.json | 28 ++ openapi/nexus.json | 18 + 10 files changed, 490 insertions(+), 231 deletions(-) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index a34f5b71ac..d43bcbb8d4 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1215,6 +1215,20 @@ pub struct InstanceAutoRestartStatus { #[serde(rename = "auto_restart_enabled")] pub enabled: bool, + /// The auto-restart policy configured for this instance, or `None` if no + /// explicit policy is configured. + /// + /// If this is not present, then this instance uses the default auto-restart + /// policy, which may or may not allow it to be restarted. The + /// `auto_restart_enabled` field indicates whether the instance will be + /// automatically restarted. + // + // Rename this field, as the struct is `#[serde(flatten)]`ed into the + // `Instance` type, and we would like the field to be prefixed with + // `auto_restart`. + #[serde(rename = "auto_restart_policy")] + pub policy: Option, + /// The time at which the auto-restart cooldown period for this instance /// completes, permitting it to be automatically restarted again. If the /// instance enters the `Failed` state, it will not be restarted until after @@ -1233,7 +1247,9 @@ pub struct InstanceAutoRestartStatus { /// A policy determining when an instance should be automatically restarted by /// the control plane. -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq, +)] #[serde(rename_all = "snake_case")] pub enum InstanceAutoRestartPolicy { /// The instance should not be automatically restarted by the control plane diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 2842a28f8f..e7aa989971 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -537,4 +537,9 @@ mod optional_time_delta { pub struct InstanceUpdate { #[diesel(column_name = boot_disk_id)] pub boot_disk_id: Option, + + /// The auto-restart policy for this instance. If this is `None`, it will + /// set the instance's auto-restart policy to `NULL`. + #[diesel(column_name = auto_restart_policy)] + pub auto_restart_policy: Option, } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index e89cd8f234..1662504865 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -223,16 +223,23 @@ impl From for external::Instance { }, ); - let policy = value - .instance - .auto_restart - .policy - .unwrap_or(InstanceAutoRestart::DEFAULT_POLICY); - let enabled = match policy { + let policy = value.instance.auto_restart.policy; + // The active policy for this instance --- either its configured + // policy or the default. We report the configured policy as the + // instance's policy, but we must use this to determine whether it + // will be auto-restarted, since it may have no configured policy. + let active_policy = + policy.unwrap_or(InstanceAutoRestart::DEFAULT_POLICY); + + let enabled = match active_policy { InstanceAutoRestartPolicy::Never => false, InstanceAutoRestartPolicy::BestEffort => true, }; - external::InstanceAutoRestartStatus { enabled, cooldown_expiration } + external::InstanceAutoRestartStatus { + enabled, + policy: policy.map(Into::into), + cooldown_expiration, + } }; Self { @@ -572,6 +579,27 @@ impl DataStore { ) -> LookupResult { opctx.authorize(authz::Action::Read, authz_instance).await?; + self.instance_fetch_with_vmm_on_conn( + &*self.pool_connection_authorized(opctx).await?, + authz_instance, + ) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Instance, + LookupType::ById(authz_instance.id()), + ), + ) + }) + } + + async fn instance_fetch_with_vmm_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + authz_instance: &authz::Instance, + ) -> Result { use db::schema::instance::dsl as instance_dsl; use db::schema::vmm::dsl as vmm_dsl; @@ -585,19 +613,8 @@ impl DataStore { .and(vmm_dsl::time_deleted.is_null())), ) .select((Instance::as_select(), Option::::as_select())) - .get_result_async::<(Instance, Option)>( - &*self.pool_connection_authorized(opctx).await?, - ) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByLookup( - ResourceType::Instance, - LookupType::ById(authz_instance.id()), - ), - ) - })?; + .get_result_async::<(Instance, Option)>(conn) + .await?; Ok(InstanceAndActiveVmm { instance, vmm }) } @@ -1013,134 +1030,39 @@ impl DataStore { ) -> Result { opctx.authorize(authz::Action::Modify, authz_instance).await?; - use crate::db::model::InstanceState; - - use db::schema::disk::dsl as disk_dsl; use db::schema::instance::dsl as instance_dsl; - use db::schema::vmm::dsl as vmm_dsl; let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; - let (instance, vmm) = self + let instance_and_vmm = self .transaction_retry_wrapper("reconfigure_instance") .transaction(&conn, |conn| { let err = err.clone(); - let update = update.clone(); + let InstanceUpdate { boot_disk_id, auto_restart_policy } = + update.clone(); async move { - // * Allow reconfiguration in NoVmm because there is no VMM - // to contend with. - // * Allow reconfiguration in Failed to allow changing the - // boot disk of a failed instance and free its boot disk - // for detach. - // * Allow reconfiguration in Creating because one of the - // last steps of instance creation, while the instance is - // still in Creating, is to reconfigure the instance to - // the desired boot disk. - let ok_to_reconfigure_instance_states = [ - InstanceState::NoVmm, - InstanceState::Failed, - InstanceState::Creating, - ]; - - let instance_state = instance_dsl::instance - .filter(instance_dsl::id.eq(authz_instance.id())) - .filter(instance_dsl::time_deleted.is_null()) - .select(instance_dsl::state) - .first_async::(&conn) - .await; - - match instance_state { - Ok(state) => { - let state_ok = ok_to_reconfigure_instance_states - .contains(&state); - - if !state_ok { - return Err(err.bail(Error::conflict( - "instance must be stopped to update", - ))); - } - } - Err(diesel::NotFound) => { - // If the instance simply doesn't exist, we - // shouldn't retry. Bail with a useful error. - return Err(err.bail(Error::not_found_by_id( - ResourceType::Instance, - &authz_instance.id(), - ))); - } - Err(e) => { - return Err(e); - } - } - - if let Some(disk_id) = update.boot_disk_id { - // Ensure the disk is currently attached before updating - // the database. - let expected_state = api::external::DiskState::Attached( - authz_instance.id(), - ); - - let attached_disk: Option = disk_dsl::disk - .filter(disk_dsl::id.eq(disk_id)) - .filter( - disk_dsl::attach_instance_id - .eq(authz_instance.id()), - ) - .filter( - disk_dsl::disk_state.eq(expected_state.label()), - ) - .select(disk_dsl::id) - .first_async::(&conn) - .await - .optional()?; - - if attached_disk.is_none() { - return Err(err.bail(Error::conflict( - "boot disk must be attached", - ))); - } - } - - // if and when `Update` can update other fields, set them - // here. - // - // NOTE: from this point forward it is OK if we update the - // instance's `boot_disk_id` column with the updated value - // again. It will have already been assigned with constraint - // checking performed above, so updates will just be - // repetitive, not harmful. - - // Update the row. We don't care about the returned - // UpdateStatus, either way the database has been updated - // with the state we're setting. + // Set the auto-restart policy. diesel::update(instance_dsl::instance) .filter(instance_dsl::id.eq(authz_instance.id())) - .set(update) + .set( + instance_dsl::auto_restart_policy + .eq(auto_restart_policy), + ) .execute_async(&conn) .await?; - // TODO: dedupe this query and `instance_fetch_with_vmm`. - // At the moment, we're only allowing instance - // reconfiguration in states that would have no VMM, but - // load it anyway so that we return correct data if this is - // relaxed in the future... - let (instance, vmm) = instance_dsl::instance - .filter(instance_dsl::id.eq(authz_instance.id())) - .filter(instance_dsl::time_deleted.is_null()) - .left_join( - vmm_dsl::vmm.on(vmm_dsl::id - .nullable() - .eq(instance_dsl::active_propolis_id) - .and(vmm_dsl::time_deleted.is_null())), - ) - .select(( - Instance::as_select(), - Option::::as_select(), - )) - .get_result_async(&conn) - .await?; + // Next, set the boot disk if needed. + self.instance_set_boot_disk_on_conn( + &conn, + &err, + authz_instance, + boot_disk_id, + ) + .await?; - Ok((instance, vmm)) + // Finally, fetch the new instance state. + self.instance_fetch_with_vmm_on_conn(&conn, authz_instance) + .await } }) .await @@ -1152,7 +1074,145 @@ impl DataStore { public_error_from_diesel(e, ErrorHandler::Server) })?; - Ok(InstanceAndActiveVmm { instance, vmm }) + Ok(instance_and_vmm) + } + + pub async fn instance_set_boot_disk( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + boot_disk_id: Option, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, authz_instance).await?; + + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("instance_set_boot_disk") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + self.instance_set_boot_disk_on_conn( + &conn, + &err, + authz_instance, + boot_disk_id, + ) + .await?; + Ok(()) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + return err; + } + + public_error_from_diesel(e, ErrorHandler::Server) + }) + } + + /// Set an instance's boot disk to the provided `boot_disk_id` (or unset it, + /// if `boot_disk_id` is `None`), within an existing transaction. + /// + /// This is factored out as it is used by both + /// [`DataStore::instance_reconfigure`], which mutates many instance fields, + /// and [`DataStore::instance_set_boot_disk`], which only touches the boot + /// disk. + async fn instance_set_boot_disk_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + err: &OptionalError, + authz_instance: &authz::Instance, + boot_disk_id: Option, + ) -> Result<(), diesel::result::Error> { + use db::schema::disk::dsl as disk_dsl; + use db::schema::instance::dsl as instance_dsl; + + // * Allow setting the boot disk in NoVmm because there is no VMM to + // contend with. + // * Allow setting the boot disk in Failed to allow changing the boot + // disk of a failed instance and free its boot disk for detach. + // * Allow setting the boot disk in Creating because one of the last + // steps of instance creation, while the instance is still in + // Creating, is to reconfigure the instance to the desired boot disk. + const OK_TO_SET_BOOT_DISK_STATES: &'static [InstanceState] = &[ + InstanceState::NoVmm, + InstanceState::Failed, + InstanceState::Creating, + ]; + + let maybe_instance = instance_dsl::instance + .filter(instance_dsl::id.eq(authz_instance.id())) + .filter(instance_dsl::time_deleted.is_null()) + .select(Instance::as_select()) + .first_async::(conn) + .await; + let instance = match maybe_instance { + Ok(i) => i, + Err(diesel::NotFound) => { + // If the instance simply doesn't exist, we + // shouldn't retry. Bail with a useful error. + return Err(err.bail(Error::not_found_by_id( + ResourceType::Instance, + &authz_instance.id(), + ))); + } + Err(e) => return Err(e), + }; + + // If the desired boot disk is already set, we're good here, and can + // elide the check that the instance is in an acceptable state to change + // the boot disk. + if instance.boot_disk_id == boot_disk_id { + return Ok(()); + } + + if let Some(disk_id) = boot_disk_id { + // Ensure the disk is currently attached before updating + // the database. + let expected_state = + api::external::DiskState::Attached(authz_instance.id()); + + let attached_disk: Option = disk_dsl::disk + .filter(disk_dsl::id.eq(disk_id)) + .filter(disk_dsl::attach_instance_id.eq(authz_instance.id())) + .filter(disk_dsl::disk_state.eq(expected_state.label())) + .select(disk_dsl::id) + .first_async::(conn) + .await + .optional()?; + + if attached_disk.is_none() { + return Err( + err.bail(Error::conflict("boot disk must be attached")) + ); + } + } + // + // NOTE: from this point forward it is OK if we update the + // instance's `boot_disk_id` column with the updated value + // again. It will have already been assigned with constraint + // checking performed above, so updates will just be + // repetitive, not harmful. + + let r = diesel::update(instance_dsl::instance) + .filter(instance_dsl::id.eq(authz_instance.id())) + .filter(instance_dsl::state.eq_any(OK_TO_SET_BOOT_DISK_STATES)) + .set(instance_dsl::boot_disk_id.eq(boot_disk_id)) + .check_if_exists::(authz_instance.id()) + .execute_and_check(&conn) + .await?; + match r.status { + UpdateStatus::NotUpdatedButExists => { + // This should be the only reason the query would fail... + debug_assert!(!OK_TO_SET_BOOT_DISK_STATES + .contains(&r.found.runtime().nexus_state)); + Err(err.bail(Error::conflict( + "instance must be stopped to set boot disk", + ))) + } + UpdateStatus::Updated => Ok(()), + } } pub async fn project_delete_instance( diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index ca8c441a41..b4ad778735 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -330,7 +330,9 @@ impl super::Nexus { None => None, }; - let update = InstanceUpdate { boot_disk_id }; + let auto_restart_policy = params.auto_restart_policy.map(Into::into); + + let update = InstanceUpdate { boot_disk_id, auto_restart_policy }; self.datastore() .instance_reconfigure(opctx, &authz_instance, update) .await diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index c8680701b1..07f7911ef5 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -44,7 +44,6 @@ pub(crate) struct Params { pub create_params: params::InstanceCreate, pub boundary_switches: HashSet, } - // Several nodes in this saga are wrapped in their own subsaga so that they can // have a parameter that denotes which node they are (e.g., which NIC or which // external IP). They also need the outer saga's parameters. @@ -1077,11 +1076,8 @@ async fn sic_set_boot_disk( .await .map_err(ActionError::action_failed)?; - let initial_configuration = - nexus_db_model::InstanceUpdate { boot_disk_id: Some(authz_disk.id()) }; - datastore - .instance_reconfigure(&opctx, &authz_instance, initial_configuration) + .instance_set_boot_disk(&opctx, &authz_instance, Some(authz_disk.id())) .await .map_err(ActionError::action_failed)?; @@ -1109,11 +1105,8 @@ async fn sic_set_boot_disk_undo( // If there was a boot disk, clear it. If there was not a boot disk, // this is a no-op. - let undo_configuration = - nexus_db_model::InstanceUpdate { boot_disk_id: None }; - datastore - .instance_reconfigure(&opctx, &authz_instance, undo_configuration) + .instance_set_boot_disk(&opctx, &authz_instance, None) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index ff4c5a8712..1aac5a186c 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -434,7 +434,10 @@ pub static DEMO_INSTANCE_CREATE: Lazy = auto_restart_policy: Default::default(), }); pub static DEMO_INSTANCE_UPDATE: Lazy = - Lazy::new(|| params::InstanceUpdate { boot_disk: None }); + Lazy::new(|| params::InstanceUpdate { + boot_disk: None, + auto_restart_policy: None, + }); // The instance needs a network interface, too. pub static DEMO_INSTANCE_NIC_NAME: Lazy = diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 6ca8052bfb..d5f37d62be 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -4157,19 +4157,12 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { assert_eq!(err.message, "boot disk cannot be detached"); // Change the instance's boot disk. - let url_instance_update = format!("/v1/instances/{}", instance.identity.id); - - let builder = - RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { boot_disk: None })) - .expect_status(Some(http::StatusCode::OK)); - let response = NexusRequest::new(builder) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("can attempt to reconfigure the instance"); - - let instance = response.parsed_body::().unwrap(); + let instance = expect_instance_reconfigure_ok( + &client, + &instance.identity.id, + params::InstanceUpdate { boot_disk: None, auto_restart_policy: None }, + ) + .await; assert_eq!(instance.boot_disk_id, None); // Now try to detach `disks[0]` again. This should succeed. @@ -4188,14 +4181,32 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_updating_running_instance_is_conflict( +async fn test_updating_running_instance_boot_disk_is_conflict( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; let instance_name = "immediately-running"; + // Test pre-reqs + DiskTest::new(&cptestctx).await; create_project_and_pool(&client).await; + create_disk(&client, PROJECT_NAME, "probablydata").await; + create_disk(&client, PROJECT_NAME, "alsodata").await; + + // Verify disk is there and currently detached + let disks: Vec = + NexusRequest::iter_collection_authn(client, &get_disks_url(), "", None) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 2); + assert_eq!(disks[0].state, DiskState::Detached); + assert_eq!(disks[1].state, DiskState::Detached); + + let probablydata = Name::try_from(String::from("probablydata")).unwrap(); + let alsodata = Name::try_from(String::from("alsodata")).unwrap(); + let instance_params = params::InstanceCreate { identity: IdentityMetadataCreateParams { name: Name::try_from(String::from(instance_name)).unwrap(), @@ -4208,8 +4219,17 @@ async fn test_updating_running_instance_is_conflict( ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], - disks: vec![], - boot_disk: None, + disks: vec![ + params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { name: probablydata.clone() }, + ), + params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { name: alsodata.clone() }, + ), + ], + boot_disk: Some(params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { name: probablydata.clone() }, + )), start: true, auto_restart_policy: Default::default(), }; @@ -4234,12 +4254,69 @@ async fn test_updating_running_instance_is_conflict( instance_simulate(nexus, &instance_id).await; instance_wait_for_state(client, instance_id, InstanceState::Running).await; - let url_instance_update = format!("/v1/instances/{}", instance_id); + let error = expect_instance_reconfigure_err( + &client, + &instance_id.into_untyped_uuid(), + params::InstanceUpdate { + boot_disk: Some(alsodata.clone().into()), + auto_restart_policy: None, + }, + http::StatusCode::CONFLICT, + ) + .await; + assert_eq!(error.message, "instance must be stopped to set boot disk"); - let builder = - RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { boot_disk: None })) - .expect_status(Some(http::StatusCode::CONFLICT)); + // However, we can freely change the auto-restart policy of a running + // instance. + expect_instance_reconfigure_ok( + &client, + &instance_id.into_untyped_uuid(), + params::InstanceUpdate { + // Leave the boot disk the same as the one with which the instance + // was created. + boot_disk: Some(probablydata.clone().into()), + auto_restart_policy: Some(InstanceAutoRestartPolicy::BestEffort), + }, + ) + .await; +} + +#[nexus_test] +async fn test_updating_missing_instance_is_not_found( + cptestctx: &ControlPlaneTestContext, +) { + const UUID_THAT_DOESNT_EXIST: Uuid = + Uuid::from_u128(0x12341234_4321_8765_1234_432143214321); + + let client = &cptestctx.external_client; + + let error = expect_instance_reconfigure_err( + &client, + &UUID_THAT_DOESNT_EXIST, + params::InstanceUpdate { boot_disk: None, auto_restart_policy: None }, + http::StatusCode::NOT_FOUND, + ) + .await; + assert_eq!( + error.message, + format!("not found: instance with id \"{}\"", UUID_THAT_DOESNT_EXIST) + ); +} + +async fn expect_instance_reconfigure_ok( + external_client: &ClientTestContext, + instance_id: &Uuid, + update: params::InstanceUpdate, +) -> Instance { + let url_instance_update = format!("/v1/instances/{instance_id}"); + + let builder = RequestBuilder::new( + external_client, + http::Method::PUT, + &url_instance_update, + ) + .body(Some(&update)) + .expect_status(Some(http::StatusCode::OK)); let response = NexusRequest::new(builder) .authn_as(AuthnMode::PrivilegedUser) @@ -4247,37 +4324,105 @@ async fn test_updating_running_instance_is_conflict( .await .expect("can attempt to reconfigure the instance"); - let error = response.parsed_body::().unwrap(); - assert_eq!(error.message, "instance must be stopped to update"); + response + .parsed_body::() + .expect("response should be parsed as an instance") } +async fn expect_instance_reconfigure_err( + external_client: &ClientTestContext, + instance_id: &Uuid, + update: params::InstanceUpdate, + status: http::StatusCode, +) -> HttpErrorResponseBody { + let url_instance_update = format!("/v1/instances/{instance_id}"); + + let builder = RequestBuilder::new( + external_client, + http::Method::PUT, + &url_instance_update, + ) + .body(Some(&update)) + .expect_status(Some(status)); + + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("can attempt to reconfigure the instance"); + + response + .parsed_body::() + .expect("error response should parse successfully") +} +// Test reconfiguring an instance's auto-restart policy. #[nexus_test] -async fn test_updating_missing_instance_is_not_found( +async fn test_auto_restart_policy_can_be_changed( cptestctx: &ControlPlaneTestContext, ) { - const UUID_THAT_DOESNT_EXIST: Uuid = - Uuid::from_u128(0x12341234_4321_8765_1234_432143214321); - let url_instance_update = - format!("/v1/instances/{}", UUID_THAT_DOESNT_EXIST); - let client = &cptestctx.external_client; + let instance_name = "reincarnation-station"; - let builder = - RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { boot_disk: None })) - .expect_status(Some(http::StatusCode::NOT_FOUND)); + create_project_and_pool(&client).await; + + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: String::from("stuff"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + boot_disk: None, + disks: Vec::new(), + start: true, + // Start out with None + auto_restart_policy: None, + }; + let builder = + RequestBuilder::new(client, http::Method::POST, &get_instances_url()) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); let response = NexusRequest::new(builder) .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .expect("can attempt to reconfigure the instance"); + .expect("Expected instance creation to work!"); - let error = response.parsed_body::().unwrap(); - assert_eq!( - error.message, - format!("not found: instance with id \"{}\"", UUID_THAT_DOESNT_EXIST) - ); + let instance = response.parsed_body::().unwrap(); + + // Starts out as None. + assert_eq!(instance.auto_restart_status.policy, None); + + let assert_reconfigured = |auto_restart_policy| async move { + let instance = expect_instance_reconfigure_ok( + client, + &instance.identity.id, + dbg!(params::InstanceUpdate { + auto_restart_policy, + boot_disk: None, + }), + ) + .await; + assert_eq!( + dbg!(instance).auto_restart_status.policy, + auto_restart_policy, + ); + }; + + // Reconfigure to Never. + assert_reconfigured(Some(InstanceAutoRestartPolicy::Never)).await; + + // Reconfigure to BestEffort + assert_reconfigured(Some(InstanceAutoRestartPolicy::BestEffort)).await; + + // Reconfigure back to None. + assert_reconfigured(None).await; } // Create an instance with boot disk set to one of its attached disks, then set @@ -4346,23 +4491,17 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { assert_eq!(instance.boot_disk_id, Some(disks[0].identity.id)); - // Change the instance's boot disk. - let url_instance_update = format!("/v1/instances/{}", instance.identity.id); - - let builder = - RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { - boot_disk: Some(disks[1].identity.id.into()), - })) - .expect_status(Some(http::StatusCode::OK)); - - let response = NexusRequest::new(builder) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("can attempt to reconfigure the instance"); + // Change the instance's boot disk.ity.id); - let instance = response.parsed_body::().unwrap(); + let instance = expect_instance_reconfigure_ok( + &client, + &instance.identity.id, + params::InstanceUpdate { + boot_disk: Some(disks[1].identity.id.into()), + auto_restart_policy: None, + }, + ) + .await; assert_eq!(instance.boot_disk_id, Some(disks[1].identity.id)); } @@ -4418,22 +4557,16 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { let instance = response.parsed_body::().unwrap(); // Update the instance's boot disk to the unattached disk. This should fail. - let url_instance_update = format!("/v1/instances/{}", instance.identity.id); - - let builder = - RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { - boot_disk: Some(disks[0].identity.id.into()), - })) - .expect_status(Some(http::StatusCode::CONFLICT)); - let response = NexusRequest::new(builder) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("can attempt to reconfigure the instance"); - - let error = - response.parsed_body::().unwrap(); + let error = expect_instance_reconfigure_err( + &client, + &instance.identity.id, + params::InstanceUpdate { + boot_disk: Some(disks[0].identity.id.into()), + auto_restart_policy: None, + }, + http::StatusCode::CONFLICT, + ) + .await; assert_eq!(error.message, format!("boot disk must be attached")); @@ -4455,19 +4588,15 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { .expect("can attempt to detach boot disk"); // And now it can be made the boot disk. - let builder = - RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { - boot_disk: Some(disks[0].identity.id.into()), - })) - .expect_status(Some(http::StatusCode::OK)); - let response = NexusRequest::new(builder) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("can attempt to reconfigure the instance"); - - let instance = response.parsed_body::().unwrap(); + let instance = expect_instance_reconfigure_ok( + &client, + &instance.identity.id, + params::InstanceUpdate { + boot_disk: Some(disks[0].identity.id.into()), + auto_restart_policy: None, + }, + ) + .await; assert_eq!(instance.boot_disk_id, Some(disks[0].identity.id)); } diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 97d92d24a9..7e36d5f1f8 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1079,6 +1079,11 @@ pub struct InstanceUpdate { /// /// If not provided, unset the instance's boot disk. pub boot_disk: Option, + + /// The auto-restart policy for this instance. + /// + /// If not provided, unset the instance's auto-restart policy. + pub auto_restart_policy: Option, } #[inline] diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 9cb476a775..8228ffe28e 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3382,6 +3382,15 @@ "description": "`true` if this instance's auto-restart policy will permit the control plane to automatically restart it if it enters the `Failed` state.", "type": "boolean" }, + "auto_restart_policy": { + "nullable": true, + "description": "The auto-restart policy configured for this instance, or `None` if no explicit policy is configured.\n\nIf this is not present, then this instance uses the default auto-restart policy, which may or may not allow it to be restarted. The `auto_restart_enabled` field indicates whether the instance will be automatically restarted.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceAutoRestartPolicy" + } + ] + }, "boot_disk_id": { "nullable": true, "description": "the ID of the disk used to boot this Instance, if a specific one is assigned.", @@ -3469,6 +3478,25 @@ "time_run_state_updated" ] }, + "InstanceAutoRestartPolicy": { + "description": "A policy determining when an instance should be automatically restarted by the control plane.", + "oneOf": [ + { + "description": "The instance should not be automatically restarted by the control plane if it fails.", + "type": "string", + "enum": [ + "never" + ] + }, + { + "description": "If this instance is running and unexpectedly fails (e.g. due to a host software crash or unexpected host reboot), the control plane will make a best-effort attempt to restart it. The control plane may choose not to restart the instance to preserve the overall availability of the system.", + "type": "string", + "enum": [ + "best_effort" + ] + } + ] + }, "InstanceCpuCount": { "description": "The number of CPUs in an Instance", "type": "integer", diff --git a/openapi/nexus.json b/openapi/nexus.json index e62d58d854..714e2a25e7 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -15156,6 +15156,15 @@ "description": "`true` if this instance's auto-restart policy will permit the control plane to automatically restart it if it enters the `Failed` state.", "type": "boolean" }, + "auto_restart_policy": { + "nullable": true, + "description": "The auto-restart policy configured for this instance, or `None` if no explicit policy is configured.\n\nIf this is not present, then this instance uses the default auto-restart policy, which may or may not allow it to be restarted. The `auto_restart_enabled` field indicates whether the instance will be automatically restarted.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceAutoRestartPolicy" + } + ] + }, "boot_disk_id": { "nullable": true, "description": "the ID of the disk used to boot this Instance, if a specific one is assigned.", @@ -15807,6 +15816,15 @@ "description": "Parameters of an `Instance` that can be reconfigured after creation.", "type": "object", "properties": { + "auto_restart_policy": { + "nullable": true, + "description": "The auto-restart policy for this instance.\n\nIf not provided, unset the instance's auto-restart policy.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceAutoRestartPolicy" + } + ] + }, "boot_disk": { "nullable": true, "description": "Name or ID of the disk the instance should be instructed to boot from.\n\nIf not provided, unset the instance's boot disk.",