diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 455aa62192..7a4a3b1d44 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -18,8 +18,10 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Resource; use crate::db::lookup::LookupPath; +use crate::db::model::ByteCount; use crate::db::model::Generation; use crate::db::model::Instance; +use crate::db::model::InstanceCpuCount; use crate::db::model::InstanceRuntimeState; use crate::db::model::Migration; use crate::db::model::MigrationState; @@ -50,6 +52,7 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::MessagePair; use omicron_common::api::external::ResourceType; +use omicron_common::api::external::UpdateResult; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; @@ -793,6 +796,61 @@ impl DataStore { Ok(updated) } + pub async fn instance_resize( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + cpus: InstanceCpuCount, + memory: ByteCount, + ) -> UpdateResult<()> { + use db::model::InstanceState as DbInstanceState; + use db::schema::instance::dsl; + + const ALLOWED_RESIZE_STATES: &[DbInstanceState] = + &[DbInstanceState::NoVmm, DbInstanceState::Failed]; + + // Use check_if_exists to distinguish cases where the instance doesn't + // exist at all from cases where it exists but is in the wrong state. + let instance_id = instance_id.into_untyped_uuid(); + let updated = diesel::update(dsl::instance) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(instance_id)) + .filter(dsl::state.eq_any(ALLOWED_RESIZE_STATES)) + .set((dsl::ncpus.eq(cpus), dsl::memory.eq(memory))) + .check_if_exists::(instance_id.into_untyped_uuid()) + .execute_and_check(&*self.pool_connection_authorized(&opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Instance, + LookupType::ById(instance_id), + ), + ) + })?; + + match updated.status { + UpdateStatus::Updated => Ok(()), + UpdateStatus::NotUpdatedButExists => { + // This query should only fail to update an extant instance if + // it's in the wrong state. + let found_state = updated.found.runtime().nexus_state; + if !ALLOWED_RESIZE_STATES.contains(&found_state) { + let external_state: external::InstanceState = + found_state.into(); + Err(Error::conflict(format!( + "instance is in state {} but must be \ + stopped to be resized", + external_state + ))) + } else { + Err(Error::internal_error("failed to resize instance")) + } + } + } + } + /// Lists all instances on in-service sleds with active Propolis VMM /// processes, returning the instance along with the VMM on which it's /// running, the sled on which the VMM is running, and the project that owns diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 344d2688f7..a1ac744dd9 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -39,6 +39,7 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::InstanceCpuCount; use omicron_common::api::external::InstanceState; use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; @@ -241,6 +242,7 @@ impl super::Nexus { project_lookup.lookup_for(authz::Action::CreateChild).await?; // Validate parameters + check_instance_cpu_memory_sizes(params.ncpus, params.memory)?; if params.disks.len() > MAX_DISKS_PER_INSTANCE as usize { return Err(Error::invalid_request(&format!( "cannot attach more than {} disks to instance", @@ -253,12 +255,6 @@ impl super::Nexus { .await?; } } - if params.ncpus.0 > MAX_VCPU_PER_INSTANCE { - return Err(Error::invalid_request(&format!( - "cannot have more than {} vCPUs per instance", - MAX_VCPU_PER_INSTANCE - ))); - } if params.external_ips.len() > MAX_EXTERNAL_IPS_PER_INSTANCE { return Err(Error::invalid_request(&format!( "An instance may not have more than {} external IP addresses", @@ -303,43 +299,6 @@ impl super::Nexus { } } - // Reject instances where the memory is not at least - // MIN_MEMORY_BYTES_PER_INSTANCE - if params.memory.to_bytes() < u64::from(MIN_MEMORY_BYTES_PER_INSTANCE) { - return Err(Error::invalid_value( - "size", - format!( - "memory must be at least {}", - ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE) - ), - )); - } - - // Reject instances where the memory is not divisible by - // MIN_MEMORY_BYTES_PER_INSTANCE - if (params.memory.to_bytes() % u64::from(MIN_MEMORY_BYTES_PER_INSTANCE)) - != 0 - { - return Err(Error::invalid_value( - "size", - format!( - "memory must be divisible by {}", - ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE) - ), - )); - } - - // Reject instances where the memory is greater than the limit - if params.memory.to_bytes() > MAX_MEMORY_BYTES_PER_INSTANCE { - return Err(Error::invalid_value( - "size", - format!( - "memory must be less than or equal to {}", - ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE).unwrap() - ), - )); - } - let actor = opctx.authn.actor_required().internal_context( "loading current user's ssh keys for new Instance", )?; @@ -649,6 +608,29 @@ impl super::Nexus { self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } + /// Resizes an instance. + pub(crate) async fn instance_resize( + &self, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + new_size: params::InstanceResize, + ) -> UpdateResult { + check_instance_cpu_memory_sizes(new_size.ncpus, new_size.memory)?; + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + self.db_datastore + .instance_resize( + opctx, + InstanceUuid::from_untyped_uuid(authz_instance.id()), + new_size.ncpus.into(), + new_size.memory.into(), + ) + .await?; + + self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await + } + /// Idempotently ensures that the sled specified in `db_instance` does not /// have a record of the instance. If the instance is currently running on /// this sled, this operation rudely terminates it. @@ -1884,6 +1866,57 @@ pub(crate) async fn notify_instance_updated( } } +/// Determines whether the supplied instance sizes (CPU count and memory size) +/// are acceptable. +fn check_instance_cpu_memory_sizes( + ncpus: InstanceCpuCount, + memory: ByteCount, +) -> Result<(), Error> { + if ncpus.0 > MAX_VCPU_PER_INSTANCE { + return Err(Error::invalid_request(&format!( + "cannot have more than {} vCPUs per instance", + MAX_VCPU_PER_INSTANCE + ))); + } + + // Reject instances where the memory is not at least + // MIN_MEMORY_BYTES_PER_INSTANCE + if memory.to_bytes() < u64::from(MIN_MEMORY_BYTES_PER_INSTANCE) { + return Err(Error::invalid_value( + "size", + format!( + "memory must be at least {}", + ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE) + ), + )); + } + + // Reject instances where the memory is not divisible by + // MIN_MEMORY_BYTES_PER_INSTANCE + if (memory.to_bytes() % u64::from(MIN_MEMORY_BYTES_PER_INSTANCE)) != 0 { + return Err(Error::invalid_value( + "size", + format!( + "memory must be divisible by {}", + ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE) + ), + )); + } + + // Reject instances where the memory is greater than the limit + if memory.to_bytes() > MAX_MEMORY_BYTES_PER_INSTANCE { + return Err(Error::invalid_value( + "size", + format!( + "memory must be less than or equal to {}", + ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE).unwrap() + ), + )); + } + + Ok(()) +} + /// Determines the disposition of a request to start an instance given its state /// (and its current VMM's state, if it has one) in the database. fn instance_start_allowed( @@ -2178,4 +2211,39 @@ mod tests { assert!(instance_start_allowed(&logctx.log, &state).is_ok()); logctx.cleanup_successful(); } + + #[test] + fn test_invalid_instance_shapes() { + // Too many CPUs. + assert!(check_instance_cpu_memory_sizes( + InstanceCpuCount(MAX_VCPU_PER_INSTANCE + 1), + ByteCount::from_gibibytes_u32(1) + ) + .is_err()); + + // Too little memory. + assert!(check_instance_cpu_memory_sizes( + InstanceCpuCount(1), + ByteCount::from_mebibytes_u32(1) + ) + .is_err()); + + // Acceptable amount of memory, but not divisible into GiB. + assert!(check_instance_cpu_memory_sizes( + InstanceCpuCount(1), + ByteCount::from_mebibytes_u32(1024 + 1) + ) + .is_err()); + + let gib = + u32::try_from(MAX_MEMORY_BYTES_PER_INSTANCE / (1024 * 1024 * 1024)) + .unwrap(); + + // A whole number of GiB, but too many of them. + assert!(check_instance_cpu_memory_sizes( + InstanceCpuCount(1), + ByteCount::from_gibibytes_u32(gib + 1) + ) + .is_err()); + } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a87bdd834d..085a724323 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -171,6 +171,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(instance_reboot)?; api.register(instance_start)?; api.register(instance_stop)?; + api.register(instance_resize)?; api.register(instance_disk_list)?; api.register(instance_disk_attach)?; api.register(instance_disk_detach)?; @@ -2866,6 +2867,42 @@ async fn instance_delete( .await } +/// Resize instance +#[endpoint { + method = PUT, + path = "/v1/instances/{instance}/resize", + tags = ["instances"], +}] +async fn instance_resize( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + new_size: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let resize = new_size.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let instance = + nexus.instance_resize(&opctx, &instance_lookup, resize).await?; + Ok(HttpResponseOk(instance.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await +} + // TODO should this be in the public API? /// Migrate an instance #[endpoint { diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 6e4e59688a..6f18a44a0b 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -359,6 +359,12 @@ pub static DEMO_INSTANCE_REBOOT_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); +pub static DEMO_INSTANCE_RESIZE_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/resize?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); pub static DEMO_INSTANCE_MIGRATE_URL: Lazy = Lazy::new(|| { format!( "/v1/instances/{}/migrate?{}", @@ -1823,6 +1829,17 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Post(serde_json::Value::Null) ], }, + VerifyEndpoint { + url: &DEMO_INSTANCE_RESIZE_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Put(serde_json::to_value(params::InstanceResize { + ncpus: InstanceCpuCount(1), + memory: ByteCount::from_gibibytes_u32(1) + }).unwrap()) + ], + }, VerifyEndpoint { url: &DEMO_INSTANCE_MIGRATE_URL, visibility: Visibility::Protected, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 2e41fac3a4..fca0fd6329 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -1611,6 +1611,167 @@ async fn test_instances_invalid_creation_returns_bad_request( )); } +#[nexus_test] +async fn test_instance_resize(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.server_context().nexus; + let instance_name = "elastic-band"; + + create_project_and_pool(&client).await; + + // Create a stopped instance. + let instance = create_instance_with( + client, + PROJECT_NAME, + instance_name, + ¶ms::InstanceNetworkInterfaceAttachment::None, + vec![], + vec![], + false, + ) + .await; + + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + let resize_url = + format!("/v1/instances/{}/resize", instance_id.to_string()); + + // Resizing the instance right away, while it's still stopped, is legal. + let _ = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &resize_url) + .body(Some(¶ms::InstanceResize { + ncpus: InstanceCpuCount(8), + memory: ByteCount::from_gibibytes_u32(8), + })) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + + // Start the instance; now resizing it should be illegal. + instance_post(&client, instance_name, InstanceOp::Start).await; + instance_simulate(nexus, &instance_id).await; + let _ = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &resize_url) + .body(Some(¶ms::InstanceResize { + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(2), + })) + .expect_status(Some(StatusCode::CONFLICT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Once the instance has stopped, it's OK to resize it again. + instance_post(&client, instance_name, InstanceOp::Stop).await; + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; + let _ = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &resize_url) + .body(Some(¶ms::InstanceResize { + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(2), + })) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); +} + +#[nexus_test] +async fn test_instance_resize_invalid_parameters( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let instance_name = "elastic-band"; + + create_project_and_pool(&client).await; + let instance = create_instance_with( + client, + PROJECT_NAME, + instance_name, + ¶ms::InstanceNetworkInterfaceAttachment::None, + vec![], + vec![], + false, + ) + .await; + + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + let resize_url = + format!("/v1/instances/{}/resize", instance_id.to_string()); + + // Too many vCPUs. + let _ = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &resize_url) + .body(Some(¶ms::InstanceResize { + ncpus: InstanceCpuCount(MAX_VCPU_PER_INSTANCE + 1), + memory: ByteCount::from_gibibytes_u32(4), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Too little memory. + let _ = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &resize_url) + .body(Some(¶ms::InstanceResize { + ncpus: InstanceCpuCount(MAX_VCPU_PER_INSTANCE + 1), + memory: ByteCount::from_gibibytes_u32(0), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Too much memory (but a proper multiple of the minimum memory size). + let max_gib = + u32::try_from(MAX_MEMORY_BYTES_PER_INSTANCE / (1024 * 1024 * 1024)) + .unwrap(); + let _ = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &resize_url) + .body(Some(¶ms::InstanceResize { + ncpus: InstanceCpuCount(1), + memory: ByteCount::from_gibibytes_u32(max_gib + 1), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // An acceptable amount of memory, but not a multiple of the minimum + // memory size of 1 GiB. + let min_mib = MIN_MEMORY_BYTES_PER_INSTANCE / (1024 * 1024); + let _ = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &resize_url) + .body(Some(¶ms::InstanceResize { + ncpus: InstanceCpuCount(1), + memory: ByteCount::from_mebibytes_u32(min_mib + 1), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); +} + #[nexus_test] async fn test_instance_using_image_from_other_project_fails( cptestctx: &ControlPlaneTestContext, diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 4af018c5af..98015b4f31 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -58,6 +58,7 @@ instance_network_interface_list GET /v1/network-interfaces instance_network_interface_update PUT /v1/network-interfaces/{interface} instance_network_interface_view GET /v1/network-interfaces/{interface} instance_reboot POST /v1/instances/{instance}/reboot +instance_resize PUT /v1/instances/{instance}/resize instance_serial_console GET /v1/instances/{instance}/serial-console instance_serial_console_stream GET /v1/instances/{instance}/serial-console/stream instance_ssh_public_key_list GET /v1/instances/{instance}/ssh-public-keys diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 8dcce913b3..03cf61d3f5 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1093,6 +1093,16 @@ impl JsonSchema for UserData { } } +/// Parameters for resizing an instance. +#[derive(Clone, Copy, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceResize { + /// The number of CPUs to assign to this instance. + pub ncpus: InstanceCpuCount, + + /// The amount of memory to assign to this instance. + pub memory: ByteCount, +} + /// Migration parameters for an `Instance` #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct InstanceMigrate { diff --git a/openapi/nexus.json b/openapi/nexus.json index da77eec2a8..7370731b78 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2378,6 +2378,62 @@ } } }, + "/v1/instances/{instance}/resize": { + "put": { + "tags": [ + "instances" + ], + "summary": "Resize instance", + "operationId": "instance_resize", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceResize" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Instance" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/instances/{instance}/serial-console": { "get": { "tags": [ @@ -15519,6 +15575,32 @@ } } }, + "InstanceResize": { + "description": "Parameters for resizing an instance.", + "type": "object", + "properties": { + "memory": { + "description": "The amount of memory to assign to this instance.", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "ncpus": { + "description": "The number of CPUs to assign to this instance.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceCpuCount" + } + ] + } + }, + "required": [ + "memory", + "ncpus" + ] + }, "InstanceResultsPage": { "description": "A single page of results", "type": "object",