Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add external API to resize a stopped instance #6321

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions nexus/db-queries/src/db/datastore/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ use crate::db::error::public_error_from_diesel;
use crate::db::error::ErrorHandler;
use crate::db::identity::Resource;
use crate::db::lookup::LookupPath;
use crate::db::model::ByteCount;
use crate::db::model::Generation;
use crate::db::model::Instance;
use crate::db::model::InstanceCpuCount;
use crate::db::model::InstanceRuntimeState;
use crate::db::model::Migration;
use crate::db::model::MigrationState;
Expand Down Expand Up @@ -50,6 +52,7 @@ use omicron_common::api::external::LookupResult;
use omicron_common::api::external::LookupType;
use omicron_common::api::external::MessagePair;
use omicron_common::api::external::ResourceType;
use omicron_common::api::external::UpdateResult;
use omicron_common::bail_unless;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::InstanceUuid;
Expand Down Expand Up @@ -793,6 +796,61 @@ impl DataStore {
Ok(updated)
}

pub async fn instance_resize(
&self,
opctx: &OpContext,
instance_id: InstanceUuid,
cpus: InstanceCpuCount,
memory: ByteCount,
) -> UpdateResult<()> {
use db::model::InstanceState as DbInstanceState;
use db::schema::instance::dsl;

const ALLOWED_RESIZE_STATES: &[DbInstanceState] =
&[DbInstanceState::NoVmm, DbInstanceState::Failed];

// Use check_if_exists to distinguish cases where the instance doesn't
// exist at all from cases where it exists but is in the wrong state.
let instance_id = instance_id.into_untyped_uuid();
let updated = diesel::update(dsl::instance)
.filter(dsl::time_deleted.is_null())
.filter(dsl::id.eq(instance_id))
.filter(dsl::state.eq_any(ALLOWED_RESIZE_STATES))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to ask if we should also check for the propolis_id being null here, but I suppose we can rely on the check constraint that the instance can only be in NoVmm if it's null. Maybe worth commenting here?

.set((dsl::ncpus.eq(cpus), dsl::memory.eq(memory)))
.check_if_exists::<Instance>(instance_id.into_untyped_uuid())
.execute_and_check(&*self.pool_connection_authorized(&opctx).await?)
.await
.map_err(|e| {
public_error_from_diesel(
e,
ErrorHandler::NotFoundByLookup(
ResourceType::Instance,
LookupType::ById(instance_id),
),
)
})?;

match updated.status {
UpdateStatus::Updated => Ok(()),
UpdateStatus::NotUpdatedButExists => {
// This query should only fail to update an extant instance if
// it's in the wrong state.
let found_state = updated.found.runtime().nexus_state;
if !ALLOWED_RESIZE_STATES.contains(&found_state) {
let external_state: external::InstanceState =
found_state.into();
Err(Error::conflict(format!(
"instance is in state {} but must be \
stopped to be resized",
external_state
)))
} else {
Err(Error::internal_error("failed to resize instance"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC this case only happens if the instance's time_deleted is not null. Maybe this should also be a "not found" error, rather than 500ing? I think we generally treat deleted objects the same as nonexistent objects, right?

}
}
}
}

/// Lists all instances on in-service sleds with active Propolis VMM
/// processes, returning the instance along with the VMM on which it's
/// running, the sled on which the VMM is running, and the project that owns
Expand Down
154 changes: 111 additions & 43 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use omicron_common::api::external::CreateResult;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::DeleteResult;
use omicron_common::api::external::Error;
use omicron_common::api::external::InstanceCpuCount;
use omicron_common::api::external::InstanceState;
use omicron_common::api::external::InternalContext;
use omicron_common::api::external::ListResultVec;
Expand Down Expand Up @@ -241,6 +242,7 @@ impl super::Nexus {
project_lookup.lookup_for(authz::Action::CreateChild).await?;

// Validate parameters
check_instance_cpu_memory_sizes(params.ncpus, params.memory)?;
if params.disks.len() > MAX_DISKS_PER_INSTANCE as usize {
return Err(Error::invalid_request(&format!(
"cannot attach more than {} disks to instance",
Expand All @@ -253,12 +255,6 @@ impl super::Nexus {
.await?;
}
}
if params.ncpus.0 > MAX_VCPU_PER_INSTANCE {
return Err(Error::invalid_request(&format!(
"cannot have more than {} vCPUs per instance",
MAX_VCPU_PER_INSTANCE
)));
}
if params.external_ips.len() > MAX_EXTERNAL_IPS_PER_INSTANCE {
return Err(Error::invalid_request(&format!(
"An instance may not have more than {} external IP addresses",
Expand Down Expand Up @@ -303,43 +299,6 @@ impl super::Nexus {
}
}

// Reject instances where the memory is not at least
// MIN_MEMORY_BYTES_PER_INSTANCE
if params.memory.to_bytes() < u64::from(MIN_MEMORY_BYTES_PER_INSTANCE) {
return Err(Error::invalid_value(
"size",
format!(
"memory must be at least {}",
ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE)
),
));
}

// Reject instances where the memory is not divisible by
// MIN_MEMORY_BYTES_PER_INSTANCE
if (params.memory.to_bytes() % u64::from(MIN_MEMORY_BYTES_PER_INSTANCE))
!= 0
{
return Err(Error::invalid_value(
"size",
format!(
"memory must be divisible by {}",
ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE)
),
));
}

// Reject instances where the memory is greater than the limit
if params.memory.to_bytes() > MAX_MEMORY_BYTES_PER_INSTANCE {
return Err(Error::invalid_value(
"size",
format!(
"memory must be less than or equal to {}",
ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE).unwrap()
),
));
}

let actor = opctx.authn.actor_required().internal_context(
"loading current user's ssh keys for new Instance",
)?;
Expand Down Expand Up @@ -649,6 +608,29 @@ impl super::Nexus {
self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await
}

/// Resizes an instance.
pub(crate) async fn instance_resize(
&self,
opctx: &OpContext,
instance_lookup: &lookup::Instance<'_>,
new_size: params::InstanceResize,
) -> UpdateResult<InstanceAndActiveVmm> {
check_instance_cpu_memory_sizes(new_size.ncpus, new_size.memory)?;
let (.., authz_instance) =
instance_lookup.lookup_for(authz::Action::Modify).await?;

self.db_datastore
.instance_resize(
opctx,
InstanceUuid::from_untyped_uuid(authz_instance.id()),
new_size.ncpus.into(),
new_size.memory.into(),
)
.await?;

self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This refetch is kind of sketchy (the instance's shape can change again between the resize query and the fetch). We may not actually need it: since the query is conditional on the instance being halted (i.e. being in NoVmm, or possibly in Failed once the failed-instance rework lands), there should be no VMM to influence the instance's state, which means we can construct the correct external instance state without having to consult the VMM table to get the active VMM's state. But that's itself kind of sketchy: the rule everywhere else is that you use an InstanceAndActiveVmm to get an external-API Instance to return, and I think this would be the first exception to that rule.

Another, simpler option would be to have this API return 204 No Content on success. (The caller knows what the updated instance's CPU and memory sizes are because it just set them, and nothing else about the instance is different.) OTOH several other instance routines (stop, reboot, etc.) return the posterior state of the instance, so maybe it's better to be consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 204 No Content approach makes less sense in view of the discussion below (where we've reached a consensus that this API should like other update APIs, which return the posterior states of the things being updated). I think it's probably reasonable to construct an InstanceAndActiveVmm in the relevant datastore routine and return it here (though this should have some explanatory comments as @hawkw mentioned above).

}

/// Idempotently ensures that the sled specified in `db_instance` does not
/// have a record of the instance. If the instance is currently running on
/// this sled, this operation rudely terminates it.
Expand Down Expand Up @@ -1884,6 +1866,57 @@ pub(crate) async fn notify_instance_updated(
}
}

/// Determines whether the supplied instance sizes (CPU count and memory size)
/// are acceptable.
fn check_instance_cpu_memory_sizes(
ncpus: InstanceCpuCount,
memory: ByteCount,
) -> Result<(), Error> {
if ncpus.0 > MAX_VCPU_PER_INSTANCE {
return Err(Error::invalid_request(&format!(
"cannot have more than {} vCPUs per instance",
MAX_VCPU_PER_INSTANCE
)));
}

// Reject instances where the memory is not at least
// MIN_MEMORY_BYTES_PER_INSTANCE
if memory.to_bytes() < u64::from(MIN_MEMORY_BYTES_PER_INSTANCE) {
return Err(Error::invalid_value(
"size",
format!(
"memory must be at least {}",
ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE)
),
));
}

// Reject instances where the memory is not divisible by
// MIN_MEMORY_BYTES_PER_INSTANCE
if (memory.to_bytes() % u64::from(MIN_MEMORY_BYTES_PER_INSTANCE)) != 0 {
return Err(Error::invalid_value(
"size",
format!(
"memory must be divisible by {}",
ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE)
),
));
}

// Reject instances where the memory is greater than the limit
if memory.to_bytes() > MAX_MEMORY_BYTES_PER_INSTANCE {
return Err(Error::invalid_value(
"size",
format!(
"memory must be less than or equal to {}",
ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE).unwrap()
),
));
}

Ok(())
}

/// Determines the disposition of a request to start an instance given its state
/// (and its current VMM's state, if it has one) in the database.
fn instance_start_allowed(
Expand Down Expand Up @@ -2178,4 +2211,39 @@ mod tests {
assert!(instance_start_allowed(&logctx.log, &state).is_ok());
logctx.cleanup_successful();
}

#[test]
fn test_invalid_instance_shapes() {
// Too many CPUs.
assert!(check_instance_cpu_memory_sizes(
InstanceCpuCount(MAX_VCPU_PER_INSTANCE + 1),
ByteCount::from_gibibytes_u32(1)
)
.is_err());

// Too little memory.
assert!(check_instance_cpu_memory_sizes(
InstanceCpuCount(1),
ByteCount::from_mebibytes_u32(1)
)
.is_err());

// Acceptable amount of memory, but not divisible into GiB.
assert!(check_instance_cpu_memory_sizes(
InstanceCpuCount(1),
ByteCount::from_mebibytes_u32(1024 + 1)
)
.is_err());

let gib =
u32::try_from(MAX_MEMORY_BYTES_PER_INSTANCE / (1024 * 1024 * 1024))
.unwrap();

// A whole number of GiB, but too many of them.
assert!(check_instance_cpu_memory_sizes(
InstanceCpuCount(1),
ByteCount::from_gibibytes_u32(gib + 1)
)
.is_err());
}
}
37 changes: 37 additions & 0 deletions nexus/src/external_api/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ pub(crate) fn external_api() -> NexusApiDescription {
api.register(instance_reboot)?;
api.register(instance_start)?;
api.register(instance_stop)?;
api.register(instance_resize)?;
api.register(instance_disk_list)?;
api.register(instance_disk_attach)?;
api.register(instance_disk_detach)?;
Expand Down Expand Up @@ -2866,6 +2867,42 @@ async fn instance_delete(
.await
}

/// Resize instance
#[endpoint {
method = PUT,
path = "/v1/instances/{instance}/resize",
tags = ["instances"],
}]
async fn instance_resize(
rqctx: RequestContext<ApiContext>,
query_params: Query<params::OptionalProjectSelector>,
path_params: Path<params::InstancePath>,
new_size: TypedBody<params::InstanceResize>,
) -> Result<HttpResponseOk<Instance>, HttpError> {
let apictx = rqctx.context();
let nexus = &apictx.context.nexus;
let path = path_params.into_inner();
let query = query_params.into_inner();
let resize = new_size.into_inner();
let instance_selector = params::InstanceSelector {
project: query.project,
instance: path.instance,
};
let handler = async {
let opctx = crate::context::op_context_for_external_api(&rqctx).await?;
let instance_lookup =
nexus.instance_lookup(&opctx, instance_selector)?;
let instance =
nexus.instance_resize(&opctx, &instance_lookup, resize).await?;
Ok(HttpResponseOk(instance.into()))
};
apictx
.context
.external_latencies
.instrument_dropshot_handler(&rqctx, handler)
.await
}

// TODO should this be in the public API?
/// Migrate an instance
#[endpoint {
Expand Down
17 changes: 17 additions & 0 deletions nexus/tests/integration_tests/endpoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,12 @@ pub static DEMO_INSTANCE_REBOOT_URL: Lazy<String> = Lazy::new(|| {
*DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR
)
});
pub static DEMO_INSTANCE_RESIZE_URL: Lazy<String> = Lazy::new(|| {
format!(
"/v1/instances/{}/resize?{}",
*DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR
)
});
pub static DEMO_INSTANCE_MIGRATE_URL: Lazy<String> = Lazy::new(|| {
format!(
"/v1/instances/{}/migrate?{}",
Expand Down Expand Up @@ -1823,6 +1829,17 @@ pub static VERIFY_ENDPOINTS: Lazy<Vec<VerifyEndpoint>> = Lazy::new(|| {
AllowedMethod::Post(serde_json::Value::Null)
],
},
VerifyEndpoint {
url: &DEMO_INSTANCE_RESIZE_URL,
visibility: Visibility::Protected,
unprivileged_access: UnprivilegedAccess::None,
allowed_methods: vec![
AllowedMethod::Put(serde_json::to_value(params::InstanceResize {
ncpus: InstanceCpuCount(1),
memory: ByteCount::from_gibibytes_u32(1)
}).unwrap())
],
},
VerifyEndpoint {
url: &DEMO_INSTANCE_MIGRATE_URL,
visibility: Visibility::Protected,
Expand Down
Loading
Loading