Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[move][move-vm][rewrite][cleanup 2/n] Add limits to arena and string interner #21050

Merged
merged 1 commit into from
Feb 5, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions external-crates/move/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion external-crates/move/Cargo.toml
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ async-trait = "0.1.42"
bcs = "0.1.4"
better_any = "0.1.1"
bitvec = "0.19.4"
bumpalo = "3.16.0"
bumpalo = "3.17.0"
byteorder = "1.4.3"
bytes = "1.0.1"
chrono = "0.4.19"
2 changes: 2 additions & 0 deletions external-crates/move/crates/move-core-types/src/vm_status.rs
Original file line number Diff line number Diff line change
@@ -395,6 +395,8 @@ pub enum StatusCode {
MEMORY_LIMIT_EXCEEDED = 4028,
VM_MAX_TYPE_NODES_REACHED = 4029,
VARIANT_TAG_MISMATCH = 4030,
PACKAGE_ARENA_LIMIT_REACHED = 4031,
INTERNER_LIMIT_REACHED = 4032,

// A reserved status to represent an unknown vm status.
// this is std::u64::MAX, but we can't pattern match on that, so put the hardcoded value in
25 changes: 21 additions & 4 deletions external-crates/move/crates/move-vm-runtime/src/cache/arena.rs
Original file line number Diff line number Diff line change
@@ -3,6 +3,9 @@

#![allow(unsafe_code)]

use move_binary_format::errors::{PartialVMError, PartialVMResult};
use move_core_types::vm_status::StatusCode;

use bumpalo::Bump;

// -------------------------------------------------------------------------------------------------
@@ -11,13 +14,21 @@ use bumpalo::Bump;

pub struct Arena(Bump);

/// Size of a package arena.
/// This is 10 megabytes, which should be more than enough room for any pacakge on chain.
/// FIXME: Test this limit and validate. See how large packages are in backtesting and double that
/// limit, setting it here.
const ARENA_SIZE: usize = 10_000_000;

// -------------------------------------------------------------------------------------------------
// Impls
// -------------------------------------------------------------------------------------------------

impl Default for Arena {
fn default() -> Self {
Self::new()
let bump = Bump::new();
bump.set_allocation_limit(Some(ARENA_SIZE));
Arena(bump)
}
}

@@ -30,9 +41,15 @@ impl Arena {
/// threads during this call. This should be fine as the translation step that uses an arena
/// should happen in a thread that holds that arena, with no other contention for allocation
/// into it, and nothing should allocate into a LoadedModule after it is loaded.
pub fn alloc_slice<T>(&self, items: impl ExactSizeIterator<Item = T>) -> *mut [T] {
let slice = self.0.alloc_slice_fill_iter(items);
slice as *mut [T]
pub fn alloc_slice<T>(
&self,
items: impl ExactSizeIterator<Item = T>,
) -> PartialVMResult<*mut [T]> {
if let Ok(slice) = self.0.try_alloc_slice_fill_iter(items) {
Ok(slice)
} else {
Err(PartialVMError::new(StatusCode::PACKAGE_ARENA_LIMIT_REACHED))
}
}
}

Original file line number Diff line number Diff line change
@@ -10,12 +10,21 @@ use move_core_types::{
use lasso::{Spur, ThreadedRodeo};

/// A wrapper around a lasso ThreadedRoade with some niceties to make it easier to use in the VM.
#[derive(Debug, Default)]
#[derive(Debug)]
pub struct IdentifierInterner(ThreadedRodeo);

/// Maximum number of identifiers we can ever intern.
/// FIXME: Set to 1 billion, but should be experimentally determined based on actual run data.
const IDENTIFIER_SLOTS: usize = 1_000_000_000;

pub type IdentifierKey = Spur;

impl IdentifierInterner {
pub fn new() -> Self {
let rodeo = ThreadedRodeo::with_capacity(lasso::Capacity::for_strings(IDENTIFIER_SLOTS));
Self(rodeo)
}

/// Resolve a string in the interner or produce an invariant violation (as they should always be
/// there). The `key_type` is used to make a more-informative error message.
pub fn resolve_string(&self, key: &IdentifierKey, key_type: &str) -> PartialVMResult<String> {
@@ -46,10 +55,8 @@ impl IdentifierInterner {
fn get_or_intern_str(&self, string: &str) -> PartialVMResult<IdentifierKey> {
match self.0.try_get_or_intern(string) {
Ok(result) => Ok(result),
Err(err) => Err(
PartialVMError::new(StatusCode::UNKNOWN_INVARIANT_VIOLATION_ERROR)
.with_message(format!("Failed to intern string {string}; error: {err:?}.")),
),
Err(err) => Err(PartialVMError::new(StatusCode::INTERNER_LIMIT_REACHED)
.with_message(format!("Failed to intern string {string}; error: {err:?}."))),
}
}

Original file line number Diff line number Diff line change
@@ -6,8 +6,8 @@
// and publishing packages to the VM.

use crate::{
cache::identifier_interner::IdentifierInterner, jit, natives::functions::NativeFunctions,
shared::types::PackageStorageId, validation::verification,
jit, natives::functions::NativeFunctions, shared::types::PackageStorageId,
validation::verification,
};
use move_vm_config::runtime::VMConfig;
use parking_lot::RwLock;
@@ -44,7 +44,6 @@ pub struct MoveCache {
pub(crate) natives: Arc<NativeFunctions>,
pub(crate) vm_config: Arc<VMConfig>,
pub(crate) package_cache: Arc<RwLock<PackageCache>>,
pub(crate) string_cache: Arc<IdentifierInterner>,
}

// -------------------------------------------------------------------------------------------------
@@ -57,7 +56,6 @@ impl MoveCache {
natives,
vm_config,
package_cache: Arc::new(RwLock::new(HashMap::new())),
string_cache: Arc::new(IdentifierInterner::default()),
}
}

@@ -92,10 +90,6 @@ impl MoveCache {
pub fn package_cache(&self) -> &RwLock<PackageCache> {
&self.package_cache
}

pub fn string_interner(&self) -> &IdentifierInterner {
&self.string_cache
}
}

impl Package {
@@ -124,13 +118,11 @@ impl Clone for MoveCache {
natives,
vm_config,
package_cache,
string_cache,
} = self;
Self {
natives: natives.clone(),
vm_config: vm_config.clone(),
package_cache: package_cache.clone(),
string_cache: string_cache.clone(),
}
}
}
Original file line number Diff line number Diff line change
@@ -354,6 +354,7 @@ impl<'a> VMTracer<'a> {

/// Snapshot the value at the root of a location. This is used to create the value snapshots
/// for TraceValue references.
#[allow(clippy::only_used_in_recursion)]
fn root_location_snapshot(
&self,
vtables: &VMDispatchTables,
Original file line number Diff line number Diff line change
@@ -490,7 +490,7 @@ fn functions(
.collect::<PartialVMResult<Vec<_>>>()?;
let loaded_functions = package_context
.package_arena
.alloc_slice(prealloc_functions.into_iter());
.alloc_slice(prealloc_functions.into_iter())?;

package_context.insert_and_make_module_function_vtable(
self_id,
@@ -641,7 +641,7 @@ fn code(
.map(|bc| bytecode(context, bc))
.collect::<PartialVMResult<Vec<Bytecode>>>()?
.into_iter(),
);
)?;
Ok(result as *const [Bytecode])
}

Original file line number Diff line number Diff line change
@@ -94,7 +94,14 @@ fn blocks(

// This invariant is enforced by the verifier's control flow analysis, but we double-check it
// here for sanity.
assert!(blocks.iter().last().unwrap().1.last().unwrap().is_unconditional_branch());
assert!(blocks
.iter()
.last()
.unwrap()
.1
.last()
.unwrap()
.is_unconditional_branch());

// (B) Record any instruction that is a fall-through target
// NB: the Move bytecode verifier enforces that the last block
2 changes: 1 addition & 1 deletion external-crates/move/crates/move-vm-runtime/src/lib.rs
Original file line number Diff line number Diff line change
@@ -50,7 +50,7 @@ use std::sync::Arc;
/// TODO: Set up this; `lasso` supports it but we need to expose that interface.
/// 3. If absolutely necessary, the execution layer _can_ dump the interner.
static STRING_INTERNER: Lazy<Arc<IdentifierInterner>> =
Lazy::new(|| Arc::new(IdentifierInterner::default()));
Lazy::new(|| Arc::new(IdentifierInterner::new()));

/// Function to access the global StringInterner
fn string_interner() -> Arc<IdentifierInterner> {