Skip to content

Commit

Permalink
Replace reductions with time slices
Browse files Browse the repository at this point in the history
Instead of each process reducing a counter and yielding when the counter
reaches zero, each process now tracks an epoch. This epoch is a global
counter incremented every 10 milliseconds by a background thread. The
compiler in turn inserts a "preempt" instruction, compiled to code that
checks the process' epoch against the global epoch. If the two differ,
the process yields control back to the scheduler.

This new approach can improve performance by up to 40%, in part because
we no longer need function calls and instead compile the instruction
directly to machine code. In addition, we no longer insert the
preemption check after every method call, instead only inserting it at
the end of a loop iteration, or before a next or break.

This new setup is less fine-grained compared to the old approach, but
that's OK as the end goal is not to give each process the exact same
amount of fuel/time, but rather to prevent one process from forever
holding on to a scheduler thread.

This fixes #522.

Changelog: performance
  • Loading branch information
yorickpeterse committed Nov 20, 2023
1 parent 1a30de9 commit cc8c6fe
Show file tree
Hide file tree
Showing 19 changed files with 593 additions and 445 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ clean:
cargo clean

docs/install:
cd docs && poetry install
cd docs && poetry install --no-root

docs/build:
cd docs && poetry run mkdocs build
Expand Down
16 changes: 16 additions & 0 deletions compiler/src/llvm/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,22 @@ impl<'ctx> Builder<'ctx> {
.unwrap()
}

pub(crate) fn load_atomic_counter(
&self,
variable: PointerValue<'ctx>,
) -> IntValue<'ctx> {
let res = self.inner.build_load(self.context.i32_type(), variable, "");
let ins = res.as_instruction_value().unwrap();

// If the alignment doesn't match the value size, LLVM compiles this to
// an __atomic_load() function call. For the sake of
// clarity/future-proofing, we set the alignment explicitly, even though
// this is technically redundant.
ins.set_alignment(4).unwrap();
ins.set_atomic_ordering(AtomicOrdering::Monotonic).unwrap();
res.into_int_value()
}

pub(crate) fn int_eq(
&self,
lhs: IntValue<'ctx>,
Expand Down
10 changes: 4 additions & 6 deletions compiler/src/llvm/constants.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
/// The offset to apply to access a regular field.
///
/// The object header occupies the first field (as an inline struct), so all
/// user-defined fields start at the next field.
pub(crate) const STATE_EPOCH_OFFSET: u32 = 4;
pub(crate) const PROCESS_EPOCH_OFFSET: u32 = 1;

pub(crate) const FIELD_OFFSET: usize = 1;

/// The offset to apply to access a process field.
pub(crate) const PROCESS_FIELD_OFFSET: usize = 2;
pub(crate) const PROCESS_FIELD_OFFSET: usize = 3;

pub(crate) const HEADER_CLASS_INDEX: u32 = 0;
pub(crate) const HEADER_REFS_INDEX: u32 = 1;
Expand Down
28 changes: 21 additions & 7 deletions compiler/src/llvm/layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ impl<'ctx> Layouts<'ctx> {
context.pointer_type().into(), // ByteArray class
context.pointer_type().into(), // hash_key0
context.pointer_type().into(), // hash_key1
context.i32_type().into(), // scheduler_epoch
]);

let context_layout = context.struct_type(&[
Expand Down Expand Up @@ -239,11 +240,16 @@ impl<'ctx> Layouts<'ctx> {
// Mutexes are smaller on Linux, resulting in a smaller process
// size, so we have to take that into account when calculating
// field offsets.
112
120
}
_ => 128,
_ => 136,
};

// The size of the data of a process that isn't exposed to the generated
// code (i.e. because it involves Rust types of which the layout isn't
// stable).
let process_private_size = process_size - HEADER_SIZE - 4;

for id in mir.classes.keys() {
// String is a built-in class, but it's defined like a regular one,
// so we _don't_ want to skip it here.
Expand All @@ -269,15 +275,23 @@ impl<'ctx> Layouts<'ctx> {
} else {
fields.push(header.into());

// For processes we need to take into account the space between
// the header and the first field. We don't actually care about
// that state in the generated code, so we just insert a single
// member that covers it.
// For processes, the memory layout is as follows:
//
// +--------------------------+
// | header (16 bytes) |
// +--------------------------+
// | start epoch (4 bytes) |
// +--------------------------+
// | private data (N bytes) |
// +--------------------------+
// | user-defined fields |
// +--------------------------+
if kind.is_async() {
fields.push(context.i32_type().into());
fields.push(
context
.i8_type()
.array_type(process_size - HEADER_SIZE)
.array_type(process_private_size)
.into(),
);
}
Expand Down
51 changes: 41 additions & 10 deletions compiler/src/llvm/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::llvm::constants::{
CONTEXT_ARGS_INDEX, CONTEXT_PROCESS_INDEX, CONTEXT_STATE_INDEX,
DROPPER_INDEX, FIELD_OFFSET, HEADER_CLASS_INDEX, HEADER_REFS_INDEX,
MESSAGE_ARGUMENTS_INDEX, METHOD_FUNCTION_INDEX, METHOD_HASH_INDEX,
PROCESS_FIELD_OFFSET,
PROCESS_EPOCH_OFFSET, PROCESS_FIELD_OFFSET, STATE_EPOCH_OFFSET,
};
use crate::llvm::context::Context;
use crate::llvm::layouts::Layouts;
Expand Down Expand Up @@ -2001,18 +2001,49 @@ impl<'a, 'b, 'ctx> LowerMethod<'a, 'b, 'ctx> {

self.builder.store(var, value);
}
Instruction::Reduce(ins) => {
let amount = self
Instruction::Preempt(_) => {
let state = self.builder.load_untyped_pointer(state_var);
let proc = self.builder.load_untyped_pointer(proc_var);

// To access the process' epoch we need a process layout. Since
// we don't care which one as the epoch is in a fixed place, we
// just use the layout of the main class, which is a process and
// is always present at this point.
let layout =
self.layouts.instances[&self.db.main_class().unwrap()];

let state_epoch_addr = self.builder.field_address(
self.layouts.state,
state,
STATE_EPOCH_OFFSET,
);

let state_epoch =
self.builder.load_atomic_counter(state_epoch_addr);

let proc_epoch = self
.builder
.context
.i16_type()
.const_int(ins.amount as u64, false)
.into();
let proc = self.builder.load_untyped_pointer(proc_var).into();
.load_field(layout, proc, PROCESS_EPOCH_OFFSET)
.into_int_value();

let is_eq = self.builder.int_eq(state_epoch, proc_epoch);
let cont_block = self.builder.add_block();
let yield_block = self.builder.add_block();

self.builder.branch(is_eq, cont_block, yield_block);

// The block to jump to if we need to yield back to the
// scheduler.
self.builder.switch_to_block(yield_block);

let func =
self.module.runtime_function(RuntimeFunction::Reduce);
self.module.runtime_function(RuntimeFunction::ProcessYield);

self.builder.call_void(func, &[proc.into()]);
self.builder.jump(cont_block);

self.builder.call_void(func, &[proc, amount]);
// The block to jump to if we can continue running.
self.builder.switch_to_block(cont_block);
}
Instruction::Finish(ins) => {
let proc = self.builder.load_untyped_pointer(proc_var).into();
Expand Down
9 changes: 4 additions & 5 deletions compiler/src/llvm/runtime_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub(crate) enum RuntimeFunction {
ProcessNew,
ProcessPanic,
ProcessSendMessage,
Reduce,
ProcessYield,
RuntimeDrop,
RuntimeNew,
RuntimeStart,
Expand All @@ -40,7 +40,7 @@ impl RuntimeFunction {
RuntimeFunction::ProcessNew => "inko_process_new",
RuntimeFunction::ProcessPanic => "inko_process_panic",
RuntimeFunction::ProcessSendMessage => "inko_process_send_message",
RuntimeFunction::Reduce => "inko_reduce",
RuntimeFunction::ProcessYield => "inko_process_yield",
RuntimeFunction::RuntimeDrop => "inko_runtime_drop",
RuntimeFunction::RuntimeNew => "inko_runtime_new",
RuntimeFunction::RuntimeStart => "inko_runtime_start",
Expand Down Expand Up @@ -70,12 +70,11 @@ impl RuntimeFunction {

ret.fn_type(&[val], false)
}
RuntimeFunction::Reduce => {
RuntimeFunction::ProcessYield => {
let proc = context.pointer_type().into();
let amount = context.i16_type().into();
let ret = context.void_type();

ret.fn_type(&[proc, amount], false)
ret.fn_type(&[proc], false)
}
RuntimeFunction::Allocate | RuntimeFunction::AllocateAtomic => {
let class = context.pointer_type().into();
Expand Down
20 changes: 6 additions & 14 deletions compiler/src/mir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ use types::{
TypeId, TypeRef, BOOL_ID, FLOAT_ID, INT_ID, NIL_ID,
};

/// The number of reductions to perform after calling a method.
const CALL_COST: u16 = 1;

/// The register ID of the register that stores `self`.
pub(crate) const SELF_ID: u32 = 0;

Expand Down Expand Up @@ -634,13 +631,9 @@ impl Block {
)));
}

pub(crate) fn reduce(&mut self, amount: u16, location: LocationId) {
pub(crate) fn preempt(&mut self, location: LocationId) {
self.instructions
.push(Instruction::Reduce(Box::new(Reduce { amount, location })))
}

pub(crate) fn reduce_call(&mut self, location: LocationId) {
self.reduce(CALL_COST, location);
.push(Instruction::Preempt(Box::new(Preempt { location })))
}

pub(crate) fn cast(
Expand Down Expand Up @@ -968,8 +961,7 @@ pub(crate) struct Spawn {
}

#[derive(Clone)]
pub(crate) struct Reduce {
pub(crate) amount: u16,
pub(crate) struct Preempt {
pub(crate) location: LocationId,
}

Expand Down Expand Up @@ -1102,7 +1094,7 @@ pub(crate) enum Instruction {
Allocate(Box<Allocate>),
Spawn(Box<Spawn>),
GetConstant(Box<GetConstant>),
Reduce(Box<Reduce>),
Preempt(Box<Preempt>),
Finish(Box<Finish>),
Cast(Box<Cast>),
Pointer(Box<Pointer>),
Expand Down Expand Up @@ -1146,7 +1138,7 @@ impl Instruction {
Instruction::Allocate(ref v) => v.location,
Instruction::Spawn(ref v) => v.location,
Instruction::GetConstant(ref v) => v.location,
Instruction::Reduce(ref v) => v.location,
Instruction::Preempt(ref v) => v.location,
Instruction::Finish(ref v) => v.location,
Instruction::Cast(ref v) => v.location,
Instruction::Pointer(ref v) => v.location,
Expand Down Expand Up @@ -1325,7 +1317,7 @@ impl Instruction {
v.id.name(db)
)
}
Instruction::Reduce(ref v) => format!("reduce {}", v.amount),
Instruction::Preempt(_) => "preempt".to_string(),
Instruction::Finish(v) => {
if v.terminate { "terminate" } else { "finish" }.to_string()
}
Expand Down
22 changes: 3 additions & 19 deletions compiler/src/mir/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ impl<'a> GenerateDropper<'a> {
None,
loc,
);
lower.reduce_call(TypeRef::nil(), loc);

lower.current_block_mut().return_value(nil_reg, loc);

assert_eq!(lower.method.arguments.len(), 1);
Expand Down Expand Up @@ -458,7 +458,6 @@ impl<'a> GenerateDropper<'a> {
None,
loc,
);
lower.reduce_call(typ, loc);
}

let variants = class.variants(lower.db());
Expand Down Expand Up @@ -545,8 +544,6 @@ impl<'a> GenerateDropper<'a> {
None,
loc,
);

lower.reduce_call(typ, loc);
}

for field in class.fields(lower.db()) {
Expand Down Expand Up @@ -1540,6 +1537,7 @@ impl<'a> LowerMethod<'a> {

if self.in_connected_block() {
self.add_edge(loop_end, loop_start);
self.current_block_mut().preempt(loc);
self.current_block_mut().goto(loop_start, loc);
}

Expand Down Expand Up @@ -1590,6 +1588,7 @@ impl<'a> LowerMethod<'a> {
let loc = self.add_location(location);

self.drop_loop_registers(loc);
self.current_block_mut().preempt(loc);
self.current_block_mut().goto(target, loc);
self.add_edge(source, target);
self.add_current_block();
Expand Down Expand Up @@ -1807,7 +1806,6 @@ impl<'a> LowerMethod<'a> {
let res = self.new_register(returns);

self.current_block_mut().call_closure(res, rec, args, loc);
self.reduce_call(returns, loc);

if returns.is_never(self.db()) {
self.add_current_block();
Expand Down Expand Up @@ -1891,8 +1889,6 @@ impl<'a> LowerMethod<'a> {
self.current_block_mut()
.call_static(result, info.id, arg_regs, targs, location);

self.reduce_call(info.returns, location);

return result;
}
};
Expand Down Expand Up @@ -1928,7 +1924,6 @@ impl<'a> LowerMethod<'a> {
.call_instance(result, rec, info.id, arg_regs, targs, location);
}

self.reduce_call(info.returns, location);
result
}

Expand Down Expand Up @@ -4324,17 +4319,6 @@ impl<'a> LowerMethod<'a> {
self.self_register != self.surrounding_type_register
}

fn reduce_call(&mut self, returns: TypeRef, location: LocationId) {
// If the method never returns there's no point in generating a
// reduction. Doing this can also break the LLVM code generation
// process.
if returns.is_never(self.db()) {
return;
}

self.current_block_mut().reduce_call(location);
}

fn warn_unreachable(&mut self, location: &SourceLocation) {
self.state.diagnostics.unreachable(self.file(), location.clone());
}
Expand Down
2 changes: 0 additions & 2 deletions compiler/src/mir/specialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1205,8 +1205,6 @@ impl<'a, 'b, 'c> ExpandDrop<'a, 'b, 'c> {
} else if !typ.is_permanent(self.db) {
self.block_mut(block).call_dropper(reg, value, location);
}

self.block_mut(block).reduce_call(location);
}

fn block_mut(&mut self, id: BlockId) -> &mut Block {
Expand Down
Loading

0 comments on commit cc8c6fe

Please sign in to comment.