Skip to content

Commit

Permalink
Enable LLVM optimizations
Browse files Browse the repository at this point in the history
This enables a set of LLVM optimization passes for the "balanced" and
"aggressive" profiles. Both are based on the "default<O2>" pipeline,
with the removal of some irrelevant passes. In the case of the
"balanced" profile we also remove some additional passes that aren't
likely to be useful in most cases.

This fixes #595.

Changelog: added
  • Loading branch information
yorickpeterse committed Jan 14, 2025
1 parent 63280d0 commit 08c6874
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 22 deletions.
1 change: 1 addition & 0 deletions compiler/src/llvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ pub(crate) mod layouts;
pub(crate) mod method_hasher;
pub(crate) mod methods;
pub(crate) mod module;
pub(crate) mod opt;
pub(crate) mod passes;
pub(crate) mod runtime_function;
213 changes: 213 additions & 0 deletions compiler/src/llvm/opt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
//! LLVM optimization passes to run for different compiler optimization levels.
//!
//! The pipelines here are based on the output of `opt -passes='default<O2>'
//! -print-pipeline-passes`, with various redundant passes (e.g. those related
//! to OpenMP) removed.
pub(crate) const BALANCED: &str = "\
inferattrs,\
function<eager-inv>(\
lower-expect,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
sroa<modify-cfg>,\
early-cse\
),\
ipsccp,\
called-value-propagation,\
globalopt,\
function<eager-inv>(\
mem2reg,\
instcombine<max-iterations=1000;no-use-loop-info>,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>\
),\
require<globals-aa>,\
function(invalidate<aa>),\
cgscc(\
inline<only-mandatory>,\
inline,\
function-attrs<skip-non-recursive>,\
function<eager-inv;no-rerun>(\
sroa<modify-cfg>,\
early-cse<memssa>,\
speculative-execution,\
jump-threading,\
correlated-propagation,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
instcombine<max-iterations=1000;no-use-loop-info>,\
aggressive-instcombine,\
constraint-elimination,\
tailcallelim,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
reassociate,\
loop-mssa(\
loop-instsimplify,\
loop-simplifycfg,\
licm<no-allowspeculation>,\
loop-rotate<header-duplication;no-prepare-for-lto>,\
licm<allowspeculation>,\
simple-loop-unswitch<no-nontrivial;trivial>\
),\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
instcombine<max-iterations=1000;no-use-loop-info>,\
loop(loop-idiom,indvars,loop-deletion,loop-unroll-full),\
sroa<modify-cfg>,\
vector-combine,\
mldst-motion<no-split-footer-bb>,\
gvn,\
sccp,\
bdce,\
instcombine<max-iterations=1000;no-use-loop-info>,\
jump-threading,\
correlated-propagation,\
adce,\
memcpyopt,\
dse,\
move-auto-init,\
loop-mssa(licm<allowspeculation>),\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,\
instcombine<max-iterations=1000;no-use-loop-info>\
),\
function-attrs,\
function(require<should-not-run-function-passes>)\
),\
deadargelim,\
globalopt,\
globaldce,\
elim-avail-extern,\
rpo-function-attrs,\
recompute-globalsaa,\
function<eager-inv>(\
float2int,\
lower-constant-intrinsics,\
loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),\
loop-distribute,\
inject-tli-mappings,\
loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,\
loop-load-elim,\
instcombine<max-iterations=1000;no-use-loop-info>,\
simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,\
slp-vectorizer,\
vector-combine,\
instcombine<max-iterations=1000;no-use-loop-info>,\
loop-unroll<O2>,\
sroa<preserve-cfg>,\
instcombine<max-iterations=1000;no-use-loop-info>,\
loop-mssa(licm<allowspeculation>),\
alignment-from-assumptions,\
loop-sink,\
instsimplify,\
div-rem-pairs,\
tailcallelim,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>\
),\
globaldce,\
constmerge,\
rel-lookup-table-converter,\
function(annotation-remarks),\
verify\
";

pub(crate) const AGGRESSIVE: &str = "\
inferattrs,\
function<eager-inv>(\
lower-expect,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
sroa<modify-cfg>,\
early-cse\
),\
ipsccp,\
called-value-propagation,\
globalopt,\
function<eager-inv>(\
mem2reg,\
instcombine<max-iterations=1000;no-use-loop-info>,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>\
),\
require<globals-aa>,\
function(invalidate<aa>),\
cgscc(\
devirt<4>(\
inline<only-mandatory>,\
inline,\
function-attrs<skip-non-recursive>,\
function<eager-inv;no-rerun>(\
sroa<modify-cfg>,\
early-cse<memssa>,\
speculative-execution,\
jump-threading,\
correlated-propagation,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
instcombine<max-iterations=1000;no-use-loop-info>,\
aggressive-instcombine,\
constraint-elimination,\
libcalls-shrinkwrap,\
tailcallelim,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
reassociate,\
loop-mssa(\
loop-instsimplify,\
loop-simplifycfg,\
licm<no-allowspeculation>,\
loop-rotate<header-duplication;no-prepare-for-lto>,\
licm<allowspeculation>,\
simple-loop-unswitch<no-nontrivial;trivial>\
),\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,\
instcombine<max-iterations=1000;no-use-loop-info>,\
loop(loop-idiom,indvars,loop-deletion,loop-unroll-full),\
sroa<modify-cfg>,\
vector-combine,\
mldst-motion<no-split-footer-bb>,\
gvn,\
sccp,\
bdce,\
instcombine<max-iterations=1000;no-use-loop-info>,\
jump-threading,\
correlated-propagation,\
adce,\
memcpyopt,\
dse,\
move-auto-init,\
loop-mssa(licm<allowspeculation>),\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,\
instcombine<max-iterations=1000;no-use-loop-info>\
),\
function-attrs,\
function(require<should-not-run-function-passes>)\
)\
),\
deadargelim,\
globalopt,\
globaldce,\
elim-avail-extern,\
rpo-function-attrs,\
recompute-globalsaa,\
function<eager-inv>(\
float2int,\
lower-constant-intrinsics,\
loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),\
loop-distribute,\
inject-tli-mappings,\
loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,\
loop-load-elim,\
instcombine<max-iterations=1000;no-use-loop-info>,\
simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,\
slp-vectorizer,\
vector-combine,\
instcombine<max-iterations=1000;no-use-loop-info>,\
loop-unroll<O2>,\
sroa<preserve-cfg>,\
instcombine<max-iterations=1000;no-use-loop-info>,\
loop-mssa(licm<allowspeculation>),\
alignment-from-assumptions,\
loop-sink,\
instsimplify,\
div-rem-pairs,\
tailcallelim,\
simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>\
),\
globaldce,\
constmerge,\
rel-lookup-table-converter,\
function(annotation-remarks),\
verify\
";
48 changes: 26 additions & 22 deletions compiler/src/llvm/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::llvm::layouts::{
};
use crate::llvm::methods::Methods;
use crate::llvm::module::Module;
use crate::llvm::opt;
use crate::llvm::runtime_function::RuntimeFunction;
use crate::mir::{
CastType, Constant, Instruction, InstructionLocation, Method, Mir,
Expand Down Expand Up @@ -300,22 +301,17 @@ pub(crate) fn lower_all(
}
}

// LLVM's optimisation level controls which passes to run, but some/many of
// those may not be relevant to Inko, while slowing down compile times. Thus
// instead of using this knob, we provide our own list of passes. Swift and
// Rust (and possibly others) take a similar approach.
// The code generation optimization level to use. This is separate from the
// optimization passes to run.
//
// For the aggressive mode we simply enable the full suite of LLVM
// optimizations, likely greatly increasing the compilation times.
// It's unclear what the difference is between Default and Aggressive, and
// we've not been able to measure a difference in runtime performance. Swift
// also appears to just use Default when optimizations are enabled
// (https://github.com/swiftlang/swift/blob/09d122af7c08e1a6e7fe76f122ddab05b0bbda59/lib/IRGen/IRGen.cpp#L929-L931),
// so we'll assume this is good enough.
let level = match state.config.opt {
Opt::None => OptimizationLevel::None,

// We have yet to figure out what optimizations we want to enable
// here, hence we don't apply any at all.
Opt::Balanced => OptimizationLevel::None,

// This is the equivalent of -O3 for clang.
Opt::Aggressive => OptimizationLevel::Aggressive,
_ => OptimizationLevel::Default,
};

// Our "queue" is just an atomic integer in the range 0..N where N is the
Expand Down Expand Up @@ -547,19 +543,27 @@ impl<'a> Worker<'a> {
fn run_passes(&self, module: &Module, layouts: &Layouts) {
let layout = layouts.target_data.get_data_layout();
let opts = PassBuilderOptions::create();
let passes = ["mem2reg"].join(",");

// The LLVM pipeline to run, including passes that we must run
// regardless of the optimization level.
//
// We need to scope pass names properly, otherwise we may run into
// issues similar to https://github.com/llvm/llvm-project/issues/81128)
let mut passes = ["function(mem2reg)"].join(",");
let extra = match self.shared.state.config.opt {
Opt::Balanced => Some(opt::BALANCED),
Opt::Aggressive => Some(opt::AGGRESSIVE),
_ => None,
};

if let Some(v) = extra {
passes.push(',');
passes.push_str(v);
}

module.set_data_layout(&layout);
module.set_triple(&self.machine.get_triple());
module.run_passes(passes.as_str(), &self.machine, opts).unwrap();

// The pass "aliases" such as "default<O3>" can't be combined together
// with other passes, so we have to handle them separately.
if let Opt::Aggressive = self.shared.state.config.opt {
let opts = PassBuilderOptions::create();

module.run_passes("default<O3>", &self.machine, opts).unwrap();
}
}

fn write_object_file(
Expand Down

0 comments on commit 08c6874

Please sign in to comment.