From 571ba8fec51120c8847f4434d194fbadf8dd1e56 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 14 Jan 2025 23:59:07 +0100 Subject: [PATCH] Enable LLVM optimizations This enables a set of LLVM optimization passes for the "balanced" and "aggressive" profiles. Both are based on the "default" pipeline, with the removal of some irrelevant passes. In the case of the "balanced" profile we also remove some additional passes that aren't likely to be useful in most cases. This fixes https://github.com/inko-lang/inko/issues/595. Changelog: added --- .github/workflows/tests.yml | 127 +++++++++++++------- ci/freebsd.sh | 19 +-- compiler/src/llvm.rs | 1 + compiler/src/llvm/opt.rs | 215 ++++++++++++++++++++++++++++++++++ compiler/src/llvm/passes.rs | 48 ++++---- docs/source/design/goals.md | 5 +- docs/source/references/cli.md | 9 +- 7 files changed, 345 insertions(+), 79 deletions(-) create mode 100644 compiler/src/llvm/opt.rs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0da26d930..e4050c1b3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -63,49 +63,65 @@ jobs: - name: Check the formatting run: 'cd std && cargo run -- fmt --check' - amd64-linux-gnu: + compiler-linux: + strategy: + matrix: + include: + - image: 'ci:fedora' + target: amd64-linux-gnu + - image: 'ci:alpine' + target: amd64-linux-musl + name: ${{ matrix.target }} compiler timeout-minutes: 15 runs-on: ubuntu-24.04 container: - image: ghcr.io/inko-lang/ci:fedora + image: ghcr.io/inko-lang/${{ matrix.image }} steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: '${{ env.CARGO_HOME }}' - key: amd64-linux-gnu-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} + key: ${{ matrix.target }}-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} - name: Run compiler tests run: cargo test - - name: Run stdlib tests with default optimizations - run: 'cd std && cargo run -- test' - - name: Run stdlib tests without optimizations - run: 'cd std && cargo run -- test --opt=none' - - name: Run stdlib tests with aggressive optimizations - run: 'cd std && cargo run -- test --opt=aggressive' - amd64-linux-musl: + std-linux: + strategy: + matrix: + level: + - none + - balanced + - aggressive + include: + - image: 'ci:fedora' + target: amd64-linux-gnu + - image: 'ci:alpine' + target: amd64-linux-musl + name: ${{ matrix.target }} std --opt=${{ matrix.level }} timeout-minutes: 15 runs-on: ubuntu-24.04 container: - image: ghcr.io/inko-lang/ci:alpine + image: ghcr.io/inko-lang/${{ matrix.image }} steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: '${{ env.CARGO_HOME }}' - key: amd64-linux-musl-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} - - name: Run compiler tests - run: cargo test - - name: Run stdlib tests with default optimizations - run: 'cd std && cargo run -- test' - - name: Run stdlib tests without optimizations - run: 'cd std && cargo run -- test --opt=none' - - name: Run stdlib tests with aggressive optimizations - run: 'cd std && cargo run -- test --opt=aggressive' + key: ${{ matrix.target }}-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} + - name: Run tests + run: 'cd std && cargo run -- test --opt=${{ matrix.level }}' - amd64-mac-native: + compiler-mac: + strategy: + matrix: + include: + - runner: macos-13 + target: amd64-mac-native + - runner: macos-14 + target: arm64-mac-native + name: ${{ matrix.target }} compiler timeout-minutes: 15 - runs-on: macos-13 + runs-on: ${{ matrix.runner }} env: RUSTUP_HOME: ${{ github.workspace }}/.rustup-home steps: @@ -115,21 +131,27 @@ jobs: path: | ${{ env.CARGO_HOME }} ${{ env.RUSTUP_HOME }} - key: amd64-mac-native-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} + key: ${{ matrix.target }}-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} - name: Install dependencies run: ./ci/mac.sh - name: Run compiler tests run: cargo test - - name: Run stdlib tests with default optimizations - run: 'cd std && cargo run -- test' - - name: Run stdlib tests without optimizations - run: 'cd std && cargo run -- test --opt=none' - - name: Run stdlib tests with aggressive optimizations - run: 'cd std && cargo run -- test --opt=aggressive' - arm64-mac-native: + std-mac: + strategy: + matrix: + level: + - none + - balanced + - aggressive + include: + - runner: macos-13 + target: amd64-mac-native + - runner: macos-14 + target: arm64-mac-native + name: ${{ matrix.target }} std --opt=${{ matrix.level }} timeout-minutes: 15 - runs-on: macos-14 + runs-on: ${{ matrix.runner }} env: RUSTUP_HOME: ${{ github.workspace }}/.rustup-home steps: @@ -139,19 +161,40 @@ jobs: path: | ${{ env.CARGO_HOME }} ${{ env.RUSTUP_HOME }} - key: arm64-mac-native-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} + key: ${{ matrix.target }}-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} - name: Install dependencies run: ./ci/mac.sh - - name: Run compiler tests - run: cargo test - - name: Run stdlib tests with default optimizations - run: 'cd std && cargo run -- test' - - name: Run stdlib tests without optimizations - run: 'cd std && cargo run -- test --opt=none' - - name: Run stdlib tests with aggressive optimizations - run: 'cd std && cargo run -- test --opt=aggressive' + - name: Run tests + run: 'cd std && cargo run -- test --opt=${{ matrix.level }}' + + compiler-freebsd: + name: amd64-freebsd-native compiler + timeout-minutes: 15 + runs-on: ubuntu-24.04 + env: + RUSTUP_HOME: ${{ github.workspace }}/.rustup-home + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v4 + with: + path: | + ${{ env.CARGO_HOME }} + ${{ env.RUSTUP_HOME }} + key: amd64-freebsd-native-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }} + - name: Run tests + uses: cross-platform-actions/action@v0.24.0 + with: + operating_system: freebsd + version: '14.0' + memory: 8G + environment_variables: 'CARGO_HOME RUSTUP_HOME' + image_url: 'https://github.com/inko-lang/freebsd-builder/releases/download/v0.8.0/freebsd-14.0-x86-64.qcow2' + run: ./ci/freebsd.sh compiler - amd64-freebsd-native: + # FreeBSD builds are a bit slow due to the use of a VM, so we only run the + # stdlib tests using the default optimization level. + std-freebsd: + name: amd64-freebsd-native std timeout-minutes: 15 runs-on: ubuntu-24.04 env: @@ -172,4 +215,4 @@ jobs: memory: 8G environment_variables: 'CARGO_HOME RUSTUP_HOME' image_url: 'https://github.com/inko-lang/freebsd-builder/releases/download/v0.8.0/freebsd-14.0-x86-64.qcow2' - run: ./ci/freebsd.sh + run: ./ci/freebsd.sh std diff --git a/ci/freebsd.sh b/ci/freebsd.sh index 7381548dc..80be66bd1 100755 --- a/ci/freebsd.sh +++ b/ci/freebsd.sh @@ -18,11 +18,14 @@ echo "::endgroup::" export PATH="${CARGO_HOME}/bin:${PATH}" -echo "::group::Run compiler tests" -cargo test -echo "::endgroup::" - -echo "::group::Run stdlib tests" -cd std -cargo run -- test -echo "::endgroup::" +if [ "$1" = "compiler" ] +then + echo "::group::Run compiler tests" + cargo test + echo "::endgroup::" +else + echo "::group::Run stdlib tests" + cd std + cargo run -- test + echo "::endgroup::" +fi diff --git a/compiler/src/llvm.rs b/compiler/src/llvm.rs index 4a005ff06..8d795be01 100644 --- a/compiler/src/llvm.rs +++ b/compiler/src/llvm.rs @@ -6,5 +6,6 @@ pub(crate) mod layouts; pub(crate) mod method_hasher; pub(crate) mod methods; pub(crate) mod module; +pub(crate) mod opt; pub(crate) mod passes; pub(crate) mod runtime_function; diff --git a/compiler/src/llvm/opt.rs b/compiler/src/llvm/opt.rs new file mode 100644 index 000000000..a52e89cfe --- /dev/null +++ b/compiler/src/llvm/opt.rs @@ -0,0 +1,215 @@ +//! LLVM optimization passes to run for different compiler optimization levels. +//! +//! The pipelines here are based on the output of `opt -passes='default' +//! -print-pipeline-passes`, with various redundant passes (e.g. those related +//! to OpenMP) removed. +//! +//! For more details, refer to https://github.com/inko-lang/inko/issues/595. +pub(crate) const BALANCED: &str = "\ + inferattrs,\ + function(\ + lower-expect,\ + simplifycfg,\ + sroa,\ + early-cse\ + ),\ + ipsccp,\ + called-value-propagation,\ + globalopt,\ + function(\ + mem2reg,\ + instcombine,\ + simplifycfg\ + ),\ + require,\ + function(invalidate),\ + cgscc(\ + inline,\ + inline,\ + function-attrs,\ + function(\ + sroa,\ + early-cse,\ + speculative-execution,\ + jump-threading,\ + correlated-propagation,\ + simplifycfg,\ + instcombine,\ + aggressive-instcombine,\ + constraint-elimination,\ + tailcallelim,\ + simplifycfg,\ + reassociate,\ + loop-mssa(\ + loop-instsimplify,\ + loop-simplifycfg,\ + licm,\ + loop-rotate,\ + licm,\ + simple-loop-unswitch\ + ),\ + simplifycfg,\ + instcombine,\ + loop(loop-idiom,indvars,loop-deletion,loop-unroll-full),\ + sroa,\ + vector-combine,\ + mldst-motion,\ + gvn,\ + sccp,\ + bdce,\ + instcombine,\ + jump-threading,\ + correlated-propagation,\ + adce,\ + memcpyopt,\ + dse,\ + move-auto-init,\ + loop-mssa(licm),\ + simplifycfg,\ + instcombine\ + ),\ + function-attrs,\ + function(require)\ + ),\ + deadargelim,\ + globalopt,\ + globaldce,\ + elim-avail-extern,\ + rpo-function-attrs,\ + recompute-globalsaa,\ + function(\ + float2int,\ + lower-constant-intrinsics,\ + loop(loop-rotate,loop-deletion),\ + loop-distribute,\ + inject-tli-mappings,\ + loop-vectorize,\ + loop-load-elim,\ + instcombine,\ + simplifycfg,\ + slp-vectorizer,\ + vector-combine,\ + instcombine,\ + loop-unroll,\ + sroa,\ + instcombine,\ + loop-mssa(licm),\ + alignment-from-assumptions,\ + loop-sink,\ + instsimplify,\ + div-rem-pairs,\ + tailcallelim,\ + simplifycfg\ + ),\ + globaldce,\ + constmerge,\ + rel-lookup-table-converter,\ + function(annotation-remarks),\ + verify\ +"; + +pub(crate) const AGGRESSIVE: &str = "\ + inferattrs,\ + function(\ + lower-expect,\ + simplifycfg,\ + sroa,\ + early-cse\ + ),\ + ipsccp,\ + called-value-propagation,\ + globalopt,\ + function(\ + mem2reg,\ + instcombine,\ + simplifycfg\ + ),\ + require,\ + function(invalidate),\ + cgscc(\ + devirt<4>(\ + inline,\ + inline,\ + function-attrs,\ + function(\ + sroa,\ + early-cse,\ + speculative-execution,\ + jump-threading,\ + correlated-propagation,\ + simplifycfg,\ + instcombine,\ + aggressive-instcombine,\ + constraint-elimination,\ + libcalls-shrinkwrap,\ + tailcallelim,\ + simplifycfg,\ + reassociate,\ + loop-mssa(\ + loop-instsimplify,\ + loop-simplifycfg,\ + licm,\ + loop-rotate,\ + licm,\ + simple-loop-unswitch\ + ),\ + simplifycfg,\ + instcombine,\ + loop(loop-idiom,indvars,loop-deletion,loop-unroll-full),\ + sroa,\ + vector-combine,\ + mldst-motion,\ + gvn,\ + sccp,\ + bdce,\ + instcombine,\ + jump-threading,\ + correlated-propagation,\ + adce,\ + memcpyopt,\ + dse,\ + move-auto-init,\ + loop-mssa(licm),\ + simplifycfg,\ + instcombine\ + ),\ + function-attrs,\ + function(require)\ + )\ + ),\ + deadargelim,\ + globalopt,\ + globaldce,\ + elim-avail-extern,\ + rpo-function-attrs,\ + recompute-globalsaa,\ + function(\ + float2int,\ + lower-constant-intrinsics,\ + loop(loop-rotate,loop-deletion),\ + loop-distribute,\ + inject-tli-mappings,\ + loop-vectorize,\ + loop-load-elim,\ + instcombine,\ + simplifycfg,\ + slp-vectorizer,\ + vector-combine,\ + instcombine,\ + loop-unroll,\ + sroa,\ + instcombine,\ + loop-mssa(licm),\ + alignment-from-assumptions,\ + loop-sink,\ + instsimplify,\ + div-rem-pairs,\ + tailcallelim,\ + simplifycfg\ + ),\ + globaldce,\ + constmerge,\ + rel-lookup-table-converter,\ + function(annotation-remarks),\ + verify\ +"; diff --git a/compiler/src/llvm/passes.rs b/compiler/src/llvm/passes.rs index 2e5ef92ef..9a3449725 100644 --- a/compiler/src/llvm/passes.rs +++ b/compiler/src/llvm/passes.rs @@ -14,6 +14,7 @@ use crate::llvm::layouts::{ }; use crate::llvm::methods::Methods; use crate::llvm::module::Module; +use crate::llvm::opt; use crate::llvm::runtime_function::RuntimeFunction; use crate::mir::{ CastType, Constant, Instruction, InstructionLocation, Method, Mir, @@ -300,22 +301,17 @@ pub(crate) fn lower_all( } } - // LLVM's optimisation level controls which passes to run, but some/many of - // those may not be relevant to Inko, while slowing down compile times. Thus - // instead of using this knob, we provide our own list of passes. Swift and - // Rust (and possibly others) take a similar approach. + // The code generation optimization level to use. This is separate from the + // optimization passes to run. // - // For the aggressive mode we simply enable the full suite of LLVM - // optimizations, likely greatly increasing the compilation times. + // It's unclear what the difference is between Default and Aggressive, and + // we've not been able to measure a difference in runtime performance. Swift + // also appears to just use Default when optimizations are enabled + // (https://github.com/swiftlang/swift/blob/09d122af7c08e1a6e7fe76f122ddab05b0bbda59/lib/IRGen/IRGen.cpp#L929-L931), + // so we'll assume this is good enough. let level = match state.config.opt { Opt::None => OptimizationLevel::None, - - // We have yet to figure out what optimizations we want to enable - // here, hence we don't apply any at all. - Opt::Balanced => OptimizationLevel::None, - - // This is the equivalent of -O3 for clang. - Opt::Aggressive => OptimizationLevel::Aggressive, + _ => OptimizationLevel::Default, }; // Our "queue" is just an atomic integer in the range 0..N where N is the @@ -547,19 +543,27 @@ impl<'a> Worker<'a> { fn run_passes(&self, module: &Module, layouts: &Layouts) { let layout = layouts.target_data.get_data_layout(); let opts = PassBuilderOptions::create(); - let passes = ["mem2reg"].join(","); + + // The LLVM pipeline to run, including passes that we must run + // regardless of the optimization level. + // + // We need to scope pass names properly, otherwise we may run into + // issues similar to https://github.com/llvm/llvm-project/issues/81128) + let mut passes = ["function(mem2reg)"].join(","); + let extra = match self.shared.state.config.opt { + Opt::Balanced => Some(opt::BALANCED), + Opt::Aggressive => Some(opt::AGGRESSIVE), + _ => None, + }; + + if let Some(v) = extra { + passes.push(','); + passes.push_str(v); + } module.set_data_layout(&layout); module.set_triple(&self.machine.get_triple()); module.run_passes(passes.as_str(), &self.machine, opts).unwrap(); - - // The pass "aliases" such as "default" can't be combined together - // with other passes, so we have to handle them separately. - if let Opt::Aggressive = self.shared.state.config.opt { - let opts = PassBuilderOptions::create(); - - module.run_passes("default", &self.machine, opts).unwrap(); - } } fn write_object_file( diff --git a/docs/source/design/goals.md b/docs/source/design/goals.md index 2aa423242..fcb1beb0a 100644 --- a/docs/source/design/goals.md +++ b/docs/source/design/goals.md @@ -34,9 +34,8 @@ an explicit non-goal for Inko. ### A balance between compile-time and runtime performance Instead of favouring runtime performance over compile-time performance, Inko -tries to provide a good balance between the two. For improved runtime -performance the compiler supports the option to enable more aggressive -optimisations at the cost of compile times, should you truly need this. +tries to provide a good balance between the two, as much as this is possible +when using LLVM as a backend. ### Memory safety, without the mental overhead diff --git a/docs/source/references/cli.md b/docs/source/references/cli.md index 89aecff84..74070ab0b 100644 --- a/docs/source/references/cli.md +++ b/docs/source/references/cli.md @@ -36,8 +36,8 @@ inko build hello.inko ``` The resulting executable is located at `./build/hello`. By default the compiler -enables a reasonable number of optimisations, without sacrificing compile times. -You can either disable optimisations entirely, or enable more aggressive +uses a set of optimizations similar to the optimizations enabled by using `clang +-O2`. You can either disable optimisations entirely, or enable more aggressive optimisations at the cost of compile times increasing: ```bash @@ -49,8 +49,9 @@ For `--opt none` the executable is placed in `./build/none/hello`, and `./build/aggressive/hello` for `--opt aggressive`. ::: tip -Only use `--opt aggressive` if you have determined a significant increase in -compile times is worth the increase in runtime performance. +In most cases `--opt=aggressive` won't make a difference in runtime performance, +as the differences in optimizations between `balanced` and `aggressive` are +minor. This may change in the future. ::: You can specify an alternative output path using the `-o` option: