From 0d7f1240175bb587c2935ff61bfff2d4129a0e1c Mon Sep 17 00:00:00 2001 From: Matt Brictson Date: Fri, 23 Feb 2024 17:00:09 -0800 Subject: [PATCH] Add `--shard` flag for splitting tests across parallel CI jobs (#14) * Add `--shard` flag for splitting tests across parallel CI jobs With this commit, mighty_test can now distribute test files evenly across parallel CI jobs, using the `--shard` option. The _shard_ nomenclature has been borrowed from similar features in [Jest](https://jestjs.io/docs/cli#--shard) and [Playwright](https://playwright.dev/docs/test-sharding). ```sh # Run the 1st group of tests out of 4 total groups bin/mt --shard 1/4 ``` Test files are shuffled before dividing into shards. To ensure the shuffle is consistent across CI nodes, the SHA of the git commit being tested is used as the random seed. If the SHA cannot be determined, a hard-coded seed is used. * Ensure CI environment doesn't pollute unit test --- README.md | 36 ++++++++++------ lib/mighty_test.rb | 1 + lib/mighty_test/cli.rb | 2 + lib/mighty_test/option_parser.rb | 1 + lib/mighty_test/sharder.rb | 35 ++++++++++++++++ test/mighty_test/cli_test.rb | 20 +++++++++ test/mighty_test/sharder_test.rb | 71 ++++++++++++++++++++++++++++++++ 7 files changed, 154 insertions(+), 12 deletions(-) create mode 100644 lib/mighty_test/sharder.rb create mode 100644 test/mighty_test/sharder_test.rb diff --git a/README.md b/README.md index bd903d7..0fd0903 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mattbrictson/mighty_test/ci.yml)](https://github.com/mattbrictson/mighty_test/actions/workflows/ci.yml) [![Code Climate maintainability](https://img.shields.io/codeclimate/maintainability/mattbrictson/mighty_test)](https://codeclimate.com/github/mattbrictson/mighty_test) -mighty_test (`mt`) is a TDD-friendly Minitest runner for Ruby projects. It includes a Jest-inspired interactive watch mode, focus mode, CI parallelization, run by directory/file/line number, fail-fast, and color formatting. +mighty_test (`mt`) is a TDD-friendly Minitest runner for Ruby projects. It includes a Jest-inspired interactive watch mode, focus mode, CI sharding, run by directory/file/line number, fail-fast, and color formatting. --- @@ -48,7 +48,7 @@ bundle binstub mighty_test Now you can run mighty_test with `bin/mt`. > [!TIP] -> **When installing mighty_test in a Rails project, make sure to put the gem in the `:test` Gemfile group.** Although Rails has a built-in test runner (`bin/rails test`) that already provides a lot of what mighty_test offers, you can still use `bin/mt` with Rails projects for its unique `--watch` mode and CI `--split` feature. +> **When installing mighty_test in a Rails project, make sure to put the gem in the `:test` Gemfile group.** Although Rails has a built-in test runner (`bin/rails test`) that already provides a lot of what mighty_test offers, you can still use `bin/mt` with Rails projects for its unique `--watch` mode and CI `--shard` feature. ## Rake Integration (Non-Rails) @@ -110,35 +110,47 @@ bin/mt test/commands If the `CI` environment variable is set, mighty_test defaults to running _all_ tests, including slow tests. This is equivalent to passing `--all`. -mighty_test can also split test files across parallel CI jobs. +mighty_test can also distribute test files evenly across parallel CI jobs, using the `--shard` option. The _shard_ nomenclature has been borrowed from similar features in [Jest](https://jestjs.io/docs/cli#--shard) and [Playwright](https://playwright.dev/docs/test-sharding). ```sh # Run the 1st group of tests out of 4 total groups -bin/mt --split 1:4 +bin/mt --shard 1/4 ``` -In GitHub Actions, for example, you can use `--split` with a matrix strategy to easily divide tests across N jobs. +In GitHub Actions, for example, you can use `--shard` with a matrix strategy to easily divide tests across N jobs. ```yaml jobs: test: strategy: matrix: - split: - - "1:4" - - "2:4" - - "3:4" - - "4:4" + shard: + - "1/4" + - "2/4" + - "3/4" + - "4/4" steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: bundler-cache: true - - run: bin/mt --split ${{ matrix.split }} + - run: bin/mt --shard ${{ matrix.shard }} +``` + +In CircleCI, you can use the `parallelism` setting, which automatically injects `$CIRCLE_NODE_INDEX` and `$CIRCLE_NODE_TOTAL` environment variables. Note that `$CIRCLE_NODE_INDEX` is zero-indexed, so it needs to be incremented by 1. + +```yaml +jobs: + test: + parallelism: 4 + steps: + - checkout + - ruby/install-deps + - run: SHARD="$((${CIRCLE_NODE_INDEX}+1))"; bin/mt --shard ${SHARD}/${CIRCLE_NODE_TOTAL} ``` > [!TIP] -> `--split` will shuffle tests and automatically distribute slow tests evenly across jobs. +> `--shard` will shuffle tests and automatically distribute slow tests evenly across jobs. ## 🧑‍🔬 Watch Mode diff --git a/lib/mighty_test.rb b/lib/mighty_test.rb index 68e995d..aa8d767 100644 --- a/lib/mighty_test.rb +++ b/lib/mighty_test.rb @@ -5,6 +5,7 @@ module MightyTest autoload :FileSystem, "mighty_test/file_system" autoload :MinitestRunner, "mighty_test/minitest_runner" autoload :OptionParser, "mighty_test/option_parser" + autoload :Sharder, "mighty_test/sharder" autoload :TestParser, "mighty_test/test_parser" autoload :Watcher, "mighty_test/watcher" end diff --git a/lib/mighty_test/cli.rb b/lib/mighty_test/cli.rb index 304e6ab..bd30680 100644 --- a/lib/mighty_test/cli.rb +++ b/lib/mighty_test/cli.rb @@ -53,6 +53,8 @@ def run_test_by_line_number def run_tests_by_path test_paths = find_test_paths + test_paths = Sharder.from_argv(options[:shard], env:).shard(test_paths) if options[:shard] + run_tests_and_exit!(*test_paths) end diff --git a/lib/mighty_test/option_parser.rb b/lib/mighty_test/option_parser.rb index e047462..68274bf 100644 --- a/lib/mighty_test/option_parser.rb +++ b/lib/mighty_test/option_parser.rb @@ -13,6 +13,7 @@ def initialize BANNER op.on("--watch") { options[:watch] = true } + op.on("--shard SHARD") { |value| options[:shard] = value } op.on("-h", "--help") { options[:help] = true } op.on("--version") { options[:version] = true } end diff --git a/lib/mighty_test/sharder.rb b/lib/mighty_test/sharder.rb new file mode 100644 index 0000000..2981fd2 --- /dev/null +++ b/lib/mighty_test/sharder.rb @@ -0,0 +1,35 @@ +module MightyTest + class Sharder + DEFAULT_SEED = 123_456_789 + + def self.from_argv(value, env: ENV) + index, total = value.to_s.match(%r{\A(\d+)/(\d+)\z})&.captures&.map(&:to_i) + raise ArgumentError, "shard: value must be in the form INDEX/TOTAL (e.g. 2/8)" if total.nil? + + git_sha = env.values_at("GITHUB_SHA", "CIRCLE_SHA1").find { |sha| !sha.to_s.strip.empty? } + seed = git_sha&.unpack1("l_") + + new(index:, total:, seed:) + end + + attr_reader :index, :total, :seed + + def initialize(index:, total:, seed: nil) + raise ArgumentError, "shard: total shards must be a number greater than 0" unless total > 0 + + valid_group = index > 0 && index <= total + raise ArgumentError, "shard: shard index must be > 0 and <= #{total}" unless valid_group + + @index = index + @total = total + @seed = seed || DEFAULT_SEED + end + + def shard(*test_paths) + random = Random.new(seed) + shuffled_paths = test_paths.flatten.shuffle(random:) + slices = shuffled_paths.each_slice(total) + slices.filter_map { |slice| slice[index - 1] } + end + end +end diff --git a/test/mighty_test/cli_test.rb b/test/mighty_test/cli_test.rb index 843fd3e..d7ad4d8 100644 --- a/test/mighty_test/cli_test.rb +++ b/test/mighty_test/cli_test.rb @@ -102,6 +102,26 @@ def test_with_non_existent_path_raises_an_error assert_includes(error.message, "test/models/non_existent_test.rb does not exist") end + def test_divides_tests_into_shards + all = with_fake_minitest_runner do |runner, executed_tests| + cli_run(argv: [], chdir: fixtures_path.join("rails_project"), runner:) + executed_tests + end + + shards = %w[1/2 2/2].map do |shard| + with_fake_minitest_runner do |runner, executed_tests| + cli_run(argv: ["--shard", shard], chdir: fixtures_path.join("rails_project"), runner:) + executed_tests + end + end + + shards.each do |shard| + refute_empty shard + end + + assert_equal all.length, shards.sum(&:length) + end + private def with_fake_minitest_runner diff --git a/test/mighty_test/sharder_test.rb b/test/mighty_test/sharder_test.rb new file mode 100644 index 0000000..a4d7bb5 --- /dev/null +++ b/test/mighty_test/sharder_test.rb @@ -0,0 +1,71 @@ +require "test_helper" + +module MightyTest + class SharderTest < Minitest::Test + def test_it_parses_the_shard_value + sharder = Sharder.from_argv("2/7") + + assert_equal(2, sharder.index) + assert_equal(7, sharder.total) + end + + def test_it_raises_an_exception_on_an_invalid_format + error = assert_raises(ArgumentError) do + Sharder.from_argv("a/9") + end + + assert_includes(error.message, "value must be in the form INDEX/TOTAL") + end + + def test_it_raises_an_exception_on_an_invalid_index_value + error = assert_raises(ArgumentError) do + Sharder.from_argv("9/5") + end + + assert_includes(error.message, "index must be > 0 and <= 5") + end + + def test_it_raises_an_exception_on_an_invalid_total_value + error = assert_raises(ArgumentError) do + Sharder.from_argv("1/0") + end + + assert_includes(error.message, "total shards must be a number greater than 0") + end + + def test_it_has_a_default_hardcoded_seed + sharder = Sharder.from_argv("1/2", env: {}) + assert_equal(123_456_789, sharder.seed) + end + + def test_it_derives_a_seed_value_from_the_github_actions_env_var + sharder = Sharder.from_argv("1/2", env: { "GITHUB_SHA" => "b94d6d86a2281d690eafd7bb3282c7032999e85f" }) + assert_equal(3_906_982_861_516_061_026, sharder.seed) + end + + def test_it_derives_a_seed_value_from_the_circle_ci_env_var + sharder = Sharder.from_argv("1/2", env: { "CIRCLE_SHA1" => "189733eff795bd1ea7c586a5234a717f82e58b64" }) + assert_equal(7_378_359_859_579_271_217, sharder.seed) + end + + def test_for_a_given_seed_it_generates_a_stable_shuffled_result + sharder = Sharder.new(index: 1, total: 2, seed: 678) + result = sharder.shard(%w[a b c d e f]) + + assert_equal(%w[f e c], result) + end + + def test_it_divdes_items_into_roughly_equally_sized_shards + all = %w[a b c d e f g h i j k l m n o p q r] + shards = (1..4).map do |index| + Sharder.new(index:, total: 4).shard(all) + end + + shards.each do |shard| + assert_includes [4, 5], shard.length + end + + assert_equal all, shards.flatten.sort + end + end +end