mila-iqia · satyaog · Nov 25, 2024
diff --git a/milabench/commands/executors.py b/milabench/commands/executors.py
@@ -139,6 +139,7 @@ async def execute_command(
                     # kill the underlying process which should force the coro to 
                     # return on next wait
                     pack = packs[timedout]
+                    await pack.send(event="stop", data=None)
                     await force_terminate_now(pack, max_delay)
 
                 # Grace period

diff --git a/tests/config/early_stop.yaml b/tests/config/early_stop.yaml
@@ -0,0 +1,20 @@
+_defaults:
+  max_duration: 1
+  voir:
+    options:
+      stop: 10
+      interval: "1s"
+
+benchio:
+  inherits: _defaults
+  definition: ../yoshua-benchio
+  plan:
+    method: njobs
+    n: 1
+  tags:
+    - monogpu
+
+  argv:
+    --sleep: 60
+    --start: 1
+    --end: 11
diff --git a/tests/test_mock.py b/tests/test_mock.py
@@ -2,11 +2,14 @@
 import os
 
 import milabench.alt_async
+from milabench.commands import Command
 import milabench.commands.executors
 from milabench.testing import resolved_config
 
 import pytest
 
+TEST_FOLDER = os.path.dirname(__file__)
+
 # benchmark that cannot be prepared because they are too big
 OVERSIZED_BENCHMARKS = {
     "llm-lora-single",
@@ -125,6 +128,23 @@ def test_milabench(monkeypatch, bench, module_tmp_dir, standard_config):
     # shutil.rmtree(module_tmp_dir)
 
 
+def test_early_stop(monkeypatch):
+    args= [
+        "--base", "/tmp",
+        "--config", os.path.join(TEST_FOLDER, "config", "early_stop.yaml"),
+        "--use-current-env"
+    ]
+
+    _execute = Command.execute
+    async def _wrap(self, *args, timeout_delay=None, **kwargs):
+        del timeout_delay
+        return await _execute.__call__(self, *args, timeout_delay=1, **kwargs)
+
+    monkeypatch.setattr(Command, "execute", _wrap)
+
+    run_cli("run", *args, "--no-report")
+
+
 ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
 def cleanpath(out, tmppath):
     import subprocess

diff --git a/tests/yoshua-benchio/main.py b/tests/yoshua-benchio/main.py
@@ -30,16 +30,17 @@ def main():
 
     args = parser.parse_args()
 
-    if args.sleep is not None:
-        time.sleep(args.sleep)
-
     data = [[[i]] for i in range(args.start, args.end)]
 
     if args.bad:
         raise RuntimeError()
 
     for [[x]] in voir.iterate("train", data, True):
         give(loss=1 / x)
+    give(rate=args.end - args.start)
+
+    if args.sleep is not None:
+        time.sleep(args.sleep)
 
 
 if __name__ == "__main__":