fix runner

to squash, fix arguments for testing runner to squash, fix arguments for testing runner to squash, fix arguments for testing runner for now, fix to train until we fix naming train -> submission
gpu-mode · Jan 15, 2025 · d50e0d3 · d50e0d3
1 parent f3d456d
commit d50e0d3
Show file tree

Hide file tree

Showing 11 changed files with 31 additions and 27 deletions.
diff --git a/.github/workflows/runner.py b/.github/workflows/runner.py
@@ -13,21 +13,18 @@
 
 if config["lang"] == "cu":
     comp, run = run_cuda_script(
-        {
-            "eval.cu": config.get("eval.cu", cu_eval),
-            "reference.cuh": config.get("reference.cuh", None),
-            "submission.cuh": config.get("submission.cuh", None),
-        },
+        {"eval.cu": cu_eval},
+        {key: config[key] for key in ["reference.cuh", "submission.cuh"] if key in config},
         arch=None,
     )
     result = {"compile": asdict(comp), "run": asdict(run)}
 else:
     run = run_pytorch_script(
         {
-            "eval.py": config.get("eval.py", py_eval),
-            "reference.py": config.get("reference.py", None),
-            "submission.py": config.get("submission.py", None),
+            "eval.py": py_eval,
+            **{key: config[key] for key in ["reference.py", "submission.py"] if key in config},
         },
+        main="eval.py",
         arch=None,
     )
     result = {"run": asdict(run)}

diff --git a/docs/docs/creating-a-leaderboard/cuda-creations.md b/docs/docs/creating-a-leaderboard/cuda-creations.md
@@ -56,7 +56,7 @@ Let's break down what's going on in this relatively short file:
 #include <iostream>
 
 #include "reference.cuh"
-#include "train.cuh"
+#include "submission.cuh"
 
 #define WARMUP_RUNS 10
 #define TIMED_RUNS 100
@@ -118,7 +118,7 @@ int main() {
     return 0;
 }
 ```
-You'll notice that we include from headers named `reference.cuh` and `train.cuh`. These are the reference
+You'll notice that we include from headers named `reference.cuh` and `submission.cuh`. These are the reference
 code and submission code respectively, just renamed to a fix module so we can include them. The
 general idea is that the evaluation code can treat the leaderboard as a basic abstraction, and only
 concern itself with three things:

diff --git a/docs/docs/creating-a-leaderboard/python-creations.md b/docs/docs/creating-a-leaderboard/python-creations.md
@@ -53,7 +53,7 @@ Let's break down what's going on in this relatively short file:
 import torch
 import time
 from reference import ref_kernel, generate_input, check_implementation
-from train import custom_kernel
+from submission import custom_kernel
 
 
 def correctness() -> bool:

diff --git a/scripts/ci_test_cuda.py b/scripts/ci_test_cuda.py
@@ -21,14 +21,14 @@ def test_does_not_compile():
     """
 
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is False
     assert run.success is False
     assert comp.nvcc_found is True
     assert comp.exit_code != ExitCode.SUCCESS
     assert comp.stdout == ""
-    assert 'train.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
+    assert 'submission.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
     assert '1 error detected in the compilation of "eval.cu".' in comp.stderr
     assert comp.command.startswith("/usr/local/cuda/bin/nvcc")
     assert "nvcc: NVIDIA (R) Cuda compiler driver" in comp.nvcc_version
@@ -55,7 +55,7 @@ def test_cuda_runtime_error():
 
     """
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is True
     assert run.success is False
@@ -85,7 +85,7 @@ def test_cuda_validation_fail():
 
         """
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is True
     assert run.success is True
@@ -102,7 +102,7 @@ def test_cuda_correct():
     sub = Path("examples/identity_cuda/submission.cuh").read_text()
 
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is True
     assert run.success is True

diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py
@@ -21,7 +21,7 @@ def test_does_not_import():
     """
 
     run = run_pytorch_script(
-        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
     )
     assert run.success is False
     assert run.exit_code != ExitCode.SUCCESS
@@ -37,7 +37,9 @@ def custom_kernel(input):
         """
 
     run = run_pytorch_script(
-        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py", arch=None
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub},
+        "eval.py",
+        arch=None,
     )
     assert run.success is True
     assert run.passed is False
@@ -53,7 +55,7 @@ def test_correct():
     sub = Path("examples/identity_py/submission.py").read_text()
 
     run = run_pytorch_script(
-        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
     )
     assert run.success is True
     assert "warming up..." in run.stdout

diff --git a/scripts/local-test.py b/scripts/local-test.py
@@ -11,7 +11,7 @@
 
 cout, score = run_cuda_script(
     {"eval.cu": cu_eval},
-    {"reference.cuh": ref.read_text(), "train.cuh": sub.read_text()},
+    {"reference.cuh": ref.read_text(), "submission.cuh": sub.read_text()},
     arch=None,
 )
 print(cout)

diff --git a/src/discord-cluster-manager/cogs/modal_cog.py b/src/discord-cluster-manager/cogs/modal_cog.py
@@ -59,7 +59,7 @@ async def run_modal(
                 "**Running on Modal...**\n> ⏳ Waiting for available GPU..."
             )
 
-            filename = "train.py" if script.filename.endswith(".py") else "train.cu"
+            filename = "submission.py" if script.filename.endswith(".py") else "train.cu"
             reference_content = None
             if reference_script is not None or reference_code is not None:
                 reference_content = (

diff --git a/src/discord-cluster-manager/consts.py b/src/discord-cluster-manager/consts.py
@@ -67,7 +67,7 @@ def combine_enums(enums: list[Type[Enum]], combined_name: str) -> Enum:
 MODAL_PATH = "/tmp/dcs/"
 MODAL_EVAL_CODE_PATH = "/tmp/dcs/eval.py"
 MODAL_REFERENCE_CODE_PATH = "/tmp/dcs/reference.py"
-MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/train.py"
+MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/submission.py"
 
 
 # Compilation flags for Modal

diff --git a/src/discord-cluster-manager/eval.cu b/src/discord-cluster-manager/eval.cu
@@ -7,7 +7,7 @@
 #include <memory>
 
 #include "reference.cuh"
-#include "train.cuh"
+#include "submission.cuh"
 
 #define WARMUP_RUNS 10
 #define TIMED_RUNS 100

diff --git a/src/discord-cluster-manager/eval.py b/src/discord-cluster-manager/eval.py
@@ -5,7 +5,7 @@
 
 import torch
 from reference import check_implementation, generate_input, ref_kernel
-from train import custom_kernel
+from submission import custom_kernel
 
 
 class PopcornLogger:

diff --git a/src/discord-cluster-manager/modal_runner.py b/src/discord-cluster-manager/modal_runner.py
@@ -20,7 +20,12 @@
 
 # Move this to another file later:
 python_image = Image.debian_slim(python_version="3.10").pip_install(
-    ["torch", "triton", "jax[cuda12]", "jax2torch"]
+    [
+        "torch",
+        "triton",
+        "jax[cuda12]",
+        "jax2torch",
+    ]
 )
 
 cuda_image = (
@@ -83,7 +88,7 @@ def modal_run_pytorch_script(  # noqa: C901
                 {
                     "eval.py": script_content,
                     "reference.py": reference_content,
-                    "train.py": submission_content,
+                    "submission.py": submission_content,
                 },
                 "eval.py",
             )
@@ -109,7 +114,7 @@ def modal_run_cuda_script(  # # noqa: C901
         with timeout(timeout_seconds):
             comp, run = run_cuda_script(
                 {"eval.cu": script_content},
-                {"reference.cuh": reference_content, "train.cuh": submission_content},
+                {"reference.cuh": reference_content, "submission.cuh": submission_content},
                 arch=arch,
                 include_dirs=MODAL_CUDA_INCLUDE_DIRS,
             )