train -> submission

gpu-mode · Jan 15, 2025 · a32eb71 · a32eb71
1 parent 285dee1
commit a32eb71
Show file tree

Hide file tree

Showing 11 changed files with 28 additions and 21 deletions.
diff --git a/.github/workflows/runner.py b/.github/workflows/runner.py
@@ -14,15 +14,15 @@
 if config["lang"] == "cu":
     comp, run = run_cuda_script(
         {"eval.cu": cu_eval},
-        {key: config[key] for key in ["reference.cuh", "train.cuh"] if key in config},
+        {key: config[key] for key in ["reference.cuh", "submission.cuh"] if key in config},
         arch=None,
     )
     result = {"compile": asdict(comp), "run": asdict(run)}
 else:
     run = run_pytorch_script(
         {
             "eval.py": py_eval,
-            **{key: config[key] for key in ["reference.py", "train.py"] if key in config},
+            **{key: config[key] for key in ["reference.py", "submission.py"] if key in config},
         },
         main="eval.py",
         arch=None,

diff --git a/docs/docs/creating-a-leaderboard/cuda-creations.md b/docs/docs/creating-a-leaderboard/cuda-creations.md
@@ -56,7 +56,7 @@ Let's break down what's going on in this relatively short file:
 #include <iostream>
 
 #include "reference.cuh"
-#include "train.cuh"
+#include "submission.cuh"
 
 #define WARMUP_RUNS 10
 #define TIMED_RUNS 100
@@ -118,7 +118,7 @@ int main() {
     return 0;
 }
 ```
-You'll notice that we include from headers named `reference.cuh` and `train.cuh`. These are the reference
+You'll notice that we include from headers named `reference.cuh` and `submission.cuh`. These are the reference
 code and submission code respectively, just renamed to a fix module so we can include them. The
 general idea is that the evaluation code can treat the leaderboard as a basic abstraction, and only
 concern itself with three things:

diff --git a/docs/docs/creating-a-leaderboard/python-creations.md b/docs/docs/creating-a-leaderboard/python-creations.md
@@ -53,7 +53,7 @@ Let's break down what's going on in this relatively short file:
 import torch
 import time
 from reference import ref_kernel, generate_input, check_implementation
-from train import custom_kernel
+from submission import custom_kernel
 
 
 def correctness() -> bool:

diff --git a/scripts/ci_test_cuda.py b/scripts/ci_test_cuda.py
@@ -21,14 +21,14 @@ def test_does_not_compile():
     """
 
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is False
     assert run.success is False
     assert comp.nvcc_found is True
     assert comp.exit_code != ExitCode.SUCCESS
     assert comp.stdout == ""
-    assert 'train.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
+    assert 'submission.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
     assert '1 error detected in the compilation of "eval.cu".' in comp.stderr
     assert comp.command.startswith("/usr/local/cuda/bin/nvcc")
     assert "nvcc: NVIDIA (R) Cuda compiler driver" in comp.nvcc_version
@@ -55,7 +55,7 @@ def test_cuda_runtime_error():
 
     """
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is True
     assert run.success is False
@@ -85,7 +85,7 @@ def test_cuda_validation_fail():
 
         """
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is True
     assert run.success is True
@@ -102,7 +102,7 @@ def test_cuda_correct():
     sub = Path("examples/identity_cuda/submission.cuh").read_text()
 
     comp, run = run_cuda_script(
-        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
     )
     assert comp.success is True
     assert run.success is True

diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py
@@ -21,7 +21,7 @@ def test_does_not_import():
     """
 
     run = run_pytorch_script(
-        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
     )
     assert run.success is False
     assert run.exit_code != ExitCode.SUCCESS
@@ -37,7 +37,9 @@ def custom_kernel(input):
         """
 
     run = run_pytorch_script(
-        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py", arch=None
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub},
+        "eval.py",
+        arch=None,
     )
     assert run.success is True
     assert run.passed is False
@@ -53,7 +55,7 @@ def test_correct():
     sub = Path("examples/identity_py/submission.py").read_text()
 
     run = run_pytorch_script(
-        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
     )
     assert run.success is True
     assert "warming up..." in run.stdout

diff --git a/scripts/local-test.py b/scripts/local-test.py
@@ -11,7 +11,7 @@
 
 cout, score = run_cuda_script(
     {"eval.cu": cu_eval},
-    {"reference.cuh": ref.read_text(), "train.cuh": sub.read_text()},
+    {"reference.cuh": ref.read_text(), "submission.cuh": sub.read_text()},
     arch=None,
 )
 print(cout)

diff --git a/src/discord-cluster-manager/cogs/modal_cog.py b/src/discord-cluster-manager/cogs/modal_cog.py
@@ -59,7 +59,7 @@ async def run_modal(
                 "**Running on Modal...**\n> ⏳ Waiting for available GPU..."
             )
 
-            filename = "train.py" if script.filename.endswith(".py") else "train.cu"
+            filename = "submission.py" if script.filename.endswith(".py") else "train.cu"
             reference_content = None
             if reference_script is not None or reference_code is not None:
                 reference_content = (

diff --git a/src/discord-cluster-manager/consts.py b/src/discord-cluster-manager/consts.py
@@ -67,7 +67,7 @@ def combine_enums(enums: list[Type[Enum]], combined_name: str) -> Enum:
 MODAL_PATH = "/tmp/dcs/"
 MODAL_EVAL_CODE_PATH = "/tmp/dcs/eval.py"
 MODAL_REFERENCE_CODE_PATH = "/tmp/dcs/reference.py"
-MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/train.py"
+MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/submission.py"
 
 
 # Compilation flags for Modal

diff --git a/src/discord-cluster-manager/eval.cu b/src/discord-cluster-manager/eval.cu
@@ -7,7 +7,7 @@
 #include <memory>
 
 #include "reference.cuh"
-#include "train.cuh"
+#include "submission.cuh"
 
 #define WARMUP_RUNS 10
 #define TIMED_RUNS 100

diff --git a/src/discord-cluster-manager/eval.py b/src/discord-cluster-manager/eval.py
@@ -5,7 +5,7 @@
 
 import torch
 from reference import check_implementation, generate_input, ref_kernel
-from train import custom_kernel
+from submission import custom_kernel
 
 
 class PopcornLogger:

diff --git a/src/discord-cluster-manager/modal_runner.py b/src/discord-cluster-manager/modal_runner.py
@@ -20,7 +20,12 @@
 
 # Move this to another file later:
 python_image = Image.debian_slim(python_version="3.10").pip_install(
-    ["torch", "triton", "jax[cuda12]", "jax2torch"]
+    [
+        "torch",
+        "triton",
+        "jax[cuda12]",
+        "jax2torch",
+    ]
 )
 
 cuda_image = (
@@ -83,7 +88,7 @@ def modal_run_pytorch_script(  # noqa: C901
                 {
                     "eval.py": script_content,
                     "reference.py": reference_content,
-                    "train.py": submission_content,
+                    "submission.py": submission_content,
                 },
                 "eval.py",
             )
@@ -109,7 +114,7 @@ def modal_run_cuda_script(  # # noqa: C901
         with timeout(timeout_seconds):
             comp, run = run_cuda_script(
                 {"eval.cu": script_content},
-                {"reference.cuh": reference_content, "train.cuh": submission_content},
+                {"reference.cuh": reference_content, "submission.cuh": submission_content},
                 arch=arch,
                 include_dirs=MODAL_CUDA_INCLUDE_DIRS,
             )