Skip to content

Commit

Permalink
train -> submission
Browse files Browse the repository at this point in the history
  • Loading branch information
alexzhang13 committed Jan 15, 2025
1 parent 285dee1 commit a32eb71
Show file tree
Hide file tree
Showing 11 changed files with 28 additions and 21 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
if config["lang"] == "cu":
comp, run = run_cuda_script(
{"eval.cu": cu_eval},
{key: config[key] for key in ["reference.cuh", "train.cuh"] if key in config},
{key: config[key] for key in ["reference.cuh", "submission.cuh"] if key in config},
arch=None,
)
result = {"compile": asdict(comp), "run": asdict(run)}
else:
run = run_pytorch_script(
{
"eval.py": py_eval,
**{key: config[key] for key in ["reference.py", "train.py"] if key in config},
**{key: config[key] for key in ["reference.py", "submission.py"] if key in config},
},
main="eval.py",
arch=None,
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/creating-a-leaderboard/cuda-creations.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Let's break down what's going on in this relatively short file:
#include <iostream>

#include "reference.cuh"
#include "train.cuh"
#include "submission.cuh"

#define WARMUP_RUNS 10
#define TIMED_RUNS 100
Expand Down Expand Up @@ -118,7 +118,7 @@ int main() {
return 0;
}
```
You'll notice that we include from headers named `reference.cuh` and `train.cuh`. These are the reference
You'll notice that we include from headers named `reference.cuh` and `submission.cuh`. These are the reference
code and submission code respectively, just renamed to a fix module so we can include them. The
general idea is that the evaluation code can treat the leaderboard as a basic abstraction, and only
concern itself with three things:
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/creating-a-leaderboard/python-creations.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Let's break down what's going on in this relatively short file:
import torch
import time
from reference import ref_kernel, generate_input, check_implementation
from train import custom_kernel
from submission import custom_kernel


def correctness() -> bool:
Expand Down
10 changes: 5 additions & 5 deletions scripts/ci_test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ def test_does_not_compile():
"""

comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is False
assert run.success is False
assert comp.nvcc_found is True
assert comp.exit_code != ExitCode.SUCCESS
assert comp.stdout == ""
assert 'train.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
assert 'submission.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
assert '1 error detected in the compilation of "eval.cu".' in comp.stderr
assert comp.command.startswith("/usr/local/cuda/bin/nvcc")
assert "nvcc: NVIDIA (R) Cuda compiler driver" in comp.nvcc_version
Expand All @@ -55,7 +55,7 @@ def test_cuda_runtime_error():
"""
comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is True
assert run.success is False
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_cuda_validation_fail():
"""
comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is True
assert run.success is True
Expand All @@ -102,7 +102,7 @@ def test_cuda_correct():
sub = Path("examples/identity_cuda/submission.cuh").read_text()

comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is True
assert run.success is True
Expand Down
8 changes: 5 additions & 3 deletions scripts/ci_test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_does_not_import():
"""

run = run_pytorch_script(
{"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
{"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
)
assert run.success is False
assert run.exit_code != ExitCode.SUCCESS
Expand All @@ -37,7 +37,9 @@ def custom_kernel(input):
"""

run = run_pytorch_script(
{"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py", arch=None
{"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub},
"eval.py",
arch=None,
)
assert run.success is True
assert run.passed is False
Expand All @@ -53,7 +55,7 @@ def test_correct():
sub = Path("examples/identity_py/submission.py").read_text()

run = run_pytorch_script(
{"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
{"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
)
assert run.success is True
assert "warming up..." in run.stdout
Expand Down
2 changes: 1 addition & 1 deletion scripts/local-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

cout, score = run_cuda_script(
{"eval.cu": cu_eval},
{"reference.cuh": ref.read_text(), "train.cuh": sub.read_text()},
{"reference.cuh": ref.read_text(), "submission.cuh": sub.read_text()},
arch=None,
)
print(cout)
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/cogs/modal_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ async def run_modal(
"**Running on Modal...**\n> ⏳ Waiting for available GPU..."
)

filename = "train.py" if script.filename.endswith(".py") else "train.cu"
filename = "submission.py" if script.filename.endswith(".py") else "train.cu"
reference_content = None
if reference_script is not None or reference_code is not None:
reference_content = (
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def combine_enums(enums: list[Type[Enum]], combined_name: str) -> Enum:
MODAL_PATH = "/tmp/dcs/"
MODAL_EVAL_CODE_PATH = "/tmp/dcs/eval.py"
MODAL_REFERENCE_CODE_PATH = "/tmp/dcs/reference.py"
MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/train.py"
MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/submission.py"


# Compilation flags for Modal
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/eval.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <memory>

#include "reference.cuh"
#include "train.cuh"
#include "submission.cuh"

#define WARMUP_RUNS 10
#define TIMED_RUNS 100
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import torch
from reference import check_implementation, generate_input, ref_kernel
from train import custom_kernel
from submission import custom_kernel


class PopcornLogger:
Expand Down
11 changes: 8 additions & 3 deletions src/discord-cluster-manager/modal_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@

# Move this to another file later:
python_image = Image.debian_slim(python_version="3.10").pip_install(
["torch", "triton", "jax[cuda12]", "jax2torch"]
[
"torch",
"triton",
"jax[cuda12]",
"jax2torch",
]
)

cuda_image = (
Expand Down Expand Up @@ -83,7 +88,7 @@ def modal_run_pytorch_script( # noqa: C901
{
"eval.py": script_content,
"reference.py": reference_content,
"train.py": submission_content,
"submission.py": submission_content,
},
"eval.py",
)
Expand All @@ -109,7 +114,7 @@ def modal_run_cuda_script( # # noqa: C901
with timeout(timeout_seconds):
comp, run = run_cuda_script(
{"eval.cu": script_content},
{"reference.cuh": reference_content, "train.cuh": submission_content},
{"reference.cuh": reference_content, "submission.cuh": submission_content},
arch=arch,
include_dirs=MODAL_CUDA_INCLUDE_DIRS,
)
Expand Down

0 comments on commit a32eb71

Please sign in to comment.