Skip to content

Commit

Permalink
fix runner
Browse files Browse the repository at this point in the history
to squash, fix arguments for testing runner

to squash, fix arguments for testing runner

to squash, fix arguments for testing runner

for now, fix to train until we fix naming

train -> submission
  • Loading branch information
alexzhang13 committed Jan 15, 2025
1 parent f3d456d commit d50e0d3
Show file tree
Hide file tree
Showing 11 changed files with 31 additions and 27 deletions.
13 changes: 5 additions & 8 deletions .github/workflows/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,18 @@

if config["lang"] == "cu":
comp, run = run_cuda_script(
{
"eval.cu": config.get("eval.cu", cu_eval),
"reference.cuh": config.get("reference.cuh", None),
"submission.cuh": config.get("submission.cuh", None),
},
{"eval.cu": cu_eval},
{key: config[key] for key in ["reference.cuh", "submission.cuh"] if key in config},
arch=None,
)
result = {"compile": asdict(comp), "run": asdict(run)}
else:
run = run_pytorch_script(
{
"eval.py": config.get("eval.py", py_eval),
"reference.py": config.get("reference.py", None),
"submission.py": config.get("submission.py", None),
"eval.py": py_eval,
**{key: config[key] for key in ["reference.py", "submission.py"] if key in config},
},
main="eval.py",
arch=None,
)
result = {"run": asdict(run)}
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/creating-a-leaderboard/cuda-creations.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Let's break down what's going on in this relatively short file:
#include <iostream>

#include "reference.cuh"
#include "train.cuh"
#include "submission.cuh"

#define WARMUP_RUNS 10
#define TIMED_RUNS 100
Expand Down Expand Up @@ -118,7 +118,7 @@ int main() {
return 0;
}
```
You'll notice that we include from headers named `reference.cuh` and `train.cuh`. These are the reference
You'll notice that we include from headers named `reference.cuh` and `submission.cuh`. These are the reference
code and submission code respectively, just renamed to a fix module so we can include them. The
general idea is that the evaluation code can treat the leaderboard as a basic abstraction, and only
concern itself with three things:
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/creating-a-leaderboard/python-creations.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Let's break down what's going on in this relatively short file:
import torch
import time
from reference import ref_kernel, generate_input, check_implementation
from train import custom_kernel
from submission import custom_kernel


def correctness() -> bool:
Expand Down
10 changes: 5 additions & 5 deletions scripts/ci_test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ def test_does_not_compile():
"""

comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is False
assert run.success is False
assert comp.nvcc_found is True
assert comp.exit_code != ExitCode.SUCCESS
assert comp.stdout == ""
assert 'train.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
assert 'submission.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
assert '1 error detected in the compilation of "eval.cu".' in comp.stderr
assert comp.command.startswith("/usr/local/cuda/bin/nvcc")
assert "nvcc: NVIDIA (R) Cuda compiler driver" in comp.nvcc_version
Expand All @@ -55,7 +55,7 @@ def test_cuda_runtime_error():
"""
comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is True
assert run.success is False
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_cuda_validation_fail():
"""
comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is True
assert run.success is True
Expand All @@ -102,7 +102,7 @@ def test_cuda_correct():
sub = Path("examples/identity_cuda/submission.cuh").read_text()

comp, run = run_cuda_script(
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
{"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
)
assert comp.success is True
assert run.success is True
Expand Down
8 changes: 5 additions & 3 deletions scripts/ci_test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_does_not_import():
"""

run = run_pytorch_script(
{"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
{"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
)
assert run.success is False
assert run.exit_code != ExitCode.SUCCESS
Expand All @@ -37,7 +37,9 @@ def custom_kernel(input):
"""

run = run_pytorch_script(
{"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py", arch=None
{"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub},
"eval.py",
arch=None,
)
assert run.success is True
assert run.passed is False
Expand All @@ -53,7 +55,7 @@ def test_correct():
sub = Path("examples/identity_py/submission.py").read_text()

run = run_pytorch_script(
{"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
{"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
)
assert run.success is True
assert "warming up..." in run.stdout
Expand Down
2 changes: 1 addition & 1 deletion scripts/local-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

cout, score = run_cuda_script(
{"eval.cu": cu_eval},
{"reference.cuh": ref.read_text(), "train.cuh": sub.read_text()},
{"reference.cuh": ref.read_text(), "submission.cuh": sub.read_text()},
arch=None,
)
print(cout)
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/cogs/modal_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ async def run_modal(
"**Running on Modal...**\n> ⏳ Waiting for available GPU..."
)

filename = "train.py" if script.filename.endswith(".py") else "train.cu"
filename = "submission.py" if script.filename.endswith(".py") else "train.cu"
reference_content = None
if reference_script is not None or reference_code is not None:
reference_content = (
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def combine_enums(enums: list[Type[Enum]], combined_name: str) -> Enum:
MODAL_PATH = "/tmp/dcs/"
MODAL_EVAL_CODE_PATH = "/tmp/dcs/eval.py"
MODAL_REFERENCE_CODE_PATH = "/tmp/dcs/reference.py"
MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/train.py"
MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/submission.py"


# Compilation flags for Modal
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/eval.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <memory>

#include "reference.cuh"
#include "train.cuh"
#include "submission.cuh"

#define WARMUP_RUNS 10
#define TIMED_RUNS 100
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import torch
from reference import check_implementation, generate_input, ref_kernel
from train import custom_kernel
from submission import custom_kernel


class PopcornLogger:
Expand Down
11 changes: 8 additions & 3 deletions src/discord-cluster-manager/modal_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@

# Move this to another file later:
python_image = Image.debian_slim(python_version="3.10").pip_install(
["torch", "triton", "jax[cuda12]", "jax2torch"]
[
"torch",
"triton",
"jax[cuda12]",
"jax2torch",
]
)

cuda_image = (
Expand Down Expand Up @@ -83,7 +88,7 @@ def modal_run_pytorch_script( # noqa: C901
{
"eval.py": script_content,
"reference.py": reference_content,
"train.py": submission_content,
"submission.py": submission_content,
},
"eval.py",
)
Expand All @@ -109,7 +114,7 @@ def modal_run_cuda_script( # # noqa: C901
with timeout(timeout_seconds):
comp, run = run_cuda_script(
{"eval.cu": script_content},
{"reference.cuh": reference_content, "train.cuh": submission_content},
{"reference.cuh": reference_content, "submission.cuh": submission_content},
arch=arch,
include_dirs=MODAL_CUDA_INCLUDE_DIRS,
)
Expand Down

0 comments on commit d50e0d3

Please sign in to comment.