-
-
Notifications
You must be signed in to change notification settings - Fork 623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixed failing tests for mps device #3143
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,8 @@ def test_no_distrib(capsys): | |
assert idist.backend() is None | ||
if torch.cuda.is_available(): | ||
assert idist.device().type == "cuda" | ||
elif torch.backends.mps.is_available(): | ||
assert idist.device().type == "mps" | ||
else: | ||
assert idist.device().type == "cpu" | ||
assert idist.get_rank() == 0 | ||
|
@@ -43,6 +45,8 @@ def test_no_distrib(capsys): | |
assert "ignite.distributed.utils INFO: backend: None" in out[-1] | ||
if torch.cuda.is_available(): | ||
assert "ignite.distributed.utils INFO: device: cuda" in out[-1] | ||
elif torch.backends.mps.is_available(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here |
||
assert "ignite.distributed.utils INFO: device: mps" in out[-1] | ||
else: | ||
assert "ignite.distributed.utils INFO: device: cpu" in out[-1] | ||
assert "ignite.distributed.utils INFO: rank: 0" in out[-1] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,13 +32,11 @@ def __init__(self, output_as_list=False): | |
self.output_as_list = output_as_list | ||
self.fc = torch.nn.Linear(1, 1, bias=False) | ||
|
||
def forward(self, x, bias=None): | ||
if bias is None: | ||
bias = 0.0 | ||
def forward(self, x): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's revert this change |
||
if self.output_as_list: | ||
return self.fc(x) + bias, self.fc(x) + bias | ||
return self.fc(x), self.fc(x) | ||
|
||
return self.fc(x) + bias | ||
return self.fc(x) | ||
|
||
|
||
def _default_create_supervised_trainer( | ||
|
@@ -49,7 +47,6 @@ def _default_create_supervised_trainer( | |
amp_mode: str = None, | ||
scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, | ||
with_model_transform: bool = False, | ||
with_model_fn: bool = False, | ||
): | ||
if with_model_transform: | ||
|
||
|
@@ -69,8 +66,8 @@ def get_first_element(output): | |
optimizer = SGD(model.parameters(), 0.1) | ||
|
||
if trace: | ||
example_inputs = (torch.randn(1), torch.randn(1)) if with_model_fn else torch.randn(1) | ||
model = torch.jit.trace(model, example_inputs) | ||
example_input = torch.randn(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, let's revert this. Probably, you need merge |
||
model = torch.jit.trace(model, example_input) | ||
|
||
if amp_mode == "apex" and model_device == trainer_device == "cuda": | ||
from apex import amp | ||
|
@@ -87,9 +84,6 @@ def get_first_element(output): | |
scaler=scaler, | ||
gradient_accumulation_steps=gradient_accumulation_steps, | ||
model_transform=model_transform if model_transform is not None else lambda x: x, | ||
model_fn=(lambda model, x: model(x, torch.tensor([0.01], device=model_device))) | ||
if with_model_fn | ||
else (lambda model, x: model(x)), | ||
) | ||
assert model.fc.weight.data[0, 0].item() == approx(0.0) | ||
return trainer, model | ||
|
@@ -103,7 +97,6 @@ def _test_create_supervised_trainer( | |
amp_mode: str = None, | ||
scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, | ||
with_model_transform: bool = False, | ||
with_model_fn: bool = False, | ||
): | ||
trainer, model = _default_create_supervised_trainer( | ||
gradient_accumulation_steps=gradient_accumulation_steps, | ||
|
@@ -113,13 +106,10 @@ def _test_create_supervised_trainer( | |
amp_mode=amp_mode, | ||
scaler=scaler, | ||
with_model_transform=with_model_transform, | ||
with_model_fn=with_model_fn, | ||
) | ||
|
||
x = torch.tensor([[0.01], [0.02], [0.03], [0.04], [0.05]]) | ||
y = torch.tensor([[0.015], [0.025], [0.035], [0.045], [0.055]]) | ||
if with_model_fn: | ||
y += 0.01 | ||
data = [(_x, _y) for _x, _y in zip(x, y)] | ||
|
||
theta = [0.0] | ||
|
@@ -131,14 +121,12 @@ def _(): | |
assert model.fc.weight.grad != 0 | ||
_x, _y = trainer.state.batch | ||
_x, _y = _x.to(model_device), _y.to(model_device) | ||
bias = 0.01 if with_model_fn else 0.0 | ||
accumulation[0] += 0.2 * _x.item() * (theta[0] * _x.item() - (_y.item() - bias)) | ||
accumulation[0] += 0.2 * _x.item() * (theta[0] * _x.item() - _y.item()) | ||
# value of loss should not be accumulated | ||
_y_pred = model(_x, torch.tensor([bias], device=model_device)) if with_model_fn else model(_x) | ||
if with_model_transform: | ||
_y_pred = _y_pred[0] | ||
|
||
loss[0] = mse_loss(_y_pred, _y).item() | ||
loss[0] = mse_loss(model(_x)[0], _y).item() | ||
else: | ||
loss[0] = mse_loss(model(_x), _y).item() | ||
|
||
@trainer.on(Events.ITERATION_COMPLETED(every=gradient_accumulation_steps)) | ||
def _(): | ||
|
@@ -232,7 +220,6 @@ def _default_create_supervised_evaluator( | |
trace: bool = False, | ||
amp_mode: str = None, | ||
with_model_transform: bool = False, | ||
with_model_fn: bool = False, | ||
): | ||
if with_model_transform: | ||
|
||
|
@@ -251,17 +238,14 @@ def get_first_element(output): | |
model.fc.weight.data.zero_() | ||
|
||
if trace: | ||
example_inputs = (torch.randn(1), torch.randn(1)) if with_model_fn else torch.randn(1) | ||
model = torch.jit.trace(model, example_inputs) | ||
example_input = torch.randn(1, 1) | ||
model = torch.jit.trace(model, example_input) | ||
|
||
evaluator = create_supervised_evaluator( | ||
model, | ||
device=evaluator_device, | ||
amp_mode=amp_mode, | ||
model_transform=model_transform if model_transform is not None else lambda x: x, | ||
model_fn=(lambda model, x: model(x, torch.tensor([0.01], device=model_device))) | ||
if with_model_fn | ||
else (lambda model, x: model(x)), | ||
) | ||
|
||
assert model.fc.weight.data[0, 0].item() == approx(0.0) | ||
|
@@ -275,29 +259,27 @@ def _test_create_supervised_evaluator( | |
trace: bool = False, | ||
amp_mode: str = None, | ||
with_model_transform: bool = False, | ||
with_model_fn: bool = False, | ||
): | ||
model, evaluator = _default_create_supervised_evaluator( | ||
model_device=model_device, | ||
evaluator_device=evaluator_device, | ||
trace=trace, | ||
amp_mode=amp_mode, | ||
with_model_transform=with_model_transform, | ||
with_model_fn=with_model_fn, | ||
) | ||
x = torch.tensor([[1.0], [2.0]]) | ||
y = torch.tensor([[3.0], [5.0]]) | ||
if with_model_fn: | ||
y += 0.01 | ||
x = torch.tensor([[1.0], [2.0]], device=model_device) | ||
y = torch.tensor([[3.0], [5.0]], device=evaluator_device) | ||
data = [(x, y)] | ||
|
||
if model_device == evaluator_device or ((model_device == "cpu") ^ (evaluator_device == "cpu")): | ||
if ( | ||
model_device == evaluator_device | ||
or ((model_device == "cpu") ^ (evaluator_device == "cpu")) | ||
or ((model_device == "mps") ^ (evaluator_device == "mps")) | ||
): | ||
state = evaluator.run(data) | ||
|
||
y_pred, y = state.output | ||
if with_model_fn: | ||
y_pred -= 0.01 | ||
y -= 0.01 | ||
|
||
assert y_pred[0, 0].item() == approx(0.0) | ||
assert y_pred[1, 0].item() == approx(0.0) | ||
assert y[0, 0].item() == approx(3.0) | ||
|
@@ -420,7 +402,6 @@ def test_create_supervised_trainer(trainer_device, trace): | |
_test_create_supervised_trainer(gradient_accumulation_steps=1, trainer_device=trainer_device, trace=trace) | ||
_test_create_supervised_trainer(gradient_accumulation_steps=3, trainer_device=trainer_device, trace=trace) | ||
_test_create_supervised_trainer(with_model_transform=True, trainer_device=trainer_device, trace=trace) | ||
_test_create_supervised_trainer(with_model_fn=True, trainer_device=trainer_device, trace=trace) | ||
_test_create_mocked_supervised_trainer(trainer_device=trainer_device, trace=trace) | ||
|
||
|
||
|
@@ -618,8 +599,6 @@ def test_create_supervised_trainer_on_cuda_with_model_on_cpu(): | |
|
||
def test_create_supervised_evaluator(): | ||
_test_create_supervised_evaluator() | ||
_test_create_supervised_evaluator(with_model_transform=True) | ||
_test_create_supervised_evaluator(with_model_fn=True) | ||
_test_mocked_supervised_evaluator() | ||
|
||
# older versions didn't have the autocast method so we skip the test for older builds | ||
|
@@ -669,10 +648,10 @@ def test_create_supervised_evaluator_on_mps(): | |
_test_mocked_supervised_evaluator(model_device=model_device, evaluator_device=evaluator_device) | ||
|
||
|
||
@pytest.mark.skipif(not (_torch_version_le_112 and torch.backends.mps.is_available()), reason="Skip if no MPS") | ||
@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="Skip if no MPS Backend") | ||
def test_create_supervised_evaluator_on_mps_with_model_on_cpu(): | ||
_test_create_supervised_evaluator(evaluator_device="mps") | ||
_test_mocked_supervised_evaluator(evaluator_device="mps") | ||
_test_create_supervised_evaluator(model_device="mps", evaluator_device="mps") | ||
_test_mocked_supervised_evaluator(model_device="mps", evaluator_device="mps") | ||
|
||
|
||
@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we have to put a guard
_torch_version_le_112
here as we have also tests for older pytorch version where mps backend does not exist.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, to run the test, you need to remove
@pytest.mark.skipif