Skip to content

Commit

Permalink
use fused_adam in deepspeed (#335)
Browse files Browse the repository at this point in the history
  • Loading branch information
ys950902 authored Jan 24, 2024
1 parent 141bfbe commit b09c6a6
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
2 changes: 2 additions & 0 deletions megatron/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,8 @@ def _add_training_args(parser):
help='Run optimizer on CPU')
group.add_argument('--cpu_torch_adam', action='store_true',
help='Use Torch Adam as optimizer on CPU.')
group.add_argument('--ds_fused_adam', action='store_true',
help='Use DeepSpeed FusedAdam as optimizer.')
group.add_argument('--no-pipeline-parallel', action='store_true',
help='Disable pipeline parallelism')
group.add_argument('--use-tutel', action='store_true',
Expand Down
4 changes: 4 additions & 0 deletions megatron/optimizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ def get_megatron_optimizer(model,
eps=args.adam_eps)
else:
if args.optimizer == 'adam':
if args.ds_fused_adam:
global Adam
from deepspeed.ops.adam import FusedAdam
Adam = FusedAdam
optimizer = Adam(param_groups,
lr=args.lr,
weight_decay=args.weight_decay,
Expand Down

0 comments on commit b09c6a6

Please sign in to comment.