Skip to content

Commit

Permalink
working cog predict
Browse files Browse the repository at this point in the history
  • Loading branch information
daanelson committed Mar 16, 2023
1 parent 61a3b43 commit 33a8fe9
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 5 deletions.
16 changes: 16 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
gpu: true
python_version: "3.10"
cuda: "11.6"
python_packages:
- "torch==1.13.1"
- "sentencepiece==0.1.97"
- "accelerate==0.16.0"

run:
- "pip install git+https://github.com/huggingface/transformers.git@c3dc391da81e6ed7efce42be06413725943b3920"

predict: "predict.py:Predictor"
42 changes: 42 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import List, Optional
from cog import BasePredictor, Input
import os

class Predictor(BasePredictor):

def predict(
self,
model_path: str = Input(description="path to model"),
tokenizer_path: str = Input(description="path to tokenizer"),
data_path: str = Input(description="path to data", default='alpaca_data.json'),
output_path: str = Input(description="path to save model", default='alpaca_out')
) -> int:
if not output_path.startswith('/src'):
output_path = os.path.join('src', output_path)
if not os.path.exists(output_path):
os.makedirs(output_path)

command = f'''torchrun --nproc_per_node=4 --master_port=9292 train.py \
--model_name_or_path {model_path} \
--tokenizer_name_or_path {tokenizer_path} \
--data_path {data_path} \
--bf16 True \
--output_dir {output_path} \
--num_train_epochs 1 \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 1 \
--evaluation_strategy "no" \
--save_strategy "steps" \
--save_steps 2000 \
--learning_rate 2e-5 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--fsdp "full_shard auto_wrap" \
--fsdp_transformer_layer_cls_to_wrap 'LLaMADecoderLayer' \
--tf32 True '''
res = os.system(command)
return res

22 changes: 17 additions & 5 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

import copy
import logging
import io
import json
from dataclasses import dataclass, field
from typing import Optional, Dict, Sequence

Expand All @@ -22,8 +24,6 @@
from torch.utils.data import Dataset
from transformers import Trainer

import utils

IGNORE_INDEX = -100
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
Expand All @@ -43,9 +43,22 @@
}


def _make_r_io_base(f, mode: str):
if not isinstance(f, io.IOBase):
f = open(f, mode=mode)
return f

def jload(f, mode="r"):
"""Load a .json file into a dictionary."""
f = _make_r_io_base(f, mode)
jdict = json.load(f)
f.close()
return jdict

@dataclass
class ModelArguments:
model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
tokenizer_name_or_path: Optional[str] = field(default="facebook/opt-125m")


@dataclass
Expand Down Expand Up @@ -140,7 +153,7 @@ class SupervisedDataset(Dataset):
def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer):
super(SupervisedDataset, self).__init__()
logging.warning("Loading data...")
list_data_dict = utils.jload(data_path)
list_data_dict = jload(data_path)

logging.warning("Formatting inputs...")
prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"]
Expand Down Expand Up @@ -199,7 +212,7 @@ def train():
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
model_args.model_name_or_path,
model_args.tokenizer_name_or_path,
cache_dir=training_args.cache_dir,
model_max_length=training_args.model_max_length,
padding_side="right",
Expand All @@ -223,7 +236,6 @@ def train():
data_module = make_supervised_data_module(tokenizer=tokenizer, data_args=data_args)
trainer = Trainer(model=model, tokenizer=tokenizer, args=training_args, **data_module)
trainer.train()
trainer.evaluate()
trainer.save_state()
safe_save_model_for_hf_trainer(trainer=trainer, output_dir=training_args.output_dir)

Expand Down

0 comments on commit 33a8fe9

Please sign in to comment.