Skip to content

Commit

Permalink
revised according to review comments, fix chatglm support
Browse files Browse the repository at this point in the history
  • Loading branch information
mikecovlee committed Jan 6, 2024
1 parent ef283cb commit c67b6d1
Show file tree
Hide file tree
Showing 14 changed files with 170 additions and 166 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ Firstly, you should clone this repository and install dependencies:
git clone https://github.com/TUDB-Labs/multi-lora-fine-tune
cd multi-lora-fine-tune
# Optional but recommended
conda create -n mlora_env python=3.9
conda create -n mlora_env python=3.10
conda activate mlora_env
# Install requirements
pip install -r requirements.txt
Expand Down Expand Up @@ -151,7 +151,7 @@ You can run finetune through webui by following the instructions in the ‘webui
You can also install m-LoRA into your environment:
```bash
# Optional but recommended
conda create -n mlora_env python=3.9
conda create -n mlora_env python=3.10
conda activate mlora_env
# Install requirements
pip install mlora
Expand Down
15 changes: 6 additions & 9 deletions config/finetune_chatglm.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"lora": [
{
"name": "lora_0",
"output": "lora_0",
"optim": "sgd",
"lr": 1e-2,
"momentum": 0.9,
Expand All @@ -17,8 +16,8 @@
"test_batch_size": 64,
"num_epochs": 3,
"r": 8,
"alpha": 16,
"dropout": 0.05,
"lora_alpha": 16,
"lora_dropout": 0.05,
"target_modules": {
"qkv": true,
"dense": true,
Expand All @@ -33,16 +32,15 @@
},
{
"name": "lora_1",
"output": "lora_1",
"optim": "adamw",
"lr": 3e-4,
"batch_size": 16,
"micro_batch_size": 4,
"test_batch_size": 64,
"num_epochs": 3,
"r": 8,
"alpha": 16,
"dropout": 0.05,
"lora_alpha": 16,
"lora_dropout": 0.05,
"target_modules": {
"qkv": true,
"dense": true,
Expand All @@ -57,16 +55,15 @@
},
{
"name": "lora_2",
"output": "lora_2",
"optim": "adamw",
"lr": 3e-4,
"batch_size": 16,
"micro_batch_size": 4,
"test_batch_size": 64,
"num_epochs": 3,
"r": 16,
"alpha": 16,
"dropout": 0.05,
"lora_alpha": 16,
"lora_dropout": 0.05,
"target_modules": {
"qkv": true,
"dense": true,
Expand Down
3 changes: 1 addition & 2 deletions mlora/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mlora.utils import Prompter, convert_hf_to_pth
from mlora.prompter import Prompter
from mlora.tokenizer import Tokenizer
from mlora.model import LLMModel
from mlora.model_llama import LlamaModel
Expand All @@ -9,7 +9,6 @@
from mlora.train import TrainConfig, train

__all__ = [
"convert_hf_to_pth",
"lora_config_factory",
"Prompter",
"Tokenizer",
Expand Down
48 changes: 26 additions & 22 deletions mlora/FeedForward.py → mlora/feed_forward.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from mlora.modelargs import MixConfig, MultiLoraBatchData
from mlora.LoraLiner import Linear
from mlora.MixLoRA import moe_layer_factory
from mlora.lora_liner import Linear
from mlora.mix_lora import moe_layer_factory
from mlora.model import RMSNorm

from typing import Optional
import torch
from typing import Tuple, Optional


class FeedForward(torch.nn.Module):
Expand All @@ -20,17 +20,18 @@ def __init__(self, norm: RMSNorm, w1: Linear, w2: Linear, w3: Linear, device: st
# device
self.device_ = device
# mix of experts
self.enable_moe_: bool = False
self.moes_: torch.ModuleDict = {}

def init_moe_weight(self, in_features: int, config: MixConfig, gate: Optional[torch.Tensor] = None):
self.moes_[config.adapter_name_] = moe_layer_factory(
in_features, config)
if gate is not None:
with torch.no_grad():
self.moes_[config.adapter_name_].gate_.weight.copy_(gate)
self.enable_moe_ = True
def forward(self, data: torch.Tensor, input_args: MultiLoraBatchData) -> torch.Tensor:
if len(self.moes_) == 0:
score_norm_data = self.norm_(data)
w1 = self.w1_.forward(score_norm_data, input_args)
w3 = self.w3_.forward(score_norm_data, input_args)
return self.w2_.forward(self.act_(w1) * w3, input_args)
else:
return self._mixlora_forward(data, input_args)

# LoRA
def _lora_forward(self, lora_name, act_fn, norm_data):
# Applying LoRA weights to FFN weights
if lora_name in self.w1_.loras_:
Expand All @@ -52,17 +53,19 @@ def _lora_forward(self, lora_name, act_fn, norm_data):
else:
return self.w2_.weight_.forward(act_result)

def _expert_forward(self, moe_name, act_fn, expert_idx, norm_data):
# MixLoRA
def init_moe_weight(self, in_features: int, config: MixConfig, gate: Optional[torch.Tensor] = None):
self.moes_[config.adapter_name_] = moe_layer_factory(
in_features, config)
if gate is not None:
with torch.no_grad():
self.moes_[config.adapter_name_].gate_.weight.copy_(gate)

def _expert_forward_callback(self, moe_name, act_fn, expert_idx, norm_data):
lora_name = f"moe.{moe_name}.experts.{expert_idx}"
return self._lora_forward(lora_name, act_fn, norm_data)

def forward(self, data: torch.Tensor, router_outputs: Tuple, input_args: MultiLoraBatchData) -> torch.Tensor:
if not self.enable_moe_:
score_norm_data = self.norm_(data)
w1 = self.w1_.forward(score_norm_data, input_args)
w3 = self.w3_.forward(score_norm_data, input_args)
return self.w2_.forward(self.act_(w1) * w3, input_args)

def _mixlora_forward(self, data: torch.Tensor, input_args: MultiLoraBatchData):
final_hidden_states = None
for idx, lora_config in enumerate(input_args.lora_batch_data_config_):
moe_name = lora_config.adapter_name_
Expand All @@ -71,10 +74,11 @@ def forward(self, data: torch.Tensor, router_outputs: Tuple, input_args: MultiLo

if moe_name in self.moes_:
current_hidden_states, current_router_outputs = self.moes_[
moe_name].forward(self.norm_, self._expert_forward, data[start_idx:end_idx])
moe_name].forward(self.norm_, self._expert_forward_callback, data[start_idx:end_idx])

if router_outputs is not None and current_router_outputs is not None:
router_outputs[idx].append(current_router_outputs)
if input_args.router_logits_ is not None and current_router_outputs is not None:
input_args.router_logits_[idx].append(
current_router_outputs)
else:
score_norm_data = self.norm_(data[start_idx:end_idx])
current_hidden_states = self._lora_forward(
Expand Down
9 changes: 5 additions & 4 deletions mlora/generate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from mlora.modelargs import LoraBatchDataConfig, MultiLoraBatchData
from mlora.modelargs import KVCache, LoraBatchDataConfig, MultiLoraBatchData
from mlora.tokenizer import Tokenizer, Tokens
from mlora.model import LLMModel, KVCache
from mlora.utils import Prompter
from mlora.prompter import Prompter
from mlora.model import LLMModel

from typing import List, Union, Tuple
from dataclasses import dataclass
Expand Down Expand Up @@ -121,9 +121,10 @@ def generate(llm_model: LLMModel,
lora_batch_data_config_=batch_data_config,
batch_seq_len_=(cur_pos - prev_pos),
batch_tokens_=tokens[:, prev_pos:cur_pos],
kv_cache_=kv_cache,
inference_model_=True)
kv_cache.seq_pos = prev_pos
logits, _ = llm_model.forward(input=input_data, kv_cache=kv_cache)
logits, _ = llm_model.forward(input_data)
if temperature > 0:
probs = torch.softmax(logits[:, -1] / temperature, dim=-1)
next_token = sample_top_p(probs, top_p)
Expand Down
File renamed without changes.
File renamed without changes.
25 changes: 1 addition & 24 deletions mlora/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,27 +110,6 @@ def forward(self, data: torch.Tensor) -> torch.Tensor:
return (self.weight_ * data).to(input_dtype)


class KVCache:
def __init__(self) -> None:
self.cache_k: List[torch.Tensor] = []
self.cache_v: List[torch.Tensor] = []
self.seq_pos: int = 0

def update(self, xk: torch.Tensor, xv: torch.Tensor, layer_idx: int,
bsz: int, seq_len: int) -> Tuple[torch.Tensor, torch.Tensor]:
if len(self.cache_k) <= layer_idx:
self.cache_k.append(xk)
self.cache_v.append(xv)
else:
self.cache_k[layer_idx][:bsz,
self.seq_pos: self.seq_pos + seq_len] = xk
self.cache_v[layer_idx][:bsz,
self.seq_pos: self.seq_pos + seq_len] = xv

return self.cache_k[layer_idx][:bsz, :self.seq_pos + seq_len], \
self.cache_v[layer_idx][:bsz, :self.seq_pos + seq_len]


class LLMModel(metaclass=ABCMeta):
@abstractclassmethod
def init_lora_layer_weight(self, config: LoraConfig, weight: Optional[Dict[str, torch.Tensor]]):
Expand Down Expand Up @@ -161,7 +140,5 @@ def get_train_paramas(self) -> Dict[str, List[torch.Tensor]]:
pass

@abstractclassmethod
def forward(self, input: MultiLoraBatchData,
output_router_logits: bool = False,
kv_cache: KVCache = None) -> torch.Tensor:
def forward(self, input: MultiLoraBatchData) -> torch.Tensor:
pass
Loading

0 comments on commit c67b6d1

Please sign in to comment.