Skip to content

Commit

Permalink
merge develop
Browse files Browse the repository at this point in the history
  • Loading branch information
RichardWooSJTU committed Dec 13, 2023
2 parents 057b6e1 + a73a7bf commit f8ebc84
Show file tree
Hide file tree
Showing 168 changed files with 9,180 additions and 2,573 deletions.
14 changes: 10 additions & 4 deletions .github/codecov.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
codecov:
notify:
require_ci_to_pass: yes

coverage:
status:
project:
default:
informational: true
default:
target: 75% # overall project Coverage < 75% CI will fail
informational: true
patch:
default:
informational: true
default:
target: 90% # lines adjusted Coverage < 90% CI will fail
informational: true
1 change: 0 additions & 1 deletion csrc/setup_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def get_gencode_flags():
"./generation/transpose_removing_padding.cu",
"./generation/write_cache_kv.cu",
"./generation/encode_rotary_qk.cu",
"./generation/top_p_sampling.cu",
"./generation/set_alibi_mask_value.cu",
"./generation/get_padding_offset_v2.cu",
"./generation/rebuild_padding_v2.cu",
Expand Down
6 changes: 2 additions & 4 deletions examples/benchmark/glue/run_glue_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,8 @@ def main():
if model_args.qat:
from paddle import nn
from paddle.quantization import QAT, QuantConfig
from paddle.quantization.quanters import (
FakeQuanterChannelWiseAbsMaxObserver,
FakeQuanterWithAbsMaxObserver,
)
from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
from paddleslim.quant.quanters import FakeQuanterChannelWiseAbsMaxObserver

from paddlenlp.peft.lora import LoRALinear
from paddlenlp.peft.lora.lora_quant_layers import QuantedLoRALinear
Expand Down
14 changes: 13 additions & 1 deletion examples/language_model/moe/dygraph/run_moe_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,8 +626,18 @@ def do_train(args):
avg_loss -= bal_loss
else:
bal_loss = -1
max_mem_reserved_msg = ""
max_mem_allocated_msg = ""
if paddle.device.is_compiled_with_cuda():
max_mem_reserved_msg = (
f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() >> 20} MB,"
)
max_mem_allocated_msg = (
f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() >> 20} MB"
)
logger.info(
"global step %d, epoch: %d, batch: %d, loss: %.9f, bal_loss: %.9f, speed: %.2f step/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
"global step %d, epoch: %d, batch: %d, loss: %.9f, bal_loss: %.9f, speed: %.2f step/s, "
"ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e, %s %s"
% (
global_step,
epoch,
Expand All @@ -638,6 +648,8 @@ def do_train(args):
speed * default_global_tokens_num,
speed * default_global_tokens_num / nranks,
learning_rate,
max_mem_reserved_msg,
max_mem_allocated_msg,
)
)
log_writer.add_scalar("loss", float(loss), global_step)
Expand Down
13 changes: 12 additions & 1 deletion examples/machine_translation/transformer/static/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,11 +317,20 @@ def do_train(args):
)
else:
train_avg_batch_cost = args.print_step / batch_cost_avg.get_total_time()
max_mem_reserved_msg = ""
max_mem_allocated_msg = ""
if paddle.device.is_compiled_with_cuda():
max_mem_reserved_msg = (
f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
)
max_mem_allocated_msg = (
f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
)
logger.info(
"step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
"normalized loss: %f, ppl: %f, avg_speed: %.2f step/s, "
"batch_cost: %.5f sec, reader_cost: %.5f sec, tokens: %d, "
"ips: %.5f words/sec"
"ips: %.5f words/sec, %s %s"
% (
step_idx,
pass_id,
Expand All @@ -334,6 +343,8 @@ def do_train(args):
reader_cost_avg.get_average(),
batch_ips_avg.get_total_cnt(),
batch_ips_avg.get_average_per_sec(),
max_mem_reserved_msg,
max_mem_allocated_msg,
)
)
reader_cost_avg.reset()
Expand Down
13 changes: 12 additions & 1 deletion examples/machine_translation/transformer/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,11 +316,20 @@ def do_train(args):
)
else:
train_avg_batch_cost = args.print_step / batch_cost_avg.get_total_time()
max_mem_reserved_msg = ""
max_mem_allocated_msg = ""
if paddle.device.is_compiled_with_cuda():
max_mem_reserved_msg = (
f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
)
max_mem_allocated_msg = (
f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
)
logger.info(
"step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
"normalized loss: %f, ppl: %f, avg_speed: %.2f step/sec, "
"batch_cost: %.5f sec, reader_cost: %.5f sec, tokens: %d, "
"ips: %.5f words/sec"
"ips: %.5f words/sec, %s %s"
% (
step_idx,
pass_id,
Expand All @@ -333,6 +342,8 @@ def do_train(args):
reader_cost_avg.get_average(),
batch_ips_avg.get_total_cnt(),
batch_ips_avg.get_average_per_sec(),
max_mem_reserved_msg,
max_mem_allocated_msg,
)
)
reader_cost_avg.reset()
Expand Down
19 changes: 19 additions & 0 deletions llm/argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ class ModelArgument:
default=None, metadata={"help": "Build-in pretrained model name or the path to local model."}
)
use_flash_attention: bool = field(default=False, metadata={"help": "Whether to use flash attention"})
weight_quantize_algo: str = field(
default=None,
metadata={
"help": "Model weight quantization algorithm including 'nf4', 'fp4','weight_only_int4', 'weight_only_int8'."
},
)
weight_blocksize: int = field(
default=64,
metadata={"help": "Block size for weight quantization(Only available for nf4 or fp4 quant_scale.)."},
)
weight_double_quant: bool = field(
default=False, metadata={"help": "Whether apply double quant(Only available for nf4 or fp4 quant_scale.)."}
)
weight_double_quant_block_size: int = field(
default=256,
metadata={
"help": "Block size for quant_scale of weight quant_scale(Only available for nf4 or fp4 quant_scale.)"
},
)

# LoRA related parameters
lora: bool = field(default=False, metadata={"help": "Whether to use LoRA technique"})
Expand Down
5 changes: 3 additions & 2 deletions llm/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def tokenize_rounds_example(tokenizer, example, data_args):


def convert_example_common(example, tokenizer, data_args, is_test=True, intokens=False):
if data_args.chat_template is not None:
if tokenizer.chat_template is not None:
return convert_rounds_example_common(example, tokenizer, data_args, is_test, intokens)

tokenized_source, tokenized_target_input_ids = tokenize_example(tokenizer, example, data_args)
Expand Down Expand Up @@ -208,6 +208,7 @@ def convert_rounds_example_common(example, tokenizer, data_args, is_test=True, i
input_ids = rounds_inputs.pop("input_ids")
# shift input_ids and labels
input_ids, labels = input_ids[:-1], labels[1:]

seq_length = len(input_ids)
features = {"input_ids": input_ids, "labels": labels}
if intokens:
Expand All @@ -221,7 +222,7 @@ def convert_rounds_example_common(example, tokenizer, data_args, is_test=True, i


def convert_example_chatglm(example, tokenizer, data_args, is_test=True, intokens=False):
if data_args.chat_template is not None:
if tokenizer.chat_template is not None:
# chatglm only support single-round finetune
example = convert_multi_rounds_to_single_round(example, tokenizer)

Expand Down
Loading

0 comments on commit f8ebc84

Please sign in to comment.