Skip to content

Commit

Permalink
code refine
Browse files Browse the repository at this point in the history
  • Loading branch information
RichardWooSJTU committed Dec 12, 2023
1 parent 58108c6 commit f886192
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 25 deletions.
13 changes: 7 additions & 6 deletions llm/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@ export FLAGS_use_autotune=1
export FLAGS_cublaslt_exhaustive_search_times=10
export FLAGS_cache_inference_while_scope=1

model_dir=${1:-"checkpoints/llama_ptq_ckpts_smooth_all_shift_mp2"}
src_len=${2:-300}
dec_len=${3:-100}
model_dir=${1:-"checkpoints/llama65b_ptq_smooth_mp8"}
src_len=${2:-1100}
dec_len=${3:-330}

total_len=`expr ${src_len} + ${dec_len}`


python -m paddle.distributed.launch \
--gpus "6,7" \
--gpus "0,1,2,3,4,5,6,7" \
predictor.py \
--model_name_or_path ./inference_model/${model_dir} \
--dtype float16 \
--src_length ${total_len} \
--max_length ${dec_len} \
--output_file "infer.json" \
--mode "static" \
--batch_size 1 \
--batch_size 128 \
--benchmark \
--block_attn \
--block_size 64 \
--inference_model
--inference_model \
--use_cachekv_int8 static
7 changes: 3 additions & 4 deletions llm/export.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@
# limitations under the License.

export PYTHONPATH=$(dirname $(pwd)):$PYTHONPATH
export DISTRIBUTED_TRAINER_ENDPOINTS=10.174.140.213:60105,10.174.140.213:60122,10.174.140.213:60196,10.174.140.213:60232,10.174.140.213:60257,10.174.140.213:60317,10.174.140.213:60458,10.174.140.213:60800


model_dir=${1:-"checkpoints/llama65b_ptq"}
src_len=${2:-1024}
dec_len=${3:-1024}
model_dir=${1:-"checkpoints/llama65b_ptq_smooth"}
src_len=${2:-1100}
dec_len=${3:-330}
quant_type=${4:-"a8w8"}

total_len=`expr ${src_len} + ${dec_len}`
Expand Down
17 changes: 10 additions & 7 deletions llm/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,11 +1115,11 @@ def predict(self, input_texts: str | list[str]):

def _preprocess(self, source):
for i, text in enumerate(source):
print("text: ", text)
# print("text: ", text)
tokens = self.tokenizer(text, return_tensors="np", padding=False, max_length=(self.config.src_length - self.config.max_length))
input_ids = tokens["input_ids"][0]
length = len(input_ids)
print("input_ids: ", input_ids)
# print("input_ids: ", input_ids)
print("length: ", length)
self.inputs["input_ids"][i : i + 1, :length] = input_ids
self.inputs["penalty_score"][i : i + 1] = self.config.repetition_penalty
Expand All @@ -1135,7 +1135,7 @@ def _preprocess(self, source):
self.inputs["stop_flags"][i : i + 1] = False
reset_stop_value(self.inputs["not_need_stop"])
need_block_nums = (length + self.config.max_length + self.pre_cache_length + self.block_size - 1) // self.block_size
print("self.free_list", self.free_list)
# print("self.free_list", self.free_list)
for bi in range(need_block_nums):
bi_now = self.free_list.pop()
self.used_list[i].append(bi_now)
Expand Down Expand Up @@ -1423,10 +1423,13 @@ def predict():
source_texts = []

data_file = open("humaneval_solution.json", 'r')

dataset = []
for line in data_file.readlines():
dataset.append(json.loads(line))

for i in range(predictor_args.batch_size):
line = data_file.readline()
data = json.loads(line)
data = dataset[i % 164]
source_texts.append(data["prompt"])


Expand Down Expand Up @@ -1463,8 +1466,8 @@ def benchmark(predictor, predictor_args, model_args):
batch_benchmark_texts = batchfy_text(benchmark_texts, predictor_args.batch_size)
print("***********Start Benchmark**********")

warmup_time = 3
test_time = 20
warmup_time = 2
test_time = 10

print("***********Start Warmup**********")
for i in range(warmup_time):
Expand Down
4 changes: 1 addition & 3 deletions llm/run_dygraph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ export FLAGS_new_executor_serial_run=1
export FLAGS_allocator_strategy=naive_best_fit
export FLAGS_fraction_of_gpu_memory_to_use=0.92

export DISTRIBUTED_TRAINER_ENDPOINTS=10.174.140.213:60105,10.174.140.213:60122,10.174.140.213:60196,10.174.140.213:60232,10.174.140.213:60257,10.174.140.213:60317,10.174.140.213:60458,10.174.140.213:60800

model_dir=${1:-"checkpoints/llama65b_ptq"}
model_dir=${1:-"checkpoints/llama65b_ptq_smooth"}
src_len=${2:-1024}
dec_len=${3:-1024}
quant_type=${4:-"a8w8"}
Expand Down
8 changes: 3 additions & 5 deletions llm/run_static.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@ export FLAGS_control_flow_use_new_executor=1
export FLAGS_new_executor_serial_run=1
export FLAGS_allocator_strategy=naive_best_fit
export FLAGS_fraction_of_gpu_memory_to_use=0.92
export DISTRIBUTED_TRAINER_ENDPOINTS=10.174.140.213:60105,10.174.140.213:60122,10.174.140.213:60196,10.174.140.213:60232,10.174.140.213:60257,10.174.140.213:60317,10.174.140.213:60458,10.174.140.213:60800



model_dir=${1:-"checkpoints/llama65b_ptq_mp8"}
src_len=${2:-1024}
dec_len=${3:-1024}
model_dir=${1:-"checkpoints/llama65b_ptq_smooth_mp8"}
src_len=${2:-1100}
dec_len=${3:-330}
quant_type=${4:-"a8w8"}

total_len=`expr ${src_len} + ${dec_len}`
Expand Down

0 comments on commit f886192

Please sign in to comment.