-
Notifications
You must be signed in to change notification settings - Fork 100
/
Copy pathupload_model.slurm
60 lines (52 loc) · 2.68 KB
/
upload_model.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/bin/bash
#SBATCH --job-name=upload-hub-for-loop
#SBATCH --partition=compil
#SBATCH --cpus-per-task=10
#SBATCH --nodes=1
#SBATCH --output=output-update-hub-for.out
#SBATCH --account=six@cpu
#SBATCH --time=20:00:00
#SBATCH --ntasks=1
#SBATCH --hint=nomultithread
# source younes-model-conversion
# echo $WORK
source $six_ALL_CCFRWORK/start-tr13f-6B3-ml-t0
#source $HOME/start-train
#conda activate younes-eval-opt
module load git-lfs
#PATH_COPY=/gpfsscratch/rech/six/commun/uan68tv-model-conversion/intermediate-checkpoints
PATH_COPY=/gpfsscratch/rech/six/commun/experiments/muennighoff/intermedckpts/ckpts
#PATH_REPO=/gpfsscratch/rech/six/commun/uan68tv-model-conversion/bloom-176-intermediate
PATH_REPO=/gpfsscratch/rech/six/commun/experiments/muennighoff/intermedckpts/bloom-2b5-intermediate
#PATH_CKPTS=/gpfsdsstore/projects/rech/six/commun/checkpoints/tr11-176B-ml/checkpoints/
PATH_CKPTS=/gpfsdsstore/projects/rech/six/commun/checkpoints/tr11c-2B5-ml/checkpoints/main
PATH_CKPTS_NOTTAR=/gpfsscratch/rech/six/commun/checkpoints/tr11c-2B5-ml/checkpoints/main
PATH_CODE_SYNC=/gpfswork/rech/six/commun/code/tr13f-6B3-ml-t0/bigscience/tools
cd $PATH_COPY
for VARIABLE in global_step100000
do
eval "cp -r $PATH_CKPTS/$VARIABLE.tar $PATH_COPY/"
eval "tar xvf $VARIABLE.tar"
#eval "python /gpfswork/rech/six/uan68tv/code/integration/transformers/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py --bloom_checkpoint_path $PATH_COPY/$VARIABLE --pytorch_dump_folder_path $PATH_REPO --pretraining_tp 4 --shard_model"
eval "python /gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/transformers_clone/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py --bloom_checkpoint_path $PATH_COPY/tr11c-2B5-ml/checkpoints/main/$VARIABLE --pytorch_dump_folder_path $PATH_REPO --pretraining_tp 4 --bloom_config_file /gpfsscratch/rech/six/commun/experiments/muennighoff/intermedckpts/2b5config.json"
cd $PATH_CODE_SYNC
eval "python $PATH_CODE_SYNC/hub-sync.py --repo-path $PATH_REPO --patterns '*json'"
eval "python $PATH_CODE_SYNC/hub-sync.py --repo-path $PATH_REPO --patterns '*bin'"
cd $PATH_REPO
eval "git tag -a $VARIABLE -m 'checkpoint from $VARIABLE'"
eval "git push origin $VARIABLE"
cd $PATH_COPY
# pwd
# eval "$(pwd)"
# eval "rm -r $VARIABLE.tar"
# eval "rm -r $VARIABLE"
done
# first update the repo
# cd $PATH_REPO
#eval $SYNC_CMD_JSON
# eval $SYNC_CMD_BIN
# CMD="git push --set-upstream origin gs47400"
# eval $CMD
# eval "git push"
PATH_REPO=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/intermedckpts/bloom-6b3-optimizer-states
eval "python $PATH_CODE_SYNC/hub-sync.py --repo-path --patterns '*bin'"