diff --git a/mergekit/_data/architectures/gpt2-sequence-classification.json b/mergekit/_data/architectures/gpt2-sequence-classification.json index 7e89ca65..54cf31f6 100644 --- a/mergekit/_data/architectures/gpt2-sequence-classification.json +++ b/mergekit/_data/architectures/gpt2-sequence-classification.json @@ -60,12 +60,6 @@ }, { "name": "transformer.h.${layer_index}.mlp.c_fc.bias" - }, - { - "name": "transformer.h.${layer_index}.mlp.c_proj.weight" - }, - { - "name": "transformer.h.${layer_index}.mlp.c_proj.bias" } ] } diff --git a/mergekit/_data/architectures/gpt2.json b/mergekit/_data/architectures/gpt2.json index 8d151df3..64a04e9d 100644 --- a/mergekit/_data/architectures/gpt2.json +++ b/mergekit/_data/architectures/gpt2.json @@ -58,12 +58,6 @@ }, { "name": "h.${layer_index}.mlp.c_fc.bias" - }, - { - "name": "h.${layer_index}.mlp.c_proj.weight" - }, - { - "name": "h.${layer_index}.mlp.c_proj.bias" } ] } diff --git a/mergekit/_data/architectures/gptbigcode.json b/mergekit/_data/architectures/gptbigcode.json new file mode 100644 index 00000000..4b086278 --- /dev/null +++ b/mergekit/_data/architectures/gptbigcode.json @@ -0,0 +1,70 @@ +{ + "model_type": "gpt_bigcode", + "architectures": [ + "GPTBigCodeForCausalLM" + ], + "pre_weights": [ + { + "name": "transformer.wte.weight", + "is_embed": true + }, + { + "name": "transformer.wpe.weight" + } + ], + "post_weights": [ + { + "name": "transformer.ln_f.weight" + }, + { + "name": "transformer.ln_f.bias" + }, + { + "name": "lm_head.weight", + "aliases": [ + "transformer.wte.weight" + ] + } + ], + "num_layers_config_key": "n_layer", + "layer_templates": { + "weights": [ + { + "name": "transformer.h.${layer_index}.attn.c_attn.weight" + }, + { + "name": "transformer.h.${layer_index}.attn.c_attn.bias" + }, + { + "name": "transformer.h.${layer_index}.attn.c_proj.weight" + }, + { + "name": "transformer.h.${layer_index}.attn.c_proj.bias" + }, + { + "name": "transformer.h.${layer_index}.ln_1.weight" + }, + { + "name": "transformer.h.${layer_index}.ln_1.bias" + }, + { + "name": "transformer.h.${layer_index}.ln_2.weight" + }, + { + "name": "transformer.h.${layer_index}.ln_2.bias" + }, + { + "name": "transformer.h.${layer_index}.mlp.c_proj.weight" + }, + { + "name": "transformer.h.${layer_index}.mlp.c_proj.bias" + }, + { + "name": "transformer.h.${layer_index}.mlp.c_fc.weight" + }, + { + "name": "transformer.h.${layer_index}.mlp.c_fc.bias" + } + ] + } +}