From 4e4148cb5ce7f4a9265f58b4eeb660c64bed0386 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 15 Nov 2023 17:51:33 +0200 Subject: [PATCH] Add support for Grouped Query Attention on Llama Model (#393) Resolves #388 --- src/models.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models.js b/src/models.js index b0a82cee0..9fabe8bdd 100644 --- a/src/models.js +++ b/src/models.js @@ -3035,9 +3035,9 @@ export class LlamaPreTrainedModel extends PreTrainedModel { // config doesn't contain pad_token_id, so we assume it is the eos_token_id this.config.pad_token_id = this.config.eos_token_id - this.num_heads = this.config.num_attention_heads + this.num_heads = this.config.num_key_value_heads ?? this.config.num_attention_heads this.num_layers = this.config.num_hidden_layers - this.dim_kv = this.config.hidden_size / this.num_heads; + this.dim_kv = this.config.hidden_size / this.config.num_attention_heads } } /**