Skip to content

Commit

Permalink
update the history figure
Browse files Browse the repository at this point in the history
  • Loading branch information
HRajoliN committed Dec 22, 2024
1 parent 41bc62e commit 798d254
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 4 deletions.
Binary file modified Figures/LLM_history.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Figures/LLM_history.pptx
Binary file not shown.
97 changes: 93 additions & 4 deletions src/auto_regression/auto_reg_concept.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -140,12 +140,101 @@
"predictions.size()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we want to go deep and explore to see if an LLM like GPT2 architecture is decoder only or not."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of layers: 12\n",
"Shape of attention matrix (layer 0): torch.Size([1, 12, 7, 7])\n",
"Causal mask (first layer, first head):\n",
"tensor([[1, 0, 0, 0, 0, 0, 0],\n",
" [1, 1, 0, 0, 0, 0, 0],\n",
" [1, 1, 1, 0, 0, 0, 0],\n",
" [1, 1, 1, 1, 0, 0, 0],\n",
" [1, 1, 1, 1, 1, 0, 0],\n",
" [1, 1, 1, 1, 1, 1, 0],\n",
" [1, 1, 1, 1, 1, 1, 1]], dtype=torch.int32)\n"
]
}
],
"source": [
"from transformers import GPT2Model, GPT2Tokenizer\n",
"import torch\n",
"\n",
"# Load GPT-2 tokenizer and model\n",
"tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
"model = GPT2Model.from_pretrained(\"gpt2\")\n",
"\n",
"# Encode a sample input\n",
"input_text = \"What is the capital of France?\"\n",
"inputs = tokenizer(input_text, return_tensors=\"pt\")\n",
"\n",
"# Get the attention mask from the model\n",
"with torch.no_grad():\n",
" outputs = model(**inputs, output_attentions=True)\n",
" attentions = outputs.attentions # List of attention tensors from each layer\n",
"\n",
"# Check the shape and behavior of the attention mask\n",
"print(f\"Number of layers: {len(attentions)}\")\n",
"print(f\"Shape of attention matrix (layer 0): {attentions[0].shape}\") # (batch_size, num_heads, seq_len, seq_len)\n",
"\n",
"# Verify causal masking in layer 0\n",
"causal_mask = attentions[0][0][0] # Extract attention matrix for first head in first layer\n",
"print(\"Causal mask (first layer, first head):\")\n",
"print((causal_mask > 0).int()) # Display binary causal mask\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Input prompt: 'What is the capital of France?'\n",
"Number of tokens: 7\n",
"Shape of hidden states (last layer): torch.Size([1, 7, 768])\n"
]
}
],
"source": [
"from transformers import GPT2Tokenizer, GPT2Model\n",
"import torch\n",
"\n",
"# Load GPT-2 tokenizer and model\n",
"tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
"model = GPT2Model.from_pretrained(\"gpt2\")\n",
"\n",
"# Define input prompt\n",
"input_prompt = \"What is the capital of France?\"\n",
"\n",
"# Tokenize input\n",
"inputs = tokenizer(input_prompt, return_tensors=\"pt\")\n",
"\n",
"# Forward pass to get hidden states (contextualized embeddings)\n",
"with torch.no_grad():\n",
" outputs = model(**inputs, output_hidden_states=True)\n",
" hidden_states = outputs.hidden_states # Hidden states for all layers\n",
"\n",
"# Display shape of hidden states\n",
"print(f\"Input prompt: '{input_prompt}'\")\n",
"print(f\"Number of tokens: {len(inputs['input_ids'][0])}\")\n",
"print(f\"Shape of hidden states (last layer): {hidden_states[-1].shape}\")\n"
]
}
],
"metadata": {
Expand Down

0 comments on commit 798d254

Please sign in to comment.