update the history figure

HRajoliN · Dec 22, 2024 · 798d254 · 798d254
1 parent 41bc62e
commit 798d254
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 4 deletions.
diff --git a/Figures/LLM_history.png b/Figures/LLM_history.png
diff --git a/Figures/LLM_history.pptx b/Figures/LLM_history.pptx
diff --git a/src/auto_regression/auto_reg_concept.ipynb b/src/auto_regression/auto_reg_concept.ipynb
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -140,12 +140,101 @@
     "predictions.size()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we want to go deep and explore to see if an LLM like GPT2 architecture is decoder only or not."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of layers: 12\n",
+      "Shape of attention matrix (layer 0): torch.Size([1, 12, 7, 7])\n",
+      "Causal mask (first layer, first head):\n",
+      "tensor([[1, 0, 0, 0, 0, 0, 0],\n",
+      "        [1, 1, 0, 0, 0, 0, 0],\n",
+      "        [1, 1, 1, 0, 0, 0, 0],\n",
+      "        [1, 1, 1, 1, 0, 0, 0],\n",
+      "        [1, 1, 1, 1, 1, 0, 0],\n",
+      "        [1, 1, 1, 1, 1, 1, 0],\n",
+      "        [1, 1, 1, 1, 1, 1, 1]], dtype=torch.int32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import GPT2Model, GPT2Tokenizer\n",
+    "import torch\n",
+    "\n",
+    "# Load GPT-2 tokenizer and model\n",
+    "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
+    "model = GPT2Model.from_pretrained(\"gpt2\")\n",
+    "\n",
+    "# Encode a sample input\n",
+    "input_text = \"What is the capital of France?\"\n",
+    "inputs = tokenizer(input_text, return_tensors=\"pt\")\n",
+    "\n",
+    "# Get the attention mask from the model\n",
+    "with torch.no_grad():\n",
+    "    outputs = model(**inputs, output_attentions=True)\n",
+    "    attentions = outputs.attentions  # List of attention tensors from each layer\n",
+    "\n",
+    "# Check the shape and behavior of the attention mask\n",
+    "print(f\"Number of layers: {len(attentions)}\")\n",
+    "print(f\"Shape of attention matrix (layer 0): {attentions[0].shape}\")  # (batch_size, num_heads, seq_len, seq_len)\n",
+    "\n",
+    "# Verify causal masking in layer 0\n",
+    "causal_mask = attentions[0][0][0]  # Extract attention matrix for first head in first layer\n",
+    "print(\"Causal mask (first layer, first head):\")\n",
+    "print((causal_mask > 0).int())  # Display binary causal mask\n"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input prompt: 'What is the capital of France?'\n",
+      "Number of tokens: 7\n",
+      "Shape of hidden states (last layer): torch.Size([1, 7, 768])\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import GPT2Tokenizer, GPT2Model\n",
+    "import torch\n",
+    "\n",
+    "# Load GPT-2 tokenizer and model\n",
+    "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
+    "model = GPT2Model.from_pretrained(\"gpt2\")\n",
+    "\n",
+    "# Define input prompt\n",
+    "input_prompt = \"What is the capital of France?\"\n",
+    "\n",
+    "# Tokenize input\n",
+    "inputs = tokenizer(input_prompt, return_tensors=\"pt\")\n",
+    "\n",
+    "# Forward pass to get hidden states (contextualized embeddings)\n",
+    "with torch.no_grad():\n",
+    "    outputs = model(**inputs, output_hidden_states=True)\n",
+    "    hidden_states = outputs.hidden_states  # Hidden states for all layers\n",
+    "\n",
+    "# Display shape of hidden states\n",
+    "print(f\"Input prompt: '{input_prompt}'\")\n",
+    "print(f\"Number of tokens: {len(inputs['input_ids'][0])}\")\n",
+    "print(f\"Shape of hidden states (last layer): {hidden_states[-1].shape}\")\n"
+   ]
   }
  ],
  "metadata": {