Skip to content

Commit

Permalink
Update README and Jupyter/Colab notebooks. Remove extraneous files.
Browse files Browse the repository at this point in the history
  • Loading branch information
jessevig committed Jan 6, 2022
1 parent 39d5398 commit 4841215
Show file tree
Hide file tree
Showing 22 changed files with 192 additions and 1,247 deletions.
256 changes: 146 additions & 110 deletions README.md

Large diffs are not rendered by default.

Binary file removed images/bertviz-logo.png
Binary file not shown.
77 changes: 0 additions & 77 deletions notebooks/head_view_albert.ipynb

This file was deleted.

64 changes: 0 additions & 64 deletions notebooks/head_view_bart.ipynb

This file was deleted.

86 changes: 0 additions & 86 deletions notebooks/head_view_bert.ipynb

This file was deleted.

107 changes: 0 additions & 107 deletions notebooks/head_view_bert_tensorflow.ipynb

This file was deleted.

40 changes: 11 additions & 29 deletions notebooks/head_view_distilbert.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
},
"outputs": [],
"source": [
"from transformers import AutoTokenizer, AutoModel, utils\n",
"from bertviz import head_view\n",
"from transformers import DistilBertModel, DistilBertTokenizer, utils"
"\n",
"utils.logging.set_verbosity_error() # Suppress standard warnings"
]
},
{
Expand All @@ -20,33 +22,13 @@
"metadata": {},
"outputs": [],
"source": [
"def show_head_view(model, tokenizer, text):\n",
" inputs = tokenizer.encode_plus(text, return_tensors='pt', add_special_tokens=True)\n",
" input_ids = inputs['input_ids']\n",
" attention = model(input_ids)[-1]\n",
" input_id_list = input_ids[0].tolist() # Batch index 0\n",
" tokens = tokenizer.convert_ids_to_tokens(input_id_list)\n",
" head_view(attention, tokens)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"is_executing": false
},
"scrolled": false
},
"outputs": [],
"source": [
"utils.logging.set_verbosity_error() # Remove line to see warnings\n",
"model_version = 'distilbert-base-uncased'\n",
"do_lower_case = True\n",
"model = DistilBertModel.from_pretrained(model_version, output_attentions=True)\n",
"tokenizer = DistilBertTokenizer.from_pretrained(model_version, do_lower_case=do_lower_case)\n",
"text = \"The cat sat on the mat\"\n",
"show_head_view(model, tokenizer, text)"
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n",
"model = AutoModel.from_pretrained(\"distilbert-base-uncased\", output_attentions=True)\n",
"inputs = tokenizer.encode(\"The cat sat on the mat\", return_tensors='pt')\n",
"outputs = model(inputs)\n",
"attention = outputs[-1] # Output includes attention weights when output_attentions=True\n",
"tokens = tokenizer.convert_ids_to_tokens(inputs[0]) \n",
"head_view(attention, tokens)"
]
}
],
Expand Down Expand Up @@ -80,4 +62,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}
Loading

0 comments on commit 4841215

Please sign in to comment.