diff --git a/pyvene/models/gpt2/modelings_intervenable_gpt2.py b/pyvene/models/gpt2/modelings_intervenable_gpt2.py index c5442f20..1567d164 100644 --- a/pyvene/models/gpt2/modelings_intervenable_gpt2.py +++ b/pyvene/models/gpt2/modelings_intervenable_gpt2.py @@ -77,6 +77,8 @@ def create_gpt2(name="gpt2", cache_dir=None): from transformers import GPT2Model, GPT2Tokenizer, GPT2Config config = GPT2Config.from_pretrained(name) + if hasattr(config, '_attn_implementation'): + config._attn_implementation = "eager" tokenizer = GPT2Tokenizer.from_pretrained(name) gpt = GPT2Model.from_pretrained(name, config=config, cache_dir=cache_dir) print("loaded model") @@ -90,8 +92,12 @@ def create_gpt2_lm(name="gpt2", config=None, cache_dir=None): tokenizer = GPT2Tokenizer.from_pretrained("gpt2") if config is None: config = GPT2Config.from_pretrained(name) + if hasattr(config, '_attn_implementation'): + config._attn_implementation = "eager" gpt = GPT2LMHeadModel.from_pretrained(name, config=config, cache_dir=cache_dir) else: + if hasattr(config, '_attn_implementation'): + config._attn_implementation = "eager" gpt = GPT2LMHeadModel(config=config) print("loaded model") return config, tokenizer, gpt @@ -103,8 +109,12 @@ def create_gpt2_classifier(name="gpt2", config=None, cache_dir=None): tokenizer = GPT2Tokenizer.from_pretrained("gpt2") if config is None: config = GPT2Config.from_pretrained(name) + if hasattr(config, '_attn_implementation'): + config._attn_implementation = "eager" gpt = GPT2LMForSequenceClassification.from_pretrained(name, config=config, cache_dir=cache_dir) else: + if hasattr(config, '_attn_implementation'): + config._attn_implementation = "eager" gpt = GPT2LMForSequenceClassification(config=config) print("loaded model") return config, tokenizer, gpt diff --git a/pyvene/models/qwen2/__init__.py b/pyvene/models/qwen2/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyvene_101.ipynb b/pyvene_101.ipynb index bd13ba38..e2b20765 100644 --- a/pyvene_101.ipynb +++ b/pyvene_101.ipynb @@ -146,7 +146,8 @@ "from transformers import AutoTokenizer, AutoModelForCausalLM\n", "\n", "model_name = \"gpt2\"\n", - "gpt2 = AutoModelForCausalLM.from_pretrained(model_name)\n", + "# Do not use SDPA attention because we cannot hook to attn_dropout\n", + "gpt2 = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation=\"eager\")\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "\n", "pv_gpt2 = pv.IntervenableModel({\n", @@ -3032,7 +3033,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.9" }, "toc-autonumbering": true, "toc-showcode": false,