From 03ace8a749d9019ec578c8fd5eaf63fd0a0718d8 Mon Sep 17 00:00:00 2001 From: James Thewlis Date: Sat, 4 Jan 2025 19:16:06 +0100 Subject: [PATCH] Add ModernBERT config (#119) Adds a ModernBERT config for the original toxic comment classification challenge, using the ModernBERT-base model. ModernBERT is an architecture similar to BERT leveraging more recent techniques like RoPE for long context and flash attention for faster inference. It is also trained on a variety of mainly English sources, including scientific articles and code. Currently requires installing transformers from git to train. --- ...xic_comment_classification_ModernBERT.json | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 configs/Toxic_comment_classification_ModernBERT.json diff --git a/configs/Toxic_comment_classification_ModernBERT.json b/configs/Toxic_comment_classification_ModernBERT.json new file mode 100644 index 0000000..7e4b3f4 --- /dev/null +++ b/configs/Toxic_comment_classification_ModernBERT.json @@ -0,0 +1,40 @@ +{ + "name": "Jigsaw_ModernBERT", + "n_gpu": 1, + "batch_size": 10, + "accumulate_grad_batches": 3, + "loss": "binary_cross_entropy", + "arch": { + "type": "ModernBERT", + "args": { + "num_classes": 6, + "model_type": "answerdotai/ModernBERT-base", + "model_name": "ModernBertForSequenceClassification", + "tokenizer_name": "AutoTokenizer" + } + }, + "dataset": { + "type": "JigsawDataOriginal", + "args": { + "train_csv_file": "jigsaw_data/jigsaw-toxic-comment-classification-challenge/train.csv", + "test_csv_file": "jigsaw_data/jigsaw-toxic-comment-classification-challenge/val.csv", + "add_test_labels": false, + "classes": [ + "toxicity", + "severe_toxicity", + "obscene", + "threat", + "insult", + "identity_attack" + ] + } + }, + "optimizer": { + "type": "Adam", + "args": { + "lr": 3e-5, + "weight_decay": 3e-6, + "amsgrad": true + } + } +}