diff --git a/apologyTransformer/Apology_Transformer_Submit.ipynb b/apologyTransformer/Apology_Transformer_Submit.ipynb new file mode 100644 index 00000000..9fa2e91c --- /dev/null +++ b/apologyTransformer/Apology_Transformer_Submit.ipynb @@ -0,0 +1,662 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "cl0DWumoeTZG" + }, + "source": [ + "# Apology Transformer\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vfYRR9obXIwG", + "outputId": "cb12aec4-75d7-48fc-e515-f9de81ef6c47" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting convokit\n", + " Downloading convokit-3.0.0.tar.gz (183 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/183.2 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━\u001b[0m \u001b[32m163.8/183.2 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.2/183.2 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: matplotlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from convokit) (3.7.1)\n", + "Requirement already satisfied: pandas>=0.23.4 in /usr/local/lib/python3.10/dist-packages (from convokit) (1.5.3)\n", + "Collecting msgpack-numpy>=0.4.3.2 (from convokit)\n", + " Downloading msgpack_numpy-0.4.8-py2.py3-none-any.whl (6.9 kB)\n", + "Requirement already satisfied: spacy>=2.3.5 in /usr/local/lib/python3.10/dist-packages (from convokit) (3.6.1)\n", + "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from convokit) (1.11.4)\n", + "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from convokit) (1.2.2)\n", + "Requirement already satisfied: nltk>=3.4 in /usr/local/lib/python3.10/dist-packages (from convokit) (3.8.1)\n", + "Collecting dill>=0.2.9 (from convokit)\n", + " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: joblib>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from convokit) (1.3.2)\n", + "Collecting clean-text>=0.6.0 (from convokit)\n", + " Downloading clean_text-0.6.0-py3-none-any.whl (11 kB)\n", + "Collecting unidecode>=1.1.1 (from convokit)\n", + " Downloading Unidecode-1.3.7-py3-none-any.whl (235 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m235.5/235.5 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tqdm>=4.64.0 in /usr/local/lib/python3.10/dist-packages (from convokit) (4.66.1)\n", + "Collecting pymongo>=4.0 (from convokit)\n", + " Downloading pymongo-4.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (677 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m677.1/677.1 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pyyaml>=5.4.1 in /usr/local/lib/python3.10/dist-packages (from convokit) (6.0.1)\n", + "Collecting dnspython>=1.16.0 (from convokit)\n", + " Downloading dnspython-2.4.2-py3-none-any.whl (300 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m300.4/300.4 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting emoji<2.0.0,>=1.0.0 (from clean-text>=0.6.0->convokit)\n", + " Downloading emoji-1.7.0.tar.gz (175 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.4/175.4 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting ftfy<7.0,>=6.0 (from clean-text>=0.6.0->convokit)\n", + " Downloading ftfy-6.1.3-py3-none-any.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.4/53.4 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (4.46.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (1.4.5)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (23.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->convokit) (2.8.2)\n", + "Requirement already satisfied: msgpack>=0.5.2 in /usr/local/lib/python3.10/dist-packages (from msgpack-numpy>=0.4.3.2->convokit) (1.0.7)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.4->convokit) (8.1.7)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.4->convokit) (2023.6.3)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23.4->convokit) (2023.3.post1)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->convokit) (3.2.0)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (3.0.12)\n", + "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (1.0.5)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (1.0.10)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (2.0.8)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (3.0.9)\n", + "Requirement already satisfied: thinc<8.2.0,>=8.1.8 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (8.1.12)\n", + "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (1.1.2)\n", + "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (2.4.8)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (2.0.10)\n", + "Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (0.9.0)\n", + "Requirement already satisfied: pathy>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (0.10.3)\n", + "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (6.4.0)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (2.31.0)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (1.10.13)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (3.1.2)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (67.7.2)\n", + "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy>=2.3.5->convokit) (3.3.0)\n", + "Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from ftfy<7.0,>=6.0->clean-text>=0.6.0->convokit) (0.2.12)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy>=2.3.5->convokit) (4.5.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib>=3.0.0->convokit) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.3.5->convokit) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.3.5->convokit) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.3.5->convokit) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.3.5->convokit) (2023.11.17)\n", + "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.2.0,>=8.1.8->spacy>=2.3.5->convokit) (0.7.11)\n", + "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.2.0,>=8.1.8->spacy>=2.3.5->convokit) (0.1.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy>=2.3.5->convokit) (2.1.3)\n", + "Building wheels for collected packages: convokit, emoji\n", + " Building wheel for convokit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for convokit: filename=convokit-3.0.0-py3-none-any.whl size=216707 sha256=08e38a1ca1f858fbdcddc1d6aa3e718dd8875a9723118520dc79a07c684967fc\n", + " Stored in directory: /root/.cache/pip/wheels/c4/89/8c/2677fdb888588b6f93cb6ac86bdfb020f1f1c33e0d5525b231\n", + " Building wheel for emoji (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for emoji: filename=emoji-1.7.0-py3-none-any.whl size=171033 sha256=645554a44a4023935e9e6c056d13747517ffb5baefab9508aa95bf2fc2a63b99\n", + " Stored in directory: /root/.cache/pip/wheels/31/8a/8c/315c9e5d7773f74b33d5ed33f075b49c6eaeb7cedbb86e2cf8\n", + "Successfully built convokit emoji\n", + "Installing collected packages: emoji, unidecode, msgpack-numpy, ftfy, dnspython, dill, pymongo, clean-text, convokit\n", + "Successfully installed clean-text-0.6.0 convokit-3.0.0 dill-0.3.7 dnspython-2.4.2 emoji-1.7.0 ftfy-6.1.3 msgpack-numpy-0.4.8 pymongo-4.6.1 unidecode-1.3.7\n" + ] + } + ], + "source": [ + "!pip install convokit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YCcTEDDlW0UP" + }, + "outputs": [], + "source": [ + "import convokit\n", + "from convokit import Corpus, download\n", + "from convokit.transformer import Transformer\n", + "from inspect import signature\n", + "import string\n", + "import re\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import random\n", + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "x6c1cZ1UdMhB", + "outputId": "9334f15f-44e8-49d7-c2b8-d9b71358f8a7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading conversations-gone-awry-cmv-corpus to /root/.convokit/downloads/conversations-gone-awry-cmv-corpus\n", + "Downloading conversations-gone-awry-cmv-corpus from http://zissou.infosci.cornell.edu/convokit/datasets/conversations-gone-awry-cmv-corpus/full.zip (88.6MB)... Done\n", + "No configuration file found at /root/.convokit/config.yml; writing with contents: \n", + "# Default Backend Parameters\n", + "db_host: localhost:27017\n", + "data_directory: ~/.convokit/saved-corpora\n", + "default_backend: mem\n" + ] + } + ], + "source": [ + "corpus = Corpus(filename=download(\"conversations-gone-awry-cmv-corpus\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZOYCg_mAWwxH" + }, + "outputs": [], + "source": [ + "def remove_quotes(comment):\n", + " quoted_pattern = r'>.*?$'\n", + " comment = re.sub(quoted_pattern, '', comment, flags=re.MULTILINE)\n", + " return comment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EtrxL_Q3J-oU" + }, + "outputs": [], + "source": [ + "apology_list = ['sorry', 'apologize', 'apologies', 'oops', 'whoops', 'woops', 'forgive me', 'forgive my', 'excuse me', 'excuse my', 'my mistake', 'my bad']\n", + "first_person = ['i', 'me', 'my', 'myself', 'mine']\n", + "second_person = ['you', 'your', 'u', 'ur', 'yours', 'yourself', 'urself']\n", + "clarification = ['mean', 'meant', 'clarify','clear','clarification','explain','understand','confused','confusing','what','context','worded','wording','are you','do you','talking about','referring','rephrase','reword','intend','intent','term']\n", + "contradictory = ['but','however','while','although']\n", + "disagreement = ['wrong','incorrect','inaccurate','false','mistaken','error','bad','nonsensical','stupid','disagree','dumb','bullshit','bs','insufficient','hypocritical','break it']\n", + "agreement = ['right','correct','sense','true','accurate','case','work','agree']\n", + "negatives = ['no','not','don\\'t','dont','doesn\\'t','doesnt', 'isn\\'t', 'isnt']\n", + "wrongdoing = ['regret','mistake','misunderstand','misunderstood','fault','offend','hurt','misread','misspoke','wrong','incorrect','accident','misconception','truly','genuine','sincere']\n", + "potential = ['for','if','because','that','about']\n", + "requests = ['could','would','can']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "poOQxzmv695C" + }, + "outputs": [], + "source": [ + "apology_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in apology_list) + r\")\\b\"\n", + "\n", + "clarify_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in clarification) + r\")\\b\"\n", + "contradictory_pattern = fr\"{apology_pattern}(.{{0,20}}(?:but|however|while|although))\\b\"\n", + "disagree_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in disagreement) + r\")\\b\"\n", + "negatives_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in negatives) + r\")\\b\"\n", + "agreement_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in agreement) + r\")\\b\"\n", + "not_agree_pattern = fr\"{negatives_pattern}.{{0,10}}{agreement_pattern}\"\n", + "potential_pattern = fr\"{apology_pattern}.{{0,3}}\\b(\" + \"|\".join(re.escape(word) for word in potential) + r\")\\b\"\n", + "first_person_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in first_person) + r\")\\b\"\n", + "second_person_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in second_person) + r\")\\b\"\n", + "wrong_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in wrongdoing) + r\")\\b\"\n", + "wrongdoing_pattern = fr\"{first_person_pattern}.{{0,10}}{wrong_pattern}\"\n", + "ask_pattern = r\"\\b(\" + \"|\".join(re.escape(word) for word in requests) + r\")\\b\"\n", + "requests_pattern = fr\"({ask_pattern}.{{0,10}}{second_person_pattern})|please\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "imYAH7AWHNHI" + }, + "outputs": [], + "source": [ + "class ApologyLabeler(Transformer):\n", + " \"\"\"\n", + " A transformer to label the diffferent types of apologies in the CMV corpus.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " obj_type='utterance',\n", + " output_field='apology_type',\n", + " input_field=None,\n", + " input_filter=None,\n", + " verbosity=10000,\n", + " ):\n", + " if input_filter:\n", + " if len(signature(input_filter).parameters) == 1:\n", + " self.input_filter = lambda utt: input_filter(utt)\n", + " else:\n", + " self.input_filter = input_filter\n", + " else:\n", + " self.input_filter = lambda utt: True\n", + " self.obj_type = obj_type\n", + " self.input_field = input_field\n", + " self.output_field = output_field\n", + " self.verbosity = verbosity\n", + "\n", + " def _print_output(self, i):\n", + " return (self.verbosity > 0) and (i > 0) and (i % self.verbosity == 0)\n", + "\n", + " def transform(self, corpus: Corpus) -> Corpus:\n", + "\n", + " if self.obj_type == 'utterance':\n", + " total = len(list(corpus.iter_utterances()))\n", + "\n", + " for idx, utterance in enumerate(corpus.iter_utterances()):\n", + " if self._print_output(idx):\n", + " print(f\"%03d/%03d {self.obj_type} processed\" % (idx, total))\n", + "\n", + " text = remove_quotes(utterance.text)\n", + " text = text.lower()\n", + " sentences = re.split(r'(?<=[.!?])\\s+', text)\n", + "\n", + " apology = False\n", + " apology_loc = 0\n", + " for i, sentence in enumerate(sentences):\n", + " apology_match = re.search(apology_pattern, sentence) #start index of match\n", + " if apology_match:\n", + " apology_loc = apology_match.span()[0]\n", + " apology_sentence = sentence.strip()\n", + " next_sentence = \" \"\n", + " if (i != len(sentences)-1):\n", + " next_sentence = sentences[i+1].strip()\n", + "\n", + " apology_segment = apology_sentence + next_sentence\n", + " apology = True\n", + "\n", + " if apology:\n", + "\n", + " pattern_meta_mapping = [\n", + " (clarify_pattern, 'clarifying_apology'),\n", + " (potential_pattern, 'wrongdoing_apology'),\n", + " (wrongdoing_pattern, 'wrongdoing_apology'),\n", + " (contradictory_pattern, 'disagree_apology'),\n", + " (disagree_pattern, 'disagree_apology'),\n", + " (not_agree_pattern, 'disagree_apology'),\n", + " (requests_pattern, 'request_apology')\n", + " ]\n", + "\n", + " closest_match = min(\n", + " [(re.search(pattern, apology_segment), meta) for pattern, meta in pattern_meta_mapping if re.search(pattern, apology_segment)],\n", + " key=lambda x: abs(x[0].start() - apology_loc),\n", + " default=None\n", + " )\n", + "\n", + " if closest_match:\n", + " _, meta = closest_match\n", + " utterance.add_meta(self.output_field, meta)\n", + " else:\n", + " utterance.add_meta(self.output_field, 'other_apology')\n", + "\n", + " else:\n", + " utterance.add_meta(self.output_field, 'no_apology')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "C-Icg-V6LWdd", + "outputId": "809cd5f2-2992-49a7-8a48-32a1202c0dfd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10000/42964 utterance processed\n", + "20000/42964 utterance processed\n", + "30000/42964 utterance processed\n", + "40000/42964 utterance processed\n" + ] + } + ], + "source": [ + "apologizer = ApologyLabeler()\n", + "apologizer.transform(corpus)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HrN-iDTQ-QCn" + }, + "source": [ + "Sorting Apologies by Types and storing IDs in lists" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7nhZHy4uPt9U" + }, + "outputs": [], + "source": [ + "apology_ids = []\n", + "\n", + "clarifying_ids = []\n", + "disagree_ids = []\n", + "wrongdoing_ids = []\n", + "request_ids = []\n", + "other_ids = []\n", + "\n", + "for utt_id in corpus.get_utterance_ids():\n", + " if corpus.get_utterance(utt_id).meta['apology_type'] != 'no_apology':\n", + "\n", + " apology_ids.append(utt_id)\n", + "\n", + " if corpus.get_utterance(utt_id).meta['apology_type'] == 'clarifying_apology':\n", + " clarifying_ids.append(utt_id)\n", + "\n", + " if corpus.get_utterance(utt_id).meta['apology_type'] == 'disagree_apology':\n", + " disagree_ids.append(utt_id)\n", + "\n", + " if corpus.get_utterance(utt_id).meta['apology_type'] == 'wrongdoing_apology':\n", + " wrongdoing_ids.append(utt_id)\n", + "\n", + " if corpus.get_utterance(utt_id).meta['apology_type'] == 'request_apology':\n", + " request_ids.append(utt_id)\n", + "\n", + " if corpus.get_utterance(utt_id).meta['apology_type'] == 'other_apology':\n", + " other_ids.append(utt_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "98rXQruz-WzQ" + }, + "source": [ + "Total Number of identified apologies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FLS1P54fSEym", + "outputId": "0ac7961d-c69f-40b4-c5a5-35281bc2a227" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "822" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(apology_ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6Vyg-Cwy-cEs" + }, + "source": [ + "Number of apologies by type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S57GyG0oGjUC", + "outputId": "c1cb5416-fc0b-4129-97c5-0ffeef002e31" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'clarifying': 102, 'disagree': 213, 'wrongdoing': 259, 'request': 26, 'other': 222}\n" + ] + } + ], + "source": [ + "apology_dict = {'clarifying': len(clarifying_ids), 'disagree': len(disagree_ids), 'wrongdoing': len(wrongdoing_ids), 'request': len(request_ids), 'other': len(other_ids)}\n", + "print(apology_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 449 + }, + "id": "xMShVwSwG3y2", + "outputId": "fd599c84-b39c-4195-e784-a0b46bc63023" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA20klEQVR4nO3deXxOZ/7/8fctkgiyNMjWJnaRqL2WVG2lYqnhy7eWatFa2k5QVWoyrdrGUC06bZXOjCZ06DZaVaNRW0IJIq2dWKqlJXQQESqyXL8/+nO+vWuLCHdy+no+HufxcM513df9Oec+iXfOue77dhhjjAAAAGyqlKsLAAAAuJ0IOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNZKu7qA4iA/P1/Hjh2Tt7e3HA6Hq8sBAAAFYIzRuXPnFBISolKlrn39hrAj6dixYwoNDXV1GQAAoBCOHj2qe+6555rthB1J3t7ekn45WD4+Pi6uBgAAFERmZqZCQ0Ot/8evhbAjWbeufHx8CDsAAJQwN5qCwgRlAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga6VdXQCAkq/Fmy1cXUKJtWH4BleXANgeV3YAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtuTTsTJ06VU2aNJG3t7cCAgLUvXt3paWlOfVp06aNHA6H0/L000879Tly5Ii6dOmismXLKiAgQGPGjFFubu6d3BUAAFBMufRDBZOSkhQTE6MmTZooNzdXf/7zn9WhQwft2bNH5cqVs/oNGTJEkyZNstbLli1r/TsvL09dunRRUFCQNm7cqOPHj6t///5yd3fXX//61zu6PwAAoPhxadhJSEhwWo+Pj1dAQIBSU1PVqlUra3vZsmUVFBR01TG+/PJL7dmzR6tWrVJgYKAaNGigyZMna+zYsZowYYI8PDxu6z4AAIDirVjN2Tl79qwkyd/f32n7woULVbFiRd17772KjY3VhQsXrLbk5GTVrVtXgYGB1rbo6GhlZmZq9+7dV32e7OxsZWZmOi0AAMCeis13Y+Xn52vkyJFq0aKF7r33Xmv7o48+qsqVKyskJEQ7duzQ2LFjlZaWpk8++USSlJ6e7hR0JFnr6enpV32uqVOnauLEibdpTwAAQHFSbMJOTEyMdu3apa+++spp+9ChQ61/161bV8HBwWrXrp0OHTqk6tWrF+q5YmNjNWrUKGs9MzNToaGhhSscAAAUa8XiNtawYcO0bNkyrV27Vvfcc891+zZr1kySdPDgQUlSUFCQTpw44dTn8vq15vl4enrKx8fHaQEAAPbk0rBjjNGwYcP06aefas2aNapateoNH7Nt2zZJUnBwsCQpKipKO3fu1MmTJ60+K1eulI+PjyIjI29L3QAAoORw6W2smJgYLVq0SJ999pm8vb2tOTa+vr7y8vLSoUOHtGjRInXu3FkVKlTQjh079Nxzz6lVq1aqV6+eJKlDhw6KjIzU448/runTpys9PV0vvfSSYmJi5Onp6crdAwAAxYBLr+zMmTNHZ8+eVZs2bRQcHGwtH374oSTJw8NDq1atUocOHVS7dm09//zz6tmzpz7//HNrDDc3Ny1btkxubm6KiorSY489pv79+zt9Lg8AAPj9cumVHWPMddtDQ0OVlJR0w3EqV66s5cuXF1VZAADARorFBGUAAIDbhbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsrbSrCwAAwI7eev5zV5dQog2b0bXIxuLKDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDW+Gwsl1pFJdV1dQokV9vJOV5cAAHcMV3YAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtEXYAAICtuTTsTJ06VU2aNJG3t7cCAgLUvXt3paWlOfW5ePGiYmJiVKFCBZUvX149e/bUiRMnnPocOXJEXbp0UdmyZRUQEKAxY8YoNzf3Tu4KAAAoplwadpKSkhQTE6NNmzZp5cqVysnJUYcOHXT+/Hmrz3PPPafPP/9cH3/8sZKSknTs2DH16NHDas/Ly1OXLl106dIlbdy4UfPnz1d8fLxefvllV+wSAAAoZkq78skTEhKc1uPj4xUQEKDU1FS1atVKZ8+e1bx587Ro0SI9+OCDkqS4uDhFRERo06ZNat68ub788kvt2bNHq1atUmBgoBo0aKDJkydr7NixmjBhgjw8PFyxawAAoJgoVnN2zp49K0ny9/eXJKWmpionJ0ft27e3+tSuXVthYWFKTk6WJCUnJ6tu3boKDAy0+kRHRyszM1O7d+++6vNkZ2crMzPTaQEAAPZUbMJOfn6+Ro4cqRYtWujee++VJKWnp8vDw0N+fn5OfQMDA5Wenm71+XXQudx+ue1qpk6dKl9fX2sJDQ0t4r0BAADFRbEJOzExMdq1a5c++OCD2/5csbGxOnv2rLUcPXr0tj8nAABwDZfO2bls2LBhWrZsmdatW6d77rnH2h4UFKRLly4pIyPD6erOiRMnFBQUZPXZsmWL03iX3611uc9veXp6ytPTs4j3AgAAFEcuvbJjjNGwYcP06aefas2aNapatapTe+PGjeXu7q7Vq1db29LS0nTkyBFFRUVJkqKiorRz506dPHnS6rNy5Ur5+PgoMjLyzuwIAAAotlx6ZScmJkaLFi3SZ599Jm9vb2uOja+vr7y8vOTr66tBgwZp1KhR8vf3l4+Pj4YPH66oqCg1b95cktShQwdFRkbq8ccf1/Tp05Wenq6XXnpJMTExXL0BAACuDTtz5syRJLVp08Zpe1xcnAYOHChJmjVrlkqVKqWePXsqOztb0dHRevvtt62+bm5uWrZsmZ555hlFRUWpXLlyGjBggCZNmnSndgMAABRjLg07xpgb9ilTpoxmz56t2bNnX7NP5cqVtXz58qIsDQAA2ESxeTcWAADA7UDYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtubSsLNu3Tp17dpVISEhcjgcWrJkiVP7wIED5XA4nJaOHTs69Tl9+rT69esnHx8f+fn5adCgQcrKyrqDewEAAIozl4ad8+fPq379+po9e/Y1+3Ts2FHHjx+3lvfff9+pvV+/ftq9e7dWrlypZcuWad26dRo6dOjtLh0AAJQQpV355J06dVKnTp2u28fT01NBQUFXbdu7d68SEhKUkpKi++67T5L05ptvqnPnznrttdcUEhJS5DUDAICSpdjP2UlMTFRAQIDCw8P1zDPP6NSpU1ZbcnKy/Pz8rKAjSe3bt1epUqW0efPma46ZnZ2tzMxMpwUAANhTsQ47HTt21IIFC7R69Wq98sorSkpKUqdOnZSXlydJSk9PV0BAgNNjSpcuLX9/f6Wnp19z3KlTp8rX19daQkNDb+t+AAAA1ylU2KlWrZrTFZbLMjIyVK1atVsu6rI+ffroD3/4g+rWravu3btr2bJlSklJUWJi4i2NGxsbq7Nnz1rL0aNHi6ZgAABQ7BQq7Hz33XfW1ZVfy87O1o8//njLRV1LtWrVVLFiRR08eFCSFBQUpJMnTzr1yc3N1enTp685z0f6ZR6Qj4+P0wIAAOzppiYoL1261Pr3ihUr5Ovra63n5eVp9erVqlKlSpEV91s//PCDTp06peDgYElSVFSUMjIylJqaqsaNG0uS1qxZo/z8fDVr1uy21QEAAEqOmwo73bt3lyQ5HA4NGDDAqc3d3V1VqlTRjBkzCjxeVlaWdZVGkg4fPqxt27bJ399f/v7+mjhxonr27KmgoCAdOnRIL7zwgmrUqKHo6GhJUkREhDp27KghQ4Zo7ty5ysnJ0bBhw9SnTx/eiQUAACTdZNjJz8+XJFWtWlUpKSmqWLHiLT351q1b1bZtW2t91KhRkqQBAwZozpw52rFjh+bPn6+MjAyFhISoQ4cOmjx5sjw9Pa3HLFy4UMOGDVO7du1UqlQp9ezZU2+88cYt1QUAAOyjUJ+zc/jw4SJ58jZt2sgYc832FStW3HAMf39/LVq0qEjqAQAA9lPoDxVcvXq1Vq9erZMnT1pXfC579913b7kwAACAolCosDNx4kRNmjRJ9913n4KDg+VwOIq6LgAAgCJRqLAzd+5cxcfH6/HHHy/qegAAAIpUoT5n59KlS7r//vuLuhYAAIAiV6iwM3jwYCYFAwCAEqFQt7EuXryov//971q1apXq1asnd3d3p/aZM2cWSXEAAAC3qlBhZ8eOHWrQoIEkadeuXU5tTFYGAADFSaHCztq1a4u6DgAAgNuiUHN2AAAASopCXdlp27btdW9XrVmzptAFAQAAFKVChZ3L83Uuy8nJ0bZt27Rr164rviAUAADAlQoVdmbNmnXV7RMmTFBWVtYtFQQAAFCUinTOzmOPPcb3YgEAgGKlSMNOcnKyypQpU5RDAgAA3JJC3cbq0aOH07oxRsePH9fWrVs1bty4IikMAACgKBQq7Pj6+jqtlypVSuHh4Zo0aZI6dOhQJIUBAAAUhUKFnbi4uKKuAwAA4LYoVNi5LDU1VXv37pUk1alTRw0bNiySogAAAIpKocLOyZMn1adPHyUmJsrPz0+SlJGRobZt2+qDDz5QpUqVirJGAACAQivUu7GGDx+uc+fOaffu3Tp9+rROnz6tXbt2KTMzUyNGjCjqGgEAAAqtUFd2EhIStGrVKkVERFjbIiMjNXv2bCYoAwCAYqVQV3by8/Pl7u5+xXZ3d3fl5+ffclEAAABFpVBh58EHH9Szzz6rY8eOWdt+/PFHPffcc2rXrl2RFQcAAHCrChV23nrrLWVmZqpKlSqqXr26qlevrqpVqyozM1NvvvlmUdcIAABQaIWasxMaGqqvv/5aq1at0r59+yRJERERat++fZEWBwAAcKtu6srOmjVrFBkZqczMTDkcDj300EMaPny4hg8friZNmqhOnTpav3797aoVAADgpt1U2Hn99dc1ZMgQ+fj4XNHm6+urp556SjNnziyy4gAAAG7VTYWd7du3q2PHjtds79Chg1JTU2+5KAAAgKJyU2HnxIkTV33L+WWlS5fWTz/9dMtFAQAAFJWbCjt33323du3adc32HTt2KDg4+JaLAgAAKCo3FXY6d+6scePG6eLFi1e0/fzzzxo/frwefvjhIisOAADgVt3UW89feuklffLJJ6pVq5aGDRum8PBwSdK+ffs0e/Zs5eXl6cUXX7wthQIAABTGTYWdwMBAbdy4Uc8884xiY2NljJEkORwORUdHa/bs2QoMDLwthQIAABTGTX+oYOXKlbV8+XKdOXNGBw8elDFGNWvW1F133XU76gMAALglhfoEZUm666671KRJk6KsBQAAoMgV6ruxAAAASgrCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDWXhp1169apa9euCgkJkcPh0JIlS5zajTF6+eWXFRwcLC8vL7Vv314HDhxw6nP69Gn169dPPj4+8vPz06BBg5SVlXUH9wIAABRnpV355OfPn1f9+vX15JNPqkePHle0T58+XW+88Ybmz5+vqlWraty4cYqOjtaePXtUpkwZSVK/fv10/PhxrVy5Ujk5OXriiSc0dOhQLVq06LbU3HjMgtsy7u9B6qv9XV0CAOB3yKVhp1OnTurUqdNV24wxev311/XSSy+pW7dukqQFCxYoMDBQS5YsUZ8+fbR3714lJCQoJSVF9913nyTpzTffVOfOnfXaa68pJCTkju0LAAAonortnJ3Dhw8rPT1d7du3t7b5+vqqWbNmSk5OliQlJyfLz8/PCjqS1L59e5UqVUqbN2++5tjZ2dnKzMx0WgAAgD0V27CTnp4uSQoMDHTaHhgYaLWlp6crICDAqb106dLy9/e3+lzN1KlT5evray2hoaFFXD0AACguim3YuZ1iY2N19uxZazl69KirSwIAALdJsQ07QUFBkqQTJ044bT9x4oTVFhQUpJMnTzq15+bm6vTp01afq/H09JSPj4/TAgAA7KnYhp2qVasqKChIq1evtrZlZmZq8+bNioqKkiRFRUUpIyNDqampVp81a9YoPz9fzZo1u+M1AwCA4sel78bKysrSwYMHrfXDhw9r27Zt8vf3V1hYmEaOHKm//OUvqlmzpvXW85CQEHXv3l2SFBERoY4dO2rIkCGaO3eucnJyNGzYMPXp04d3YgEAAEkuDjtbt25V27ZtrfVRo0ZJkgYMGKD4+Hi98MILOn/+vIYOHaqMjAw98MADSkhIsD5jR5IWLlyoYcOGqV27dipVqpR69uypN954447vCwAAKJ5cGnbatGkjY8w12x0OhyZNmqRJkyZds4+/v/9t+wBBAABQ8hXbOTsAAABFgbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsjbADAABsrViHnQkTJsjhcDgttWvXttovXryomJgYVahQQeXLl1fPnj114sQJF1YMAACKm2IddiSpTp06On78uLV89dVXVttzzz2nzz//XB9//LGSkpJ07Ngx9ejRw4XVAgCA4qa0qwu4kdKlSysoKOiK7WfPntW8efO0aNEiPfjgg5KkuLg4RUREaNOmTWrevPmdLhUAABRDxf7KzoEDBxQSEqJq1aqpX79+OnLkiCQpNTVVOTk5at++vdW3du3aCgsLU3Jy8nXHzM7OVmZmptMCAADsqViHnWbNmik+Pl4JCQmaM2eODh8+rJYtW+rcuXNKT0+Xh4eH/Pz8nB4TGBio9PT06447depU+fr6WktoaOht3AsAAOBKxfo2VqdOnax/16tXT82aNVPlypX10UcfycvLq9DjxsbGatSoUdZ6ZmYmgQcAAJsq1ld2fsvPz0+1atXSwYMHFRQUpEuXLikjI8Opz4kTJ646x+fXPD095ePj47QAAAB7KlFhJysrS4cOHVJwcLAaN24sd3d3rV692mpPS0vTkSNHFBUV5cIqAQBAcVKsb2ONHj1aXbt2VeXKlXXs2DGNHz9ebm5u6tu3r3x9fTVo0CCNGjVK/v7+8vHx0fDhwxUVFcU7sQAAgKVYh50ffvhBffv21alTp1SpUiU98MAD2rRpkypVqiRJmjVrlkqVKqWePXsqOztb0dHRevvtt11cNQAAKE6Kddj54IMPrttepkwZzZ49W7Nnz75DFQEAgJKmRM3ZAQAAuFmEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGuEHQAAYGulXV0AAKDoJLVq7eoSSrTW65JcXQJuA67sAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAWyPsAAAAW7NN2Jk9e7aqVKmiMmXKqFmzZtqyZYurSwIAAMWALcLOhx9+qFGjRmn8+PH6+uuvVb9+fUVHR+vkyZOuLg0AALiYLcLOzJkzNWTIED3xxBOKjIzU3LlzVbZsWb377ruuLg0AALhYaVcXcKsuXbqk1NRUxcbGWttKlSql9u3bKzk5+aqPyc7OVnZ2trV+9uxZSVJmZuYNny8v++dbrPj3qyDH92acu5hXpOP9nhT1a5H7c26Rjvd7UtSvxflcXotbUZSvx8/ZF4psrN+jgrwWl/sYY67f0ZRwP/74o5FkNm7c6LR9zJgxpmnTpld9zPjx440kFhYWFhYWFhssR48evW5WKPFXdgojNjZWo0aNstbz8/N1+vRpVahQQQ6Hw4WVFV5mZqZCQ0N19OhR+fj4uLqc3zVei+KF16P44LUoPuzyWhhjdO7cOYWEhFy3X4kPOxUrVpSbm5tOnDjhtP3EiRMKCgq66mM8PT3l6enptM3Pz+92lXhH+fj4lOgT1054LYoXXo/ig9ei+LDDa+Hr63vDPiV+grKHh4caN26s1atXW9vy8/O1evVqRUVFubAyAABQHJT4KzuSNGrUKA0YMED33XefmjZtqtdff13nz5/XE0884erSAACAi9ki7PTu3Vs//fSTXn75ZaWnp6tBgwZKSEhQYGCgq0u7Yzw9PTV+/Pgrbs/hzuO1KF54PYoPXovi4/f2WjiMudH7tQAAAEquEj9nBwAA4HoIOwAAwNYIOwAAwNYIO3fQd999J4fDoW3btt3yWFWqVNHrr79uraenp+uhhx5SuXLlCvyZQQ6HQ0uWLLnlWkqqNm3aaOTIkZKuPJ74/fj1eVAQ8fHxtvlcLvy+3Oy5bie2eDfW71FKSorKlStnrc+aNUvHjx/Xtm3bCvQBS5J0/Phx3XXXXberxBLlt8cTuJbevXurc+fOri4DvzJhwgQtWbKkSP6QtIPExES1bdtWZ86cIZj/f4SdEubSpUvy8PBQpUqVnLYfOnRIjRs3Vs2aNQs81rU+Yfr36LfH807Ly8uTw+FQqVK/r4utJXG/vby85OXl5eoy7ojLv2+Aaykp50jJ+Q1TguTn52v69OmqUaOGPD09FRYWpilTplzRLy8vT4MGDVLVqlXl5eWl8PBw/e1vf3PqM3DgQHXv3l1TpkxRSEiIwsPDJTnfdqlSpYoWL16sBQsWyOFwaMCAAapRo4Zee+01p7G2bdsmh8OhgwcPSnK+jXX5Ftsnn3yitm3bqmzZsqpfv/4V3xz/j3/8Q6GhoSpbtqz+53/+RzNnziwRfzmcP39e/fv3V/ny5RUcHKwZM2Y4tf/6eBpjNGHCBIWFhcnT01MhISEaMWKE1fe9997TfffdJ29vbwUFBenRRx/VyZMnncZbunSpatasqTJlyqht27aaP3++HA6HMjIyJP3frZClS5cqMjJSnp6eOnLkiLKzszV69GjdfffdKleunJo1a6bExESnsb/66iu1bNlSXl5eCg0N1YgRI3T+/PkiP2YFtWzZMvn5+Skv75dvob98nv3pT3+y+gwePFiPPfbYNff7zJkz6t+/v+666y6VLVtWnTp10oEDB6zHX37cihUrFBERofLly6tjx446fvy41Sc3N1cjRoyQn5+fKlSooLFjx2rAgAHq3r271edG54GkAtdy2YQJE9SgQQO99957qlKlinx9fdWnTx+dO3fO6nPu3Dn169dP5cqVU3BwsGbNmlUsbym0adNGw4YN08iRI1WxYkVFR0dr165d6tSpk8qXL6/AwEA9/vjj+u9//2s95mrH9Lf7drVb5n5+foqPj7fWjx49ql69esnPz0/+/v7q1q2bvvvuO6s9MTFRTZs2tW7Vt2jRQt9//73i4+M1ceJEbd++XQ6HQw6Hw2lcu8rOztaIESMUEBCgMmXK6IEHHlBKSoq+++47tW3bVpJ01113yeFwaODAgdbj8vPz9cILL8jf319BQUGaMGGC07gZGRkaPHiwKlWqJB8fHz344IPavn271X75fP/nP/+pqlWrqkyZMndid28ZYec2iI2N1bRp0zRu3Djt2bNHixYtuuoHHObn5+uee+7Rxx9/rD179ujll1/Wn//8Z3300UdO/VavXq20tDStXLlSy5Ytu2KclJQUdezYUb169dLx48f1xhtv6Mknn1RcXJxTv7i4OLVq1Uo1atS4Zu0vvviiRo8erW3btqlWrVrq27evcnNzJUkbNmzQ008/rWeffVbbtm3TQw89dNUQVxyNGTNGSUlJ+uyzz/Tll18qMTFRX3/99VX7Ll68WLNmzdI777yjAwcOaMmSJapbt67VnpOTo8mTJ2v79u1asmSJvvvuO6dfJocPH9b//u//qnv37tq+fbueeuopvfjii1c8z4ULF/TKK6/on//8p3bv3q2AgAANGzZMycnJ+uCDD7Rjxw498sgj6tixo/Wf7aFDh9SxY0f17NlTO3bs0IcffqivvvpKw4YNK9oDdhNatmypc+fO6ZtvvpEkJSUlqWLFik4hLSkpSW3atJF09f0eOHCgtm7dqqVLlyo5OVnGGHXu3Fk5OTnWGBcuXNBrr72m9957T+vWrdORI0c0evRoq/2VV17RwoULFRcXpw0bNigzM/OK/2ALch4UpJbfOnTokJYsWaJly5Zp2bJlSkpK0rRp06z2UaNGacOGDVq6dKlWrlyp9evXX/P8c7X58+fLw8NDGzZs0LRp0/Tggw+qYcOG2rp1qxISEnTixAn16tXL6n8zP1vXkpOTo+joaHl7e2v9+vXasGGDFWgvXbqk3Nxcde/eXa1bt9aOHTuUnJysoUOHyuFwqHfv3nr++edVp04dHT9+XMePH1fv3r2L+rAUOy+88IIWL16s+fPn6+uvv1aNGjWsY7h48WJJUlpamo4fP+70R/T8+fNVrlw5bd68WdOnT9ekSZO0cuVKq/2RRx7RyZMn9cUXXyg1NVWNGjVSu3btdPr0aavPwYMHtXjxYn3yyScl59bhdb8THTctMzPTeHp6mn/84x9XtB0+fNhIMt988801Hx8TE2N69uxprQ8YMMAEBgaa7Oxsp36VK1c2s2bNsta7detmBgwYYK3/+OOPxs3NzWzevNkYY8ylS5dMxYoVTXx8vNVHkvn000+davvnP/9pte/evdtIMnv37jXGGNO7d2/TpUsXpzr69etnfH19r7k/xcG5c+eMh4eH+eijj6xtp06dMl5eXubZZ581xjgfzxkzZphatWqZS5cuFWj8lJQUI8mcO3fOGGPM2LFjzb333uvU58UXXzSSzJkzZ4wxxsTFxRlJZtu2bVaf77//3ri5uZkff/zR6bHt2rUzsbGxxhhjBg0aZIYOHerUvn79elOqVCnz888/F6je26FRo0bm1VdfNcYY0717dzNlyhTj4eFhzp07Z3744Qcjyezfv/+q+71//34jyWzYsMHa9t///td4eXlZr9nlxx08eNDqM3v2bBMYGGitBwYGWjUYY0xubq4JCwsz3bp1M8YU7DwoaC2/PufHjx9vypYtazIzM61tY8aMMc2aNTPG/PI7wd3d3Xz88cdWe0ZGhilbtqz1vMVF69atTcOGDa31yZMnmw4dOjj1OXr0qJFk0tLSCnRMjXH+XXOZr6+viYuLM8YY895775nw8HCTn59vtWdnZxsvLy+zYsUKc+rUKSPJJCYmXrXu8ePHm/r16xdup0ugrKws4+7ubhYuXGhtu3TpkgkJCTHTp083a9eudfp9c1nr1q3NAw884LStSZMmZuzYscaYX36X+Pj4mIsXLzr1qV69unnnnXeMMb8ca3d3d3Py5MnbsGe3D1d2itjevXuVnZ2tdu3aFaj/7Nmz1bhxY1WqVEnly5fX3//+dx05csSpT926dW/6nmhISIi6dOmid999V5L0+eefKzs7W4888sh1H1evXj3r38HBwZJk3aJJS0tT06ZNnfr/dr04OnTokC5duqRmzZpZ2/z9/a1bgr/1yCOP6Oeff1a1atU0ZMgQffrpp9bVLUlKTU1V165dFRYWJm9vb7Vu3VqSrNctLS1NTZo0cRrzasfJw8PD6Xjv3LlTeXl5qlWrlsqXL28tSUlJOnTokCRp+/btio+Pd2qPjo5Wfn6+Dh8+XMgjdOtat26txMREGWO0fv169ejRQxEREfrqq6+UlJSkkJAQaz7Zb/d77969Kl26tNPrU6FCBYWHh2vv3r3WtrJly6p69erWenBwsHVunj17VidOnHA6zm5ubmrcuLG1XpDzoKC1/FaVKlXk7e191dq+/fZb5eTkONXm6+t7zfPP1X59zLZv3661a9c6nW+1a9eW9MvxvNmfrWvZvn27Dh48KG9vb+t5/P39dfHiRR06dEj+/v4aOHCgoqOj1bVrV/3tb39zuoX5e3Po0CHl5OSoRYsW1jZ3d3c1bdr0uuep5Pw7XnI+V7dv366srCxVqFDB6TU/fPiw9TtIkipXruzyeY43iwnKRexmJi5+8MEHGj16tGbMmKGoqCh5e3vr1Vdf1ebNm536FfZdQoMHD9bjjz+uWbNmKS4uTr1791bZsmWv+xh3d3fr3w6HQ9Ivt9t+T0JDQ5WWlqZVq1Zp5cqV+uMf/6hXX31VSUlJunTpkqKjoxUdHa2FCxeqUqVKOnLkiKKjo3Xp0qWbeh4vLy/rGEtSVlaW3NzclJqaKjc3N6e+5cuXt/o89dRTTnOILgsLCyvE3haNNm3a6N1339X27dvl7u6u2rVrq02bNkpMTNSZM2esQChdud8F9etzU/rl/DTF5NturlZbSf25+fXvm6ysLHXt2lWvvPLKFf2Cg4Ot+X83crXX6te3BbOystS4cWMtXLjwisde/k81Li5OI0aMUEJCgj788EO99NJLWrlypZo3b16gGvCL652rWVlZCg4OvmKeoCSneWol8Z2rXNkpYjVr1pSXl5dWr159w74bNmzQ/fffrz/+8Y9q2LChatSo4ZSeb1Xnzp1Vrlw5zZkzRwkJCXryySdvabzw8HClpKQ4bfvtenFUvXp1ubu7O4XIM2fOaP/+/dd8jJeXl7p27ao33nhDiYmJSk5O1s6dO7Vv3z6dOnVK06ZNU8uWLVW7du0rJieHh4dr69atTtsKcpwaNmyovLw8nTx5UjVq1HBaLr9zrlGjRtqzZ88V7TVq1HDpOyIuz9uZNWuWFWwuh53ExERrvs7VREREKDc31+n1OXXqlNLS0hQZGVmg5/f19VVgYKDTcc7Ly3OaO1KQ86AoavmtatWqyd3d3am2s2fPXvf8Ky4aNWqk3bt3q0qVKlecb+XKlSvwz1alSpWcrsQcOHBAFy5ccHqeAwcOKCAg4Irn+fVHaTRs2FCxsbHauHGj7r33Xi1atEjSL1cLL0+Q/z2oXr26Na/qspycHKWkpCgyMtL6XXCzx6RRo0ZKT09X6dKlr3gdKlasWKT7cKcRdopYmTJlNHbsWL3wwgtasGCBDh06pE2bNmnevHlX9K1Zs6a2bt2qFStWaP/+/Ro3blyRhgc3NzcNHDhQsbGxqlmzpqKiom5pvOHDh2v58uWaOXOmDhw4oHfeeUdffPFFof5Kv5PKly+vQYMGacyYMVqzZo127dqlgQMHXvPtzvHx8Zo3b5527dqlb7/9Vv/617/k5eWlypUrKywsTB4eHnrzzTf17bffaunSpZo8ebLT45966int27dPY8eO1f79+/XRRx9Z7w653rGqVauW+vXrp/79++uTTz7R4cOHtWXLFk2dOlX/+c9/JEljx47Vxo0bNWzYMG3btk0HDhzQZ5995tIJytIv7/qoV6+eFi5caAWbVq1a6euvv9b+/fudruz8Vs2aNdWtWzcNGTJEX331lbZv367HHntMd999t7p161bgGoYPH66pU6fqs88+U1pamp599lmdOXPGOuYFOQ+KqpZf8/b21oABAzRmzBitXbtWu3fv1qBBg1SqVKli/7MTExOj06dPq2/fvkpJSdGhQ4e0YsUKPfHEE8rLyyvwz9aDDz6ot956S9988422bt2qp59+2ukKQ79+/VSxYkV169ZN69ev1+HDh5WYmKgRI0bohx9+0OHDhxUbG6vk5GR9//33+vLLL3XgwAFFRERI+uU24uHDh7Vt2zb997//VXZ29h09TndauXLl9Mwzz2jMmDFKSEjQnj17NGTIEF24cEGDBg1S5cqV5XA4tGzZMv3000/Kysoq0Ljt27dXVFSUunfvri+//FLfffedNm7cqBdffPGKP+BKGsLObTBu3Dg9//zzevnllxUREaHevXtf8de/9Mt/ij169FDv3r3VrFkznTp1Sn/84x+LtJZBgwbp0qVLeuKJJ255rBYtWmju3LmaOXOm6tevr4SEBD333HMl4q2Hr776qlq2bKmuXbuqffv2euCBB5zmJvyan5+f/vGPf6hFixaqV6+eVq1apc8//1wVKlRQpUqVFB8fr48//liRkZGaNm3aFW/xr1q1qv7973/rk08+Ub169TRnzhzr3Vienp7XrTMuLk79+/fX888/r/DwcHXv3l0pKSnWLap69eopKSlJ+/fvV8uWLdWwYUO9/PLLCgkJKYKjdGtat26tvLw8K+z4+/srMjJSQUFBN5zDERcXp8aNG+vhhx9WVFSUjDFavnz5FZfcr2fs2LHq27ev+vfvr6ioKGs+06/Pz4KcB0VRy2/NnDlTUVFRevjhh9W+fXu1aNFCERERxf5nJyQkRBs2bFBeXp46dOigunXrauTIkfLz87MCTUGO6YwZMxQaGqqWLVvq0Ucf1ejRo51uqZctW1br1q1TWFiYNd9r0KBBunjxonx8fFS2bFnt27dPPXv2VK1atTR06FDFxMToqaeekiT17NlTHTt2VNu2bVWpUiW9//77d+4guci0adPUs2dPPf7442rUqJEOHjyoFStW6K677tLdd9+tiRMn6k9/+pMCAwML/MeQw+HQ8uXL1apVKz3xxBOqVauW+vTpo++///6q7yguSRymuNz0xm2xfv16tWvXTkePHr0tJ+uQIUO0b98+rV+/vsjHtpMpU6Zo7ty5Onr0qKtL+d3Iz89XRESEevXqdcXVN1c7f/687r77bs2YMUODBg1ydTlFrk2bNmrQoAFfwYJigwnKNpWdna2ffvpJEyZM0COPPFJkQee1116zvoPriy++0Pz58/X2228Xydh28vbbb6tJkyaqUKGCNmzYoFdffdXlt5rs7vLtjdatWys7O1tvvfWWDh8+rEcffdTVpembb77Rvn371LRpU509e1aTJk2SpELfGgNwcwg7NvX+++9r0KBBatCggRYsWFBk427ZskXTp0/XuXPnVK1aNb3xxhsaPHhwkY1vFwcOHNBf/vIXnT59WmFhYXr++ecVGxvr6rJsrVSpUoqPj9fo0aNljNG9996rVatWWfM6XO21115TWlqaPDw81LhxY61fv77ET/oESgpuYwEAAFtjgjIAALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg6AYqlKlSp8KB2AIkHYAXDTkpOT5ebmpi5duri6lNsmMTFRDofjusvVvh0aQPHDhwoCuGnz5s3T8OHDNW/ePB07dqxYfDdXUbv//vudvqn72WefVWZmpuLi4qxt/v7+rigNwE3iyg6Am5KVlaUPP/xQzzzzjLp06WJ9o/tll6+I/Oc//1G9evVUpkwZNW/eXLt27XLqt3jxYtWpU0eenp6qUqWKZsyYcd3nPXLkiLp166by5cvLx8dHvXr10okTJ5z6/OUvf1FAQIC8vb01ePBg/elPf1KDBg0kSevWrZO7u7vS09OdHjNy5Ei1bNnyiufz8PBQUFCQtXh5ecnT01NBQUHav3+/QkNDdfr06WuOFR8fLz8/Py1ZskQ1a9ZUmTJlFB0dfcX3o3322Wdq1KiRypQpo2rVqmnixInKzc297rEAcHMIOwBuykcffaTatWsrPDxcjz32mN59911d7YPYx4wZoxkzZiglJUWVKlVS165dlZOTI0lKTU1Vr1691KdPH+3cuVMTJkzQuHHjrghOl+Xn56tbt246ffq0kpKStHLlSn377bfq3bu31WfhwoWaMmWKXnnlFaWmpiosLExz5syx2lu1aqVq1arpvffes7bl5ORo4cKFevLJJ2/qGBR0rAsXLmjKlClasGCBNmzYoIyMDPXp08dqX79+vfr3769nn31We/bs0TvvvKP4+HhNmTLlpuoBcAMGAG7C/fffb15//XVjjDE5OTmmYsWKZu3atVb72rVrjSTzwQcfWNtOnTplvLy8zIcffmiMMebRRx81Dz30kNO4Y8aMMZGRkdZ65cqVzaxZs4wxxnz55ZfGzc3NHDlyxGrfvXu3kWS2bNlijDGmWbNmJiYmxmnMFi1amPr161vrr7zyiomIiLDWFy9ebMqXL2+ysrJuuN8DBgww3bp1K/BYcXFxRpLZtGmT1Wfv3r1Gktm8ebMxxph27dqZv/71r07P895775ng4OAb1gOg4LiyA6DA0tLStGXLFvXt21eSVLp0afXu3Vvz5s27om9UVJT1b39/f4WHh2vv3r2SpL1796pFixZO/Vu0aKEDBw4oLy/virH27t2r0NBQhYaGWtsiIyPl5+dnjZmWlqamTZs6Pe636wMHDtTBgwe1adMmSb/caurVq5fKlStX4GNwM2OVLl1aTZo0sdZr167tVPP27ds1adIklS9f3lqGDBmi48eP68KFCzddE4CrY4IygAKbN2+ecnNznSYkG2Pk6empt956S76+vi6s7sYCAgLUtWtXxcXFqWrVqvriiy8K/Y6qohgrKytLEydOVI8ePa5oK1OmTKHqAnAlwg6AAsnNzdWCBQs0Y8YMdejQwamte/fuev/99/X0009b2zZt2qSwsDBJ0pkzZ7R//35FRERIkiIiIrRhwwanMTZs2KBatWrJzc3tiueOiIjQ0aNHdfToUevqzp49e5SRkaHIyEhJUnh4uFJSUtS/f3/rcSkpKVeMNXjwYPXt21f33HOPqlevfsUVpptxo7Fyc3O1detW6wpTWlqaMjIyrOPQqFEjpaWlqUaNGoWuAUABuPo+GoCS4dNPPzUeHh4mIyPjirYXXnjB3HfffcaY/5uzU6dOHbNq1Sqzc+dO84c//MGEhYWZ7OxsY4wxqampplSpUmbSpEkmLS3NxMfHGy8vLxMXF2eN+es5O/n5+aZBgwamZcuWJjU11WzevNk0btzYtG7d2ur/r3/9y3h5eZn4+Hizf/9+M3nyZOPj42MaNGjgVGteXp4JDQ01Hh4eZtq0aQXe/9/O2bnRWHFxccbd3d00bdrUbNq0yWzdutU0b97cNG/e3OqTkJBgSpcubSZMmGB27dpl9uzZY95//33z4osvFrguADdG2AFQIA8//LDp3LnzVds2b95sJJnt27dbYefzzz83derUMR4eHqZp06Zm+/btTo/597//bSIjI427u7sJCwszr776qlP7r8OOMcZ8//335g9/+IMpV66c8fb2No888ohJT093esykSZNMxYoVTfny5c2TTz5pRowY4RQuLhs3bpxxc3Mzx44dK/D+Xy3sXG+suLg44+vraxYvXmyqVatmPD09Tfv27c3333/v1C8hIcHcf//9xsvLy/j4+JimTZuav//97wWuC8CNOYy5yntGAaCQEhMT1bZtW505c0Z+fn4ureWhhx5SUFCQ01vEJWnQoEH66aeftHTp0lt+jmuNFR8fr5EjRyojI+OWnwPArWHODgBbuHDhgubOnavo6Gi5ubnp/fff16pVq7Ry5Uqrz9mzZ7Vz504tWrToloNOUY4F4PYi7ACwBYfDoeXLl2vKlCm6ePGiwsPDtXjxYrVv397q061bN23ZskVPP/20HnrooVt6vqIcC8DtxW0sAABga3yoIAAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsDXCDgAAsLX/BwSwgIUDphgPAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = pd.DataFrame(list(apology_dict.items()), columns=['Apology Type', 'Count'])\n", + "\n", + "sns.barplot(x='Apology Type', y='Count', data=df)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M_ULJaiP_ZW9" + }, + "source": [ + "Printing out some examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 87 + }, + "id": "ksEVIMfokOiI", + "outputId": "3cc3ae5f-d404-4da5-a82c-09deaeead90b" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'Sorry, I meant \"life-long\" monogamy in the case of a single partner, not switching them every couple of months.\\n\\n>Personally I think humans are just plain diverse when it comes to relationships.\\n\\nAre we though? I don\\'t want to be pretentious, but aren\\'t we all biologically wired the same? Values/traditions have an impact of course, but in the developed western nations those are usually disregarded when you see a nice piece of aaaaaaaaaaaaaasssshhh and have a shot of alcohol near you.\\n\\n>An answer to what? What, exactly, is the problem here?\\n\\nThe problem is that we live in societies that tell us that we have to get married, that people want to get married and that marriage is happiness, when everything points to the contrary.'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# clarifying apology\n", + "corpus.get_utterance(clarifying_ids[0]).text\n", + "\n", + "# Sorry, I meant..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 87 + }, + "id": "tI7DqGKV_pPj", + "outputId": "ed892b87-304f-4f40-d15d-4e9fdfe65ca2" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "\">Deleting comments that go against the grain of latestagecapitalism, and banning those that challenge the moderators. It's hard to see how this isn't 'classic' communism, as in best traditions of most communist/socialist regimes that have risen in the past with repression against political dissidents.\\n\\nIf this is your definition of communism, then I guess you will have to throw /the_Donald into the commie bucket as well. They have long deleted comments, banned users, and supported bots the reproduce without comment (propaganda?). I'm sorry but moderating a sub to a include what you want it to include, isn't communism so much as just an echo chamber. Lots of people like echo chambers and it does nothing to push a communist agenda.\"" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# disagree apology\n", + "corpus.get_utterance(disagree_ids[56]).text\n", + "\n", + "# I'm sorry but ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 105 + }, + "id": "p6zcnT69ACKP", + "outputId": "ae1e342d-20c4-45e3-83b1-aaa1a1ce32f9" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "\"I truly apologize and don't mean to hurt your feelings. What I said was in the matter of discussion and debate and within the context I understand that it can be hurtful. I'm sorry that it came off in that manner and I hope you're feelings aren't too hurt. I'm speaking of things on a macro level and happened to use autism as an example and I understand how speaking in such generalities loses the personal touch of humanity. I would just like to say that if an argument like this came up in real life I would refrain from saying such things as everything has a time and place and context. This sub was just meant for what I suppose controversial opinions and I thought this would be a good platform to voice it. I knew what I stated would offend/hurt some people and I truly am sorry and I know you probably don't believe me but that's the best way I can explain it.\"" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# wrongdoing apology\n", + "corpus.get_utterance(wrongdoing_ids[23]).text\n", + "\n", + "# I truly apologize and don't mean to ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "Rk6aEENvAQrh", + "outputId": "752ce351-056b-4045-b634-ea5852dda88e" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "\"I'm sorry you feel that way, can you explain how you perceive it as racist? \"" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# request apology\n", + "corpus.get_utterance(request_ids[17]).text\n", + "\n", + "# I'm sorry you...can you explain..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4mF0WknUAlzL" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/apologyTransformer/apologiesTransformer.py b/apologyTransformer/apologiesTransformer.py new file mode 100644 index 00000000..a1e5de08 --- /dev/null +++ b/apologyTransformer/apologiesTransformer.py @@ -0,0 +1,193 @@ +import convokit +from convokit import Corpus, download +from convokit.transformer import Transformer +from inspect import signature +import string +import re + +apology_list = [ + "sorry", + "apologize", + "apologies", + "oops", + "whoops", + "woops", + "forgive me", + "forgive my", + "excuse me", + "excuse my", + "my mistake", + "my bad", +] +first_person = ["i", "me", "my", "myself", "mine"] +second_person = ["you", "your", "u", "ur", "yours", "yourself", "urself"] +clarification = [ + "mean", + "meant", + "clarify", + "clear", + "clarification", + "explain", + "understand", + "confused", + "confusing", + "what", + "context", + "worded", + "wording", + "are you", + "do you", + "talking about", + "referring", + "rephrase", + "reword", + "intend", + "intent", + "term", +] +contradictory = ["but", "however", "while", "although"] +disagreement = [ + "wrong", + "incorrect", + "inaccurate", + "false", + "mistaken", + "error", + "bad", + "nonsensical", + "stupid", + "disagree", + "dumb", + "bullshit", + "bs", + "insufficient", + "hypocritical", + "break it", +] +agreement = ["right", "correct", "sense", "true", "accurate", "case", "work", "agree"] +negatives = ["no", "not", "don't", "dont", "doesn't", "doesnt", "isn't", "isnt"] +wrongdoing = [ + "regret", + "mistake", + "misunderstand", + "misunderstood", + "fault", + "offend", + "hurt", + "misread", + "misspoke", + "wrong", + "incorrect", + "accident", + "misconception", + "truly", + "genuine", + "sincere", +] +potential = ["for", "if", "because", "that", "about"] +requests = ["could", "would", "can"] + +apology_pattern = r"\b(" + "|".join(re.escape(word) for word in apology_list) + r")\b" + +clarify_pattern = r"\b(" + "|".join(re.escape(word) for word in clarification) + r")\b" +contradictory_pattern = rf"{apology_pattern}(.{{0,20}}(?:but|however|while|although))\b" +disagree_pattern = r"\b(" + "|".join(re.escape(word) for word in disagreement) + r")\b" +negatives_pattern = r"\b(" + "|".join(re.escape(word) for word in negatives) + r")\b" +agreement_pattern = r"\b(" + "|".join(re.escape(word) for word in agreement) + r")\b" +not_agree_pattern = rf"{negatives_pattern}.{{0,10}}{agreement_pattern}" +potential_pattern = ( + rf"{apology_pattern}.{{0,3}}\b(" + "|".join(re.escape(word) for word in potential) + r")\b" +) +first_person_pattern = r"\b(" + "|".join(re.escape(word) for word in first_person) + r")\b" +second_person_pattern = r"\b(" + "|".join(re.escape(word) for word in second_person) + r")\b" +wrong_pattern = r"\b(" + "|".join(re.escape(word) for word in wrongdoing) + r")\b" +wrongdoing_pattern = rf"{first_person_pattern}.{{0,10}}{wrong_pattern}" +ask_pattern = r"\b(" + "|".join(re.escape(word) for word in requests) + r")\b" +requests_pattern = rf"({ask_pattern}.{{0,10}}{second_person_pattern})|please" + + +class ApologyLabeler(Transformer): + """ + A transformer to label diffferent types of apologies in a corpus. + + :param + """ + + def __init__( + self, + obj_type="utterance", + output_field="apology_type", + input_field=None, + input_filter=None, + verbosity=10000, + ): + if input_filter: + if len(signature(input_filter).parameters) == 1: + self.input_filter = lambda utt: input_filter(utt) + else: + self.input_filter = input_filter + else: + self.input_filter = lambda utt: True + self.obj_type = obj_type + self.input_field = input_field + self.output_field = output_field + self.verbosity = verbosity + + def _print_output(self, i): + return (self.verbosity > 0) and (i > 0) and (i % self.verbosity == 0) + + def transform(self, corpus: Corpus) -> Corpus: + if self.obj_type == "utterance": + total = len(list(corpus.iter_utterances())) + + for idx, utterance in enumerate(corpus.iter_utterances()): + if self._print_output(idx): + print(f"%03d/%03d {self.obj_type} processed" % (idx, total)) + + text = remove_quotes(utterance.text) + text = text.lower() + sentences = re.split(r"(?<=[.!?])\s+", text) + + apology = False + apology_loc = 0 + for i, sentence in enumerate(sentences): + apology_match = re.search(apology_pattern, sentence) # start index of match + if apology_match: + apology_loc = apology_match.span()[0] + apology_sentence = sentence.strip() + next_sentence = " " + if i != len(sentences) - 1: + next_sentence = sentences[i + 1].strip() + + apology_segment = apology_sentence + next_sentence + apology = True + + if apology: + pattern_meta_mapping = [ + (clarify_pattern, "clarifying_apology"), + (potential_pattern, "wrongdoing_apology"), + (wrongdoing_pattern, "wrongdoing_apology"), + (contradictory_pattern, "disagree_apology"), + (disagree_pattern, "disagree_apology"), + (not_agree_pattern, "disagree_apology"), + (requests_pattern, "request_apology"), + ] + + closest_match = min( + [ + (re.search(pattern, apology_segment), meta) + for pattern, meta in pattern_meta_mapping + if re.search(pattern, apology_segment) + ], + key=lambda x: abs(x[0].start() - apology_loc), + default=None, + ) + + if closest_match: + _, meta = closest_match + utterance.add_meta(self.output_field, meta) + else: + utterance.add_meta(self.output_field, "other_apology") + + else: + utterance.add_meta(self.output_field, "no_apology")