From b8ca53c43e45a0d46197c64d4ac1c49bfe1da912 Mon Sep 17 00:00:00 2001
From: Barbara Vreede <b.vreede@esciencecenter.nl>
Date: Tue, 21 Nov 2023 17:05:29 +0100
Subject: [PATCH] calculate FTO

---
 sktalk/corpus/conversation.py | 67 ++++++++++++++++++++++++++++++-----
 sktalk/corpus/utterance.py    | 21 +++++++++++
 2 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/sktalk/corpus/conversation.py b/sktalk/corpus/conversation.py
index 2af1580..ac0f904 100644
--- a/sktalk/corpus/conversation.py
+++ b/sktalk/corpus/conversation.py
@@ -132,20 +132,71 @@ def _count_participants(self) -> int:
         participants = [u.participant for u in self.utterances]
         return len(set(participants))
 
-    def apply(self, field, **kwargs):
+    def _update(self, field: str, values: list, **kwargs):
         """
-        Apply a function to each utterance in the conversation
+        Update the all utterances in the conversation with calculated values
+
+        This function also stores relevant arguments in the Conversation metadata.
 
         Args:
-            func (function): function to apply to each utterance
-            field (str): field to update
+            field (str): field of the Utterance to update
+            values (list): list of values to update each utterance with
+            kwargs (dict): information about the calculation to store in the Conversation metadata
         """
-        func = self.CONVERSATION_FUNCTIONS[field]
+        if len(values) != len(self.utterances):
+            raise ValueError(
+                "The number of values must match the number of utterances")
+        try:
+            self._metadata["Calculations"].update(field=kwargs)
+        except KeyError:
+            self._metadata = {"Calculations": {field: kwargs}}
+        for index, utterance in enumerate(self.utterances):
+            utterance.__setattr__(field, values[index])
+
+    def calculate_FTO(self, window: int = 10000, planning_buffer: int = 200, n_participants: int = 2):
+        """Calculate Floor Transfer Offset (FTO) per utterance
+
+        FTO is defined as the difference between the time that a turn starts and the
+        end of the most relevant prior turn by the other participant, which is not
+        necessarily the prior utterance.
 
+        An utterance does not receive an FTO if there are preceding utterances
+        within the window that do not have timing information, or if it lacks
+        timing information itself.
+
+        To be a relevant prior turn, the following conditions must be met, respective to utterance U:
+        - the utterance must be by another speaker than U
+        - the utterance by the other speaker must be the most recent utterance by that speaker
+        - the utterance must have started before utterance U, more than `planning_buffer` ms before.
+        - the utterance must be partly or entirely within the context window (`window` ms prior to the start of utterance U)
+        - within the context window, there must be a maximum of `n_participants` speakers.
+
+        Args:
+            window (int, optional): _description_. Defaults to 10000.
+            planning_buffer (int, optional): _description_. Defaults to 200.
+            n_participants (int, optional): _description_. Defaults to 2.
+        """
+        values = []
         for index, utterance in enumerate(self.utterances):
-            sub = self.subconversation(index=index, **kwargs)
-            value = func(sub)
-            utterance.__setattr__(field, value)
+            sub = self._subconversation(
+                index=index,
+                time_or_index="time",
+                before=window,
+                after=0)
+            if not 2 <= sub._count_participants() <= n_participants:
+                values.append(None)
+                continue
+            potentials = [
+                u for u in sub.utterances if utterance._relevant_for_fto(u, planning_buffer)]
+            try:
+                relevant = potentials[-1]
+                values.append(utterance.until(relevant))
+            except IndexError:
+                values.append(None)
+        self._update("FTO", values,
+                     window=window,
+                     planning_buffer=planning_buffer,
+                     n_participants=n_participants)
 
     @staticmethod
     def overlap(begin: int, end: int, time: list):
diff --git a/sktalk/corpus/utterance.py b/sktalk/corpus/utterance.py
index 6712674..0f7b082 100644
--- a/sktalk/corpus/utterance.py
+++ b/sktalk/corpus/utterance.py
@@ -59,6 +59,27 @@ def _clean_utterance(self):
     def until(self, next_utt):
         return next_utt.time[0] - self.time[1]
 
+    def _relevant_for_fto(self, prior_utt, planning_buffer: int):
+        """Assess whether an utterance is potentially relevant to calculate FTO
+
+        An utterance is potentially relevant for fto calculation if:
+        - the utterance `prior_utt` must be by another speaker
+        - the utterance `prior_utt` must have started before the utterance itself, more than `planning_buffer` ms before.
+
+        The planning buffer is the minimum time between a relevant preceding utterance and the utterance itself
+
+        Args:
+            prior_utt (Utterance): utterance to assess
+            planning_buffer (int): buffer time (in ms)
+
+        Returns:
+            bool: whether the utterance `prior_utt` meets the criteria and is potentially relevant for FTO calculation
+        """
+        return (
+            self.participant != prior_utt.participant
+            and self.time[0] - planning_buffer >= prior_utt.time[0]
+        )
+
     def _split_time(self):
         try:
             begin, end = self.time