mobilise-d · AKuederle · Apr 17, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 14, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,10 +13,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   This was added to move all data related config (i.e. which sensor to use in a pipeline) to the dataset class, making
   it easier to implement dataset agnostic pipelines (https://github.com/mobilise-d/mobgap/pull/119)
 - A evaluation pipeline for GSD (https://github.com/mobilise-d/mobgap/pull/124)
+- ML based LR classification (https://github.com/mobilise-d/mobgap/pull/106)
+- A evaluation/optimization pipeline for LRC (https://github.com/mobilise-d/mobgap/pull/106)
 
 ### Changed
 
 - The loaded reference data now has stricter dtypes (https://github.com/mobilise-d/mobgap/pull/119)
+- Renamed LRD (left-right-detection) to LRC (left-right-classification) (https://github.com/mobilise-d/mobgap/pull/106)
 
 ### Fixed
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -201,7 +201,7 @@ def substitute(matchobj) -> str:
             "../examples/data",
             "../examples/gsd",
             "../examples/icd",
-            "../examples/lrd",
+            "../examples/lrc",
             "../examples/cad",
             "../examples/wba",
             "../examples/aggregation",

diff --git a/docs/modules/index.rst b/docs/modules/index.rst
@@ -13,7 +13,7 @@ This is the API Reference for ``mobgap``.
     pipeline
     gsd
     icd
-    lrd
+    lrc
     cad
     wba
     aggregation

diff --git a/docs/modules/lrc.rst b/docs/modules/lrc.rst
@@ -0,0 +1,49 @@
+Left-Right Classification (LRC)
+===============================
+
+.. automodule:: mobgap.lrc
+    :no-members:
+    :no-inherited-members:
+
+
+Algorithms
+++++++++++
+.. currentmodule:: mobgap.lrc
+
+.. autosummary::
+   :toctree: generated/lrc
+   :template: class.rst
+
+    LrcMcCamley
+    LrcUllrich
+
+Pipelines
++++++++++
+.. automodule:: mobgap.lrc.pipeline
+    :no-members:
+    :no-inherited-members:
+
+    LrcEmulationPipeline
+
+Base Classes
+++++++++++++
+.. automodule:: mobgap.lrc.base
+    :no-members:
+    :no-inherited-members:
+
+.. currentmodule:: mobgap.lrc.base
+
+.. autosummary::
+   :toctree: generated/lrc
+   :template: class.rst
+
+    BaseLRClassifier
+
+Docu-helper
+-----------
+
+.. autosummary::
+   :toctree: generated/lrc
+   :template: func.rst
+
+    base_lrc_docfiller
diff --git a/docs/modules/lrd.rst b/docs/modules/lrd.rst
diff --git a/examples/gsd/_02_gsd_evaluation.py b/examples/gsd/_02_gsd_evaluation.py
@@ -167,7 +167,7 @@ def load_reference(single_test_data):
 # often want to run a full evaluation on an entire dataset.
 # This can be done using the :class:`~mobgap.gsd.evaluation.GsdEvaluationPipeline` class and some ``tpcp`` functions.
 #
-# But lets start with selecting some data.
+# But let's start with selecting some data.
 # We want to use all the simulated real-world walking data from the INDIP reference system (Test11).
 simulated_real_world_walking = LabExampleDataset(reference_system="INDIP").get_subset(test="Test11")
 
@@ -180,12 +180,12 @@ def load_reference(single_test_data):
 
 pipeline = GsdEvaluationPipeline(GsdIluz())
 
-pipeline.run(simulated_real_world_walking[0]).gs_list_
+pipeline.safe_run(simulated_real_world_walking[0]).gs_list_
 
 # %%
 # Note, that this did just "run" the pipeline on a single datapoint.
 # If we want to run it on all datapoints and evaluate the performance of the algorithm, we can use the
-# ``tpcp.validate.validate`` function.
+# :func:`~tpcp.validate.validate` function.
 #
 # It uses the build in ``score`` method of the pipeline to calculate the performance of the algorithm on each datapoint
 # and then takes the mean of the results.
@@ -205,7 +205,7 @@ def load_reference(single_test_data):
 evaluation_results["single_detected"][0][0]
 
 # %%
-# If you want to calculate additional metrics, you can either create a custom score function or sublcass the pipeline
+# If you want to calculate additional metrics, you can either create a custom score function or subclass the pipeline
 # and overwrite the score function.
 #
 # Parameter Optimization

diff --git a/examples/lrc/README.rst b/examples/lrc/README.rst
@@ -0,0 +1,4 @@
+.. _examples-lrc:
+
+Left/Right Classification
+-------------------------
diff --git a/examples/lrd/_01_mccamley.py → examples/lrc/_01_lrc_mccamley.py b/examples/lrd/_01_mccamley.py → examples/lrc/_01_lrc_mccamley.py
@@ -1,17 +1,15 @@
 """
-McCamley L/R detector
-=====================
+McCamley L/R Classifier
+=======================
 
-The McCamley L/R detector is a simple algorithm to detect the laterality of initial contacts based on the sign
+The McCamley L/R classifier is a simple algorithm to detect the laterality of initial contacts based on the sign
 of the angular velocity signal.
 We use a modified version of the original McCamley algorithm, which includes a smoothing filter to reduce the
 influence of noise on the detection.
 
 This example shows how to use the algorithm and compares the output to the reference labels on some example data.
 """
 
-import pandas as pd
-
 from mobgap.data import LabExampleDataset
 
 # %%
@@ -22,8 +20,7 @@
 # We load example data from the lab dataset together with the INDIP reference system.
 # We will use the INDIP "InitialContact_Event" output as ground truth.
 #
-# We only use the data from the "simulated daily living" activity test from a single particomand.
-
+# We only use the data from the "simulated daily living" activity test from a single participant.
 example_data = LabExampleDataset(reference_system="INDIP", reference_para_level="wb")
 single_test = example_data.get_subset(cohort="MS", participant_id="001", test="Test11", trial="Trial1")
 
@@ -46,57 +43,21 @@
 # We will use the `GsIterator` to iterate over the gait sequences and apply the algorithm to each wb.
 # Note, that we use the ``ic_list`` result key, as the output of all L/R detectors is identical to the output of the
 # IC-detectors, but with an additional ``lr_label`` column.
-from mobgap.lrd import LrdMcCamley
+from mobgap.lrc import LrcMcCamley
 from mobgap.pipeline import GsIterator
 
 iterator = GsIterator()
+algo = LrcMcCamley()
 
 for (gs, data), result in iterator.iterate(imu_data, reference_wbs):
-    result.ic_list = (
-        LrdMcCamley().detect(data, ic_list=ref_ics_rel_to_gs.loc[gs.id], sampling_rate_hz=sampling_rate_hz).ic_lr_list_
-    )
+    result.ic_list = algo.predict(
+        data, ic_list=ref_ics_rel_to_gs.loc[gs.id].drop("lr_label", axis=1), sampling_rate_hz=sampling_rate_hz
+    ).ic_lr_list_
 
 detected_ics = iterator.results_.ic_list
-detected_ics
-
-# %%
-# Compare the results to the reference
-# ------------------------------------
-# We compare the detected initial contacts to the reference labels.
-# One easy way to compare the results is to visualize them as colorful bars.
-
-import matplotlib.pyplot as plt
-
-
-def plot_lr(ref, detected):
-    fig, ax = plt.subplots(figsize=(15, 5))
-    # We plot one box either (red or blue depending on the laterality) for each detected IC ignoring the actual time
-    for (_, row), (_, ref_row) in zip(detected.iterrows(), ref.iterrows()):
-        ax.plot([row["ic"], row["ic"]], [0, 0.98], color="r" if row["lr_label"] == "left" else "b", linewidth=5)
-        ax.plot(
-            [ref_row["ic"], ref_row["ic"]], [1.02, 2], color="r" if ref_row["lr_label"] == "left" else "b", linewidth=5
-        )
-
-    ax.set_yticks([0.5, 1.5])
-    ax.set_yticklabels(["Detected", "Reference"])
-    return fig, ax
-
-
-fig, _ = plot_lr(ref_ics, detected_ics)
-fig.show()
+detected_ics.assign(ref_lr_label=ref_ics.lr_label)
 
 # %%
-# If we zoom in on a longer WB, we can see that for some ICs the L/R label does not match.
-# But, in particular for regular gait in the center of the WB, the labels match quite well.
-
-fig, ax = plot_lr(ref_ics, detected_ics)
-ax.set_xlim(12000, 15000)
-fig.show()
-
-# %%
-# We can also quantify the agreement between the detected and the reference labels using typical classification metrics.
-from sklearn.metrics import classification_report
-
-pd.DataFrame(
-    classification_report(ref_ics.lr_label, detected_ics.lr_label, target_names=["left", "right"], output_dict=True)
-).T
+# We can see that for most ICs we correctly identify the laterality.
+# If you want to learn more about evaluating the algorithm output, you can check the
+# :ref:`evaluation example <lrc_evaluation>`.
diff --git a/examples/lrc/_02_lrc_ullrich.py b/examples/lrc/_02_lrc_ullrich.py
@@ -0,0 +1,123 @@
+"""
+Ullrich L/R Classifier
+======================
+
+The Ullrich L/R classifier is a general approach of differentiating left from right foot contacts using signal features
+extracted from the gyroscopic data of a single IMU sensor placed on the lower back.
+The feature vectors at the timepoints of the pre-detected initial contacts are then used in a typical binary
+classification pipeline to predict the left/right label of each initial contact.
+
+This example shows how to use the algorithm and how to train your own classification model on custom data.
+
+"""
+
+from mobgap.data import LabExampleDataset
+
+# %%
+# Loading some example data
+# -------------------------
+# .. note :: More infos about data loading can be found in the :ref:`data loading example <data_loading_example>`.
+#
+# We load example data from the lab dataset together with the INDIP reference system.
+# We will use the INDIP "InitialContact_Event" output as ground truth.
+#
+# We only use the data from the "simulated daily living" activity test from a single particomand.
+
+example_data = LabExampleDataset(reference_system="INDIP", reference_para_level="wb")
+single_test = example_data.get_subset(cohort="MS", participant_id="001", test="Test11", trial="Trial1")
+
+imu_data = single_test.data_ss
+reference_wbs = single_test.reference_parameters_.wb_list
+
+sampling_rate_hz = single_test.sampling_rate_hz
+ref_ics = single_test.reference_parameters_.ic_list
+ref_ics_rel_to_gs = single_test.reference_parameters_relative_to_wb_.ic_list
+
+# %%
+# Applying the algorithm using reference ICs
+# ------------------------------------------
+# We use algorithm to detect the laterality of the initial contacts.
+# For this we need the IMU data and the indices of the initial contacts per GS.
+# To focus this example on the L/R detection, we use the reference ICs from the INDIP system as input.
+# In a real application, we would use the output of the IC-detectors as input.
+#
+# First, we need to set up an instance of our algorithm.
+# For ``LrcUllrich`` we provide a pre-trained model, which we can use to predict the L/R labels.
+# They are all trained on the MS-Project (University of Sheffield) dataset, just on different sub cohorts and can
+# be accessed using ``LrcUllrich.PredefinedParameters`.
+# We will use the model trained on all participants of the MS-Project dataset.
+from mobgap.lrc import LrcUllrich
+
+algo = LrcUllrich(**LrcUllrich.PredefinedParameters.msproject_all)
+
+# %%
+# As we want to apply the algorithm to each gait sequence/WB individually, use the `GsIterator` to iterate over the
+# reference wbs and apply the algorithm to each wb.
+# Note, that we use the ``ic_list`` result key, as the output of all L/R detectors is identical to the output of the
+# IC-detectors, but with an additional ``lr_label`` column.
+from mobgap.pipeline import GsIterator
+
+iterator = GsIterator()
+
+for (gs, data), result in iterator.iterate(imu_data, reference_wbs):
+    result.ic_list = algo.predict(
+        data, ic_list=ref_ics_rel_to_gs.loc[gs.id].drop("lr_label", axis=1), sampling_rate_hz=sampling_rate_hz
+    ).ic_lr_list_
+
+detected_ics = iterator.results_.ic_list
+detected_ics.assign(ref_lr_label=ref_ics.lr_label)
+
+# %%
+# The output that we get provides us with an `lr_label` for each initial contact.
+#
+# Training a custom model
+# -----------------------
+# As ``LrcUllrich`` is machine-learning based, we provide the option to train a custom model on your own data.
+# We even allow you to complete customize the classifier pipeline to test out different approaches.
+#
+# Here we will show how to train a custom model on the example data using the "low level" training interface.
+# However, for most usecases (e.g. training with Hyperparatuning/evaulation with cross_validation/...) you will
+# want to use the higher level Pipeline interface.
+# A full example on how to use this is shown in the :ref:`evaluation example <lrc_evaluation>`.
+#
+# The low level interface involves directly calling the ``self_optimize`` method of ``LrcUllrich``.
+# It takes a series of data sequences, their corresponding ICs and ground truth labels.
+#
+# But first, we need to define an untrained sklearn ML pipeline to provide to the algorithm for tuning.
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.svm import SVC
+
+clf_pipe = Pipeline([("scaler", MinMaxScaler()), ("clf", SVC())])
+algo = LrcUllrich(clf_pipe=clf_pipe)
+
+# %%
+# Then we need to prepare the data.
+# We will extract the IMU data the ICs and the ground truth labels from the example data used above.
+per_gs_data = []
+per_gs_ic = []
+per_gs_ic_lr = []
+
+for (gs, data), _ in iterator.iterate(imu_data, reference_wbs):
+    per_gs_data.append(data)
+    ref_ics = ref_ics_rel_to_gs.loc[gs.id]
+    per_gs_ic.append(ref_ics.drop("lr_label", axis=1))
+    per_gs_ic_lr.append(ref_ics)
+
+# %%
+# We will use all sequences but the last as trainings data.
+algo = algo.self_optimize(per_gs_data[:-1], per_gs_ic[:-1], per_gs_ic_lr[:-1], sampling_rate_hz=sampling_rate_hz)
+
+# %%
+# We can now use our trained model and make predictions on the sequence we did not train on.
+# We will use the last sequence for this.
+predictions = algo.predict(per_gs_data[-1], ic_list=per_gs_ic[-1], sampling_rate_hz=sampling_rate_hz).ic_lr_list_
+predictions.assign(ref_lr_label=per_gs_ic_lr[-1]["lr_label"])
+
+# %%
+# Note, that we don't expect particularly good performance, as we trained on very little data.
+# But, because the data all from the same participant and recorded in a controlled lab environment, we can see that
+# most predictions are correct.
+#
+# If you want to learn about evaluating and optimizing the algorithm, please refer to the
+# :ref:`evaluation example <lrc_evaluation>`.
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,7 +13,7 @@ This is the API Reference for ``mobgap``. @@
         pipeline
         gsd
         icd
-        lrd
+        lrc
         cad
         wba
         aggregation
@@ Expand Down @@