Docstring and tests for mplot_dataframe_utils.py (#88, #89)

LSYS · Dec 15, 2023 · 1adcb7c · 1adcb7c
1 parent 2b5ce9c
commit 1adcb7c
Show file tree

Hide file tree

Showing 2 changed files with 285 additions and 7 deletions.
diff --git a/forestplot/mplot_dataframe_utils.py b/forestplot/mplot_dataframe_utils.py
@@ -2,16 +2,35 @@
 
 import pandas as pd
 
+from forestplot.dataframe_utils import insert_empty_row
+from forestplot.text_utils import _get_max_varlen
+
 
 def insert_group_model(
     dataframe: pd.core.frame.DataFrame, groupvar: str, varlabel: str, model_col: str
 ) -> pd.core.frame.DataFrame:
-    """Insert rows for group labels taking into account model groupings.
+    """
+    Inserts rows for group labels into a pandas DataFrame based on specified model groupings.
+
+    This function iterates over unique values in the 'model_col' and 'groupvar' columns of the input DataFrame.
+    For each unique combination of model and group, it inserts a new row with the group label.
+
+    Parameters
+    ----------
+    dataframe : pd.core.frame.DataFrame
+        The DataFrame into which the group labels will be inserted.
+    groupvar : str
+        The name of the column in 'dataframe' that contains the grouping variable.
+    varlabel : str
+        The label to assign to the inserted group label rows.
+    model_col : str
+        The name of the column in 'dataframe' that contains the model variable.
 
     Returns
     -------
     pd.core.frame.DataFrame
-        Dataframe with additional columns for plotting.
+        A new DataFrame with additional rows inserted that contain the group labels for each group and model
+        combination.
     """
     models = dataframe[model_col].unique()
     groups = dataframe[groupvar].unique()
@@ -29,13 +48,32 @@ def insert_group_model(
     return df_groupmodel_asvar
 
 
-from forestplot.dataframe_utils import insert_empty_row
-from forestplot.text_utils import _get_max_varlen
-
-
 def _insert_headers_models(
     dataframe: pd.core.frame.DataFrame, model_col: str, models: Union[Sequence[str], None]
 ) -> pd.core.frame.DataFrame:
+    """
+    Inserts an empty row as a header for each unique model in a pandas DataFrame.
+
+    This function iterates over a specified list of models or, if not provided, over the unique values in the 'model_col' column of the input DataFrame. For each model, it filters the DataFrame to include only the rows corresponding to that model, inserts an empty row at the beginning, and then concatenates these modified DataFrames.
+
+    Parameters
+    ----------
+    dataframe : pd.core.frame.DataFrame
+        The DataFrame into which the headers (empty rows) will be inserted.
+    model_col : str
+        The name of the column in 'dataframe' that contains the model identifiers.
+    models : Union[Sequence[str], None], optional
+        A sequence of model identifiers for which headers are to be inserted. If None, headers are inserted for all unique values in the 'model_col' column.
+
+    Returns
+    -------
+    pd.core.frame.DataFrame
+        A new DataFrame with empty rows inserted as headers for each specified model.
+
+    Notes
+    -----
+    The function relies on an external function `insert_empty_row` to insert the empty rows. Ensure this function is defined and properly handles the insertion of empty rows into a DataFrame.
+    """
     if models is None:
         models = dataframe[model_col].unique()
 
@@ -61,10 +99,42 @@ def make_multimodel_tableheaders(
 ) -> pd.core.frame.DataFrame:
     """Make additional column for table headers taking in account models and groups.
 
+    This function is designed to prepare a pandas DataFrame for tabular display or plotting, especially
+    when the data is categorized by different models. It adds additional columns for table headers,
+    considering various models, groups, and annotations.
+
+    Parameters
+    ----------
+    dataframe : pd.core.frame.DataFrame
+        The DataFrame to be processed and enhanced.
+    varlabel : str
+        The label of a key column in the DataFrame to be used in header formatting.
+    model_col : str
+        The column name in the DataFrame that contains model identifiers.
+    models : Optional[Sequence[str]], optional
+        A sequence of model identifiers. If None, the function uses unique values from 'model_col'.
+    annote : Optional[Sequence[str]], optional
+        A sequence of columns in the DataFrame to be used for left-side annotations.
+    annoteheaders : Optional[Sequence[str]], optional
+        Headers corresponding to 'annote', for left-side annotations.
+    rightannote : Optional[Sequence[str]], optional
+        A sequence of columns in the DataFrame to be used for right-side annotations.
+    right_annoteheaders : Optional[Sequence[str]], optional
+        Headers corresponding to 'rightannote', for right-side annotations.
+    flush : bool, default True
+        Determines if headers should be left-aligned (flushed). If False, headers are aligned as per their natural alignment.
+    **kwargs : Any
+        Additional keyword arguments. Includes 'variable_header' for the main variable header and
+        'col_spacing' for spacing between columns.
+
     Returns
     -------
     pd.core.frame.DataFrame
-        Dataframe with additional columns for plotting.
+        A modified DataFrame with additional columns and headers, formatted for plotting or tabular display.
+
+    Notes
+    -----
+    This function relies on external functions '_insert_headers_models' and '_get_max_varlen'.
     """
     # No table headers
     variable_header = kwargs.get("variable_header", "")

diff --git a/tests/test_mplot_dataframe_utils.py b/tests/test_mplot_dataframe_utils.py
@@ -0,0 +1,208 @@
+import numpy as np
+import pandas as pd
+
+from forestplot.mplot_dataframe_utils import (
+    _insert_headers_models,
+    insert_group_model,
+    make_multimodel_tableheaders,
+)
+
+
+def test_insert_group_model():
+    # Setup test data
+    data = {
+        "model_col": ["Model1", "Model1", "Model2", "Model2"],
+        "groupvar": ["GroupA", "GroupB", "GroupA", "GroupB"],
+        "value": [10, 20, 30, 40],
+    }
+    df = pd.DataFrame(data)
+
+    # Expected output
+    expected_data = {
+        "varlabel": ["GroupA", None, "GroupB", None, "GroupA", None, "GroupB", None],
+        "groupvar": [
+            "GroupA",
+            "GroupA",
+            "GroupB",
+            "GroupB",
+            "GroupA",
+            "GroupA",
+            "GroupB",
+            "GroupB",
+        ],
+        "model_col": [
+            "Model1",
+            "Model1",
+            "Model1",
+            "Model1",
+            "Model2",
+            "Model2",
+            "Model2",
+            "Model2",
+        ],
+        "value": [None, 10, None, 20, None, 30, None, 40],
+    }
+    expected_df = pd.DataFrame(expected_data)
+
+    # Apply the function
+    result_df = insert_group_model(df, "groupvar", "varlabel", "model_col")
+
+    # Assert
+    pd.testing.assert_frame_equal(result_df, expected_df)
+
+
+def test_insert_headers_models():
+    # Setup
+    df = pd.DataFrame(
+        {
+            "model_col": ["model1", "model1", "model2", "model2"],
+            "data1": [100, 200, 300, 400],
+        }
+    )
+
+    # Expected output
+    expected_output = pd.DataFrame(
+        {
+            "model_col": [None, "model1", "model1", None, "model2", "model2"],
+            "data1": [None, 100, 200, None, 300, 400],
+        }
+    )
+
+    # Exercise
+    result = _insert_headers_models(df, "model_col", None)
+
+    # Verify
+    pd.testing.assert_frame_equal(
+        result.reset_index(drop=True), expected_output.reset_index(drop=True)
+    )
+
+
+def test_make_multimodel_tableheaders():
+    # Setup
+    df_input = pd.DataFrame(
+        {
+            "var": ["var0", "var1", "var2", "var3", "var0", "var1", "var2", "var3"],
+            "group": [
+                "Group 0",
+                "Group 0",
+                "Group 1",
+                "Group 1",
+                "Group 0",
+                "Group 0",
+                "Group 1",
+                "Group 1",
+            ],
+            "model": [
+                "Model 0",
+                "Model 0",
+                "Model 0",
+                "Model 0",
+                "Model 1",
+                "Model 1",
+                "Model 1",
+                "Model 1",
+            ],
+            "coef": [
+                "Coef 0",
+                "Coef 1",
+                "Coef 2",
+                "Coef 3",
+                "Coef 4",
+                "Coef 5",
+                "Coef 6",
+                "Coef 7",
+            ],
+        }
+    )
+
+    # Expected output
+    df_expected = pd.DataFrame(
+        {
+            "var": [
+                np.nan,
+                "var0",
+                "var1",
+                "var2",
+                "var3",
+                np.nan,
+                "var0",
+                "var1",
+                "var2",
+                "var3",
+            ],
+            "group": [
+                np.nan,
+                "Group 0",
+                "Group 0",
+                "Group 1",
+                "Group 1",
+                np.nan,
+                "Group 0",
+                "Group 0",
+                "Group 1",
+                "Group 1",
+            ],
+            "model": [
+                "Model 0",
+                "Model 0",
+                "Model 0",
+                "Model 0",
+                "Model 0",
+                "Model 1",
+                "Model 1",
+                "Model 1",
+                "Model 1",
+                "Model 1",
+            ],
+            "coef": [
+                np.nan,
+                "Coef 0",
+                "Coef 1",
+                "Coef 2",
+                "Coef 3",
+                np.nan,
+                "Coef 4",
+                "Coef 5",
+                "Coef 6",
+                "Coef 7",
+            ],
+            "yticklabel": [
+                "Variable  header",
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                "Variable  header",
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+            "yticklabel2": [
+                "",
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                "",
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+        }
+    )
+
+    # Exercise
+    df_result = make_multimodel_tableheaders(
+        df_input,
+        varlabel="var",
+        model_col="model",
+        models=None,
+        annote=["var"],
+        annoteheaders=["header"],
+        rightannote=None,
+        right_annoteheaders=None,
+    )
+    # Verify
+    pd.testing.assert_frame_equal(df_result, df_expected)