Skip to content

Commit

Permalink
Docstring and tests for mplot_dataframe_utils.py (#88, #89)
Browse files Browse the repository at this point in the history
  • Loading branch information
LSYS committed Dec 15, 2023
1 parent 2b5ce9c commit 1adcb7c
Show file tree
Hide file tree
Showing 2 changed files with 285 additions and 7 deletions.
84 changes: 77 additions & 7 deletions forestplot/mplot_dataframe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,35 @@

import pandas as pd

from forestplot.dataframe_utils import insert_empty_row
from forestplot.text_utils import _get_max_varlen


def insert_group_model(
dataframe: pd.core.frame.DataFrame, groupvar: str, varlabel: str, model_col: str
) -> pd.core.frame.DataFrame:
"""Insert rows for group labels taking into account model groupings.
"""
Inserts rows for group labels into a pandas DataFrame based on specified model groupings.
This function iterates over unique values in the 'model_col' and 'groupvar' columns of the input DataFrame.
For each unique combination of model and group, it inserts a new row with the group label.
Parameters
----------
dataframe : pd.core.frame.DataFrame
The DataFrame into which the group labels will be inserted.
groupvar : str
The name of the column in 'dataframe' that contains the grouping variable.
varlabel : str
The label to assign to the inserted group label rows.
model_col : str
The name of the column in 'dataframe' that contains the model variable.
Returns
-------
pd.core.frame.DataFrame
Dataframe with additional columns for plotting.
A new DataFrame with additional rows inserted that contain the group labels for each group and model
combination.
"""
models = dataframe[model_col].unique()
groups = dataframe[groupvar].unique()
Expand All @@ -29,13 +48,32 @@ def insert_group_model(
return df_groupmodel_asvar


from forestplot.dataframe_utils import insert_empty_row
from forestplot.text_utils import _get_max_varlen


def _insert_headers_models(
dataframe: pd.core.frame.DataFrame, model_col: str, models: Union[Sequence[str], None]
) -> pd.core.frame.DataFrame:
"""
Inserts an empty row as a header for each unique model in a pandas DataFrame.
This function iterates over a specified list of models or, if not provided, over the unique values in the 'model_col' column of the input DataFrame. For each model, it filters the DataFrame to include only the rows corresponding to that model, inserts an empty row at the beginning, and then concatenates these modified DataFrames.
Parameters
----------
dataframe : pd.core.frame.DataFrame
The DataFrame into which the headers (empty rows) will be inserted.
model_col : str
The name of the column in 'dataframe' that contains the model identifiers.
models : Union[Sequence[str], None], optional
A sequence of model identifiers for which headers are to be inserted. If None, headers are inserted for all unique values in the 'model_col' column.
Returns
-------
pd.core.frame.DataFrame
A new DataFrame with empty rows inserted as headers for each specified model.
Notes
-----
The function relies on an external function `insert_empty_row` to insert the empty rows. Ensure this function is defined and properly handles the insertion of empty rows into a DataFrame.
"""
if models is None:
models = dataframe[model_col].unique()

Expand All @@ -61,10 +99,42 @@ def make_multimodel_tableheaders(
) -> pd.core.frame.DataFrame:
"""Make additional column for table headers taking in account models and groups.
This function is designed to prepare a pandas DataFrame for tabular display or plotting, especially
when the data is categorized by different models. It adds additional columns for table headers,
considering various models, groups, and annotations.
Parameters
----------
dataframe : pd.core.frame.DataFrame
The DataFrame to be processed and enhanced.
varlabel : str
The label of a key column in the DataFrame to be used in header formatting.
model_col : str
The column name in the DataFrame that contains model identifiers.
models : Optional[Sequence[str]], optional
A sequence of model identifiers. If None, the function uses unique values from 'model_col'.
annote : Optional[Sequence[str]], optional
A sequence of columns in the DataFrame to be used for left-side annotations.
annoteheaders : Optional[Sequence[str]], optional
Headers corresponding to 'annote', for left-side annotations.
rightannote : Optional[Sequence[str]], optional
A sequence of columns in the DataFrame to be used for right-side annotations.
right_annoteheaders : Optional[Sequence[str]], optional
Headers corresponding to 'rightannote', for right-side annotations.
flush : bool, default True
Determines if headers should be left-aligned (flushed). If False, headers are aligned as per their natural alignment.
**kwargs : Any
Additional keyword arguments. Includes 'variable_header' for the main variable header and
'col_spacing' for spacing between columns.
Returns
-------
pd.core.frame.DataFrame
Dataframe with additional columns for plotting.
A modified DataFrame with additional columns and headers, formatted for plotting or tabular display.
Notes
-----
This function relies on external functions '_insert_headers_models' and '_get_max_varlen'.
"""
# No table headers
variable_header = kwargs.get("variable_header", "")
Expand Down
208 changes: 208 additions & 0 deletions tests/test_mplot_dataframe_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import numpy as np
import pandas as pd

from forestplot.mplot_dataframe_utils import (
_insert_headers_models,
insert_group_model,
make_multimodel_tableheaders,
)


def test_insert_group_model():
# Setup test data
data = {
"model_col": ["Model1", "Model1", "Model2", "Model2"],
"groupvar": ["GroupA", "GroupB", "GroupA", "GroupB"],
"value": [10, 20, 30, 40],
}
df = pd.DataFrame(data)

# Expected output
expected_data = {
"varlabel": ["GroupA", None, "GroupB", None, "GroupA", None, "GroupB", None],
"groupvar": [
"GroupA",
"GroupA",
"GroupB",
"GroupB",
"GroupA",
"GroupA",
"GroupB",
"GroupB",
],
"model_col": [
"Model1",
"Model1",
"Model1",
"Model1",
"Model2",
"Model2",
"Model2",
"Model2",
],
"value": [None, 10, None, 20, None, 30, None, 40],
}
expected_df = pd.DataFrame(expected_data)

# Apply the function
result_df = insert_group_model(df, "groupvar", "varlabel", "model_col")

# Assert
pd.testing.assert_frame_equal(result_df, expected_df)


def test_insert_headers_models():
# Setup
df = pd.DataFrame(
{
"model_col": ["model1", "model1", "model2", "model2"],
"data1": [100, 200, 300, 400],
}
)

# Expected output
expected_output = pd.DataFrame(
{
"model_col": [None, "model1", "model1", None, "model2", "model2"],
"data1": [None, 100, 200, None, 300, 400],
}
)

# Exercise
result = _insert_headers_models(df, "model_col", None)

# Verify
pd.testing.assert_frame_equal(
result.reset_index(drop=True), expected_output.reset_index(drop=True)
)


def test_make_multimodel_tableheaders():
# Setup
df_input = pd.DataFrame(
{
"var": ["var0", "var1", "var2", "var3", "var0", "var1", "var2", "var3"],
"group": [
"Group 0",
"Group 0",
"Group 1",
"Group 1",
"Group 0",
"Group 0",
"Group 1",
"Group 1",
],
"model": [
"Model 0",
"Model 0",
"Model 0",
"Model 0",
"Model 1",
"Model 1",
"Model 1",
"Model 1",
],
"coef": [
"Coef 0",
"Coef 1",
"Coef 2",
"Coef 3",
"Coef 4",
"Coef 5",
"Coef 6",
"Coef 7",
],
}
)

# Expected output
df_expected = pd.DataFrame(
{
"var": [
np.nan,
"var0",
"var1",
"var2",
"var3",
np.nan,
"var0",
"var1",
"var2",
"var3",
],
"group": [
np.nan,
"Group 0",
"Group 0",
"Group 1",
"Group 1",
np.nan,
"Group 0",
"Group 0",
"Group 1",
"Group 1",
],
"model": [
"Model 0",
"Model 0",
"Model 0",
"Model 0",
"Model 0",
"Model 1",
"Model 1",
"Model 1",
"Model 1",
"Model 1",
],
"coef": [
np.nan,
"Coef 0",
"Coef 1",
"Coef 2",
"Coef 3",
np.nan,
"Coef 4",
"Coef 5",
"Coef 6",
"Coef 7",
],
"yticklabel": [
"Variable header",
np.nan,
np.nan,
np.nan,
np.nan,
"Variable header",
np.nan,
np.nan,
np.nan,
np.nan,
],
"yticklabel2": [
"",
np.nan,
np.nan,
np.nan,
np.nan,
"",
np.nan,
np.nan,
np.nan,
np.nan,
],
}
)

# Exercise
df_result = make_multimodel_tableheaders(
df_input,
varlabel="var",
model_col="model",
models=None,
annote=["var"],
annoteheaders=["header"],
rightannote=None,
right_annoteheaders=None,
)
# Verify
pd.testing.assert_frame_equal(df_result, df_expected)

0 comments on commit 1adcb7c

Please sign in to comment.