Skip to content

Commit

Permalink
ADD: CLI runnable functions
Browse files Browse the repository at this point in the history
  • Loading branch information
adamovanja committed Mar 3, 2024
1 parent e28e9e8 commit 90f0d4d
Show file tree
Hide file tree
Showing 10 changed files with 483 additions and 746 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ dmypy.json
# MLflow model logs
**/mlruns/**

# Best model checkpoints
**/best_models/**
# Model checkpoints
**/models/**

# Sequence and metadata
**/experiments/data/**
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,17 @@ make dev
```

## Model training
To train models with a defined configuration in `q2_ritme/config.json` run:
````
python q2_ritme/run_n_eval_tune.py --config q2_ritme/run_config.json
````

Once you have trained some models, you can check the progress of the trained models by launching `mlflow ui --backend-store-uri experiments/mlruns`.

To evaluate the best trial (trial < experiment) of all launched experiments, run:
````
python q2_ritme/eval_best_trial_overall.py --model_path "experiments/models"
````

## Background
### Why ray tune?
Expand Down
922 changes: 221 additions & 701 deletions experiments/try_tune.ipynb

Large diffs are not rendered by default.

9 changes: 0 additions & 9 deletions q2_ritme/config.py

This file was deleted.

67 changes: 67 additions & 0 deletions q2_ritme/eval_best_trial_overall.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import argparse
import os

from q2_ritme.evaluate_all_experiments import (
best_trial_name,
compare_trials,
get_all_exp_analyses,
)


def parse_args():
parser = argparse.ArgumentParser(
description="Post-run evaluation over all experiments."
)
parser.add_argument(
"--model_path",
type=str,
default="experiments/models",
help="Path where the models are stored.",
)
parser.add_argument(
"--overall_comparison_output",
type=str,
default=None,
help="Output path for the overall comparison. If not provided, it defaults to "
"a 'compare_all' directory inside the base path.",
)
parser.add_argument(
"--ls_model_types",
type=str,
nargs="+",
default=["nn", "xgb", "linreg", "rf"],
help="List of model types to evaluate. Separate each model type with a space.",
)
return parser.parse_args()


def main():
args = parse_args()

# Use the provided arguments
model_path = args.model_path
overall_comparison_output = args.overall_comparison_output or os.path.join(
model_path, "compare_all"
)
ls_model_types = args.ls_model_types

# Ensure the overall comparison output directory exists
os.makedirs(overall_comparison_output, exist_ok=True)

# Find best trial over all experiments for each model type
best_trials_overall = {}
for model in ls_model_types:
# read all ExperimentAnalysis objects from this directory
experiment_dir = f"{model_path}/*/{model}"
analyses_ls = get_all_exp_analyses(experiment_dir)

# identify best trial from all analyses of this model type
best_trials_overall[model] = best_trial_name(
analyses_ls, "rmse_val", mode="min"
)

compare_trials(best_trials_overall, model_path, overall_comparison_output)


if __name__ == "__main__":
main()
10 changes: 5 additions & 5 deletions q2_ritme/evaluate_all_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def get_all_exp_analyses(experiment_dir):
return analyses_ls


def read_predictions_for_trial(trial_tag):
def read_predictions_for_trial(trial_tag, path_to_models):
# read predictions for this trial
base_path = Path("best_models")
base_path = Path(path_to_models)
target_path = [p for p in base_path.rglob(f"{trial_tag}*") if p.is_dir()]

if len(target_path) > 1:
Expand All @@ -72,12 +72,12 @@ def verify_indices(models_dict, pred_value):
return all(index.equals(indices[0]) for index in indices)


def compare_trials(dic_trials_to_check, path_to_save):
def compare_trials(dic_trials_to_check, path_to_models, path_to_save):
# get predictions for each best trial
pred_dic = {}
config_dic = {}
for v in dic_trials_to_check.values():
pred_dic[v] = read_predictions_for_trial(v)
pred_dic[v] = read_predictions_for_trial(v, path_to_models)
config_dic[v] = v.config

# Verify that IDs are identical for both splits
Expand All @@ -91,4 +91,4 @@ def compare_trials(dic_trials_to_check, path_to_save):

# display config differences
config_df = pd.DataFrame(config_dic)
config_df.to_csv(os.path.join(path_to_save, "config.csv"), index=True)
config_df.to_csv(os.path.join(path_to_save, "best_trial_config.csv"), index=True)
60 changes: 36 additions & 24 deletions q2_ritme/evaluate_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from q2_ritme.feature_space.transform_features import transform_features
from q2_ritme.model_space._static_trainables import NeuralNet

# 30.437 is avg. number of days per month
DAYS_PER_MONTH = 30.437


def _get_checkpoint_path(result: Result) -> str:
"""
Expand Down Expand Up @@ -161,37 +164,41 @@ def plot_rmse_over_experiments(preds_dic, save_loc, dpi=400):
plt.show()


def plot_rmse_over_time(preds_dic, ls_model_types, DAYS_PER_MONTH, save_loc, dpi=300):
def plot_rmse_over_time(
preds_dic, ls_model_types, save_loc, days_per_month=DAYS_PER_MONTH, dpi=300
):
"""
Plot RMSE over true time bins for the first model type in ls_model_types.
Parameters:
preds_dic (dict): Dictionary containing predictions for each model type.
ls_model_types (list): List of model types.
DAYS_PER_MONTH (float): Average number of days per month to use for binning.
days_per_month (float): Average number of days per month to use for binning.
dpi (int): Resolution of the plot.
"""
model_type = ls_model_types[0]
pred_df = preds_dic[model_type]
split = None

# Bin true columns by months
pred_df["group"] = np.round(pred_df["true"] / DAYS_PER_MONTH, 0).astype(int)

# Calculate RMSE for each group
grouped_ser = pred_df.groupby(["group"]).apply(calculate_rmse)
grouped_df = grouped_ser.apply(pd.Series)
if split is not None:
grouped_df = grouped_df[[split]].copy()

# Plot
plt.figure(dpi=dpi)
grouped_df.plot(
kind="bar", title=f"Model: {model_type}", ylabel="RMSE", figsize=(10, 5)
)
path_to_save = os.path.join(save_loc, "rmse_over_time_train_test.png")
plt.savefig(path_to_save, dpi=dpi)
plt.show()
for model_type in ls_model_types:
pred_df = preds_dic[model_type]
split = None

# Bin true columns by months
pred_df["group"] = np.round(pred_df["true"] / days_per_month, 0).astype(int)

# Calculate RMSE for each group
grouped_ser = pred_df.groupby(["group"]).apply(calculate_rmse)
grouped_df = grouped_ser.apply(pd.Series)
if split is not None:
grouped_df = grouped_df[[split]].copy()

# Plot
plt.figure(dpi=dpi)
grouped_df.plot(
kind="bar", title=f"Model: {model_type}", ylabel="RMSE", figsize=(10, 5)
)
path_to_save = os.path.join(
save_loc, f"rmse_over_time_train_test_{model_type}.png"
)
plt.savefig(path_to_save, dpi=dpi)
plt.show()


def get_best_model_metrics_and_config(
Expand Down Expand Up @@ -261,7 +268,7 @@ def plot_best_models_comparison(
plt.show()


def plot_model_training_over_iterations(model_type, result_dic, labels):
def plot_model_training_over_iterations(model_type, result_dic, labels, save_loc):
ax = None
for result in result_dic[model_type]:
label_str = ""
Expand All @@ -278,3 +285,8 @@ def plot_model_training_over_iterations(model_type, result_dic, labels):
ax.legend(bbox_to_anchor=(1.1, 1.05))
ax.set_title(f"RMSE_val vs. training iteration for all trials of {model_type}")
ax.set_ylabel("RMSE_val")
plt.tight_layout()
path_to_save = os.path.join(
save_loc, f"rmse_best_{model_type}_over_training_iteration.png"
)
plt.savefig(path_to_save, dpi=400)
21 changes: 21 additions & 0 deletions q2_ritme/run_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"experiment_tag": "test_synthetic",
"host_id": "host_id",
"ls_model_types": [
"nn",
"xgb",
"linreg",
"rf"
],
"mlflow_tracking_uri": "mlruns",
"models_to_evaluate_separately": [
"xgb",
"nn"
],
"path_to_ft": null,
"path_to_md": null,
"seed_data": 12,
"seed_model": 12,
"target": "age_days",
"train_size": 0.8
}
118 changes: 118 additions & 0 deletions q2_ritme/run_n_eval_tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import argparse
import json
import os
import shutil

import pandas as pd

from q2_ritme.evaluate_models import (
aggregate_best_models_metrics_and_configs,
get_predictions,
plot_best_models_comparison,
plot_model_training_over_iterations,
plot_rmse_over_experiments,
plot_rmse_over_time,
retrieve_best_models,
)
from q2_ritme.process_data import load_n_split_data
from q2_ritme.tune_models import run_all_trials


def parse_args():
parser = argparse.ArgumentParser(description="Run configuration.")
parser.add_argument(
"--config",
type=str,
required=True,
help="Path to the run configuration JSON file.",
)
return parser.parse_args()


def main(config_path):
with open(config_path, "r") as f:
config = json.load(f)

# ! Define needed paths
base_path = os.path.join("experiments", "models")

exp_comparison_output = os.path.join(base_path, config["experiment_tag"])
if os.path.exists(exp_comparison_output):
raise ValueError(
f"This experiment tag already exists: {config['experiment_tag']}."
"Please use another one."
)

# todo: flag mlflow runs also with experiment tag somehow
path_mlflow = os.path.join("experiments", config["mlflow_tracking_uri"])
path_exp = os.path.join(base_path, config["experiment_tag"])

# ! Load and split data
train_val, test = load_n_split_data(
config["path_to_md"],
config["path_to_ft"],
config["host_id"],
config["target"],
config["train_size"],
config["seed_data"],
)

# ! Run all experiments
result_dic = run_all_trials(
train_val,
config["target"],
config["host_id"],
config["seed_data"],
config["seed_model"],
path_mlflow,
path_exp,
model_types=config["ls_model_types"],
fully_reproducible=False,
)

# ! Save run config
config_output_path = os.path.join(exp_comparison_output, "run_config.json")
shutil.copy(config_path, config_output_path)

# ! Evaluate best models of this experiment
# Eval1: train_val vs. test -> performance
best_model_dic = retrieve_best_models(result_dic)
non_features = [config["target"], config["host_id"]]
features = [x for x in train_val if x not in non_features]

preds_dic = {}
for model_type, tmodel in best_model_dic.items():
train_pred = get_predictions(
train_val, tmodel, config["target"], features, "train"
)
test_pred = get_predictions(test, tmodel, config["target"], features, "test")
all_pred = pd.concat([train_pred, test_pred])

# Save all predictions to model file
path2save = os.path.join(tmodel.path, "predictions.csv")
all_pred.to_csv(path2save, index=True)
preds_dic[model_type] = all_pred

plot_rmse_over_experiments(preds_dic, exp_comparison_output)

plot_rmse_over_time(preds_dic, config["ls_model_types"], exp_comparison_output)

# Eval2: train vs. val -> performance and config
metrics_all, best_configs = aggregate_best_models_metrics_and_configs(result_dic)

plot_best_models_comparison(metrics_all, exp_comparison_output)

best_configs.to_csv(
os.path.join(exp_comparison_output, "best_trial_config.csv"), index=True
)

# ! Evaluate one model over training iterations
for m in config["models_to_evaluate_separately"]:
plot_model_training_over_iterations(
m, result_dic, labels=["data_transform"], save_loc=exp_comparison_output
)


if __name__ == "__main__":
args = parse_args()
main(args.config)
Loading

0 comments on commit 90f0d4d

Please sign in to comment.