ADD: CLI runnable functions

adamovanja · Mar 3, 2024 · 90f0d4d · 90f0d4d
1 parent e28e9e8
commit 90f0d4d
Show file tree

Hide file tree

Showing 10 changed files with 483 additions and 746 deletions.
diff --git a/.gitignore b/.gitignore
@@ -143,8 +143,8 @@ dmypy.json
 # MLflow model logs
 **/mlruns/**
 
-# Best model checkpoints
-**/best_models/**
+# Model checkpoints
+**/models/**
 
 # Sequence and metadata
 **/experiments/data/**

diff --git a/README.md b/README.md
@@ -13,8 +13,17 @@ make dev
 ```
 
 ## Model training
+To train models with a defined configuration in `q2_ritme/config.json` run:
+````
+python q2_ritme/run_n_eval_tune.py --config q2_ritme/run_config.json
+````
+
 Once you have trained some models, you can check the progress of the trained models by launching `mlflow ui --backend-store-uri experiments/mlruns`.
 
+To evaluate the best trial (trial < experiment) of all launched experiments, run:
+````
+python q2_ritme/eval_best_trial_overall.py --model_path "experiments/models"
+````
 
 ## Background
 ### Why ray tune?

diff --git a/experiments/try_tune.ipynb b/experiments/try_tune.ipynb
diff --git a/q2_ritme/config.py b/q2_ritme/config.py
diff --git a/q2_ritme/eval_best_trial_overall.py b/q2_ritme/eval_best_trial_overall.py
@@ -0,0 +1,67 @@
+import argparse
+import os
+
+from q2_ritme.evaluate_all_experiments import (
+    best_trial_name,
+    compare_trials,
+    get_all_exp_analyses,
+)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Post-run evaluation over all experiments."
+    )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default="experiments/models",
+        help="Path where the models are stored.",
+    )
+    parser.add_argument(
+        "--overall_comparison_output",
+        type=str,
+        default=None,
+        help="Output path for the overall comparison. If not provided, it defaults to "
+        "a 'compare_all' directory inside the base path.",
+    )
+    parser.add_argument(
+        "--ls_model_types",
+        type=str,
+        nargs="+",
+        default=["nn", "xgb", "linreg", "rf"],
+        help="List of model types to evaluate. Separate each model type with a space.",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    # Use the provided arguments
+    model_path = args.model_path
+    overall_comparison_output = args.overall_comparison_output or os.path.join(
+        model_path, "compare_all"
+    )
+    ls_model_types = args.ls_model_types
+
+    # Ensure the overall comparison output directory exists
+    os.makedirs(overall_comparison_output, exist_ok=True)
+
+    # Find best trial over all experiments for each model type
+    best_trials_overall = {}
+    for model in ls_model_types:
+        # read all ExperimentAnalysis objects from this directory
+        experiment_dir = f"{model_path}/*/{model}"
+        analyses_ls = get_all_exp_analyses(experiment_dir)
+
+        # identify best trial from all analyses of this model type
+        best_trials_overall[model] = best_trial_name(
+            analyses_ls, "rmse_val", mode="min"
+        )
+
+    compare_trials(best_trials_overall, model_path, overall_comparison_output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/q2_ritme/evaluate_all_experiments.py b/q2_ritme/evaluate_all_experiments.py
@@ -50,9 +50,9 @@ def get_all_exp_analyses(experiment_dir):
     return analyses_ls
 
 
-def read_predictions_for_trial(trial_tag):
+def read_predictions_for_trial(trial_tag, path_to_models):
     # read predictions for this trial
-    base_path = Path("best_models")
+    base_path = Path(path_to_models)
     target_path = [p for p in base_path.rglob(f"{trial_tag}*") if p.is_dir()]
 
     if len(target_path) > 1:
@@ -72,12 +72,12 @@ def verify_indices(models_dict, pred_value):
     return all(index.equals(indices[0]) for index in indices)
 
 
-def compare_trials(dic_trials_to_check, path_to_save):
+def compare_trials(dic_trials_to_check, path_to_models, path_to_save):
     # get predictions for each best trial
     pred_dic = {}
     config_dic = {}
     for v in dic_trials_to_check.values():
-        pred_dic[v] = read_predictions_for_trial(v)
+        pred_dic[v] = read_predictions_for_trial(v, path_to_models)
         config_dic[v] = v.config
 
     # Verify that IDs are identical for both splits
@@ -91,4 +91,4 @@ def compare_trials(dic_trials_to_check, path_to_save):
 
     # display config differences
     config_df = pd.DataFrame(config_dic)
-    config_df.to_csv(os.path.join(path_to_save, "config.csv"), index=True)
+    config_df.to_csv(os.path.join(path_to_save, "best_trial_config.csv"), index=True)
diff --git a/q2_ritme/evaluate_models.py b/q2_ritme/evaluate_models.py
@@ -13,6 +13,9 @@
 from q2_ritme.feature_space.transform_features import transform_features
 from q2_ritme.model_space._static_trainables import NeuralNet
 
+# 30.437 is avg. number of days per month
+DAYS_PER_MONTH = 30.437
+
 
 def _get_checkpoint_path(result: Result) -> str:
     """
@@ -161,37 +164,41 @@ def plot_rmse_over_experiments(preds_dic, save_loc, dpi=400):
     plt.show()
 
 
-def plot_rmse_over_time(preds_dic, ls_model_types, DAYS_PER_MONTH, save_loc, dpi=300):
+def plot_rmse_over_time(
+    preds_dic, ls_model_types, save_loc, days_per_month=DAYS_PER_MONTH, dpi=300
+):
     """
     Plot RMSE over true time bins for the first model type in ls_model_types.
 
     Parameters:
     preds_dic (dict): Dictionary containing predictions for each model type.
     ls_model_types (list): List of model types.
-    DAYS_PER_MONTH (float): Average number of days per month to use for binning.
+    days_per_month (float): Average number of days per month to use for binning.
     dpi (int): Resolution of the plot.
     """
-    model_type = ls_model_types[0]
-    pred_df = preds_dic[model_type]
-    split = None
-
-    # Bin true columns by months
-    pred_df["group"] = np.round(pred_df["true"] / DAYS_PER_MONTH, 0).astype(int)
-
-    # Calculate RMSE for each group
-    grouped_ser = pred_df.groupby(["group"]).apply(calculate_rmse)
-    grouped_df = grouped_ser.apply(pd.Series)
-    if split is not None:
-        grouped_df = grouped_df[[split]].copy()
-
-    # Plot
-    plt.figure(dpi=dpi)
-    grouped_df.plot(
-        kind="bar", title=f"Model: {model_type}", ylabel="RMSE", figsize=(10, 5)
-    )
-    path_to_save = os.path.join(save_loc, "rmse_over_time_train_test.png")
-    plt.savefig(path_to_save, dpi=dpi)
-    plt.show()
+    for model_type in ls_model_types:
+        pred_df = preds_dic[model_type]
+        split = None
+
+        # Bin true columns by months
+        pred_df["group"] = np.round(pred_df["true"] / days_per_month, 0).astype(int)
+
+        # Calculate RMSE for each group
+        grouped_ser = pred_df.groupby(["group"]).apply(calculate_rmse)
+        grouped_df = grouped_ser.apply(pd.Series)
+        if split is not None:
+            grouped_df = grouped_df[[split]].copy()
+
+        # Plot
+        plt.figure(dpi=dpi)
+        grouped_df.plot(
+            kind="bar", title=f"Model: {model_type}", ylabel="RMSE", figsize=(10, 5)
+        )
+        path_to_save = os.path.join(
+            save_loc, f"rmse_over_time_train_test_{model_type}.png"
+        )
+        plt.savefig(path_to_save, dpi=dpi)
+        plt.show()
 
 
 def get_best_model_metrics_and_config(
@@ -261,7 +268,7 @@ def plot_best_models_comparison(
     plt.show()
 
 
-def plot_model_training_over_iterations(model_type, result_dic, labels):
+def plot_model_training_over_iterations(model_type, result_dic, labels, save_loc):
     ax = None
     for result in result_dic[model_type]:
         label_str = ""
@@ -278,3 +285,8 @@ def plot_model_training_over_iterations(model_type, result_dic, labels):
     ax.legend(bbox_to_anchor=(1.1, 1.05))
     ax.set_title(f"RMSE_val vs. training iteration for all trials of {model_type}")
     ax.set_ylabel("RMSE_val")
+    plt.tight_layout()
+    path_to_save = os.path.join(
+        save_loc, f"rmse_best_{model_type}_over_training_iteration.png"
+    )
+    plt.savefig(path_to_save, dpi=400)
diff --git a/q2_ritme/run_config.json b/q2_ritme/run_config.json
@@ -0,0 +1,21 @@
+{
+  "experiment_tag": "test_synthetic",
+  "host_id": "host_id",
+  "ls_model_types": [
+    "nn",
+    "xgb",
+    "linreg",
+    "rf"
+  ],
+  "mlflow_tracking_uri": "mlruns",
+  "models_to_evaluate_separately": [
+    "xgb",
+    "nn"
+  ],
+  "path_to_ft": null,
+  "path_to_md": null,
+  "seed_data": 12,
+  "seed_model": 12,
+  "target": "age_days",
+  "train_size": 0.8
+}
diff --git a/q2_ritme/run_n_eval_tune.py b/q2_ritme/run_n_eval_tune.py
@@ -0,0 +1,118 @@
+import argparse
+import json
+import os
+import shutil
+
+import pandas as pd
+
+from q2_ritme.evaluate_models import (
+    aggregate_best_models_metrics_and_configs,
+    get_predictions,
+    plot_best_models_comparison,
+    plot_model_training_over_iterations,
+    plot_rmse_over_experiments,
+    plot_rmse_over_time,
+    retrieve_best_models,
+)
+from q2_ritme.process_data import load_n_split_data
+from q2_ritme.tune_models import run_all_trials
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Run configuration.")
+    parser.add_argument(
+        "--config",
+        type=str,
+        required=True,
+        help="Path to the run configuration JSON file.",
+    )
+    return parser.parse_args()
+
+
+def main(config_path):
+    with open(config_path, "r") as f:
+        config = json.load(f)
+
+    # ! Define needed paths
+    base_path = os.path.join("experiments", "models")
+
+    exp_comparison_output = os.path.join(base_path, config["experiment_tag"])
+    if os.path.exists(exp_comparison_output):
+        raise ValueError(
+            f"This experiment tag already exists: {config['experiment_tag']}."
+            "Please use another one."
+        )
+
+    # todo: flag mlflow runs also with experiment tag somehow
+    path_mlflow = os.path.join("experiments", config["mlflow_tracking_uri"])
+    path_exp = os.path.join(base_path, config["experiment_tag"])
+
+    # ! Load and split data
+    train_val, test = load_n_split_data(
+        config["path_to_md"],
+        config["path_to_ft"],
+        config["host_id"],
+        config["target"],
+        config["train_size"],
+        config["seed_data"],
+    )
+
+    # ! Run all experiments
+    result_dic = run_all_trials(
+        train_val,
+        config["target"],
+        config["host_id"],
+        config["seed_data"],
+        config["seed_model"],
+        path_mlflow,
+        path_exp,
+        model_types=config["ls_model_types"],
+        fully_reproducible=False,
+    )
+
+    # ! Save run config
+    config_output_path = os.path.join(exp_comparison_output, "run_config.json")
+    shutil.copy(config_path, config_output_path)
+
+    # ! Evaluate best models of this experiment
+    # Eval1: train_val vs. test -> performance
+    best_model_dic = retrieve_best_models(result_dic)
+    non_features = [config["target"], config["host_id"]]
+    features = [x for x in train_val if x not in non_features]
+
+    preds_dic = {}
+    for model_type, tmodel in best_model_dic.items():
+        train_pred = get_predictions(
+            train_val, tmodel, config["target"], features, "train"
+        )
+        test_pred = get_predictions(test, tmodel, config["target"], features, "test")
+        all_pred = pd.concat([train_pred, test_pred])
+
+        # Save all predictions to model file
+        path2save = os.path.join(tmodel.path, "predictions.csv")
+        all_pred.to_csv(path2save, index=True)
+        preds_dic[model_type] = all_pred
+
+    plot_rmse_over_experiments(preds_dic, exp_comparison_output)
+
+    plot_rmse_over_time(preds_dic, config["ls_model_types"], exp_comparison_output)
+
+    # Eval2: train vs. val -> performance and config
+    metrics_all, best_configs = aggregate_best_models_metrics_and_configs(result_dic)
+
+    plot_best_models_comparison(metrics_all, exp_comparison_output)
+
+    best_configs.to_csv(
+        os.path.join(exp_comparison_output, "best_trial_config.csv"), index=True
+    )
+
+    # ! Evaluate one model over training iterations
+    for m in config["models_to_evaluate_separately"]:
+        plot_model_training_over_iterations(
+            m, result_dic, labels=["data_transform"], save_loc=exp_comparison_output
+        )
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args.config)