[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
kjappelbaum · Oct 8, 2024 · c14f99c · c14f99c
1 parent 596bf18
commit c14f99c
Show file tree

Hide file tree

Showing 12 changed files with 165 additions and 151 deletions.
diff --git a/paper/README.md b/paper/README.md
@@ -4,25 +4,25 @@
 - `20220310_plot_causalimpact.ipynb` used to plots the results of the causal impact analysis (e.g. generated via `xgboost_causalimpact.py`)
 - `20220310_plot_forecast_overview.ipynb` used to plot an overview of the historical forecasts (Figure 7 in the main text as well as the model evaluation)
 - `20220310_train_gbdt_on_all.ipynb` used to train the GBDT model on all data (subsequently used to compute the scenarios)
-- `20220306_predict_w_gbdt.ipynb` example for training a GBDT model 
+- `20220306_predict_w_gbdt.ipynb` example for training a GBDT model
 
-## Scripts 
-### Causal impact analysis 
+## Scripts
+### Causal impact analysis
 - `causalimpact_sweep.py` run the hyperparamter sweep (assumes [Weights and Biases](https://wandb.ai/site) is set up)
-- `causalimpact_xgboost.py` run the causal impact analysis using GBDT models 
+- `causalimpact_xgboost.py` run the causal impact analysis using GBDT models
 - `tcn_causalimpact.py` run the analysis using TCN models
-- `step_times.pkl` contains the timestamps for the step changes in our study 
+- `step_times.pkl` contains the timestamps for the step changes in our study
 
-### Scenarios 
-- `loop_over_maps_gbdt.py` / `loop_over_maps_scitas.py` used to create and submit slurm script for "scenario" analysis 
-- `plot_effects_gbdt.py` / `plot_effects.py` used to convert the outputs of the scenario scripts into heatmaps 
-- `run_gbdt_scenarios.py` / `run_scenarios.py` contain the logic for running the scenarios 
+### Scenarios
+- `loop_over_maps_gbdt.py` / `loop_over_maps_scitas.py` used to create and submit slurm script for "scenario" analysis
+- `plot_effects_gbdt.py` / `plot_effects.py` used to convert the outputs of the scenario scripts into heatmaps
+- `run_gbdt_scenarios.py` / `run_scenarios.py` contain the logic for running the scenarios
 
 ### Models
 
-Model checkpoints are archived on Zenodo (DOI: [https://dx.doi.org/10.5281/zenodo.5153417](10.5281/zenodo.5153417)) but also available in the `model` subdirectory. 
+Model checkpoints are archived on Zenodo (DOI: [https://dx.doi.org/10.5281/zenodo.5153417](10.5281/zenodo.5153417)) but also available in the `model` subdirectory.
 Unfortunately, we could only serialize the models as pickle files wherefore the same Python version and package versions are needed for reusing the models.
 
-### Results 
+### Results
 
-The `results` subdirectory contains pre-computed results that are used in the notebooks that plot the results.
+The `results` subdirectory contains pre-computed results that are used in the notebooks that plot the results.
diff --git a/paper/causalimpact_sweep.py b/paper/causalimpact_sweep.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 
 import logging
 import pickle
@@ -92,7 +93,7 @@ def inner_train_test(x, y, day, target):
     x = x[features]
 
     x_trains = []
-    y_trains = []   
+    y_trains = []
 
     before, during, after, way_after = get_causalimpact_splits(x, y, day, times, DF)
 
@@ -115,9 +116,9 @@ def inner_train_test(x, y, day, target):
     x_trains[shorter] = xscaler.transform(x_trains[shorter])
     y_trains[shorter] = yscaler.transform(y_trains[shorter])
 
-    steps =len(during[0]) 
+    steps = len(during[0])
 
-    if steps >  len(x_trains[shorter]):
+    if steps > len(x_trains[shorter]):
         ts = choose_index(x, 0.3)
         x_before, x_after = x_trains[longer].split_before(ts)
         y_before, y_after = y_trains[longer].split_before(ts)

diff --git a/paper/causalimpact_xgboost.py b/paper/causalimpact_xgboost.py
@@ -1,27 +1,32 @@
-from aeml.causalimpact.utils import get_timestep_tuples, get_causalimpact_splits
+# -*- coding: utf-8 -*-
+import math
 import pickle
-from aeml.causalimpact.utils import _select_unrelated_x
+import time
+from copy import deepcopy
+
+import click
+import numpy as np
+import pandas as pd
+from darts import TimeSeries
+from darts.dataprocessing.transformers import Scaler
+
+from aeml.causalimpact.utils import (
+    _select_unrelated_x,
+    get_causalimpact_splits,
+    get_timestep_tuples,
+)
 from aeml.models.gbdt.gbmquantile import LightGBMQuantileRegressor
 from aeml.models.gbdt.run import run_ci_model
 from aeml.models.gbdt.settings import *
 
-from darts.dataprocessing.transformers import Scaler
-from darts import TimeSeries
-import pandas as pd
-from copy import deepcopy
-import time
-import numpy as np 
-import click
-import math 
-
 settings = {
-    0: {0: ci_0_0, 1: ci_0_1}, 
-1: {0: ci_1_0, 1: ci_1_1},
-2: {0: ci_2_0, 1: ci_2_1},
-3: {0: ci_3_0, 1: ci_3_1},
-4: {0: ci_4_0, 1: ci_4_1},
-5: {0: ci_5_0, 1: ci_5_1},
-6: {0: ci_6_0, 1: ci_6_1}
+    0: {0: ci_0_0, 1: ci_0_1},
+    1: {0: ci_1_0, 1: ci_1_1},
+    2: {0: ci_2_0, 1: ci_2_1},
+    3: {0: ci_3_0, 1: ci_3_1},
+    4: {0: ci_4_0, 1: ci_4_1},
+    5: {0: ci_5_0, 1: ci_5_1},
+    6: {0: ci_6_0, 1: ci_6_1},
 }
 
 TIMESTR = time.strftime("%Y%m%d-%H%M%S")
@@ -87,28 +92,27 @@
     6: [],
 }
 
+
 def select_columns(day):
     feat_to_exclude = to_exclude[day]
     feats = [f for f in MEAS_COLUMNS if f not in feat_to_exclude]
     return feats
 
 
-@click.command('cli')
-@click.argument('day', type=click.INT)
-@click.argument('target', type=click.INT)
-def run_causalimpact_analysis(day, target): 
+@click.command("cli")
+@click.argument("day", type=click.INT)
+@click.argument("target", type=click.INT)
+def run_causalimpact_analysis(day, target):
     cols = select_columns(day)
     y = TimeSeries.from_dataframe(DF)[TARGETS_clean[target]]
     x = TimeSeries.from_dataframe(DF[cols])
 
     x_trains = []
     y_trains = []
 
-    before, during, after, way_after = get_causalimpact_splits(
-        x, y, day, times, DF
-    )
+    before, during, after, way_after = get_causalimpact_splits(x, y, day, times, DF)
 
-    # We do multiseries training 
+    # We do multiseries training
     x_trains.append(before[0])
     y_trains.append(before[1])
     x_trains.append(way_after[0])
@@ -126,7 +130,7 @@ def run_causalimpact_analysis(day, target):
     x_trains[shorter] = xscaler.transform(x_trains[shorter])
     y_trains[shorter] = yscaler.transform(y_trains[shorter])
 
-    if len(x_trains[shorter]) < 300: 
+    if len(x_trains[shorter]) < 300:
         x_trains.pop(shorter)
         y_trains.pop(shorter)
 
@@ -160,39 +164,39 @@ def run_causalimpact_analysis(day, target):
 
     day_y_df = pd.concat([before_y_df, during_y_df, after_y_df], axis=0)
     day_y_ts = TimeSeries.from_dataframe(day_y_df)
-    
-    steps = math.ceil(len(during[0])/2)# * 2
+
+    steps = math.ceil(len(during[0]) / 2)  # * 2
 
     model = run_ci_model(
         x_trains,
         y_trains,
         **settings[day][target],
         num_features=len(cols),
-                quantiles=(0.05, 0.5, 0.95), 
-                output_chunk_length=steps
+        quantiles=(0.05, 0.5, 0.95),
+        output_chunk_length=steps,
     )
-    buffer = math.ceil(len(during[0])/3)
+    buffer = math.ceil(len(during[0]) / 3)
     b = before[1][:-buffer]
-    predictions = model.forecast( 
-                      n = len(during[0]) + 2* buffer,
-        series =  b,
-        past_covariates = day_x_ts,
-
-)
+    predictions = model.forecast(
+        n=len(during[0]) + 2 * buffer,
+        series=b,
+        past_covariates=day_x_ts,
+    )
 
     results = {
-        'predictions': predictions, 
-        'x_all': day_x_ts, 
-        'before': before, 
-        'during': during, 
-        'after': after
+        "predictions": predictions,
+        "x_all": day_x_ts,
+        "before": before,
+        "during": during,
+        "after": after,
     }
 
     with open(
-            f"{TIMESTR}-causalimpact_{day}_{target}",
-            "wb",
-        ) as handle:
-            pickle.dump(results, handle)
+        f"{TIMESTR}-causalimpact_{day}_{target}",
+        "wb",
+    ) as handle:
+        pickle.dump(results, handle)
+
 
-if __name__ == '__main__': 
-    run_causalimpact_analysis()
+if __name__ == "__main__":
+    run_causalimpact_analysis()
diff --git a/paper/loop_over_maps.py b/paper/loop_over_maps.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
-from os import fchdir
 import subprocess
 import time
+from os import fchdir
 
 import click
 
@@ -46,7 +46,7 @@
 #SBATCH --constraint=gpu
 #SBATCH --account=pr128
 
-module load daint-gpu 
+module load daint-gpu
 source /home/kjablonk/anaconda3/bin/activate
 conda activate aeml
 

diff --git a/paper/loop_over_maps_scitas.py b/paper/loop_over_maps_scitas.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
-from os import fchdir
 import subprocess
 import time
+from os import fchdir
 
 import click
 

diff --git a/paper/plot_effects.py b/paper/plot_effects.py
@@ -1,13 +1,15 @@
-import matplotlib.pyplot as plt
-from glob import glob
+# -*- coding: utf-8 -*-
+import os
 import pickle
+import traceback
+from glob import glob
 from pathlib import Path
-import numpy as np
+
 import click
 import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
 from scipy.ndimage import gaussian_filter
-import os
-import traceback
 
 
 def load_pickle(filename):