Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Oct 8, 2024
1 parent 596bf18 commit c14f99c
Show file tree
Hide file tree
Showing 12 changed files with 165 additions and 151 deletions.
24 changes: 12 additions & 12 deletions paper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,25 @@
- `20220310_plot_causalimpact.ipynb` used to plots the results of the causal impact analysis (e.g. generated via `xgboost_causalimpact.py`)
- `20220310_plot_forecast_overview.ipynb` used to plot an overview of the historical forecasts (Figure 7 in the main text as well as the model evaluation)
- `20220310_train_gbdt_on_all.ipynb` used to train the GBDT model on all data (subsequently used to compute the scenarios)
- `20220306_predict_w_gbdt.ipynb` example for training a GBDT model
- `20220306_predict_w_gbdt.ipynb` example for training a GBDT model

## Scripts
### Causal impact analysis
## Scripts
### Causal impact analysis
- `causalimpact_sweep.py` run the hyperparamter sweep (assumes [Weights and Biases](https://wandb.ai/site) is set up)
- `causalimpact_xgboost.py` run the causal impact analysis using GBDT models
- `causalimpact_xgboost.py` run the causal impact analysis using GBDT models
- `tcn_causalimpact.py` run the analysis using TCN models
- `step_times.pkl` contains the timestamps for the step changes in our study
- `step_times.pkl` contains the timestamps for the step changes in our study

### Scenarios
- `loop_over_maps_gbdt.py` / `loop_over_maps_scitas.py` used to create and submit slurm script for "scenario" analysis
- `plot_effects_gbdt.py` / `plot_effects.py` used to convert the outputs of the scenario scripts into heatmaps
- `run_gbdt_scenarios.py` / `run_scenarios.py` contain the logic for running the scenarios
### Scenarios
- `loop_over_maps_gbdt.py` / `loop_over_maps_scitas.py` used to create and submit slurm script for "scenario" analysis
- `plot_effects_gbdt.py` / `plot_effects.py` used to convert the outputs of the scenario scripts into heatmaps
- `run_gbdt_scenarios.py` / `run_scenarios.py` contain the logic for running the scenarios

### Models

Model checkpoints are archived on Zenodo (DOI: [https://dx.doi.org/10.5281/zenodo.5153417](10.5281/zenodo.5153417)) but also available in the `model` subdirectory.
Model checkpoints are archived on Zenodo (DOI: [https://dx.doi.org/10.5281/zenodo.5153417](10.5281/zenodo.5153417)) but also available in the `model` subdirectory.
Unfortunately, we could only serialize the models as pickle files wherefore the same Python version and package versions are needed for reusing the models.

### Results
### Results

The `results` subdirectory contains pre-computed results that are used in the notebooks that plot the results.
The `results` subdirectory contains pre-computed results that are used in the notebooks that plot the results.
7 changes: 4 additions & 3 deletions paper/causalimpact_sweep.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-

import logging
import pickle
Expand Down Expand Up @@ -92,7 +93,7 @@ def inner_train_test(x, y, day, target):
x = x[features]

x_trains = []
y_trains = []
y_trains = []

before, during, after, way_after = get_causalimpact_splits(x, y, day, times, DF)

Expand All @@ -115,9 +116,9 @@ def inner_train_test(x, y, day, target):
x_trains[shorter] = xscaler.transform(x_trains[shorter])
y_trains[shorter] = yscaler.transform(y_trains[shorter])

steps =len(during[0])
steps = len(during[0])

if steps > len(x_trains[shorter]):
if steps > len(x_trains[shorter]):
ts = choose_index(x, 0.3)
x_before, x_after = x_trains[longer].split_before(ts)
y_before, y_after = y_trains[longer].split_before(ts)
Expand Down
102 changes: 53 additions & 49 deletions paper/causalimpact_xgboost.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
from aeml.causalimpact.utils import get_timestep_tuples, get_causalimpact_splits
# -*- coding: utf-8 -*-
import math
import pickle
from aeml.causalimpact.utils import _select_unrelated_x
import time
from copy import deepcopy

import click
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler

from aeml.causalimpact.utils import (
_select_unrelated_x,
get_causalimpact_splits,
get_timestep_tuples,
)
from aeml.models.gbdt.gbmquantile import LightGBMQuantileRegressor
from aeml.models.gbdt.run import run_ci_model
from aeml.models.gbdt.settings import *

from darts.dataprocessing.transformers import Scaler
from darts import TimeSeries
import pandas as pd
from copy import deepcopy
import time
import numpy as np
import click
import math

settings = {
0: {0: ci_0_0, 1: ci_0_1},
1: {0: ci_1_0, 1: ci_1_1},
2: {0: ci_2_0, 1: ci_2_1},
3: {0: ci_3_0, 1: ci_3_1},
4: {0: ci_4_0, 1: ci_4_1},
5: {0: ci_5_0, 1: ci_5_1},
6: {0: ci_6_0, 1: ci_6_1}
0: {0: ci_0_0, 1: ci_0_1},
1: {0: ci_1_0, 1: ci_1_1},
2: {0: ci_2_0, 1: ci_2_1},
3: {0: ci_3_0, 1: ci_3_1},
4: {0: ci_4_0, 1: ci_4_1},
5: {0: ci_5_0, 1: ci_5_1},
6: {0: ci_6_0, 1: ci_6_1},
}

TIMESTR = time.strftime("%Y%m%d-%H%M%S")
Expand Down Expand Up @@ -87,28 +92,27 @@
6: [],
}


def select_columns(day):
feat_to_exclude = to_exclude[day]
feats = [f for f in MEAS_COLUMNS if f not in feat_to_exclude]
return feats


@click.command('cli')
@click.argument('day', type=click.INT)
@click.argument('target', type=click.INT)
def run_causalimpact_analysis(day, target):
@click.command("cli")
@click.argument("day", type=click.INT)
@click.argument("target", type=click.INT)
def run_causalimpact_analysis(day, target):
cols = select_columns(day)
y = TimeSeries.from_dataframe(DF)[TARGETS_clean[target]]
x = TimeSeries.from_dataframe(DF[cols])

x_trains = []
y_trains = []

before, during, after, way_after = get_causalimpact_splits(
x, y, day, times, DF
)
before, during, after, way_after = get_causalimpact_splits(x, y, day, times, DF)

# We do multiseries training
# We do multiseries training
x_trains.append(before[0])
y_trains.append(before[1])
x_trains.append(way_after[0])
Expand All @@ -126,7 +130,7 @@ def run_causalimpact_analysis(day, target):
x_trains[shorter] = xscaler.transform(x_trains[shorter])
y_trains[shorter] = yscaler.transform(y_trains[shorter])

if len(x_trains[shorter]) < 300:
if len(x_trains[shorter]) < 300:
x_trains.pop(shorter)
y_trains.pop(shorter)

Expand Down Expand Up @@ -160,39 +164,39 @@ def run_causalimpact_analysis(day, target):

day_y_df = pd.concat([before_y_df, during_y_df, after_y_df], axis=0)
day_y_ts = TimeSeries.from_dataframe(day_y_df)
steps = math.ceil(len(during[0])/2)# * 2

steps = math.ceil(len(during[0]) / 2) # * 2

model = run_ci_model(
x_trains,
y_trains,
**settings[day][target],
num_features=len(cols),
quantiles=(0.05, 0.5, 0.95),
output_chunk_length=steps
quantiles=(0.05, 0.5, 0.95),
output_chunk_length=steps,
)
buffer = math.ceil(len(during[0])/3)
buffer = math.ceil(len(during[0]) / 3)
b = before[1][:-buffer]
predictions = model.forecast(
n = len(during[0]) + 2* buffer,
series = b,
past_covariates = day_x_ts,

)
predictions = model.forecast(
n=len(during[0]) + 2 * buffer,
series=b,
past_covariates=day_x_ts,
)

results = {
'predictions': predictions,
'x_all': day_x_ts,
'before': before,
'during': during,
'after': after
"predictions": predictions,
"x_all": day_x_ts,
"before": before,
"during": during,
"after": after,
}

with open(
f"{TIMESTR}-causalimpact_{day}_{target}",
"wb",
) as handle:
pickle.dump(results, handle)
f"{TIMESTR}-causalimpact_{day}_{target}",
"wb",
) as handle:
pickle.dump(results, handle)


if __name__ == '__main__':
run_causalimpact_analysis()
if __name__ == "__main__":
run_causalimpact_analysis()
4 changes: 2 additions & 2 deletions paper/loop_over_maps.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from os import fchdir
import subprocess
import time
from os import fchdir

import click

Expand Down Expand Up @@ -46,7 +46,7 @@
#SBATCH --constraint=gpu
#SBATCH --account=pr128
module load daint-gpu
module load daint-gpu
source /home/kjablonk/anaconda3/bin/activate
conda activate aeml
Expand Down
2 changes: 1 addition & 1 deletion paper/loop_over_maps_scitas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from os import fchdir
import subprocess
import time
from os import fchdir

import click

Expand Down
12 changes: 7 additions & 5 deletions paper/plot_effects.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import matplotlib.pyplot as plt
from glob import glob
# -*- coding: utf-8 -*-
import os
import pickle
import traceback
from glob import glob
from pathlib import Path
import numpy as np

import click
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import gaussian_filter
import os
import traceback


def load_pickle(filename):
Expand Down
Loading

0 comments on commit c14f99c

Please sign in to comment.