Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.6.20 #255

Merged
merged 20 commits into from
Mar 3, 2025
6 changes: 4 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
* Forecasts are desired for the future immediately following the most recent data.
* trimmed_mean to AverageValueNaive

# 0.6.19 🇺🇦 🇺🇦 🇺🇦
* bug fix for transformer_list="all"
# 0.6.20 🇺🇦 🇺🇦 🇺🇦
* transformer bug fixes
* Prophet package adjustments
* linear model singular matrix handling

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.19'
__version__ = '0.6.20'

TransformTS = GeneralTransformer

Expand Down
117 changes: 72 additions & 45 deletions autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,7 @@ def model_forecast(
current_model_file=current_model_file,
model_count=model_count,
force_gc=force_gc,
internal_validation=False, # allow sub ensembles to have postprocessing
)
model_id = create_model_id(
df_forecast.model_name,
Expand Down Expand Up @@ -1527,6 +1528,7 @@ def model_forecast(
forecast_length=forecast_length,
)
transformer_object.fit(df_train)
# forecast inverse MUST come before upper and lower bounds inverse
ens_forecast.forecast = transformer_object.inverse_transform(
ens_forecast.forecast
)
Expand Down Expand Up @@ -1736,7 +1738,7 @@ def _eval_prediction_for_template(


horizontal_post_processors = [
{
{ # consistently used as best
"fillna": "fake_date",
"transformations": {"0": "AlignLastValue", "1": "AlignLastValue"},
"transformation_params": {
Expand Down Expand Up @@ -1785,7 +1787,7 @@ def _eval_prediction_for_template(
},
}, # best wasserstein on daily
# {"fillna": "linear", "transformations": {"0": "bkfilter", "1": "DifferencedTransformer", "2": "BKBandpassFilter"}, "transformation_params": {"0": {}, "1": {"lag": 1, "fill": "zero"}, "2": {"low": 12, "high": 32, "K": 6, "lanczos_factor": False, "return_diff": False, "on_transform": False, "on_inverse": True}}},
{
{ # observed used best on LRP 2025-02-20, neat
"fillna": "rolling_mean_24",
"transformations": {"0": "bkfilter", "1": "FIRFilter", "2": "AlignLastDiff"},
"transformation_params": {
Expand Down Expand Up @@ -1834,8 +1836,8 @@ def _eval_prediction_for_template(
"threshold_method": "mean",
},
},
}, # best mae on daily, a bit weird otherwise, 1x best mage daily
{
},
{ # best mae on daily, a bit weird otherwise, 1x best mage daily
"fillna": "median",
"transformations": {
"0": "DiffSmoother",
Expand Down Expand Up @@ -1867,7 +1869,6 @@ def _eval_prediction_for_template(
"fillna": "fake_date",
"transformations": {
"0": "AlignLastValue",
"1": "PositiveShift",
"2": "HistoricValues",
},
"transformation_params": {
Expand All @@ -1880,8 +1881,7 @@ def _eval_prediction_for_template(
"threshold": 10,
"threshold_method": "mean",
},
"1": {},
"2": {"window": 28},
"1": {"window": 28},
},
}, # best competition on VN1
{
Expand Down Expand Up @@ -2021,44 +2021,6 @@ def _eval_prediction_for_template(
},
},
},
{ # balanced on wiki daily
"fillna": "cubic",
"transformations": {"0": "AlignLastValue", "1": "DatepartRegression"},
"transformation_params": {
"0": {
"rows": 1,
"lag": 7,
"method": "multiplicative",
"strength": 0.9,
"first_value_only": False,
"threshold": 3,
"threshold_method": "max",
},
"1": {
"regression_model": {
"model": "ElasticNet",
"model_params": {
"l1_ratio": 0.5,
"fit_intercept": True,
"selection": "cyclic",
"max_iter": 1000,
},
},
"datepart_method": "common_fourier",
"polynomial_degree": None,
"transform_dict": {
"fillna": None,
"transformations": {"0": "ClipOutliers"},
"transformation_params": {
"0": {"method": "clip", "std_threshold": 4}
},
},
"holiday_countries_used": False,
"lags": None,
"forward_lags": None,
},
},
},
{ # best on VPV, 19.7 smape
"fillna": "quadratic",
"transformations": {"0": "AlignLastValue", "1": "ChangepointDetrend"},
Expand All @@ -2080,6 +2042,69 @@ def _eval_prediction_for_template(
},
},
},
{ # hand tuned, might be replaceable with better FIR combination
'fillna': 'fake_date',
'transformations': {
'0': 'FIRFilter',
"1": "AlignLastValue",
"2": "AlignLastValue",
},
'transformation_params': {
'0': {
'numtaps': 32,
'cutoff_hz': 0.1,
'window': "triang",
'sampling_frequency': 12,
'on_transform': False,
'on_inverse': True,
'bounds_only': True,
},
"1": {
"rows": 1,
"lag": 1,
"method": "multiplicative",
"strength": 1.0,
"first_value_only": False,
"threshold": None,
"threshold_method": "mean",
},
"2": {
"rows": 1,
"lag": 1,
"method": "multiplicative",
"strength": 1.0,
"first_value_only": True,
"threshold": 10,
"threshold_method": "max",
},
},
},
{ # on wiki daily horizontal, mainly smape
'fillna': 'ffill',
'transformations': {
'0': 'LevelShiftTransformer',
'1': 'Constraint',
'2': 'HistoricValues',
},
'transformation_params': {
'0': {
'window_size': 120,
'alpha': 3.5,
'grouping_forward_limit': 3,
'max_level_shifts': 5,
'alignment': 'rolling_diff',
},
'1': {
'constraint_method': 'dampening',
'constraint_direction': 'upper',
'constraint_regularization': 1.0,
'constraint_value': 0.99,
'bounds_only': False,
'fillna': None,
},
'2': {'window': None},
},
},
]


Expand Down Expand Up @@ -2197,6 +2222,7 @@ def virtual_memory():
if ensemble_input == 2 and transformation_dict:
# SKIP BECAUSE TRANSFORMERS (PRE DEFINED) ARE DONE BELOW TO REDUCE FORECASTS RERUNS
# ON INTERNAL VALIDATION ONLY ON TEMPLATES
# this does mean that "custom" postprocessing won't work with template wizard
if verbose >= 1:
print(
"skipping horizontal with transformation due to that being done on internal validation"
Expand Down Expand Up @@ -2299,6 +2325,7 @@ def virtual_memory():
forecast_length=forecast_length,
)
transformer_object.fit(df_train)
# forecast inverse MUST come before upper and lower bounds inverse
df_forecast2.forecast = transformer_object.inverse_transform(
df_forecast2.forecast
)
Expand Down
113 changes: 71 additions & 42 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def __init__(
transformer_list: dict = "auto",
transformer_max_depth: int = 6,
models_mode: str = "random",
num_validations: str = "auto",
num_validations: int = "auto",
models_to_validate: float = 0.15,
max_per_model_class: int = None,
validation_method: str = 'backwards',
Expand Down Expand Up @@ -1449,18 +1449,21 @@ def fit(
ensemble=self.ensemble,
score_per_series=self.score_per_series,
)
self._run_template(
ensemble_templates,
df_train,
df_test,
future_regressor_train=future_regressor_train,
future_regressor_test=future_regressor_test,
current_weights=current_weights,
validation_round=0,
max_generations="Ensembles",
current_generation=(current_generation + 1),
result_file=result_file,
)
if not ensemble_templates.empty:
self._run_template(
self.ensemble_templates,
df_train,
df_test,
future_regressor_train=future_regressor_train,
future_regressor_test=future_regressor_test,
current_weights=current_weights,
validation_round=0,
max_generations="Ensembles",
current_generation=(current_generation + 1),
result_file=result_file,
)
elif "simple" in self.ensemble:
print("Simple ensemble missing, error unclear")
except Exception as e:
print(
f"Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
Expand Down Expand Up @@ -1506,25 +1509,26 @@ def fit(
score_per_series=self.score_per_series,
)
self.ensemble_templates2 = ensemble_templates
self._run_template(
ensemble_templates,
df_train,
df_test,
future_regressor_train=future_regressor_train,
future_regressor_test=future_regressor_test,
current_weights=current_weights,
validation_round=0,
max_generations="Ensembles",
current_generation=(current_generation + 2),
result_file=result_file,
)
self._run_validations(
df_wide_numeric=self.df_wide_numeric,
num_validations=self.num_validations,
validation_template=ensemble_templates,
future_regressor=self.future_regressor_train,
first_validation=False,
)
if not ensemble_templates.empty:
self._run_template(
ensemble_templates,
df_train,
df_test,
future_regressor_train=future_regressor_train,
future_regressor_test=future_regressor_test,
current_weights=current_weights,
validation_round=0,
max_generations="Ensembles",
current_generation=(current_generation + 2),
result_file=result_file,
)
self._run_validations(
df_wide_numeric=self.df_wide_numeric,
num_validations=self.num_validations,
validation_template=ensemble_templates,
future_regressor=self.future_regressor_train,
first_validation=False,
)
except Exception as e:
print(
f"Post-Validation Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
Expand Down Expand Up @@ -2032,12 +2036,23 @@ def _run_template(
# gather results of template run
if not return_template:
self.initial_results = self.initial_results.concat(template_result)
scores, score_dict = generate_score(
self.initial_results.model_results,
metric_weighting=self.metric_weighting,
prediction_interval=self.prediction_interval,
return_score_dict=True,
)
try:
scores, score_dict = generate_score(
self.initial_results.model_results,
metric_weighting=self.metric_weighting,
prediction_interval=self.prediction_interval,
return_score_dict=True,
)
except Exception as e:
mod_res = self.initial_results.model_results
print(mod_res.head())
print(self.metric_weighting)
print(mod_res.columns)
print(mod_res.index)
print(
f"Succeeded model count this template: {mod_res[mod_res['Exceptions'].isnull()].shape[0]}. If this is zero, try importing a different template or changing initial template. Check data too."
)
raise ValueError("unknown score generation error") from e
self.initial_results.model_results['Score'] = scores
self.score_breakdown = pd.DataFrame(score_dict).set_index("ID")
else:
Expand Down Expand Up @@ -2442,6 +2457,7 @@ def export_template(
min_metrics: list = ['smape', 'spl', 'wasserstein', 'mle', 'imle', 'ewmae'],
max_metrics: list = None,
focus_models: list = None,
include_ensemble: bool = True,
):
"""Export top results as a reusable template.

Expand All @@ -2457,6 +2473,7 @@ def export_template(
min_metrics (list): if not None and models=='best', include the lowest for this metric, a way to include even if not a major part of metric weighting as an addon
max_metrics (list): for metrics to take the max model for
focus_models (list): also pull the best score/min/max metrics as per just this model
include_ensemble (bool): if False, exclude Ensembles (ignored with "all" models)
"""
if models == 'all':
export_template = self.initial_results.model_results[self.template_cols_id]
Expand All @@ -2472,6 +2489,8 @@ def export_template(
(export_template['Runs'] >= (self.num_validations + 1))
| (export_template['Ensemble'] >= 2)
]
if not include_ensemble:
export_template = export_template[export_template["Ensemble"] == 0]
# clean up any bad data (hopefully there is none anyway...)
export_template = export_template[
(~export_template['ModelParameters'].isnull())
Expand Down Expand Up @@ -2557,11 +2576,12 @@ def export_template(
if not include_results:
export_template = export_template[self.template_cols_id]
elif models == "slowest":
export_template = self.initial_results.model_results
if not include_ensemble:
export_template = export_template[export_template["Ensemble"] == 0]
return self.save_template(
filename,
self.initial_results.model_results.nlargest(
n, columns=['TotalRuntime']
),
export_template.nlargest(n, columns=['TotalRuntime']),
)
else:
raise ValueError("`models` must be 'all' or 'best' or 'slowest'")
Expand Down Expand Up @@ -4351,8 +4371,17 @@ def plot_chosen_transformer(

# Create a second y-axis sharing the x-axis
ax2 = ax1.twinx()
col_here = (
col
if col in df2.columns
else [colz for colz in df2.columns if col in colz]
)
ax2.plot(
df2.index, df2[col], color=color2, linestyle='--', label='transformed'
df2.index,
df2[col_here],
color=color2,
linestyle='--',
label='transformed',
)
ax2.set_ylabel('transformed', color=color2, fontsize=12)
ax2.tick_params(axis='y', labelcolor=color2)
Expand Down
Loading
Loading