Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.6.14 #245

Merged
merged 7 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
* Forecasts are desired for the future immediately following the most recent data.
* trimmed_mean to AverageValueNaive

# 0.6.13 🇺🇦 🇺🇦 🇺🇦
* trend_phi directly into Prophet
* subset arg to make KalmanStateSpace more scalable to memory
* bug fixes
# 0.6.14 🇺🇦 🇺🇦 🇺🇦
* prevent excessive use of 'CenterSplit' and other macro_micro style transformers
* added ElasticNetwork as subsidiary regression model option
* KalmanSmoothing, BKBandpassFilter added on_inverse option
* add threshold arg to AlignLastValue

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.13'
__version__ = '0.6.14'

TransformTS = GeneralTransformer

Expand Down
32 changes: 16 additions & 16 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,9 @@ def __init__(

full_params['transformations'] = transformations
full_params['transformation_params'] = transformation_params
self.initial_template.loc[
index, 'TransformationParameters'
] = json.dumps(full_params)
self.initial_template.loc[index, 'TransformationParameters'] = (
json.dumps(full_params)
)

self.regressor_used = False
self.grouping_ids = None
Expand Down Expand Up @@ -1093,9 +1093,9 @@ def fit(

Args:
df (pandas.DataFrame): Datetime Indexed dataframe of series, or dataframe of three columns as below.
date_col (str): name of datetime column
value_col (str): name of column containing the data of series.
id_col (str): name of column identifying different series.
date_col (str): name of datetime column if long style data
value_col (str): name of column containing the data of series if using long style data. NOT for pointing out the most important column if several, that's `weights`
id_col (str): name of column identifying different series if long style data.
future_regressor (numpy.Array): single external regressor matching train.index
weights (dict): {'colname1': 2, 'colname2': 5} - increase importance of a series in metric evaluation. Any left blank assumed to have weight of 1.
pass the alias 'mean' as a str ie `weights='mean'` to automatically use the mean value of a series as its weight
Expand Down Expand Up @@ -1827,10 +1827,10 @@ def _run_template(
self.model_count = template_result.model_count
# capture results from lower-level template run
if "TotalRuntime" in template_result.model_results.columns:
template_result.model_results[
'TotalRuntime'
] = template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
template_result.model_results['TotalRuntime'] = (
template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
)
)
else:
# trying to catch a rare and sneaky bug (perhaps some variety of beetle?)
Expand Down Expand Up @@ -1930,9 +1930,9 @@ def _run_validations(
frac=0.8, random_state=self.random_seed
).reindex(idx)
nan_frac = val_df_train.shape[1] / num_validations
val_df_train.iloc[
-2:, int(nan_frac * y) : int(nan_frac * (y + 1))
] = np.nan
val_df_train.iloc[-2:, int(nan_frac * y) : int(nan_frac * (y + 1))] = (
np.nan
)

# run validation template on current slice
result = self._run_template(
Expand Down Expand Up @@ -3851,9 +3851,9 @@ def diagnose_params(self, target='runtime', waterfall_plots=True):
)
y = pd.json_normalize(json.loads(row["ModelParameters"]))
y.index = [row['ID']]
y[
'Model'
] = x # might need to remove this and do analysis independently for each
y['Model'] = (
x # might need to remove this and do analysis independently for each
)
res.append(
pd.DataFrame(
{
Expand Down
12 changes: 6 additions & 6 deletions autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,18 +690,18 @@ def long_form_results(
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_upper[
interval_name
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
upload_upper[interval_name] = (
f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
)
upload_lower = pd.melt(
self.lower_forecast.rename_axis(index='datetime').reset_index(),
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_lower[
interval_name
] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
upload_lower[interval_name] = (
f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
)

upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
if datetime_column is not None:
Expand Down
123 changes: 123 additions & 0 deletions autots/models/dnn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Neural Nets."""

import random
import pandas as pd
from autots.tools.shaping import wide_to_3d

Expand Down Expand Up @@ -421,3 +422,125 @@ def predict(self, X):
"""Predict on dataframe of X."""
test = pd.DataFrame(X).to_numpy().reshape((X.shape[0], X.shape[1], 1))
return pd.DataFrame(self.model.predict(test))


class ElasticNetwork(object):
def __init__(
self,
size: int = 256,
l1: float = 0.01,
l2: float = 0.02,
feature_subsample_rate: float = None,
optimizer: str = 'adam',
loss: str = 'mse',
epochs: int = 20,
batch_size: int = 32,
activation: str = "relu",
verbose: int = 1,
random_seed: int = 2024,
):
self.name = 'ElasticNetwork'
self.verbose = verbose
self.random_seed = random_seed
self.size = size
self.l1 = l1
self.l2 = l2
self.feature_subsample_rate = feature_subsample_rate
self.epochs = epochs
self.batch_size = batch_size
self.optimizer = optimizer
self.loss = loss
self.activation = activation

def fit(self, X, y):
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Layer
from tensorflow.keras.regularizers import L1L2

# hiding this here as TF is an optional import
class SubsetDense(Layer):
def __init__(self, units, input_dim, feature_subsample_rate=0.5, **kwargs):
super(SubsetDense, self).__init__(**kwargs)
self.units = units
self.input_dim = input_dim
self.feature_subsample_rate = feature_subsample_rate
self.selected_features_per_unit = []
self.kernels = []
self.biases = None

def build(self, input_shape):
# Select a subset of the input features for each unit
num_features = int(self.input_dim * self.feature_subsample_rate)
for _ in range(self.units):
selected_features = random.sample(
range(self.input_dim), num_features
)
self.selected_features_per_unit.append(selected_features)
kernel = self.add_weight(
shape=(num_features,),
initializer='glorot_uniform',
name=f'kernel_{len(self.kernels)}',
)
self.kernels.append(kernel)

self.biases = self.add_weight(
shape=(self.units,), initializer='zeros', name='biases'
)

def call(self, inputs):
outputs = []
for i in range(self.units):
selected_inputs = tf.gather(
inputs, self.selected_features_per_unit[i], axis=1
)
output = (
tf.reduce_sum(selected_inputs * self.kernels[i], axis=1)
+ self.biases[i]
)
outputs.append(output)
return tf.stack(outputs, axis=1)

# Model configuration
input_dim = X.shape[1] # Number of input features
output_dim = y.shape[1] # Number of outputs

# Build the model
if self.feature_subsample_rate is None:
self.model = Sequential(
[
Dense(
self.size,
input_dim=input_dim,
activation=self.activation,
kernel_regularizer=L1L2(l1=self.l1, l2=self.l2),
), # Example layer
Dense(output_dim), # Output layer
]
)
else:
self.model = Sequential(
[
SubsetDense(
self.size,
input_dim=input_dim,
feature_subsample_rate=self.feature_subsample_rate,
),
tf.keras.layers.Activation(self.activation),
SubsetDense(
self.size // 2,
input_dim=input_dim,
feature_subsample_rate=self.feature_subsample_rate,
),
tf.keras.layers.Activation(self.activation),
Dense(output_dim), # Output layer
]
)

# Compile the model
self.model.compile(optimizer=self.optimizer, loss=self.loss)
self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size)

return self

def predict(self, X):
return self.model.predict(X)
18 changes: 9 additions & 9 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,15 +1838,15 @@ def MosaicEnsemble(
f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} "
+ mi
)
melted[
'forecast'
] = fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'upper_forecast'
] = u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'lower_forecast'
] = l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted['forecast'] = (
fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['upper_forecast'] = (
u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['lower_forecast'] = (
l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)

forecast_df = melted.pivot(
values="forecast", columns="series_id", index="forecast_period"
Expand Down
2 changes: 1 addition & 1 deletion autots/models/matrix_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def get_new_params(self, method: str = 'random'):
"""Return dict of new parameters for parameter tuning."""
return {
'method': random.choices(['als', 'dmd'], [0.7, 0.3])[0],
'rank': random.choice([2, 4, 6, 0.1, 0.2, 0.5]),
'rank': random.choice([2, 4, 8, 16, 32, 0.1, 0.2, 0.5]),
'maxiter': 200,
}

Expand Down
Loading
Loading