winedarksea · winedarksea · May 17, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/TODO.md b/TODO.md
@@ -13,10 +13,11 @@
 * Forecasts are desired for the future immediately following the most recent data.
 * trimmed_mean to AverageValueNaive
 
-# 0.6.13 🇺🇦 🇺🇦 🇺🇦
-* trend_phi directly into Prophet
-* subset arg to make KalmanStateSpace more scalable to memory
-* bug fixes
+# 0.6.14 🇺🇦 🇺🇦 🇺🇦
+* prevent excessive use of 'CenterSplit' and other macro_micro style transformers
+* added ElasticNetwork as subsidiary regression model option
+* KalmanSmoothing, BKBandpassFilter added on_inverse option
+* add threshold arg to AlignLastValue
 
 ### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
 * Pytorch-Forecasting

diff --git a/autots/__init__.py b/autots/__init__.py
@@ -27,7 +27,7 @@
 from autots.models.cassandra import Cassandra
 
 
-__version__ = '0.6.13'
+__version__ = '0.6.14'
 
 TransformTS = GeneralTransformer
 

diff --git a/autots/evaluator/auto_ts.py b/autots/evaluator/auto_ts.py
@@ -418,9 +418,9 @@ def __init__(
 
                 full_params['transformations'] = transformations
                 full_params['transformation_params'] = transformation_params
-                self.initial_template.loc[
-                    index, 'TransformationParameters'
-                ] = json.dumps(full_params)
+                self.initial_template.loc[index, 'TransformationParameters'] = (
+                    json.dumps(full_params)
+                )
 
         self.regressor_used = False
         self.grouping_ids = None
@@ -1093,9 +1093,9 @@ def fit(
 
         Args:
             df (pandas.DataFrame): Datetime Indexed dataframe of series, or dataframe of three columns as below.
-            date_col (str): name of datetime column
-            value_col (str): name of column containing the data of series.
-            id_col (str): name of column identifying different series.
+            date_col (str): name of datetime column if long style data
+            value_col (str): name of column containing the data of series if using long style data. NOT for pointing out the most important column if several, that's `weights`
+            id_col (str): name of column identifying different series if long style data.
             future_regressor (numpy.Array): single external regressor matching train.index
             weights (dict): {'colname1': 2, 'colname2': 5} - increase importance of a series in metric evaluation. Any left blank assumed to have weight of 1.
                 pass the alias 'mean' as a str ie `weights='mean'` to automatically use the mean value of a series as its weight
@@ -1827,10 +1827,10 @@ def _run_template(
             self.model_count = template_result.model_count
         # capture results from lower-level template run
         if "TotalRuntime" in template_result.model_results.columns:
-            template_result.model_results[
-                'TotalRuntime'
-            ] = template_result.model_results['TotalRuntime'].fillna(
-                pd.Timedelta(seconds=60)
+            template_result.model_results['TotalRuntime'] = (
+                template_result.model_results['TotalRuntime'].fillna(
+                    pd.Timedelta(seconds=60)
+                )
             )
         else:
             # trying to catch a rare and sneaky bug (perhaps some variety of beetle?)
@@ -1930,9 +1930,9 @@ def _run_validations(
                         frac=0.8, random_state=self.random_seed
                     ).reindex(idx)
                 nan_frac = val_df_train.shape[1] / num_validations
-                val_df_train.iloc[
-                    -2:, int(nan_frac * y) : int(nan_frac * (y + 1))
-                ] = np.nan
+                val_df_train.iloc[-2:, int(nan_frac * y) : int(nan_frac * (y + 1))] = (
+                    np.nan
+                )
 
             # run validation template on current slice
             result = self._run_template(
@@ -3851,9 +3851,9 @@ def diagnose_params(self, target='runtime', waterfall_plots=True):
                     )
                     y = pd.json_normalize(json.loads(row["ModelParameters"]))
                     y.index = [row['ID']]
-                    y[
-                        'Model'
-                    ] = x  # might need to remove this and do analysis independently for each
+                    y['Model'] = (
+                        x  # might need to remove this and do analysis independently for each
+                    )
                     res.append(
                         pd.DataFrame(
                             {

diff --git a/autots/models/base.py b/autots/models/base.py
@@ -690,18 +690,18 @@ def long_form_results(
             value_name=value_name,
             id_vars="datetime",
         ).set_index("datetime")
-        upload_upper[
-            interval_name
-        ] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
+        upload_upper[interval_name] = (
+            f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
+        )
         upload_lower = pd.melt(
             self.lower_forecast.rename_axis(index='datetime').reset_index(),
             var_name=id_name,
             value_name=value_name,
             id_vars="datetime",
         ).set_index("datetime")
-        upload_lower[
-            interval_name
-        ] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
+        upload_lower[interval_name] = (
+            f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
+        )
 
         upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
         if datetime_column is not None:

diff --git a/autots/models/dnn.py b/autots/models/dnn.py
@@ -1,5 +1,6 @@
 """Neural Nets."""
 
+import random
 import pandas as pd
 from autots.tools.shaping import wide_to_3d
 
@@ -421,3 +422,125 @@ def predict(self, X):
         """Predict on dataframe of X."""
         test = pd.DataFrame(X).to_numpy().reshape((X.shape[0], X.shape[1], 1))
         return pd.DataFrame(self.model.predict(test))
+
+
+class ElasticNetwork(object):
+    def __init__(
+        self,
+        size: int = 256,
+        l1: float = 0.01,
+        l2: float = 0.02,
+        feature_subsample_rate: float = None,
+        optimizer: str = 'adam',
+        loss: str = 'mse',
+        epochs: int = 20,
+        batch_size: int = 32,
+        activation: str = "relu",
+        verbose: int = 1,
+        random_seed: int = 2024,
+    ):
+        self.name = 'ElasticNetwork'
+        self.verbose = verbose
+        self.random_seed = random_seed
+        self.size = size
+        self.l1 = l1
+        self.l2 = l2
+        self.feature_subsample_rate = feature_subsample_rate
+        self.epochs = epochs
+        self.batch_size = batch_size
+        self.optimizer = optimizer
+        self.loss = loss
+        self.activation = activation
+
+    def fit(self, X, y):
+        from tensorflow.keras.models import Sequential
+        from tensorflow.keras.layers import Dense, Layer
+        from tensorflow.keras.regularizers import L1L2
+
+        # hiding this here as TF is an optional import
+        class SubsetDense(Layer):
+            def __init__(self, units, input_dim, feature_subsample_rate=0.5, **kwargs):
+                super(SubsetDense, self).__init__(**kwargs)
+                self.units = units
+                self.input_dim = input_dim
+                self.feature_subsample_rate = feature_subsample_rate
+                self.selected_features_per_unit = []
+                self.kernels = []
+                self.biases = None
+
+            def build(self, input_shape):
+                # Select a subset of the input features for each unit
+                num_features = int(self.input_dim * self.feature_subsample_rate)
+                for _ in range(self.units):
+                    selected_features = random.sample(
+                        range(self.input_dim), num_features
+                    )
+                    self.selected_features_per_unit.append(selected_features)
+                    kernel = self.add_weight(
+                        shape=(num_features,),
+                        initializer='glorot_uniform',
+                        name=f'kernel_{len(self.kernels)}',
+                    )
+                    self.kernels.append(kernel)
+
+                self.biases = self.add_weight(
+                    shape=(self.units,), initializer='zeros', name='biases'
+                )
+
+            def call(self, inputs):
+                outputs = []
+                for i in range(self.units):
+                    selected_inputs = tf.gather(
+                        inputs, self.selected_features_per_unit[i], axis=1
+                    )
+                    output = (
+                        tf.reduce_sum(selected_inputs * self.kernels[i], axis=1)
+                        + self.biases[i]
+                    )
+                    outputs.append(output)
+                return tf.stack(outputs, axis=1)
+
+        # Model configuration
+        input_dim = X.shape[1]  # Number of input features
+        output_dim = y.shape[1]  # Number of outputs
+
+        # Build the model
+        if self.feature_subsample_rate is None:
+            self.model = Sequential(
+                [
+                    Dense(
+                        self.size,
+                        input_dim=input_dim,
+                        activation=self.activation,
+                        kernel_regularizer=L1L2(l1=self.l1, l2=self.l2),
+                    ),  # Example layer
+                    Dense(output_dim),  # Output layer
+                ]
+            )
+        else:
+            self.model = Sequential(
+                [
+                    SubsetDense(
+                        self.size,
+                        input_dim=input_dim,
+                        feature_subsample_rate=self.feature_subsample_rate,
+                    ),
+                    tf.keras.layers.Activation(self.activation),
+                    SubsetDense(
+                        self.size // 2,
+                        input_dim=input_dim,
+                        feature_subsample_rate=self.feature_subsample_rate,
+                    ),
+                    tf.keras.layers.Activation(self.activation),
+                    Dense(output_dim),  # Output layer
+                ]
+            )
+
+        # Compile the model
+        self.model.compile(optimizer=self.optimizer, loss=self.loss)
+        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size)
+
+        return self
+
+    def predict(self, X):
+        return self.model.predict(X)
diff --git a/autots/models/ensemble.py b/autots/models/ensemble.py
@@ -1838,15 +1838,15 @@ def MosaicEnsemble(
             f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} "
             + mi
         )
-    melted[
-        'forecast'
-    ] = fore  # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
-    melted[
-        'upper_forecast'
-    ] = u_fore  # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
-    melted[
-        'lower_forecast'
-    ] = l_fore  # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
+    melted['forecast'] = (
+        fore  # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
+    )
+    melted['upper_forecast'] = (
+        u_fore  # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
+    )
+    melted['lower_forecast'] = (
+        l_fore  # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
+    )
 
     forecast_df = melted.pivot(
         values="forecast", columns="series_id", index="forecast_period"

diff --git a/autots/models/matrix_var.py b/autots/models/matrix_var.py
@@ -237,7 +237,7 @@ def get_new_params(self, method: str = 'random'):
         """Return dict of new parameters for parameter tuning."""
         return {
             'method': random.choices(['als', 'dmd'], [0.7, 0.3])[0],
-            'rank': random.choice([2, 4, 6, 0.1, 0.2, 0.5]),
+            'rank': random.choice([2, 4, 8, 16, 32, 0.1, 0.2, 0.5]),
             'maxiter': 200,
         }