diff --git a/lightwood/data/timeseries_analyzer.py b/lightwood/data/timeseries_analyzer.py index a55e9ce59..081f3fa08 100644 --- a/lightwood/data/timeseries_analyzer.py +++ b/lightwood/data/timeseries_analyzer.py @@ -82,6 +82,7 @@ def get_delta(df: pd.DataFrame, ts_info: dict, group_combinations: list, order_c # get default delta for all data for col in order_cols: series = pd.Series([x[-1] for x in df[col]]) + series = series.drop_duplicates() # by this point df is ordered so duplicate timestamps are either because of non-handled groups or repeated data that, for mode delta estimation, should be ignored # noqa rolling_diff = series.rolling(window=2).apply(lambda x: x.iloc[1] - x.iloc[0]) delta = rolling_diff.value_counts(ascending=False).keys()[0] # pick most popular deltas["__default"][col] = delta diff --git a/lightwood/data/timeseries_transform.py b/lightwood/data/timeseries_transform.py index 3eec6d26e..e1390bc70 100644 --- a/lightwood/data/timeseries_transform.py +++ b/lightwood/data/timeseries_transform.py @@ -106,7 +106,7 @@ def transform_timeseries( df_arr.append(df.sort_values(by=ob_arr)) group_lengths.append(len(df)) else: - df_arr = [original_df] + df_arr = [original_df.sort_values(by=ob_arr)] group_lengths.append(len(original_df)) n_groups = len(df_arr) diff --git a/tests/integration/advanced/test_timeseries.py b/tests/integration/advanced/test_timeseries.py index 2ae985187..cd1cf9627 100644 --- a/tests/integration/advanced/test_timeseries.py +++ b/tests/integration/advanced/test_timeseries.py @@ -137,9 +137,16 @@ def test_1_time_series_regression(self): # test inferring mode test_df['__mdb_make_predictions'] = False + test_df = test_df.sample(frac=1) # shuffle to test internal ordering logic preds = pred.predict(test_df) self.check_ts_prediction_df(preds, nr_preds, [order_by]) + # Additionally, check timestamps are further into the future than test dates + latest_timestamp = pd.to_datetime(test_df[order_by]).max().timestamp() + for idx, row in preds.iterrows(): + for timestamp in row[f'order_{order_by}']: + assert timestamp > latest_timestamp + def test_2_time_series_classification(self): from lightwood.api.high_level import predictor_from_problem