From 2ad88a5aacb190f1f8760de7ed12da785b72e5cc Mon Sep 17 00:00:00 2001
From: Patricio Cerda Mardini <patricio.mardini@mindsdb.com>
Date: Tue, 21 Dec 2021 18:58:10 -0300
Subject: [PATCH 1/3] fix: sort non-grouped DF in TS transform method

---
 lightwood/data/timeseries_transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightwood/data/timeseries_transform.py b/lightwood/data/timeseries_transform.py
index 3eec6d26e..e1390bc70 100644
--- a/lightwood/data/timeseries_transform.py
+++ b/lightwood/data/timeseries_transform.py
@@ -106,7 +106,7 @@ def transform_timeseries(
             df_arr.append(df.sort_values(by=ob_arr))
             group_lengths.append(len(df))
     else:
-        df_arr = [original_df]
+        df_arr = [original_df.sort_values(by=ob_arr)]
         group_lengths.append(len(original_df))
 
     n_groups = len(df_arr)

From e96cbf1f10670834c411c8c2890cf2326b0355ac Mon Sep 17 00:00:00 2001
From: Patricio Cerda Mardini <patricio.mardini@mindsdb.com>
Date: Tue, 21 Dec 2021 18:58:28 -0300
Subject: [PATCH 2/3] drop duplicates for TS delta estimation

---
 lightwood/data/timeseries_analyzer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lightwood/data/timeseries_analyzer.py b/lightwood/data/timeseries_analyzer.py
index a55e9ce59..081f3fa08 100644
--- a/lightwood/data/timeseries_analyzer.py
+++ b/lightwood/data/timeseries_analyzer.py
@@ -82,6 +82,7 @@ def get_delta(df: pd.DataFrame, ts_info: dict, group_combinations: list, order_c
     # get default delta for all data
     for col in order_cols:
         series = pd.Series([x[-1] for x in df[col]])
+        series = series.drop_duplicates()  # by this point df is ordered so duplicate timestamps are either because of non-handled groups or repeated data that, for mode delta estimation, should be ignored  # noqa
         rolling_diff = series.rolling(window=2).apply(lambda x: x.iloc[1] - x.iloc[0])
         delta = rolling_diff.value_counts(ascending=False).keys()[0]  # pick most popular
         deltas["__default"][col] = delta

From 8e36a0cd291369c6e0b7cd0a87f811766da3fadd Mon Sep 17 00:00:00 2001
From: Patricio Cerda Mardini <patricio.mardini@mindsdb.com>
Date: Tue, 21 Dec 2021 18:59:08 -0300
Subject: [PATCH 3/3] tests: check that nongrouped DF with repeated values
 estimates correct time delta

---
 tests/integration/advanced/test_timeseries.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/integration/advanced/test_timeseries.py b/tests/integration/advanced/test_timeseries.py
index 2ae985187..cd1cf9627 100644
--- a/tests/integration/advanced/test_timeseries.py
+++ b/tests/integration/advanced/test_timeseries.py
@@ -137,9 +137,16 @@ def test_1_time_series_regression(self):
 
         # test inferring mode
         test_df['__mdb_make_predictions'] = False
+        test_df = test_df.sample(frac=1)  # shuffle to test internal ordering logic
         preds = pred.predict(test_df)
         self.check_ts_prediction_df(preds, nr_preds, [order_by])
 
+        # Additionally, check timestamps are further into the future than test dates
+        latest_timestamp = pd.to_datetime(test_df[order_by]).max().timestamp()
+        for idx, row in preds.iterrows():
+            for timestamp in row[f'order_{order_by}']:
+                assert timestamp > latest_timestamp
+
     def test_2_time_series_classification(self):
         from lightwood.api.high_level import predictor_from_problem