Skip to content

Commit

Permalink
Merge pull request #27 from fredmontet/22-detailed-plot-of-anomalies
Browse files Browse the repository at this point in the history
22 detailed plot of anomalies
  • Loading branch information
fredmontet authored Dec 4, 2023
2 parents fd7241f + 5df7176 commit 5cbfdd4
Show file tree
Hide file tree
Showing 12 changed files with 1,143 additions and 701 deletions.
175 changes: 175 additions & 0 deletions notebooks/docs/0.8-anomaly-time-series-plot.ipynb

Large diffs are not rendered by default.

945 changes: 293 additions & 652 deletions notebooks/docs/0_core/0.5-plots.ipynb

Large diffs are not rendered by default.

42 changes: 9 additions & 33 deletions notebooks/docs/0_core/0.6-anomaly-frequency.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
"cell_type": "markdown",
"id": "7c555cfe9131a67",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"source": [
"# Anomaly Frequency\n",
Expand Down Expand Up @@ -48,10 +45,7 @@
"end_time": "2023-10-25T12:33:05.716873700Z",
"start_time": "2023-10-25T12:33:05.709405600Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [],
"source": [
Expand All @@ -67,10 +61,7 @@
"end_time": "2023-10-25T12:33:06.068692300Z",
"start_time": "2023-10-25T12:33:05.716873700Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand Down Expand Up @@ -100,10 +91,7 @@
"end_time": "2023-10-25T12:33:06.554772400Z",
"start_time": "2023-10-25T12:33:06.074360Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand Down Expand Up @@ -132,10 +120,7 @@
"end_time": "2023-10-25T12:33:07.017948600Z",
"start_time": "2023-10-25T12:33:06.554772400Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand Down Expand Up @@ -170,14 +155,11 @@
"end_time": "2023-10-25T12:33:07.028332900Z",
"start_time": "2023-10-25T12:33:07.017948600Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [],
"source": [
"anomalies_frequencies = on.context.common.AnomaliesFrequencies(ts_detect)"
"anomalies_frequencies = on.context.common.AnomalyFrequency(ts_detect)"
]
},
{
Expand All @@ -189,10 +171,7 @@
"end_time": "2023-10-25T12:34:06.702046600Z",
"start_time": "2023-10-25T12:34:06.437698300Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand Down Expand Up @@ -231,10 +210,7 @@
"end_time": "2023-10-25T12:34:08.559774100Z",
"start_time": "2023-10-25T12:34:08.273890800Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand Down
210 changes: 210 additions & 0 deletions notebooks/docs/0_core/0.7-anomaly-time-series-plot.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion notebooks/getting-started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
"metadata": {},
"outputs": [],
"source": [
"import ontime as on"
"import ontime as on\n",
"import pandas as pd"
]
},
{
Expand Down
249 changes: 237 additions & 12 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ vegafusion-python-embed = "^1.4.1"
vegafusion = {extras = ["embed"], version = "^1.4.1"}
protobuf = "^4.24.4"
vega-datasets = "^0.9.0"
ipywidgets = "^8.1.1"
altair-viewer = "^0.4.0"

[tool.poetry.group.dev.dependencies]
jupyterlab = "^4.0.5"
pre-commit = "^3.3.3"
black = "^23.7.0"
nbmake = "^1.4.6"
pytest-xdist = "^3.5.0"

[build-system]
requires = ["poetry-core"]
Expand Down
1 change: 1 addition & 0 deletions src/ontime/core/plot/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from . import plots
from . import anomaly_plot
212 changes: 212 additions & 0 deletions src/ontime/core/plot/anomaly_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
import pandas as pd

from ..time_series import TimeSeries, BinaryTimeSeries
import altair as alt


class AnomalyPlot:
_DATA_COLUMN_NAME = "data"

@staticmethod
def plot_anomalies(
data: TimeSeries,
point_anomalies: BinaryTimeSeries | None = None,
contextual_anomalies: BinaryTimeSeries | None = None,
collective_anomalies: BinaryTimeSeries | None = None,
seasonal_anomalies: BinaryTimeSeries | None = None,
cyclical_anomalies: BinaryTimeSeries | None = None,
):
"""
Plot the anomalies of the given time series.
each kind of anomaly is plotted in a different subplot.
:param data: TimeSeries data
:param point_anomalies: BinaryTimeSeries of point anomalies. Will be plotted as points on the graph.
:param contextual_anomalies: BinaryTimeSeries of contextual anomalies. Will be plotted as a line on the graph.
:param collective_anomalies: BinaryTimeSeries of collective anomalies. Will be plotted as an area on the graph.
:param seasonal_anomalies: BinaryTimeSeries of seasonal anomalies. Will be plotted as a line on the graph.
:param cyclical_anomalies: BinaryTimeSeries of cyclical anomalies. Will be plotted as a line on the graph.
:return: Return an altair chart with the current time series and the anomalies drawn. Data are plot in blue and
anomalies in red.
"""
data_df = data.pd_dataframe()

chart = (
alt.Chart(data_df.reset_index())
.mark_line()
.encode(
x="time:T",
y=alt.Y(
f"{AnomalyPlot._DATA_COLUMN_NAME}:Q",
axis=alt.Axis(title="Values"),
),
)
.properties(
title="Chart representing the data and the anomalies over the time",
width=600,
height=400,
)
)

if point_anomalies is not None:
anomalies_df = point_anomalies.pd_dataframe()
anomalies_df["anomalies_y"] = (
anomalies_df["anomalies"] * data_df[AnomalyPlot._DATA_COLUMN_NAME]
)

anomalies_chart = (
alt.Chart(anomalies_df.reset_index())
.mark_circle(color="red", size=100)
.encode(
x="time:T",
y="anomalies_y:Q",
)
.transform_filter(alt.datum.anomalies == 1)
)

chart += anomalies_chart

if contextual_anomalies is not None:
chart = AnomalyPlot._make_line_chart(data_df, chart, contextual_anomalies)

if collective_anomalies is not None:
chart = AnomalyPlot._make_area_chart(data_df, chart, collective_anomalies)

if seasonal_anomalies is not None:
chart = AnomalyPlot._make_line_chart(data_df, chart, seasonal_anomalies)

if cyclical_anomalies is not None:
chart = AnomalyPlot._make_line_chart(data_df, chart, cyclical_anomalies)

return chart

@staticmethod
def _make_line_chart(
data: pd.DataFrame, actual_chart: alt.Chart, anomalies: BinaryTimeSeries
) -> alt.Chart:
"""
Make a line chart with the given anomalies.
:param data: TimeSeries within the data that are plotted in the chart.
:param actual_chart: Chart that will be updated with the anomalies.
:param anomalies: BinaryTimeSeries of anomalies that will be plotted in red on the chart.
:return: Return an altair chart with the current time series and the anomalies drawn.
"""
array_anomalies_df = AnomalyPlot.split_continuous_series(anomalies)
chart_total = actual_chart
for anomalies_df in array_anomalies_df:
anomalies_df["anomalies_y"] = (
anomalies_df["anomalies"] * data[AnomalyPlot._DATA_COLUMN_NAME]
)
chart = (
alt.Chart(anomalies_df.reset_index())
.mark_line(color="red", strokeWidth=2.5)
.encode(
x="time:T",
y="anomalies_y:Q",
)
)
chart_total += chart

return chart_total

@staticmethod
def _make_area_chart(
data_df: pd.DataFrame, actual_chart: alt.Chart, anomalies: BinaryTimeSeries
) -> alt.Chart:
"""
Adding background to the chart with the given anomalies.
:param data_df: TimeSeries within the data that are plotted in the chart.
:param actual_chart: Chart that will be updated with the anomalies.
:param anomalies: BinaryTimeSeries of anomalies that will be used to color the background in red on the chart.
:return: Return an altair chart with the current time series and the anomalies drawn.
"""

y_max = data_df[AnomalyPlot._DATA_COLUMN_NAME].max()
y_min = data_df[AnomalyPlot._DATA_COLUMN_NAME].min()

delta = (y_max - y_min) * 0.1

array_anomalies_df = AnomalyPlot.split_continuous_series(anomalies)
chart_total = None
for anomalies_df in array_anomalies_df:
anomalies_df["max"] = y_max + delta
anomalies_df["min"] = y_min - delta

chart = (
alt.Chart(anomalies_df.reset_index())
.mark_area(color="red", opacity=0.3)
.encode(
x="time:T",
y="min:Q",
y2="max:Q",
)
)

if chart_total is None:
chart_total = chart
else:
chart_total += chart

if chart_total is not None:
# Put our chart as background
return chart_total + actual_chart
return actual_chart

@staticmethod
def split_continuous_series(anomalies: BinaryTimeSeries) -> list[pd.DataFrame]:
"""
Split a continuous series of anomalies into multiple series of anomalies. The response covert all the anomalies
in the BinaryTimeSeries but change its structure to have a list with one df by continuous series of 1. Zeros are
not represented anymore.
:param anomalies: BinaryTimeSeries of anomalies that will be split.
:return: Return a list of DataFrame of anomalies.
"""
anomalies_df = anomalies.pd_dataframe()

# Initialize variables
result_dfs = []
current_value = 0
current_df = None

# Iterate through each row of the base_df
for idx, row in anomalies_df.iterrows():
value = row["anomalies"]

# If value changes
if value != current_value:
if current_df is not None:
# Save the current DataFrame into the array
current_df = current_df.rename_axis("time")
result_dfs.append(current_df)
current_df = None

else:
# Create a new DataFrame with the same structure as base_df
current_df = pd.DataFrame(columns=anomalies_df.columns)
# Save the current row in the new DataFrame
current_df = pd.concat(
[current_df, pd.DataFrame([row], index=[idx])]
)

current_value = value

else:
# If value does not change
if current_df is not None:
# Save the row in the current DataFrame
current_df = pd.concat(
[current_df, pd.DataFrame([row], index=[idx])]
)

if current_value == 1:
current_df = current_df.rename_axis("time")
result_dfs.append(current_df)

return result_dfs
1 change: 0 additions & 1 deletion src/ontime/core/time_series/binary_time_series.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import xarray as xr
import numpy as np

from .resticted_time_series import RestrictedTimeSeries


Expand Down
1 change: 0 additions & 1 deletion src/ontime/core/time_series/probabilistic_time_series.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import xarray as xr
import numpy as np

from .resticted_time_series import RestrictedTimeSeries


Expand Down
1 change: 0 additions & 1 deletion src/ontime/core/time_series/time_series.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations
from typing import List

from darts import TimeSeries as DartsTimeSeries
import pandas as pd
import xarray as xr
Expand Down

0 comments on commit 5cbfdd4

Please sign in to comment.