-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathanomaly_pycaret.py
147 lines (133 loc) · 7.24 KB
/
anomaly_pycaret.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import streamlit as st
import pandas as pd
from pycaret.anomaly import setup, predict_model, pull, plot_model, create_model, models, assign_model, save_model
def anomalyPycaret():
# st.title("AutoML Using Pycaret")
st.subheader("AutoML - Anomaly detection")
if 'begin_ad' not in st.session_state:
st.session_state.begin_ad = False
if not st.session_state.begin_ad:
st.info("To get started, double-click on the Begin button.")
if st.button(" Begin "):
st.session_state.begin_ad = True
st.session_state.button_clicked_ad = False
st.session_state.form_submitted = False
st.session_state.model_saved = False
else:
st.info("To restart AutoML, click on the 'Reset' button.")
if st.button("Reset"):
st.session_state.begin_ad = True
st.session_state.button_clicked_ad = False
st.session_state.form_submitted = False
st.session_state.model_saved = False
if 'page' not in st.session_state:
st.session_state.page = "Home"
if 'df' not in st.session_state:
st.session_state.df = pd.DataFrame()
dataset = st.session_state.df
anomaly_model =''
# from pycaret.datasets import get_data
# dataset = get_data('mice')
if len(dataset) > 0:
st.write("### Data Preview")
st.write(dataset)
# st.header("Dataset Configuration")
# Initialize session state variables
if 'form_submitted' not in st.session_state:
st.session_state.form_submitted = False
if 'model_saved' not in st.session_state:
st.session_state.model_saved = False
st.write("### Model Selection and Configuration")
with st.form("model_form"):
features = st.multiselect("Select features to include:", dataset.columns)
train_size = st.slider("Set the training data size:", 0.1, 0.9, 0.8)
# validation_size = col2.slider("Set the validation data size:", 0.1, 0.9 - train_size, 0.1)
st.markdown(
'<p style="color:#3355FF">Click the button below to confirm '
'your selection before filling out the other fields.</p>', unsafe_allow_html=True)
change_ad = st.form_submit_button("Click here to confirm your selection")
if change_ad:
st.session_state.form_submitted = False
if features:
# Split data
data = dataset[features].sample(frac=train_size, random_state=786).reset_index(drop=True)
data_unseen = dataset[features].drop(data.index).reset_index(drop=True)
# Setup PyCaret clustering
try:
s = setup(data, session_id=123)
except Exception as e:
st.error(str(e))
model_df = models()
if not model_df.empty:
anomaly_model = st.selectbox("Choose the model name", model_df['Name'].tolist())
# st.write(" ")
# selected_metrics = st.multiselect("Select metrics to evaluate",
# options=list(metrics_dict.keys()))
st.write(" ")
uploaded_file_test = st.file_uploader("If you want to upload a test dataset, upload CSV or Excel test (optional) "
"file",type=["csv", "xlsx"], key='test')
st.write(" ")
submit_button = st.form_submit_button("AutoML")
if submit_button:
st.session_state.form_submitted = True
# Process form submission and display download button
if st.session_state.form_submitted and st.session_state.begin_ad:
st.markdown('<p style="color:#4FFF33">Setup Successfully Completed!</p>', unsafe_allow_html=True)
st.dataframe(pull())
# st.write("Model created")
try:
model_id = model_df[model_df['Name'] == anomaly_model].index[0]
created_model = create_model(model_id)
# st.write(pull())
st.write("#### Assign")
assigned_result = assign_model(created_model)
st.dataframe(assigned_result)
# save model
save_model(created_model, 'anomaly_model')
st.write("#### t-SNE (3d) Dimension Plot")
with st.spinner("Running......"):
try:
plot_model(created_model, plot='tsne', display_format='streamlit')
except:
st.write("The plot is unavailable; please consider using alternative model.")
try:
# Predicts label on the holdout set.
pred_holdout = predict_model(created_model, data_unseen)
st.write('#### Predictions from holdout set (validation set)')
st.dataframe(pred_holdout)
except:
st.error("Something went wrong, please try other models")
# else:
# st.warning("Choose the models")
if uploaded_file_test:
st.write('### Test data')
if uploaded_file_test.name.endswith('.csv'):
test_dataset = pd.read_csv(uploaded_file_test)
elif uploaded_file_test.name.endswith(('.xlsx', '.xls')):
test_dataset = pd.read_excel(uploaded_file_test)
st.write("Test Data Preview:")
# st.write(test_dataset)
# test_dataset = pd.read_csv(uploaded_file_test)
st.dataframe(test_dataset)
test_dataset = test_dataset[features]
# test_pred = predict_model(created_model, test_dataset)
# st.write("### Prediction")
# st.dataframe(test_pred)
try:
test_pred = predict_model(created_model, test_dataset)
st.write("### Prediction")
st.dataframe(test_pred)
except:
st.error("Something went wrong, please try other models")
st.session_state.model_saved = True
except:
st.error("Something went wrong, please try other models")
# Display download button if model is saved
if st.session_state.model_saved:
with open('anomaly_model.pkl', 'rb') as f:
st.download_button(
label="Download Model",
data=f,
file_name='anomaly_model.pkl',
mime='application/octet-stream'
)