-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodels.py
91 lines (70 loc) · 3.59 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from mlxtend.regressor import StackingCVRegressor
# Assuming you have 'merged_df' DataFrame
target_variable = 'INF_Value'
# Define features and target
features = merged_df.drop(target_variable, axis=1)
target = merged_df[target_variable]
# Split the data into training and testing sets
tscv = TimeSeriesSplit(n_splits=5)
for train_index, val_index in tscv.split(features):
X_train, X_val = features.iloc[train_index], features.iloc[val_index]
y_train, y_val = target.iloc[train_index], target.iloc[val_index]
# Models
rf_model = RandomForestRegressor(random_state=42)
xgb_model = XGBRegressor(random_state=42)
catboost_model = CatBoostRegressor(random_state=42, silent=True)
# Train the models
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
catboost_model.fit(X_train, y_train)
# Make predictions on the validation set
rf_val_preds = rf_model.predict(X_val)
xgb_val_preds = xgb_model.predict(X_val)
catboost_val_preds = catboost_model.predict(X_val)
# Create a DataFrame with validation set predictions as features
meta_features_val = pd.DataFrame({'RF': rf_val_preds, 'XGB': xgb_val_preds, 'CatBoost': catboost_val_preds})
# Initialize meta-regressor (you may choose a different meta-regressor)
meta_regressor = CatBoostRegressor(random_state=42, silent=True)
# Fit the meta-regressor with the predictions
meta_regressor.fit(meta_features_val, y_val)
# Make predictions on the test set
rf_test_preds = rf_model.predict(X_val)
xgb_test_preds = xgb_model.predict(X_val)
catboost_test_preds = catboost_model.predict(X_val)
# Create a DataFrame with test set predictions as features
meta_features_test = pd.DataFrame({'RF': rf_test_preds, 'XGB': xgb_test_preds, 'CatBoost': catboost_test_preds})
# Make final predictions using the meta-regressor
stacking_preds = meta_regressor.predict(meta_features_test)
# Evaluate the individual models
rf_mae = mean_absolute_error(y_val, rf_test_preds)
rf_rmse = mean_squared_error(y_val, rf_test_preds, squared=False)
rf_mape = mean_absolute_percentage_error(y_val, rf_test_preds)
xgb_mae = mean_absolute_error(y_val, xgb_test_preds)
xgb_rmse = mean_squared_error(y_val, xgb_test_preds, squared=False)
xgb_mape = mean_absolute_percentage_error(y_val, xgb_test_preds)
catboost_mae = mean_absolute_error(y_val, catboost_test_preds)
catboost_rmse = mean_squared_error(y_val, catboost_test_preds, squared=False)
catboost_mape = mean_absolute_percentage_error(y_val, catboost_test_preds)
stacking_mae = mean_absolute_error(y_val, stacking_preds)
stacking_rmse = mean_squared_error(y_val, stacking_preds, squared=False)
stacking_mape = mean_absolute_percentage_error(y_val, stacking_preds)
# Print metrics for individual models
print("Random Forest Test Set MAE:", rf_mae)
print("Random Forest Test Set RMSE:", rf_rmse)
print("Random Forest Test Set MAPE:", rf_mape * 100)
print("XGBoost Test Set MAE:", xgb_mae)
print("XGBoost Test Set RMSE:", xgb_rmse)
print("XGBoost Test Set MAPE:", xgb_mape * 100)
print("CatBoost Test Set MAE:", catboost_mae)
print("CatBoost Test Set RMSE:", catboost_rmse)
print("CatBoost Test Set MAPE:", catboost_mape * 100)
# Print metrics for stacking model
print("Stacking Test Set MAE:", stacking_mae)
print("Stacking Test Set RMSE:", stacking_rmse)
print("Stacking Test Set MAPE:", stacking_mape * 100)