-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_and_predict_quality.py
151 lines (118 loc) · 5.7 KB
/
train_and_predict_quality.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
# Load your dataset
data = pd.read_csv('video_quality_data.csv')
# Features and target variables
X = data[['Advanced Motion Complexity', 'DCT Complexity', 'Temporal DCT Complexity',
'Histogram Complexity', 'Edge Detection Complexity', 'ORB Feature Complexity',
'Color Histogram Complexity', 'Bitrate (kbps)', 'Resolution (px)',
'Frame Rate (fps)', 'CRF', 'average_framerate', 'min_framerate',
'max_framerate', 'smoothed_frame_rate_variation']]
y_ssim = data['SSIM']
y_psnr = data['PSNR']
y_vmaf = data['VMAF']
# Split the dataset into training and testing sets
X_train, X_test, y_ssim_train, y_ssim_test = train_test_split(X, y_ssim, test_size=0.2, random_state=42)
_, _, y_psnr_train, y_psnr_test = train_test_split(X, y_psnr, test_size=0.2, random_state=42)
_, _, y_vmaf_train, y_vmaf_test = train_test_split(X, y_vmaf, test_size=0.2, random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Regularization Models (L1 - Lasso and L2 - Ridge)
lasso_ssim = Lasso(alpha=0.01)
ridge_ssim = Ridge(alpha=1.0)
ridge_psnr = Ridge(alpha=1.0)
ridge_vmaf = Ridge(alpha=1.0)
# Train models with L1 and L2 regularization
lasso_ssim.fit(X_train_scaled, y_ssim_train)
ridge_ssim.fit(X_train_scaled, y_ssim_train)
ridge_psnr.fit(X_train_scaled, y_psnr_train)
ridge_vmaf.fit(X_train_scaled, y_vmaf_train)
# Make predictions
ssim_lasso_predictions = lasso_ssim.predict(X_test_scaled)
ssim_ridge_predictions = ridge_ssim.predict(X_test_scaled)
psnr_ridge_predictions = ridge_psnr.predict(X_test_scaled)
vmaf_ridge_predictions = ridge_vmaf.predict(X_test_scaled)
# Evaluate models using Mean Squared Error
ssim_lasso_mse = mean_squared_error(y_ssim_test, ssim_lasso_predictions)
ssim_ridge_mse = mean_squared_error(y_ssim_test, ssim_ridge_predictions)
psnr_ridge_mse = mean_squared_error(y_psnr_test, psnr_ridge_predictions)
vmaf_ridge_mse = mean_squared_error(y_vmaf_test, vmaf_ridge_predictions)
# Print the MSE scores
print(f"SSIM Lasso MSE: {ssim_lasso_mse}")
print(f"SSIM Ridge MSE: {ssim_ridge_mse}")
print(f"PSNR Ridge MSE: {psnr_ridge_mse}")
print(f"VMAF Ridge MSE: {vmaf_ridge_mse}")
# XGBoost for feature importance evaluation
xgb_model_ssim = XGBRegressor(n_estimators=100, learning_rate=0.1)
xgb_model_psnr = XGBRegressor(n_estimators=100, learning_rate=0.1)
xgb_model_vmaf = XGBRegressor(n_estimators=100, learning_rate=0.1)
# Train the models
xgb_model_ssim.fit(X_train, y_ssim_train)
xgb_model_psnr.fit(X_train, y_psnr_train)
xgb_model_vmaf.fit(X_train, y_vmaf_train)
# Feature importance from XGBoost (SSIM model example)
feature_importance = xgb_model_ssim.feature_importances_
# Plot feature importance
def plot_feature_importance(importance, names, model_type):
feature_importance = np.array(importance)
feature_names = np.array(names)
data = {'feature_names': feature_names, 'feature_importance': feature_importance}
fi_df = pd.DataFrame(data)
fi_df.sort_values(by=['feature_importance'], ascending=False, inplace=True)
plt.figure(figsize=(10,8))
sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
plt.title(f'{model_type} Feature Importance')
plt.xlabel('Feature Importance')
plt.ylabel('Feature Names')
plt.show()
# Call plot function for SSIM XGBoost model
plot_feature_importance(xgb_model_ssim.feature_importances_, X.columns, 'XGBoost SSIM')
# Hyperparameter tuning for XGBoost (example using cross-validation)
from sklearn.model_selection import GridSearchCV
param_grid = {
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 7],
'n_estimators': [50, 100, 200]
}
grid_search = GridSearchCV(XGBRegressor(), param_grid, cv=3, scoring='neg_mean_squared_error', verbose=1)
grid_search.fit(X_train, y_ssim_train)
# Best parameters from grid search
print("Best parameters found: ", grid_search.best_params_)
best_xgb_ssim_model = grid_search.best_estimator_
# Stacking models (example combining Ridge, Lasso, and XGBoost)
from sklearn.ensemble import StackingRegressor
estimators = [
('ridge', Ridge(alpha=1.0)),
('lasso', Lasso(alpha=0.01)),
('xgb', XGBRegressor(n_estimators=100, learning_rate=0.1))
]
stacking_model = StackingRegressor(estimators=estimators, final_estimator=RandomForestRegressor())
stacking_model.fit(X_train_scaled, y_ssim_train)
# Make predictions using the stacking model
stacking_predictions = stacking_model.predict(X_test_scaled)
stacking_mse = mean_squared_error(y_ssim_test, stacking_predictions)
print(f"Stacking Model MSE: {stacking_mse}")
# Advanced Feature Engineering (Example: Polynomial Features)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, interaction_only=True)
X_poly = poly.fit_transform(X)
X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, y_ssim, test_size=0.2, random_state=42)
ridge_model_poly = Ridge(alpha=1.0)
ridge_model_poly.fit(X_train_poly, y_train)
# Predictions with polynomial features
poly_predictions = ridge_model_poly.predict(X_test_poly)
poly_mse = mean_squared_error(y_test, poly_predictions)
print(f"Polynomial Feature Ridge MSE: {poly_mse}")
# Evaluate Model with Cross-Validation
cv_scores = cross_val_score(ridge_ssim, X_train_scaled, y_ssim_train, cv=5, scoring='neg_mean_squared_error')
print(f"Cross-validation MSE scores: {-cv_scores.mean()}")