-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathk-fold-random-forest
42 lines (33 loc) · 1.41 KB
/
k-fold-random-forest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Load your dataset (replace 'your_dataset.csv' with the file path of your dataset)
file_path = 'C:\Users\user\Downloads\success3.csv'
data = pd.read_csv(file_path)
# Assuming 'X' represents features and 'y' represents the target variable
X = data.drop('target_column', axis=1) # Replace 'target_column' with the name of your target
variable
y = data['target_column']
# Initialize the Random Forest model
model = RandomForestClassifier()
# Define the number of splits (k)
num_splits = 5 # Choose the desired number of folds
# Create stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=42)
fold_scores = []
fold_counter = 1
for train_index, test_index in skf.split(X, y):
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
# Fit the model and make predictions
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Calculate accuracy for each fold
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy for Fold {fold_counter}: {accuracy}")
fold_scores.append(accuracy)
fold_counter += 1
# Calculate the mean accuracy across folds
mean_accuracy = sum(fold_scores) / len(fold_scores)
print(f"Average Cross-Validation Accuracy: {mean_accuracy}")