-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsel multi.py
74 lines (61 loc) · 2.25 KB
/
sel multi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import schedule
import time
import numpy as np
import pandas as pd
import itertools
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
DRIVER_PATH = 'chromedriver.exe'
wd = webdriver.Chrome()
wd.get("https://www.youtube.com/")
filename = "PAC_Multi_Data_model.sav"
pac = pickle.load(open(filename, 'rb'))
dataset = pd.read_csv("bye.csv")
convert_dict = {
'title': str
}
dataset = dataset.astype(convert_dict)
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(dataset['title'])
def predictor(inp):
data = [str(inp)]
#d = pd.DataFrame(data, columns=['title'])
#print(d["title"][0])
tfidf_reviews=tfidf_vectorizer.transform(data)
result_probs = pac._predict_proba_lr(tfidf_reviews[0])
result = pac.predict(tfidf_reviews[0])
#print(result)
if result_probs[0][result][0]>=0.4:
if result == 0:
print("Educational", result_probs[0][result][0]*100,"% confidence")
elif result == 1:
print("Music", result_probs[0][result][0]*100,"% confidence")
elif result == 2:
print("Sports", result_probs[0][result][0]*100,"% confidence")
elif result == 3:
print("Gaming", result_probs[0][result][0]*100,"% confidence")
elif result == 4:
print("Movies", result_probs[0][result][0]*100,"% confidence")
else:
print("Unable to classify")
print(result, result_probs[0][result][0])
def url_check():
title = wd.title
print(title)
if title[-7:] == "YouTube":
if wd.current_url == "https://www.youtube.com/" or wd.current_url == "https://www.youtube.com/watch?v=dQw4w9WgXcQ&ab_channel=RickAstley" or wd.current_url == "https://www.youtube.com/watch?v=dQw4w9WgXcQ":
pass
else:
predictor(title)
schedule.every(2).seconds.do(url_check)
while True:
schedule.run_pending()
time.sleep(1)