-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathRecommender.py
110 lines (78 loc) · 3.29 KB
/
Recommender.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import sys
import pandas as pd
import csv
from ChefRequest import makeRequest
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
starter_problems = {
"0": "CHCHCL",
"1": "TEST",
"2": "INTEST",
"3": "TSORT",
"4": "FCTRL2",
"5": "ATM",
"6": "LADDU",
"7": "START01",
"8": "AMR15A",
"9": "RNDPAIR"
}
def create_soup(x):
return x["tags"] + ' ' + x["author"]
csv_file = "problem_data.csv"
metadata = pd.read_csv(csv_file, low_memory=False)
# Create a new soup feature
metadata["soup"] = metadata.apply(create_soup, axis=1)
count = CountVectorizer(stop_words="english")
count_matrix = count.fit_transform(metadata["soup"])
cosine_sim = cosine_similarity(count_matrix, count_matrix)
metadata = metadata.reset_index()
indices = pd.Series(metadata.index, index=metadata["code"])
def getRecommendations(code="", contestCode=""):
# Get the index of the problem that matches the problem code
global metadata, count, count_matrix, cosine_sim, indices
if not code:
return starter_problems
idx = indices.get(code, -1)
if idx == -1:
response = makeRequest(
"GET", "https://api.codechef.com/contests/" + contestCode + "/problems/" + code).json()
problem_dict = response.get("result", {}).get(
"data", {}).get("content", {})
data_dict = {}
data_dict["code"] = problem_dict.get("problemCode", "")
data_dict["tags"] = problem_dict.get("tags", [])
data_dict["author"] = problem_dict.get("author", "")
data_dict["solved"] = problem_dict.get("successfulSubmissions", 0)
data_dict["attempted"] = problem_dict.get("totalSubmissions", 0)
data_dict["partiallySolved"] = problem_dict.get("partialSubmissions")
csv_columns = ["code", "tags", "author",
"solved", "attempted", "partiallySolved"]
try:
with open(csv_file, 'a') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writerow(data_dict)
except IOError:
print("I/O error")
metadata = pd.read_csv(csv_file, low_memory=False)
# Create a new soup feature
metadata["soup"] = metadata.apply(create_soup, axis=1)
count = CountVectorizer(stop_words="english")
count_matrix = count.fit_transform(metadata["soup"])
cosine_sim = cosine_similarity(count_matrix, count_matrix)
metadata = metadata.reset_index()
indices = pd.Series(metadata.index, index=metadata["code"])
# Get the pairwsie similarity scores of all movies with that movie
sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the problems based on the similarity scores
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 10 most similar problems
sim_scores = sim_scores[1:11]
# Get the problem indices
problem_indices = [i[0] for i in sim_scores]
# Return the top 10 most similar problems
return metadata["code"].iloc[problem_indices]
def recommendProblem(problem_code, contest_code):
res = getRecommendations(code=problem_code, contestCode=contest_code)
print(res)
if __name__ == '__main__':
sys.exit(recommendProblem("JAGAM", "PRACTICE"))