-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathratings.py
138 lines (108 loc) · 5.11 KB
/
ratings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import time
import trueskill as ts
from itertools import groupby
from sqlalchemy import update
from scipy.stats import rankdata
import config
from database import db
from model import Races, Results, Racers
env = ts.TrueSkill(mu=config.MU, sigma=config.SIGMA,
backend='mpmath', draw_probability=0)
def get_all_ratings(debug_limit=None):
"""Get all ratings for results in the Results table."""
time0 = time.time()
print('Starting to rate!')
ordered_results = (Results.query
.join(Races, Races.race_id == Results.race_id)
.order_by(Races.date,
Results.race_id,
Results.RaceCategoryName,
Results.Place))
groups = groupby(ordered_results.limit(debug_limit),
lambda x: (x.race_id, x.RaceCategoryName))
print(f'Made groupby: {time.time() - time0}') # 8s for full dataset
# Warning... this will hog memory!
# Entire dataset took nearly 3 hours
for (race_id, category), results in groups: # ~30s delay to fetch and start
print(f'Rating race {race_id} category {category}')
results = list(results)
# Get Racers rows corresponding to the results
racers = [Racers.query.get(result.RacerID) for result in results]
# print(f'Got racers rows: {time.time() - time0}')
# Store prior ratings in Results table in both prior mu/sigma and
# current mu/sigma columns - current mu/sigma will change for placing
# racers and not change for DNF racers
for result, racer in zip(results, racers):
result.prior_mu = racer.mu
result.prior_sigma = racer.sigma
result.mu = racer.mu
result.sigma = racer.sigma
# print(f'store prior ratings: {time.time() - time0}')
# Predicted placing for ALL racers (including DNFs)
get_predicted_places(results)
# print(f'Predicted places: {time.time() - time0}')
# Filter out DNFs - make lists from pairs of results/racers with
# valid result.Place
result_racer_tuples = list(filter(
lambda x: x[0].Place != None, zip(results, racers)
))
if not result_racer_tuples:
continue # empty list!
placing_results, placing_racers = map(list, zip(*result_racer_tuples))
# print(f'Filter DNFs: {time.time() - time0}')
# Rate using trueskill
if len(placing_results) <= 1: # don't rate uncontested races
continue
new_ratings = run_trueskill(placing_results)
# print(f'Run trueskill: {time.time() - time0}')
# Update results and racers rows
for result, racer, rating in zip(placing_results,
placing_racers,
new_ratings):
result.mu = racer.mu = rating.mu
result.sigma = racer.sigma = rating.sigma
result.rated = True
print(f'Elapsed time: {time.time() - time0}')
# Committing took ~15 seconds when stopping entire dataset early, but
# was instant when done after rating the whole dataset
time0 = time.time()
db.session.flush()
db.session.commit()
print(f'Committing took: {time.time() - time0}')
def get_predicted_places(results):
"""Gets the predicted place for each racer in a set of results. Placing
order determined by decreasing mean rating."""
ranks = rankdata([-x.prior_mu for x in results], method='min')
for result, rank in zip(results, ranks):
result.predicted_place = int(rank) # convert from numpy dtype
def reset_ratings():
"""Reset all ratings to default values."""
defaults = {'mu': env.mu, 'sigma': env.sigma, 'rated': False,
'prior_mu': env.mu, 'prior_sigma': env.sigma}
print('Resetting ratings in Results...')
Results.query.update(defaults, synchronize_session=False)
defaults = {'mu': env.mu, 'sigma': env.sigma}
print('Resetting ratings in Racers...')
Racers.query.update(defaults, synchronize_session=False)
db.session.flush()
db.session.commit()
def run_trueskill(results):
"""Runs TrueSkill on the race results, where prior ratings are stored
in prior_mu and prior_sigma attributes for each row in the results.
Returns a list of updated results dictionary mappings for each row.
Returns [] if the rating is uncontested and the results are not updated.
"""
# TrueSkill requires each "team" as a list. Our teams are one person each
# and consist of one rating. We then need to get the only element from the
# returned list to access the updated ratings
try:
new_ratings = env.rate([
[env.Rating(result.prior_mu, result.prior_sigma)]
for result in results])
new_ratings = [rating[0] for rating in new_ratings]
except FloatingPointError as e:
import dill
dill.dump([(result.prior_mu, result.prior_sigma) for result in results],
open('error.pkl', 'wb'))
print(e)
return new_ratings