-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbandits.py
125 lines (106 loc) · 4.14 KB
/
bandits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
from tests import test_pnl_increase
def simple_two_armed_bandit(X, C, alpha, pa, pb):
"""
Test the two armed bandit algorithm using user defined probabilities pA and pB
"""
fracA = [X] # we store all x
fracB = [1-X]
for n in range(1000):
gamma = (C / (C + n + 1))**alpha
if np.random.random_sample() < X:
# test A:
if np.random.sample() < pa:
X = X + gamma * (1 - X)
else:
# test B:
if np.random.sample() < pb:
X = X - gamma * X # we give gamma X to B
fracA.append(X)
fracB.append(1-X)
return fracA, fracB
def agent_two_armed_bandit(data, agentA, agentB, X, C, alpha, n_iter=None):
"""
Test the two armed bandit algorithm using user defined probabilities pA and pB
"""
n_iter = n_iter if n_iter is not None else len(data)
fracA = [X] # we store all x
fracB = [1-X]
for n in range(n_iter):
ask, bid = data.iloc[n][['AskPrice', 'BidPrice']]
agentA.act(ask, bid)
agentB.act(ask, bid)
gamma = (C / (C + n + 1))**alpha
if np.random.random_sample() < X:
# test A:
agentA.calc_pnl(bid)
if test_pnl_increase(agentA):
nX = X + gamma * (1 - X)
agentA.rescale_capital(nX/X) # multiply everything by this ration to reach the new ratio
agentB.rescale_capital((1-nX)/(1-X))
X = nX
else:
# test B:
agentB.calc_pnl(bid)
if test_pnl_increase(agentB):
nX = X - gamma * X # we give gamma X to B
agentA.rescale_capital(nX/X)
agentB.rescale_capital((1-nX)/(1-X))
X = nX
fracA.append(X)
fracB.append(1-X)
return fracA, fracB
def simple_multi_armed_bandit(X, C, alpha, ps, n_iter=973):
"""
Test the mutli armed bandit algorithm using user defined probabilities ps
ps and X must be of size #agent
"""
fracs = np.zeros((n_iter+1, len(X)))
fracs[0, :] = X
for n in range(n_iter):
gamma = (C / (C + n + 1))**alpha
cs = np.cumsum(fracs[n, :])
s = np.random.sample()
for i in range(len(cs)):
if s <= cs[i]:
# we evaluate agent i
if np.random.sample() < ps[i]:
x = fracs[n, i]
fracs[n+1, i] = x + gamma * (1 - x)
for k in range(len(X)):
if k != i:
fracs[n+1, k] = fracs[n, k] - fracs[n, k] * gamma # we decrease each agent's capital proportionally
else:
fracs[n+1, :] = fracs[n, :] # we do not punish when not passing the test
break
return fracs
def agent_multi_armed_bandit(data, agents, X, C, alpha, n_iter=None):
"""
Test the multi armed bandit algorithm using agents
"""
n_iter = n_iter if n_iter is not None else len(data)
fracs = np.zeros((n_iter+1, len(X)))
fracs[0, :] = X
for n in range(n_iter):
# get the data and have each agent play
ask, bid = data.iloc[n][['AskPrice', 'BidPrice']]
for a in agents:
a.act(ask, bid)
gamma = (C / (C + n + 1))**alpha
cs = np.cumsum(fracs[n, :])
s = np.random.sample()
for i in range(len(cs)):
if s <= cs[i]:
# we evaluate agent i
agents[i].calc_pnl(bid)
if test_pnl_increase(agents[i]):
x = fracs[n, i]
fracs[n+1, i] = x + gamma * (1 - x)
for k in range(len(X)):
if k != i:
fracs[n+1, k] = fracs[n, k] - fracs[n, k] * gamma # we decrease each agent's capital proportionally
agents[i].rescale_capital(fracs[n+1, k]/fracs[n, k]) # rescale capital for all agents according the the new proportions
else:
fracs[n+1, :] = fracs[n, :] # we do not punish when not passing the test
break
return fracs