-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagent.py
128 lines (99 loc) · 3.73 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from mortgage import State
from scipy.linalg import hadamard
import math
import random
import numpy as np
delta = 0.01
L = 1
alpha = 0.05
C = 2**(math.ceil(math.log(12, 2))) # C = 16
hadamard = hadamard(C)
hbar = hadamard[:, 1:12]
offset = random.randint(0,15)
class Agent():
def __init__(self):
self.policies = {} # dictionary mapping states to policies (s -> pi_i)
self.updates = {}
def sample_action(self, policy):
return np.random.choice([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], p=policy)
def get_random_policy(self):
policy = np.random.random_sample((11,))
policy = Agent.normalize(policy)
print(policy)
#print(sum(policy))
return policy
def normalize(v):
norm = np.linalg.norm(v, ord=1)
if norm == 0:
return v
return v / norm
def get_action(self, opportunities, state):
policy = self.policies.get(state)
if policy is None:
policy = self.get_random_policy()
self.updates[state] = 0
self.policies[state] = policy
if not opportunities:
print("No refinance because no opportunity, returning ", state.variable_fraction)
return state.variable_fraction # same fb implies no refinancing
#print("updatedict = ", self.updates)
result = self.sample_action(policy)
#Agent.update_policy(state, result)
print("Sampled action with given policy is: ", result)
return result
def update(self, action, reward, state, time_passed, current_coupon_payment):
print("updating state = {}".format(state))
n = self.updates[state]
policy = self.policies[state]
print("previous policy = ", policy)
print("n (iteration value) = ", n)
a = Agent.a(n)
inverse_del_pi_i = Agent.inverse(Agent.get_del_pi_i(n))
print("inverse del pi i = ", inverse_del_pi_i)
v_n_l = Agent.V(n, state, reward, time_passed, current_coupon_payment)
print("Vnl = ", v_n_l)
new_policy = Agent.project(policy + a*(v_n_l/delta)*inverse_del_pi_i)
print("new policy = ", new_policy)
self.policies[state] = new_policy
self.updates[state] = n+1
def get_del_pi_i(n):
return hbar[(n+offset)%C]
def inverse(del_pi_i):
#print(del_pi_i)
#print(del_pi_i.shape)
return del_pi_i
def a(n):
a = 1/(n+1)
return a
def b(n):
b = 1/(n+1)
return b
def V(n, state, reward, time_passed, current_coupon_payment):
if n == 0: # base case
return 2
b = Agent.b(n)
del_pi_i = Agent.get_del_pi_i(n)
pi_bar = Agent.project(self.policies[state]+delta*del_pi_i)
new_fb = self.sample_action(pi_bar)
c = current_coupon_payment
fb = state.variable_fraction
vt = state.variable_interest_rate
rt = state.base_interest_rate # should be mt!
days = 30
if fb == new_fb: # no expected refinance
e_reward = c + c*fb*vt + c*(1-fb)*rt
else:
e_reward = c + c*fb*vt*(time_passed/days) + c*(1-fb)*rt*(time_passed/days)
val = Agent.eV(n-1, None, e_reward)
return (1-b)*Agent.V(n-1, state, reward, time_passed, current_coupon_payment) + b*(reward + alpha*( val ))
def eV(n, state, reward):
if n == 0: # base case
return 2
b = Agent.b(n)
return (1-b)*Agent.eV(n-1, state, reward) + b*(reward)
def project(policy):
minimum = np.amin(policy)
if (minimum < 0):
policy += abs(minimum)
projection = Agent.normalize(policy)
return projection