-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaction.py
132 lines (111 loc) · 4.43 KB
/
action.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
from utils import matrix_to_list
class Action():
def __init__(self, position, direction):
self.position = position
self.direction = direction
self.name = str(position[0]) + "_" + str(position[1]) + "_" + direction
# Set I (initiation set), beta (termination set), pi (policy)
self._setIBetaPi(position, direction)
self.initiation_as_list = matrix_to_list(self.I)
self.termination_as_list = matrix_to_list(self.beta)
def __copy__(self):
return type(self)(self.position,self.direction)
def pickAction(self, state):
action_number = self.pi[state]
if action_number == 1:
action = "left"
elif action_number == 2:
action = "up"
elif action_number == 3:
action = "right"
elif action_number == 4:
action = "down"
else:
action = "still"
# Return action number, used for intra-option model learning
return action, action_number
def execute_policy_probabilistic(self,S):
return self.execute_policy(S)
def execute_policy(self, S): #starting at position S, returns the state obtained after executing option policy
#not necessary for primitive actions, included so that they can be handled identically to options in options.py
pos = np.where(S == 1)
while self.beta[pos] == 0:
action = self.pi[pos][0]
if action == 1:
x = pos[0][0]
y = pos[1][0] - 1
elif action == 2:
x = pos[0][0] - 1
y = pos[1][0]
elif action == 3:
x = pos[0][0]
y = pos[1][0] + 1
elif action == 4:
x = pos[0][0] + 1
y = pos[1][0]
pos = ([x], [y])
final_state = np.zeros((8, 8))
final_state[pos] = 1
return final_state
def list_initiation_states(self): #Split a set of states into a list of stat_result
states = []
for i in range(8):
for j in range(8):
if self.I[i][j] == 1:
arr = np.zeros((8, 8))
arr[i][j] = 1
states.append(arr)
return states
def list_termination_states(self): #Split a set of states into a list of stat_result
states = []
for i in range(8):
for j in range(8):
if self.beta[i][j] == 1:
arr = np.zeros((8, 8))
arr[i][j] = 1
states.append(arr)
return states
def _setIBetaPi(self, position,direction):
self.I = np.zeros((8, 8))
self.I[position] = 1 # available at start position
self.beta = np.zeros((8, 8))
if direction == "left":
adjusted = (position[0], max(0,position[1] - 1)) # don't move on leftmost positions
self.beta[adjusted] = 1 # terminates after moving left
self.pi = np.zeros((8, 8))
# check for edge case
if not adjusted == position:
self.pi[position] = 1
if direction == "up":
adjusted = (max(0,position[0] -1),position[1]) # don't move on upmost positions
self.beta[adjusted] = 1 # terminates after moving left
self.pi = np.zeros((8, 8))
# check for edge case
if not adjusted == position:
self.pi[position] = 2
if direction == "right":
adjusted = (position[0], min(7,position[1] + 1)) # don't move on leftmost positions
self.beta[adjusted] = 1 # terminates after moving left
self.pi = np.zeros((8, 8))
# check for edge case
if not adjusted == position:
self.pi[position] = 3
if direction == "down":
adjusted = (min(7,position[0] + 1),position[1]) # don't move on upmost positions
self.beta[adjusted] = 1 # terminates after moving left
self.pi = np.zeros((8, 8))
# check for edge case
if not adjusted == position:
self.pi[position] = 4
def __str__(self):
return self.name
if __name__ == "__main__":
action = Action((6,0), "down")
print("Initiation Set")
print(action.I)
print("Termination Set")
print(action.beta)
print("Pi")
print(action.pi)
print(action.name)