-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathTigerPOMDPs.jl
79 lines (62 loc) · 1.98 KB
/
TigerPOMDPs.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
mutable struct TigerPOMDP <: POMDP{Bool, Int64, Bool}
r_listen::Float64
r_findtiger::Float64
r_escapetiger::Float64
p_listen_correctly::Float64
discount_factor::Float64
end
TigerPOMDP() = TigerPOMDP(-1.0, -100.0, 10.0, 0.85, 0.95)
states(::TigerPOMDP) = (false, true)
observations(::TigerPOMDP) = (false, true)
stateindex(::TigerPOMDP, s::Bool) = Int64(s) + 1
actionindex(::TigerPOMDP, a::Int) = a + 1
obsindex(::TigerPOMDP, o::Bool) = Int64(o) + 1
initial_belief(::TigerPOMDP) = DiscreteBelief(2)
const TIGER_LISTEN = 0
const TIGER_OPEN_LEFT = 1
const TIGER_OPEN_RIGHT = 2
const TIGER_LEFT = false
const TIGER_RIGHT = true
# Resets the problem after opening door; does nothing after listening
function transition(pomdp::TigerPOMDP, s::Bool, a::Int64)
if a == TIGER_OPEN_LEFT || a == TIGER_OPEN_RIGHT
p = 0.5
elseif s
p = 1.0
else
p = 0.0
end
return BoolDistribution(p)
end
function observation(pomdp::TigerPOMDP, a::Int64, sp::Bool)
pc = pomdp.p_listen_correctly
p = 1.0
if a == TIGER_LISTEN
sp ? (p = pc) : (p = 1.0-pc)
else
p = 0.5
end
return BoolDistribution(p)
end
function observation(pomdp::TigerPOMDP, s::Bool, a::Int64, sp::Bool)
return observation(pomdp, a, sp)
end
function reward(pomdp::TigerPOMDP, s::Bool, a::Int64)
r = 0.0
a == TIGER_LISTEN && (r+=pomdp.r_listen)
if a == TIGER_OPEN_LEFT
s == TIGER_LEFT ? (r += pomdp.r_findtiger) : (r += pomdp.r_escapetiger)
end
if a == TIGER_OPEN_RIGHT
s == TIGER_RIGHT ? (r += pomdp.r_findtiger) : (r += pomdp.r_escapetiger)
end
return r
end
reward(pomdp::TigerPOMDP, s::Bool, a::Int64, sp::Bool) = reward(pomdp, s, a)
initialstate(pomdp::TigerPOMDP) = BoolDistribution(0.5)
actions(::TigerPOMDP) = 0:2
function upperbound(pomdp::TigerPOMDP, s::Bool)
return pomdp.r_escapetiger
end
discount(pomdp::TigerPOMDP) = pomdp.discount_factor
initialobs(p::TigerPOMDP, s::Bool) = observation(p, 0, s) # listen