-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathValueIteration.m
78 lines (63 loc) · 2.16 KB
/
ValueIteration.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
function [ J_opt, u_opt_ind ] = ValueIteration(P, G)
%VALUEITERATION Value iteration
% Solve a stochastic shortest path problem by Value Iteration.
%
% [J_opt, u_opt_ind] = ValueIteration(P, G) computes the optimal cost and
% the optimal control input for each state of the state space.
%
% Input arguments:
% P:
% A (K x K x L)-matrix containing the transition probabilities
% between all states in the state space for all control inputs.
% The entry P(i, j, l) represents the transition probability
% from state i to state j if control input l is applied.
%
% G:
% A (K x L)-matrix containing the stage costs of all states in
% the state space for all control inputs. The entry G(i, l)
% represents the cost if we are in state i and apply control
% input l.
%
% Output arguments:
% J_opt:
% A (K x 1)-matrix containing the optimal cost-to-go for each
% element of the state space.
%
% u_opt_ind:
% A (K x 1)-matrix containing the index of the optimal control
% input for each element of the state space. Mapping of the
% terminal state is arbitrary (for example: HOVER).
global K HOVER
%% Handle terminal state
% Do yo need to do something with the teminal state before starting policy
% iteration ?
global TERMINAL_STATE_INDEX
% IMPORTANT: You can use the global variable TERMINAL_STATE_INDEX computed
% in the ComputeTerminalStateIndex.m file (see main.m)
%% Value Iteration
fprintf("Value Iteration exploiting Gauss-Seidel Update\n");
err = 1e-6;
J = zeros(1,K);
J(TERMINAL_STATE_INDEX) = 0;
J_old = zeros(1,K);
u_ind = zeros(1,K);
iter = 0;
while(1)
iter=iter+1;
J_old = J;
for i=1:K
[J(i),u_ind(i)] = min( G(i,:) + J*squeeze(P(i,:,:)) );
end
if max(abs(J-J_old)) < err
break;
else
if mod(iter,100) == 0
fprintf("Currently at iteration n.%d with error update of %d > %d\n", iter, max(abs(J-J_old)), err);
end
end
end
fprintf('Finished after %d iterations\n\n', iter);
u_ind(TERMINAL_STATE_INDEX) = HOVER;
J_opt = J';
u_opt_ind = u_ind';
end