Skip to content

Commit

Permalink
m
Browse files Browse the repository at this point in the history
  • Loading branch information
constant-inos committed Jun 1, 2021
1 parent d1dc11a commit c0ae26f
Show file tree
Hide file tree
Showing 11 changed files with 825 additions and 182 deletions.
568 changes: 568 additions & 0 deletions _vizdoom.ini

Large diffs are not rendered by default.

24 changes: 22 additions & 2 deletions controllers/ddqn_webots/ddqn_webots.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
parent_dir = os.path.dirname(current_dir)
parent_dir = os.path.dirname(parent_dir)
sys.path.insert(0, parent_dir)
print(parent_dir)
exit()

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #cut annoying tf messages

Expand Down Expand Up @@ -30,6 +32,13 @@
import __main__
from datetime import datetime

def WithNoise(input_vector):
mean = 0
std = 0.005
n = len(input_vector)
noise = np.random.normal(mean,std,n)
return list(np.array(input_vector) + noise)


dir_path = os.path.dirname(os.path.realpath(__file__))
L = Logger()
Expand Down Expand Up @@ -60,6 +69,7 @@
filename = os.path.join(parent_dir,'history','checkpoint')

scores = deque(maxlen=100)
goals = deque(maxlen=100)
i = 0

if os.path.exists(filename):
Expand Down Expand Up @@ -89,8 +99,13 @@
else:
state = np.expand_dims(observation,axis=0)
new_state = np.expand_dims(observation_,axis=0)


agent.store_experience(state,action_idx,reward,new_state,done)
# # Add exp from noise
# agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done)
# agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done)

observation = observation_
if training: agent.learn()
score += reward
Expand All @@ -105,7 +120,9 @@
training = True
agent.epsilon = epsilon_train
print('Training on')


goal = (reward == 100)

her_memory = env.her.in_done()
for m in her_memory:
state,action_idx,reward,new_state,done = m
Expand All @@ -115,11 +132,14 @@


L.add_log('score',score)
L.add_log('goals',goal)
L.save_game()

scores.append(score)
goals.append(goal)


print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'\n')
print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'goals/100:',sum(goals),'\n')
agent.save_model()

i += 1
Expand Down
162 changes: 162 additions & 0 deletions environments/TargetGame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import os,sys,inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)

from extras import obstacles
import numpy as np
import random
import cv2


def WithNoise(input_vector):
mean = 0
std = 0.005
n = len(input_vector)
noise = np.random.normal(mean,std,n)
return list(np.array(input_vector) + noise)

def cart2pol(x, y):
rho = np.sqrt(x**2 + y**2)
phi = np.arctan2(y, x)
return(rho, phi)

def pol2cart(rho, phi):
x = rho * np.cos(phi)
y = rho * np.sin(phi)
return(x, y)

def D(A,B):
if len(A) == 3:
(x,y,z) = A
(a,b,c) = B
else:
(x,y) = A
(a,b) = B
return np.sqrt((x-a)**2 + (y-b)**2)

def reward_function(position_data,prev_shaping,collision=False):
X,Y,X1,Y1 = position_data

reward = 0
sh1 = -100*(X1**2+Y1**2)
shaping = sh1
if prev_shaping is not None:
reward = shaping - prev_shaping

done = False
if collision:
#reward -= 100
done = True

if np.sqrt(X1**2+Y1**2) < 3:
reward = 100
done = True

return reward,done,shaping



class Follower():
# Webots-to-environment-agnostic
def __init__(self,max_steps=50:
self.max_steps = max_steps

self.discrete_actions = [0,1,2]
self.action_size = len(self.discrete_actions)
self.stepCounter = 0
self.shaping = None

self.create_world()

def reset(self,reset_position=True):

self.create_world()

self.stepCounter = 0

self.shaping = None

self.path.append(position)
state,_,_,_ = self.step(1)
return state


def step(self,action):

[xg,yg,] = self.GOAL
[x0,y0] = self.position

position_data = []

if self.direction == 0: #up
x1 = x0-1
if action==1:
y1 = y0-1
self.direction=2
if action==2:
y1 = y0+1
self.direction=3
if self.direction == 1: #down
x1 = x0+1
if action==1:
y1 = y0+1
self.direction=3
if action==2:
y1 = y0-1
self.direction=2
if self.direction == 2: #left
y1 = y0-1
if action==1:
x1 = x0+1
self.direction=1
if action==2:
x1=x0-1
self.direction=0
if self.direction == 3: #right
y1 = y0+1
if action==1:
x1 = x0-1
self.direction=0
if action==2:
x1=x0+1
self.direction=1

try:
self.map[x1,y1] = 1
except:
x1,y1 = x1,y0

position_data = [x0-xg,y0-yg,x1-xg,y1-yg]


# rho0,phi0 = cart2pol(x-xg,y-yg)
# rho1,phi1 = cart2pol(x1-xg,y1-yg)
# state = [rho0,phi0,rho1,phi1]
state = position_data

# REWARD
reward,done,self.shaping = reward_function(position_data,self.shaping)

if reward == 100: print('goal')

if self.stepCounter >= self.max_steps:
done = True

self.path.append([x1,y1])
self.stepCounter += 1
info = ''
return state,reward,done,info


def create_world(self):
L = 100
self.map = np.zeros((L,L))
self.start = [int(random.random()*L),int(random.random()*L)]
self.target = [int(random.random()*L),int(random.random()*L)]

self.direction = np.random.choice([1,2,3,4]) # up, down, left, right
self.position = self.start



14 changes: 14 additions & 0 deletions environments/WebotsEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@

OF = OpticalFlow()

def WithNoise(input_vector):
mean = 0
std = 0.005
n = len(input_vector)
noise = np.random.normal(mean,std,n)
return list(np.array(input_vector) + noise)

def cart2pol(x, y):
rho = np.sqrt(x**2 + y**2)
phi = np.arctan2(y, x)
Expand Down Expand Up @@ -92,6 +99,8 @@ def in_done(self):
rho1,phi1 = cart2pol(x1-xg,y1-yg)
state = [rho0,phi0,rho1,phi1]



reward,done,prev_shaping = reward_function(position_data,prev_shaping)

done = (i==n-1)
Expand All @@ -101,6 +110,11 @@ def in_done(self):

if prev_state is not None:
memory.append([prev_state,prev_action,prev_reward,state,prev_done])

# # Add Gaussian Noise to increase data and regularize
# memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done])
# memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done])

prev_state,prev_action,prev_reward,prev_done = state,action,reward,done

return memory
Expand Down
Binary file not shown.
Binary file modified environments/__pycache__/WebotsEnv.cpython-37.pyc
Binary file not shown.
46 changes: 46 additions & 0 deletions mains/ddqn_target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os,sys,inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)
print(parent_dir)


import numpy as np
import random
import os
from tensorflow.keras.optimizers import Adam

from environments.TargetGame import *
from networks.networks import *
from extras.experience_memory import *
from agents.DDQN import Agent
from extras.statistics import *
dir_path = os.path.dirname(os.path.realpath(__file__))
L = Logger()

env = VizDoomEnv(scenario='defend_the_center.cfg')
agent = Agent(action_size=env.action_size,Network=SimpleDQN)

n_games = 2000
scores = []
avg_score = 0

for i in range(n_games):
observation = env.reset()
done = False
score = 0
while not done:
action = agent.choose_action(observation)
new_observation,reward,done,info = env.step(action)
score += reward
state = np.expand_dims(observation,axis=0)
new_state = np.expand_dims(new_observation,axis=0)
agent.store_experience(state,action,reward,new_state,done)
observation = new_observation

agent.learn()

scores.append(score)
print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:]))
L.add_log('score',score)

15 changes: 9 additions & 6 deletions mains/ddqn_vizdoom.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
import os,sys,inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
#parent_dir = os.path.dirname(parent_dir)
sys.path.insert(0, parent_dir)
print(parent_dir)


import numpy as np
import random
import os
from tensorflow.keras.optimizers import Adam

from environments.VizDoomEnv import *
from networks.networks import *
from environments.VizDoomEnv import *
from extras.experience_memory import *
from agents.DDQN import Agent
from extras.statistics import *
dir_path = os.path.dirname(os.path.realpath(__file__))
L = Logger(dir=dir_path,fname='vizdoom_ddqn')
L = Logger()


env = VizDoomEnv(scenario='defend_the_center.cfg')
agent = Agent(action_size=env.action_size,conv=True)
agent = Agent(action_size=env.action_size,Network=ConvDQN,epsilon_step=1/50000)


n_games = 2000
Expand All @@ -41,9 +44,9 @@
agent.learn()

scores.append(score)
print('GAME:',i,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:]))
print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:]))
L.add_log('score',score)
L.add_log('kills',kills)

if i % 10==0:
L.save_game()
# if i % 10==0:
# L.save_game()
Binary file modified networks/__pycache__/networks.cpython-37.pyc
Binary file not shown.
Loading

0 comments on commit c0ae26f

Please sign in to comment.