m

constant-inos · Jun 1, 2021 · c0ae26f · c0ae26f
1 parent d1dc11a
commit c0ae26f
Show file tree

Hide file tree

Showing 11 changed files with 825 additions and 182 deletions.
diff --git a/_vizdoom.ini b/_vizdoom.ini
diff --git a/controllers/ddqn_webots/ddqn_webots.py b/controllers/ddqn_webots/ddqn_webots.py
@@ -3,6 +3,8 @@
 parent_dir = os.path.dirname(current_dir)
 parent_dir = os.path.dirname(parent_dir)
 sys.path.insert(0, parent_dir) 
+print(parent_dir)
+exit()
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #cut annoying tf messages
 
@@ -30,6 +32,13 @@
 import __main__
 from datetime import datetime
 
+def WithNoise(input_vector):
+    mean = 0
+    std = 0.005
+    n = len(input_vector)
+    noise = np.random.normal(mean,std,n)
+    return list(np.array(input_vector) + noise)
+
 
 dir_path = os.path.dirname(os.path.realpath(__file__))
 L = Logger()
@@ -60,6 +69,7 @@
 filename = os.path.join(parent_dir,'history','checkpoint')
 
 scores = deque(maxlen=100)
+goals = deque(maxlen=100)
 i = 0
 
 if os.path.exists(filename):
@@ -89,8 +99,13 @@
         else:
             state = np.expand_dims(observation,axis=0)
             new_state = np.expand_dims(observation_,axis=0)
+
 
         agent.store_experience(state,action_idx,reward,new_state,done)
+        # # Add exp from noise
+        # agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done)
+        # agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done)
+
         observation = observation_
         if training: agent.learn()
         score += reward
@@ -105,7 +120,9 @@
             training = True
             agent.epsilon = epsilon_train
             print('Training on')
-
+
+    goal = (reward == 100)
+
     her_memory = env.her.in_done()
     for m in her_memory:
         state,action_idx,reward,new_state,done = m
@@ -115,11 +132,14 @@
 
 
     L.add_log('score',score)
+    L.add_log('goals',goal)
     L.save_game()
+
     scores.append(score)
+    goals.append(goal)
 
 
-    print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'\n')
+    print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'goals/100:',sum(goals),'\n')
     agent.save_model()
 
     i += 1

diff --git a/environments/TargetGame.py b/environments/TargetGame.py
@@ -0,0 +1,162 @@
+import os,sys,inspect
+current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parent_dir = os.path.dirname(current_dir)
+sys.path.insert(0, parent_dir) 
+
+from extras import obstacles
+import numpy as np
+import random
+import cv2
+
+
+def WithNoise(input_vector):
+    mean = 0
+    std = 0.005
+    n = len(input_vector)
+    noise = np.random.normal(mean,std,n)
+    return list(np.array(input_vector) + noise)
+
+def cart2pol(x, y):
+    rho = np.sqrt(x**2 + y**2)
+    phi = np.arctan2(y, x)
+    return(rho, phi)
+
+def pol2cart(rho, phi):
+    x = rho * np.cos(phi)
+    y = rho * np.sin(phi)
+    return(x, y)
+
+def D(A,B):
+    if len(A) == 3:
+        (x,y,z) = A
+        (a,b,c) = B
+    else:
+        (x,y) = A
+        (a,b) = B
+    return np.sqrt((x-a)**2 + (y-b)**2)
+
+def reward_function(position_data,prev_shaping,collision=False):
+    X,Y,X1,Y1 = position_data
+
+    reward = 0
+    sh1 = -100*(X1**2+Y1**2) 
+    shaping = sh1
+    if prev_shaping is not None:
+        reward = shaping - prev_shaping
+
+    done = False
+    if collision:
+        #reward -= 100
+        done = True
+
+    if np.sqrt(X1**2+Y1**2) < 3:
+        reward = 100
+        done = True
+
+    return reward,done,shaping
+
+
+
+class Follower():
+    # Webots-to-environment-agnostic
+    def __init__(self,max_steps=50:
+        self.max_steps = max_steps
+
+        self.discrete_actions = [0,1,2] 
+        self.action_size = len(self.discrete_actions)
+        self.stepCounter = 0
+        self.shaping = None
+
+        self.create_world()
+
+    def reset(self,reset_position=True):
+
+        self.create_world()
+
+        self.stepCounter = 0
+
+        self.shaping = None
+
+        self.path.append(position)
+        state,_,_,_ = self.step(1)
+        return state
+
+
+    def step(self,action):
+
+        [xg,yg,] = self.GOAL
+        [x0,y0] = self.position
+
+        position_data = []
+
+        if self.direction == 0: #up
+            x1 = x0-1
+            if action==1:
+                y1 = y0-1
+                self.direction=2
+            if action==2:
+                y1 = y0+1
+                self.direction=3
+        if self.direction == 1: #down
+            x1 = x0+1
+            if action==1:
+                y1 = y0+1
+                self.direction=3
+            if action==2:
+                y1 = y0-1
+                self.direction=2
+        if self.direction == 2: #left
+            y1 = y0-1
+            if action==1:
+                x1 = x0+1
+                self.direction=1
+            if action==2:
+                x1=x0-1
+                self.direction=0
+        if self.direction == 3: #right
+            y1 = y0+1
+            if action==1:
+                x1 = x0-1
+                self.direction=0
+            if action==2:
+                x1=x0+1
+                self.direction=1
+
+        try:
+            self.map[x1,y1] = 1
+        except:
+            x1,y1 = x1,y0
+
+        position_data = [x0-xg,y0-yg,x1-xg,y1-yg]
+
+
+        # rho0,phi0 = cart2pol(x-xg,y-yg)
+        # rho1,phi1 = cart2pol(x1-xg,y1-yg)
+        # state = [rho0,phi0,rho1,phi1]
+        state = position_data
+
+        # REWARD
+        reward,done,self.shaping = reward_function(position_data,self.shaping)
+
+        if reward == 100: print('goal')
+
+        if self.stepCounter >= self.max_steps:
+            done = True
+
+        self.path.append([x1,y1])
+        self.stepCounter += 1
+        info = ''
+        return state,reward,done,info 
+
+
+    def create_world(self):
+        L = 100
+        self.map = np.zeros((L,L))
+        self.start = [int(random.random()*L),int(random.random()*L)]
+        self.target = [int(random.random()*L),int(random.random()*L)]
+
+        self.direction = np.random.choice([1,2,3,4]) # up, down, left, right
+        self.position = self.start
+
+
+
diff --git a/environments/WebotsEnv.py b/environments/WebotsEnv.py
@@ -15,6 +15,13 @@
 
 OF = OpticalFlow()
 
+def WithNoise(input_vector):
+    mean = 0
+    std = 0.005
+    n = len(input_vector)
+    noise = np.random.normal(mean,std,n)
+    return list(np.array(input_vector) + noise)
+
 def cart2pol(x, y):
     rho = np.sqrt(x**2 + y**2)
     phi = np.arctan2(y, x)
@@ -92,6 +99,8 @@ def in_done(self):
             rho1,phi1 = cart2pol(x1-xg,y1-yg)
             state = [rho0,phi0,rho1,phi1]
 
+
+
             reward,done,prev_shaping = reward_function(position_data,prev_shaping)
 
             done = (i==n-1)
@@ -101,6 +110,11 @@ def in_done(self):
 
             if prev_state is not None:
                 memory.append([prev_state,prev_action,prev_reward,state,prev_done])
+
+                # # Add Gaussian Noise to increase data and regularize
+                # memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done])
+                # memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done])
+
             prev_state,prev_action,prev_reward,prev_done = state,action,reward,done
 
         return memory

diff --git a/environments/__pycache__/VizDoomEnv.cpython-37.pyc b/environments/__pycache__/VizDoomEnv.cpython-37.pyc
diff --git a/environments/__pycache__/WebotsEnv.cpython-37.pyc b/environments/__pycache__/WebotsEnv.cpython-37.pyc
diff --git a/mains/ddqn_target.py b/mains/ddqn_target.py
@@ -0,0 +1,46 @@
+import os,sys,inspect
+current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parent_dir = os.path.dirname(current_dir)
+sys.path.insert(0, parent_dir) 
+print(parent_dir)
+
+
+import numpy as np
+import random
+import os
+from tensorflow.keras.optimizers import Adam
+
+from environments.TargetGame import *
+from networks.networks import *
+from extras.experience_memory import *
+from agents.DDQN import Agent
+from extras.statistics import *
+dir_path = os.path.dirname(os.path.realpath(__file__))
+L = Logger()
+
+env = VizDoomEnv(scenario='defend_the_center.cfg')
+agent = Agent(action_size=env.action_size,Network=SimpleDQN)
+
+n_games = 2000
+scores = []
+avg_score = 0
+
+for i in range(n_games):
+    observation = env.reset()
+    done = False
+    score = 0
+    while not done:
+        action = agent.choose_action(observation)
+        new_observation,reward,done,info = env.step(action)
+        score += reward
+        state = np.expand_dims(observation,axis=0)
+        new_state = np.expand_dims(new_observation,axis=0)
+        agent.store_experience(state,action,reward,new_state,done)
+        observation = new_observation
+
+        agent.learn()
+
+    scores.append(score)
+    print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:]))
+    L.add_log('score',score)
+
diff --git a/mains/ddqn_vizdoom.py b/mains/ddqn_vizdoom.py
@@ -1,24 +1,27 @@
 import os,sys,inspect
 current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parent_dir = os.path.dirname(current_dir)
+#parent_dir = os.path.dirname(parent_dir)
 sys.path.insert(0, parent_dir) 
+print(parent_dir)
+
 
 import numpy as np
 import random
 import os
 from tensorflow.keras.optimizers import Adam
 
-from environments.VizDoomEnv import *
 from networks.networks import *
+from environments.VizDoomEnv import *
 from extras.experience_memory import *
 from agents.DDQN import Agent
 from extras.statistics import *
 dir_path = os.path.dirname(os.path.realpath(__file__))
-L = Logger(dir=dir_path,fname='vizdoom_ddqn')
+L = Logger()
 
 
 env = VizDoomEnv(scenario='defend_the_center.cfg')
-agent = Agent(action_size=env.action_size,conv=True)
+agent = Agent(action_size=env.action_size,Network=ConvDQN,epsilon_step=1/50000)
 
 
 n_games = 2000
@@ -41,9 +44,9 @@
         agent.learn()
 
     scores.append(score)
-    print('GAME:',i,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:]))
+    print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:]))
     L.add_log('score',score)
     L.add_log('kills',kills)
 
-    if i % 10==0: 
-        L.save_game()
+    # if i % 10==0: 
+    #     L.save_game()
diff --git a/networks/__pycache__/networks.cpython-37.pyc b/networks/__pycache__/networks.cpython-37.pyc