m

constant-inos · May 29, 2021 · d1dc11a · d1dc11a
1 parent 2627eea
commit d1dc11a
Show file tree

Hide file tree

Showing 11 changed files with 35 additions and 13 deletions.
diff --git a/agents/__pycache__/DDQN.cpython-37.pyc b/agents/__pycache__/DDQN.cpython-37.pyc
diff --git a/controllers/ddqn_webots/ddqn_webots.py b/controllers/ddqn_webots/ddqn_webots.py
@@ -41,7 +41,7 @@
 keyboard.enable(env.timestep)
 
 n_inputs = 1
-agent = Agent(action_size=env.action_size, lr=0.001, mem_size=50000, epsilon_step=1/1500000 ,Network=SimpleDQN, Memory=Memory, n_inputs=n_inputs, update_target_freq=30, train_interval=10, batch_size=32)
+agent = Agent(action_size=env.action_size, lr=0.001, mem_size=50000, epsilon_step=1/100000 ,Network=SimpleDQN, Memory=Memory, n_inputs=n_inputs, update_target_freq=30, train_interval=10, batch_size=32)
 
 if n_inputs==2:
     state = [tf.convert_to_tensor([state[0]]),tf.convert_to_tensor([state[1]])]
@@ -66,7 +66,7 @@
     [agent.memory.memory,agent.memory.memCounter,agent.epsilon,env.task,i,scores,L.Variables,L.fname,L.time,L.t] = list(np.load(filename,allow_pickle=True))
     env.total_steps = agent.memory.memCounter
 
-while (i<n_games):
+while (True):
 
     done = False
     score = 0
@@ -132,3 +132,8 @@
         f.close()
 
         env.robot.worldReload()
+
+    if agent.epsilon <= agent.epsilon_min:
+        break
+
+print('End of Training!')
diff --git a/environments/WebotsEnv.py b/environments/WebotsEnv.py
@@ -15,6 +15,16 @@
 
 OF = OpticalFlow()
 
+def cart2pol(x, y):
+    rho = np.sqrt(x**2 + y**2)
+    phi = np.arctan2(y, x)
+    return(rho, phi)
+
+def pol2cart(rho, phi):
+    x = rho * np.cos(phi)
+    y = rho * np.sin(phi)
+    return(x, y)
+
 def D(A,B):
     if len(A) == 3:
         (x,y,z) = A
@@ -38,12 +48,11 @@ def reward_function(position_data,prev_shaping,collision=False):
         #reward -= 100
         done = True
 
-    c=1
+    c=5
     if np.sqrt(X1**2+Y1**2) < c/100:
-        reward = 1000
+        reward = 100
         done = True
-        print('OK')
-
+
     return reward,done,shaping
 
 class HER():
@@ -77,7 +86,11 @@ def in_done(self):
         for i in range(n):
             [x,y,x1,y1] = self.states[i]
             position_data = [x-xg,y-yg,x1-xg,y1-yg]
-            state = position_data
+
+            #state = position_data
+            rho0,phi0 = cart2pol(x-xg,y-yg)
+            rho1,phi1 = cart2pol(x1-xg,y1-yg)
+            state = [rho0,phi0,rho1,phi1]
 
             reward,done,prev_shaping = reward_function(position_data,prev_shaping)
 
@@ -178,17 +191,21 @@ def step(self,action_idx):
                 # sensor_data += list(sensors)
                 x1,y1,z1 = self.get_robot_position()
 
-        c = 10
-        x,y,x1,y1,xg,yg = c*x,c*y,c*x1,c*y1,c*xg,c*yg
+        x,y,x1,y1,xg,yg = x,y,x1,y1,xg,yg
         position_data = [x-xg,y-yg,x1-xg,y1-yg]
 
 
         #state = [camera_stack, sensor_data + position_data]
         #state = sensor_data + position_data
-        state = position_data 
+        #state = position_data
+        rho0,phi0 = cart2pol(x-xg,y-yg)
+        rho1,phi1 = cart2pol(x1-xg,y1-yg)
+        state = [rho0,phi0,rho1,phi1]
 
         # REWARD
         reward,done,self.shaping = reward_function(position_data,self.shaping)
+
+        if reward == 100: print('goal')
 
         if self.stepCounter >= self.max_steps:
             done = True

diff --git a/environments/__pycache__/WebotsEnv.cpython-37.pyc b/environments/__pycache__/WebotsEnv.cpython-37.pyc
diff --git a/extras/__pycache__/dynamic_map.cpython-37.pyc b/extras/__pycache__/dynamic_map.cpython-37.pyc
diff --git a/extras/__pycache__/experience_memory.cpython-37.pyc b/extras/__pycache__/experience_memory.cpython-37.pyc
diff --git a/extras/__pycache__/obstacles.cpython-37.pyc b/extras/__pycache__/obstacles.cpython-37.pyc
diff --git a/extras/__pycache__/optical_flow.cpython-37.pyc b/extras/__pycache__/optical_flow.cpython-37.pyc
diff --git a/extras/__pycache__/statistics.cpython-37.pyc b/extras/__pycache__/statistics.cpython-37.pyc
diff --git a/networks/__pycache__/networks.cpython-37.pyc b/networks/__pycache__/networks.cpython-37.pyc
diff --git a/worlds/.Eworld.wbproj b/worlds/.Eworld.wbproj
@@ -1,7 +1,7 @@
 Webots Project File version R2021a
-perspectives: 000000ff00000000fd0000000300000000000000690000027ffc0100000002fb0000001e00480074006d006c0052006f0062006f007400570069006e0064006f00770000000000000000690000000000000000fb0000001a0044006f00630075006d0065006e0074006100740069006f006e0000000000ffffffff0000006900ffffff0000000100000124000001ecfc0200000001fb0000001400540065007800740045006400690074006f00720100000016000001ec0000003f00ffffff000000030000050e000000a6fc0100000001fb0000001a0043006f006e0073006f006c00650041006c006c0041006c006c01000000000000050e0000006900ffffff000003e8000001ec00000001000000020000000100000008fc00000000
-simulationViewPerspectives: 000000ff000000010000000200000118000000ac0100000002010000000100
-sceneTreePerspectives: 000000ff0000000100000002000000c0000000fa0100000002010000000200
+perspectives: 000000ff00000000fd0000000300000000000000690000027ffc0100000002fb0000001e00480074006d006c0052006f0062006f007400570069006e0064006f00770000000000000000690000000000000000fb0000001a0044006f00630075006d0065006e0074006100740069006f006e0000000000ffffffff0000005400ffffff000000010000012400000182fc0200000001fb0000001400540065007800740045006400690074006f00720100000014000001820000003c00ffffff00000003000002810000005efc0100000001fb0000001a0043006f006e0073006f006c00650041006c006c0041006c006c0100000000000002810000005400ffffff000001570000018200000001000000020000000100000008fc00000000
+simulationViewPerspectives: 000000ff000000010000000200000118000000ac0100000006010000000100
+sceneTreePerspectives: 000000ff0000000100000002000000c0000000fc0100000006010000000200
 maximizedDockId: -1
 centralWidgetVisible: 1
 orthographicViewHeight: 1