From e5f77ffa7477ba4412d924b289e0001884de7958 Mon Sep 17 00:00:00 2001 From: con stant-inos Date: Sun, 27 Dec 2020 23:39:59 +0200 Subject: [PATCH] new file structure --- agents/ActorCritic.py | 102 +++ agents/DDQN.py | 123 ++++ agents/PPO.py | 176 +++++ agents/PolicyGradients.py | 129 ++++ agents/__pycache__/DDQN.cpython-38.pyc | Bin 0 -> 3968 bytes agents/__pycache__/PPO.cpython-38.pyc | Bin 0 -> 5182 bytes controllers/ddqn_webots/ddqn_webots.py | 203 ++++++ controllers/ppo_webots/ppo_webots.py | 166 +++++ depend.sh | 24 + environments/VizDoomEnv.py | 103 +++ environments/WebotsEnv.py | 239 +++++++ .../__pycache__/VizDoomEnv.cpython-38.pyc | Bin 0 -> 3052 bytes .../__pycache__/WebotsEnv.cpython-38.pyc | Bin 0 -> 7333 bytes environments/defend_the_center.cfg | 38 ++ environments/defend_the_center.wad | Bin 0 -> 6478 bytes extras/__pycache__/dynamic_map.cpython-38.pyc | Bin 0 -> 3411 bytes .../experience_memory.cpython-38.pyc | Bin 0 -> 2386 bytes .../__pycache__/optical_flow.cpython-38.pyc | Bin 0 -> 3355 bytes extras/__pycache__/statistics.cpython-38.pyc | Bin 0 -> 3329 bytes extras/curiosity.py | 109 ++++ extras/dynamic_map.py | 117 ++++ extras/experience_memory.py | 78 +++ extras/optical_flow.py | 138 ++++ extras/statistics.py | 90 +++ extras/utils.py | 29 + mains/ddqn_vizdoom.py | 49 ++ mains/ppo_vizdoom.py | 57 ++ networks/__pycache__/networks.cpython-38.pyc | Bin 0 -> 5001 bytes networks/networks.py | 186 ++++++ setupvizdoom.sh | 27 + setupwebots.sh | 13 + setupwebots2.sh | 53 ++ worlds/Cworld.wbt | 450 +++++++++++++ worlds/Dworld.wbt | 601 ++++++++++++++++++ 34 files changed, 3300 insertions(+) create mode 100644 agents/ActorCritic.py create mode 100644 agents/DDQN.py create mode 100644 agents/PPO.py create mode 100644 agents/PolicyGradients.py create mode 100644 agents/__pycache__/DDQN.cpython-38.pyc create mode 100644 agents/__pycache__/PPO.cpython-38.pyc create mode 100644 controllers/ddqn_webots/ddqn_webots.py create mode 100644 controllers/ppo_webots/ppo_webots.py create mode 100644 depend.sh create mode 100644 environments/VizDoomEnv.py create mode 100644 environments/WebotsEnv.py create mode 100644 environments/__pycache__/VizDoomEnv.cpython-38.pyc create mode 100644 environments/__pycache__/WebotsEnv.cpython-38.pyc create mode 100644 environments/defend_the_center.cfg create mode 100644 environments/defend_the_center.wad create mode 100644 extras/__pycache__/dynamic_map.cpython-38.pyc create mode 100644 extras/__pycache__/experience_memory.cpython-38.pyc create mode 100644 extras/__pycache__/optical_flow.cpython-38.pyc create mode 100644 extras/__pycache__/statistics.cpython-38.pyc create mode 100644 extras/curiosity.py create mode 100644 extras/dynamic_map.py create mode 100644 extras/experience_memory.py create mode 100644 extras/optical_flow.py create mode 100644 extras/statistics.py create mode 100644 extras/utils.py create mode 100644 mains/ddqn_vizdoom.py create mode 100644 mains/ppo_vizdoom.py create mode 100644 networks/__pycache__/networks.cpython-38.pyc create mode 100644 networks/networks.py create mode 100644 setupvizdoom.sh create mode 100644 setupwebots.sh create mode 100644 setupwebots2.sh create mode 100644 worlds/Cworld.wbt create mode 100644 worlds/Dworld.wbt diff --git a/agents/ActorCritic.py b/agents/ActorCritic.py new file mode 100644 index 0000000..0a60282 --- /dev/null +++ b/agents/ActorCritic.py @@ -0,0 +1,102 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #cut annoying tf messages +from tensorflow.keras.optimizers import Adam +import numpy as np +import tensorflow as tf +import tensorflow_probability as tfp + +from networks.networks import * + + +class Agent(object): + def __init__(self, n_actions=2,lr=0.01, gamma=0.99): + self.lr=lr + self.gamma=gamma + self.n_actions=n_actions + self.action_space = [i for i in range(n_actions)] + + self.actor_critic = ActorCriticNetwork(n_actions=n_actions) + self.actor_critic.compile(Adam(lr=lr)) + + self.action = None + + def choose_action(self,state): + state = tf.convert_to_tensor([state]) + _, probs = self.actor_critic(state) + action = np.random.choice(self.action_space,p=probs.numpy()[0]) + return action + + def save_model(self): + self.actor_critic.save_weights(self.actor_critic.model_name) + + def load_model(self): + self.actor_critic.load_weights(self.actor_critic.model_name) + + def learn(self,state,action,reward,state_,done): + + state = tf.convert_to_tensor([state]) + state_ = tf.convert_to_tensor([state_]) + #action = tf.convert_to_tensor([action]) + reward = tf.convert_to_tensor([reward]) + + with tf.GradientTape() as tape: + value, probs = self.actor_critic(state) + value_, probs_ = self.actor_critic(state_) + value = tf.squeeze(value) + value_ = tf.squeeze(value_) + + action_probs = tfp.distributions.Categorical(probs=probs) + log_prob = action_probs.log_prob(tf.convert_to_tensor(action)) + + """ + log_prob = -sparse_categorical_crossentropy_with_logits + what is the loss function exactly ??? calculate it + (how tf works, sess, graph, fast?) + """ + + delta = reward + self.gamma * value_ * (1-int(done)) - value + actor_loss = -log_prob * delta + critic_loss = delta**2 + + total_loss = actor_loss + critic_loss + + gradient = tape.gradient(total_loss, self.actor_critic.trainable_variables) + self.actor_critic.optimizer.apply_gradients(zip(gradient,self.actor_critic.trainable_variables)) + + + +if __name__ == '__main__': + import gym + + env = gym.make('CartPole-v0') + agent = Agent(lr= 0.9*1e-5,n_actions=env.action_space.n) + n_games = 2000 + + score_history = [] + max_score, max_avg = 0,0 + + + for i in range(n_games): + obs = env.reset() + done = False + score = 0 + steps = 0 + while not done: + action = agent.choose_action(obs) + obs_,reward,done,info = env.step(action) + score += reward + agent.learn(obs,action,reward,obs_,done) + obs = obs_ + steps += 1 + score_history.append(score) + avg_score = np.mean(score_history[-100:]) + + print('GAMES:',i,'SCORE:',score,'AVG SCORE:',avg_score) + if i % 100 == 0: print(max_score,max_avg) + if score > max_score: max_score = score + if avg_score > max_avg: max_avg = avg_score diff --git a/agents/DDQN.py b/agents/DDQN.py new file mode 100644 index 0000000..d9a1f35 --- /dev/null +++ b/agents/DDQN.py @@ -0,0 +1,123 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +from collections import deque +import numpy as np +import random +import tensorflow as tf +from tensorflow.keras.optimizers import Adam +import numpy as np +import os +from networks.networks import * +from extras.experience_memory import * + +class Agent(object): + + def __init__(self, action_size, lr=0.0001, conv=False, batch_size=32, \ + gamma=0.99, epsilon_max=1.0, epsilon_min=0.0001,\ + update_target_freq=3000, train_interval=100, \ + mem_size=50000, fname='mitsos_dqn.h5'): + + self.action_size = action_size + self.action_space = [i for i in range(action_size)] + self.lr = lr + self.epsilon_max = epsilon_max + self.epsilon_min = epsilon_min + self.epsilon = epsilon_max + self.batch_size = batch_size + self.gamma = gamma + self.update_target_freq = update_target_freq + self.train_interval = train_interval + self.model_file = fname + + self.memory = Memory(n_actions=action_size) + + self.model = DQNetwork(action_size,conv=conv) + self.model.compile(loss='mse',optimizer=Adam(lr)) + self.target_model = DQNetwork(action_size,conv=conv) + + def choose_action(self,state): + if np.random.random() < self.epsilon: + action_idx = np.random.choice(self.action_space) + else: + state = tf.convert_to_tensor([state]) + action = self.model(state).numpy()[0] + action_idx = np.argmax(action) + return action_idx + + def store_experience(self,state,action,reward,new_state,done): + self.memory.store_experience(state,action,reward,new_state,1-int(done)) + + def learn(self): + if self.epsilon > self.epsilon_min: + self.epsilon -= (self.epsilon_max - self.epsilon_min) / 50000 + if self.memory.memCounter % self.update_target_freq == 0: + self.update_target_model() + + if not (self.memory.memCounter % self.train_interval == 0): + return + + n_samples = min(self.batch_size*self.train_interval, self.memory.memCounter) + states,action_ind,rewards,new_states,notdones = self.memory.sample_memory(n_samples) + + q_pred = self.model.predict(states) + q_eval = self.model.predict(new_states) + q_next = self.target_model.predict(new_states) + q_target = q_pred + + sample_index = np.arange(n_samples) + #q_target[sample_index,np.argmax(q_target,axis=1)] = rewards[sample_index] + self.gamma*notdones[sample_index]*q_next[sample_index,np.argmax(q_eval,axis=1)] + q_target[sample_index,action_ind.astype(int)] = rewards[sample_index] + self.gamma*notdones[sample_index]*q_next[sample_index,np.argmax(q_eval,axis=1)] + + self.model.fit(states,q_target,batch_size=self.batch_size,verbose=0) + + return + + def update_target_model(self): + self.target_model.set_weights(self.model.get_weights()) + return + + def save_model(self): + self.model.save_weights(self.model_file) + + def load_model(self): + self.model.load_weights(self.model_file) + self.target_model.load_weights(self.model_file) + + + +if __name__ == '__main__': + import gym + from statistics import * + + env = gym.make('CartPole-v0') + agent = Agent(action_size=2) + + dir_path = os.path.dirname(os.path.realpath(__file__)) + L = Logger(dir=dir_path,fname='cartpole_ddqn') + + n_games = 2000 + scores = [] + avg_score = 0 + + for i in range(n_games): + state = env.reset() + done = False + score = 0 + while not done: + action = agent.choose_action(state) + new_state,reward,done,_ = env.step(action) + score += reward + agent.store_experience(state,action,reward,new_state,done) + state = new_state + + agent.learn() + + L.tick() + + L.add_log('score',score) + L.save_game() + scores.append(score) + print('GAME:',i,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) diff --git a/agents/PPO.py b/agents/PPO.py new file mode 100644 index 0000000..0e3b25e --- /dev/null +++ b/agents/PPO.py @@ -0,0 +1,176 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +import os +import numpy as np +import tensorflow as tf +from tensorflow.keras.optimizers import Adam +import random +import tensorflow_probability as tfp + +from networks.networks import * +from extras.experience_memory import * + +def normalize(x): + mean = np.mean(x) + if len(x) == 0: + std = 1 + else: + std = np.std(x) + return (x - mean) / std + + +class Agent(object): + def __init__(self,n_actions,lr=0.005,gamma=0.99): + self.lr = lr + self.gamma = gamma + self.n_actions = n_actions + self.LAMBDA_GAE = 0.95 + self.PPO_EPOCHS = 10 + self.MINIBATCH_SIZE = 64 + self.PPO_EPSILON = 0.15 + + self.ppo_network = PPONetwork(n_actions) + self.ppo_network.compile(optimizer=Adam(lr=lr)) + + self.memory = Memory(n_actions=n_actions) + + def choose_action(self,state): + state = tf.convert_to_tensor([state]) + probs, value = self.ppo_network(state) + action_dist = tfp.distributions.Categorical(probs=probs, dtype=tf.float32) + action = action_dist.sample() + try: + log_prob = action_dist.log_prob(action) + except: + print(state) + print(action,probs,value) + print(action_dist) + + x = self.ppo_network.fc1(state) + print(x) + x = self.ppo_network.fc2(x) + print(x) + + exit() + + return int(action.numpy()[0]), log_prob.numpy()[0], value.numpy()[0][0] + + def read_memory(self): + memory = self.memory.read_memory() + self.memory.clear() + return memory + + def store_experience(self,state,action,reward,state_,done,log_prob,value): + #self.memory.store_experience(np.expand_dims(state,axis=0),action,reward,np.expand_dims(state_,axis=0),done,log_prob,value) + self.memory.store_experience(state,action,reward,state_,done,log_prob,value) + return + + def store_experience1(self,*args): + Args = [] + for a in args: + if isinstance(a,np.ndarray): a = np.expand_dims(a,axis=0) + Args.append(a) + self.memory.store_experience(tuple(Args)) + return + + def compute_gae(self,next_value, values, rewards, dones): + values = list(values) + [next_value] + gae = 0 + returns = [] + for i in reversed(range(len(rewards))): + mask = 1 - int(dones[i]) + delta = rewards[i] + self.gamma*values[i+1]*mask - values[i] + gae = delta + self.gamma * self.LAMBDA_GAE * gae + returns.insert(0,gae+values[i]) + return np.array(returns) + + def ppo_iter(self,states,actions,log_probs,returns,advantages): + # generates random mini-batches until we have covered the full batch + batch_size = len(states) + for _ in range(batch_size // self.MINIBATCH_SIZE): + indices = np.random.randint(0,batch_size,self.MINIBATCH_SIZE) + yield states[indices],actions[indices],log_probs[indices],returns[indices],advantages[indices] + + def ppo_update(self,states,actions,log_probs,returns,advantages): + e = self.PPO_EPSILON + # PPO EPOCHS is the number of times we will go through ALL the training data to make updates + for _ in range(self.PPO_EPOCHS): + for state, action, old_log_probs, return_, advantage in self.ppo_iter(states, actions, log_probs, returns, advantages): + # grabs random mini-batches several times until we have covered all data + with tf.GradientTape() as tape: + probs,value = self.ppo_network(state) + action_dist = tfp.distributions.Categorical(probs=probs, dtype=tf.float32) + entropy_loss = tf.math.reduce_mean(action_dist.entropy()) + + new_log_probs = action_dist.log_prob(action) + ratio = tf.math.exp(new_log_probs - old_log_probs) + surr1 = ratio * advantage + surr2 = tf.clip_by_value(ratio,1-e,1+e)*advantage + + actor_loss = -tf.math.minimum(surr1,surr2) + actor_loss = tf.math.reduce_mean(actor_loss) + critic_loss = tf.math.pow(return_ - value,2) + + c1 = 0.1 + c2 = -0.05 + loss = actor_loss + c1*critic_loss + c2*entropy_loss + gradients = tape.gradient(loss,self.ppo_network.trainable_variables) + self.ppo_network.optimizer.apply_gradients(zip(gradients,self.ppo_network.trainable_variables)) + + + +if __name__ == '__main__': + import gym + + + env = gym.make('CartPole-v0') + agent = Agent(n_actions=2) + + def test_agent(env): + total_reward = 0 + state = env.reset() + done = False + while not done: + action,_,_ = agent.choose_action(state) + state_, reward, done, info = env.step(action) + state = state_ + total_reward += reward + return total_reward + + TEST_EPOCHS = 5 + PPO_STEPS = 256 + TARGET_SCORE = 200 + + train_epochs = 0 + early_stop = False + while not early_stop: + observation = env.reset() + for _ in range(PPO_STEPS): + action, log_probs, value = agent.choose_action(observation) + observation_, reward, done, info = env.step(action) + agent.store_experience(np.expand_dims(observation,axis=0),action,reward,np.expand_dims(observation_,axis=0),done,log_probs,value) + if done: + observation = env.reset() + continue + observation = observation_ + + obs = tf.convert_to_tensor([observation_]) + _,next_value = agent.ppo_network(obs) + next_value = next_value.numpy()[0][0] + states,actions,rewards,states_,dones,log_probs,values = agent.read_memory() + returns = agent.compute_gae(next_value,values,rewards,dones) + advantages = returns - values + agent.ppo_update(states,actions,log_probs,returns,advantages) + + if train_epochs % TEST_EPOCHS == 0: + score = test_agent(env) + print(score) + if score >= TARGET_SCORE: + early_stop = True + + train_epochs += 1 + + diff --git a/agents/PolicyGradients.py b/agents/PolicyGradients.py new file mode 100644 index 0000000..0b3306c --- /dev/null +++ b/agents/PolicyGradients.py @@ -0,0 +1,129 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +import os +import numpy as np +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.optimizers import Adam +import tensorflow.keras.backend as K +import tensorflow_probability as tfp + +from networks.networks import PolicyGradientNetwork + + +class Agent(object): + def __init__(self,input_dims,n_actions,lr=0.003,gamma=0.99,fname='reinforce.h5'): + print('PG4') + self.input_dims = input_dims + self.n_actions = n_actions + self.action_space = [i for i in range(n_actions)] + + self.lr = lr + self.alpha = 1e-4 + self.gamma = gamma + self.epCounter = 0 + self.training_interval = 5 + + self.G = [] # episodes rewards + self.state_memory = [] + self.action_memory = [] + self.reward_memory = [] + + self.policy = PolicyGradientNetwork(n_actions=n_actions) + self.policy.compile(optimizer=Adam(lr=lr)) + + + def choose_action(self,observation): + state = tf.convert_to_tensor([observation]) + probs = self.policy(state) + action_probs = tfp.distributions.Categorical(probs=probs) + action = action_probs.sample() + + return action.numpy()[0] + + + def store_experience(self,state,action,reward): + self.state_memory.append(state) + self.action_memory.append(action) + self.reward_memory.append(reward) + + def learn(self): + actions = tf.convert_to_tensor(self.action_memory) + rewards = tf.convert_to_tensor(self.reward_memory) + + returns = self.discount_rewards(rewards) + + with tf.GradientTape() as tape: + loss = 0 + for idx, (g,state) in enumerate(zip(returns,self.state_memory)): + state = tf.convert_to_tensor([state]) + probs = self.policy(state) + action_probs = tfp.distributions.Categorical(probs=probs) + log_prob = action_probs.log_prob(actions[idx]) + loss += -g * tf.squeeze(log_prob) + + gradient = tape.gradient(loss,self.policy.trainable_variables) + self.policy.optimizer.apply_gradients(zip(gradient,self.policy.trainable_variables)) + + self.reward_memory = [] + self.state_memory = [] + self.action_memory = [] + self.epCounter += 1 + return + + + def save_model(self): + self.policy.save(self.model_file) + + def load_model(self): + self.policy = load_model(self.model_file) + + + def discount_rewards(self,rewards): + G = np.zeros_like(rewards) + r = 0 + for i in reversed(range(len(rewards))): + r = self.gamma*r + rewards[i] + G[i] = r + + mean = np.mean(G) + std = np.std(G) if np.std(G)>0 else 1 + advantages = (G - mean) / std + + return G + + + +if __name__ == '__main__': + import gym + + env = gym.make('CartPole-v0') + agent = Agent(input_dims=4,n_actions=2,lr=0.001,gamma=0.99) + + score_history = [] + episodes = 2000 + avg = [] + + + for i in range(episodes): + done = False + score = 0 + observation = env.reset() + while not done: + action = agent.choose_action(observation) + observation_,reward,done,_ = env.step(action) + agent.store_experience(observation,action,reward) + observation = observation_ + score += reward + score_history.append(score) + agent.learn() + w = score_history[-100:] + print('GAME:',i,'SCORE:',score,'AVG SCORE:',np.mean(w)) + avg.append(np.mean(w)) + + plt.plot(avg) + plt.show() + diff --git a/agents/__pycache__/DDQN.cpython-38.pyc b/agents/__pycache__/DDQN.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..141840e2f5ec0d2f1021b14b9ac7d2d44bd2e2cf GIT binary patch literal 3968 zcmZ`*OLN@D5uU+ovDoGEX-d??re$09N+zlJZAG!Ik|G^!(+|l$pzK0HJ3|UfEU+Aa zda%F~l|v4yQtmk{bBZrM=a56H@&|IBTatsr${{(sD&>4VSW=>*05v^5GmSyd^w;0a zR}&Kf!}IIk{UaJoG4^+AJo&WI_z0!=6K6~?!BZC2H4nLITcIUc-yS(ey3!hXD2+X~ z$LS6I@u&~IDLE_y!DMsK;xvzywg&C6ZFHVagdH>IWH@Q+sc;&#oz4tq!`Y{7YCoKG z?k3ISq|e=C*O_pI_kamc&hOgcF?n1xHmpZn{n5;}5H1+Y6E~S$7@ZX4(J6UMG)3@` zho_%d#q;t7Xg{>VGh!lqX`O{H3l_d2&&ro}`BnDq@55K+snKhDEbNL- zcn)Ld<+(?^*p{b9ugfm}u!QktxwOZ>al)1GLU=CxnLIBqyi1m+M{mfkT#^^$iafXH zawb>8E_&bGW6;lx-lF;u-#iQbZ80g&)2=~($Be%`dKWz$J@1Jr`Kp+f=f#ZdjNX?o zi&?>cWi22EyPlb0M(`V~JGW1V}vT&!P6kyf}usDUQQ~6WYDJF0=B7 zr7NfY`L}mYe0Xht5wrjB{q?_Y&wc-|4}U-d&%Zz1|3izheF1&{^FK58 zuYAa6)r1VH7G$ezS6tW+X@-figV}B2JfxW>E7qUvPYJi2uB=@yyoZR-5sMkYug8z=>`j;*tbee3VQ)!$f~&JFe*Qyp}7Ep07d&^$3MBYs?3xt$N>N>(OW zzH(I-8)ZIR5y>#k2XUG7R$|)SV&&@9FIJa__jPb7O^ULY4~8Ecr@zrYdCX$lXG|ED zqbSLeGK!WjrFkz-iw~e2d;nB5vncMBNuCu1m0q6R(e|K_aLh^bqR`EJSSEvHTdMA? zZq=qyfwp#4oUKdUKIj|9J*ll!>6RQ8Nt$QTAinoRBgu4QJfeeJaoM{)o@YHC3}QXA zH573vqcT?OvW(W0+|->?#Yu(*lv3S^QymO)A=79rNu~Bam4jT}*WRG6o7fLo-p$p9 zc8!^C(5AptQ6zMGJh2(;wzX5pbWPEgsgoq;NW4H|k;Ewyr%Aj>;tWLS<5y#!*RpsZ z9|uiMV_f?Odp(0z@jL``JwC_1r(dgWb!q`QtfXL^eg zVbsAG7KykA3xlwyw`&4F$_qKhUl$)2Ch7SUpR(F~h6j9mw%(y5mOT%q2M_5@6thi> zP_Q9vgggTg*p(xAX;tnnKhB7JO{!X}`)293dIjcsswi_MBYAHqRU$KB*aqIZo>`50 z3nuCvvh$R@8!Lf?k$0ndjgH8(r$M0Jhw&RIg$X9#p08K%^k0t91uff0F^llw9W4Td zZ#e2~1?n>`@KGNN5&R^34+$QTh200Q_1$@#vm2KB2yyr5xGG*iy*@iprbebegCAz%Ttrm%$j z&>08ITUE3<2CV8mXv9+Sy`<<`CRud@$h-v8T+HDA10_Y~!Q;OHVX?6suq zw(Fz-%hq!i+B4WG!ovi~MA)dYRp{UlTt(<-c}V~(LT@vozf&zigH&hoUK#qEW3~+2 zV~>+e$a_!cnVZU3WpBf$ViAJzHa~&v@~s)}So3@yxkj&J1=dAt-r8=}h&5jwfhncU z;W0dq2Es+T20|LybRDdN_8ngZ8PGoIRskx%E0guxWl`^Ai^zT4^Bu2~j84MZX`)ee z8DbU19AyliZZFgxJtK7gb5qFwSMVCQ^D;cXZZUP#P2b^hi)J2gFB3kAa@*>jhDB9eL z(|Sa`P9MEXg2+#OOoD=_z92z&peMG}Y_5s3ivhYDnUm7DKwI>Xr1GEOnBVkmf5CTr zTahjKGNAiFwB5QAtMc=V&;xjD+ii5>=q7>ES!k+{X!B2nr*ERw%q(bu$F z^g#F9?)A%`{_^6scjL-uU!o2!e|>$i)|21<^wUq>i`vQyZ5Q_o-AJ-xD0`)LhH-gY zH!vZsTQ}lcMco5Pn=47Xx1|&=XKRYaTjN%Q4|Fh$X~gIrCd6?HgFH#5Hdo!m4?=tW{vZs9p>Rb^dWCid@di#riEE)7)5TN!O2(;KH^M>71{{1WF*1GL zC;2)kQS(o9Ba3jelf-zm<+w`5ch)1Ls|8vp#U8gv367}G*`t?12-`D%I zYPF={`THOK<^N||)BZ_~=O605j5i)LG>vIY?`U3L>YlFJhG%eX-RxV6+T7?ncnbq< zpwrKDhrKT9MXrOh#57(U82XFzmA#6ZujV3G}i1K zKxr}ik>*Wa*Omp`n)c0I?+yM+|0F*? z(C?UD!#m}@37)rj18vJY?ag{~-rL|f!z=4&`Ejg1_SoPJKC9ZNhwV9jioZE1=o+7U zqKmcNRi5Y@7C-GBS7oh#j-PIk4Bx?g@A7xBg5ka4o#*HKZgt$CO6eSyEn-^UK-`Q?F)Jsjn)O=2!3 zFN?p*5AW#S6@Hap+0oH^g};8!h9oUE4yhbv6Bi(%jUSl4tThfiBWqb|=+atXK^LWd zs;Ser;bWqt_bT4_EJ&jD^%TaD>4}kQJ37K4FFbUz z%Xs4@5U!C+cm|tbCbJ&tp2=+Hz`QJ0U@l6V6=g-QzH`C1A=`g#gc`vxOw zGWu>Y`ff7|)JY1dzC(R(s;#^0#iW!P%zmsh=P~r2lvAA*9#cO&6B}}uT5Ib0Vg4Bj z=hqa$w4*R?RK+;>rQHg;-9SuIz4YnAXIHK* z_}3Q}WeHmF7jNCX`pL4aeRgB%#+8MYtDpGGH@rnz&ij^ce0p=qBgQ43Jcz_SS?={B zKO8m+&8XXJcenslY0~+OcO$VSovtcjH&)C$YieP&6EOugWhXhQ&?wjU+hIHL{Ta04 z2@p+pp8vrVCXDTx(#-5GbCXuweOy$?5|u*!ii)y$og6uXF#AS|+8x+?W+vvkwQi?I zqOChje{5t{Y7um1_I3?5H?=Xw`BDqN&^7ucMN*h5F)B58OjWn&8&qiYOEh;@0AFG6 zTRAN}|1GKPV5YCN(D+JAs;S+t0ZNR6@Wj;Ft*7R0je6EhDBoOqYDl{$qSaX1EZORD z<&%vOX(VfMv>Ao>xk&sZ@)I7$kr0P*a%m=OJy~V#I1%mDO=SnNd^JdTD-!Ky(2<3; zP81~P&Ppc^y1fpUZYOH_v!_97QtKo_xw2B=km{J^5 z_4`3*lgn~$bv`XEoxCC50)rsSeOc?(=G`cY`Os8Pf+IcxqQUoT`k3zO21*BIMRyIn zd1-8q=VtK2=oiqB?1Elp3y80zCEy=43`;=G7(f>(fb^-p)G)cW#0xZ%oDJ8p+K3eGA>!m<#u zK!{*VmLY{8WH7OA3=`}1c*t@M(V3*3Y{H%!R&JtJAX>3!Er3crcu5Fy^yR~zQ3tDG z9!G)Wq}51IHL0{s>ZNCc-$fk9J{6Vo#r@^TVH-~`gy0j8+b zW;H7?BLN1b1;T@AEpuTHsFSQ<8|a-v(lMc>?u0f)fd+lxDyBun0Te@@O???`s9!+f zQR5Ec3nhFTCla?`Oq~HNUXTtOC24iYMP0#xA2Dar7D3qJf^1ZrA);WKd`CLa1VAR& zgRQjCDCP1kh5TXS=f3Z#B9?{R5(%$hg0W||0!|;3O%Xy1Z6pyvd=I_6ll-4b$`njC zVZa!46N|@VAR1u6!fyhqH~~7j=W5S!HICLmHU;n z-mlVSB^yPpmg0htjj5iIbSy1RQ_pxho>nHbYyxxF({UtH&NO0X>cFZ^)pJ0N7)>YA z0|TS1Ws}3XCowv&kEP{w1f$G>kxelxn`U-)Fr7@N(<$Xk2h&mIH#1*q?`ofGx3#ad zwAvp}XK;RJeFCEnsoH`5WHOcJ^@$xrVcE%zx-tT1NgRa;1!2K63OlXe|BjwN&JQb3 z|J;}shloWP$U&<8lE`roS-CC(22@X00>C>dM7%-dB#{P@Qy}Cu$z6(4Y_rJ`r;vvi zFi1qbEd}$^1Te~Kv(xVRt6M{#EOe1pbvL`x>_rcx+ZwJU#}g5>!(g?;!6@2Pi3O!y zas-Il+46TeV`*-;dyT`&aK+cO8!|BQb0TD`;tq(^x%dS&&Jm$_RlRF8J-DaqA75k?%|mz5++f{vfZ zk=&*}0+TYV+|DTT5*O9LcDNQjCmMB~C?z~jd^PpRwzOn$JOygI?`ymms zTtO}@P4MP;N`DjN&b5zVw}_>nvgOzppA!cq9J$l&A)6r2!%e4&{9Bx+{vYu|>0ZoT z`y-f<784>)10QgUK?tTF-CbO&8*Z`AO1f*}zF@dzvPMgxNq);A%RHrSJg`;VcxtfK PNrgOPMorCB0{{O2C_eXO literal 0 HcmV?d00001 diff --git a/controllers/ddqn_webots/ddqn_webots.py b/controllers/ddqn_webots/ddqn_webots.py new file mode 100644 index 0000000..243f563 --- /dev/null +++ b/controllers/ddqn_webots/ddqn_webots.py @@ -0,0 +1,203 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +parent_dir = os.path.dirname(parent_dir) +sys.path.insert(0, parent_dir) + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #cut annoying tf messages + +import numpy as np +import random +import importlib.util +import time +from collections import deque +import cv2 +import matplotlib.pyplot as plt +from controller import Keyboard +import pickle +import shelve +from tensorflow.keras.optimizers import Adam +import tensorflow as tf + +#from curiosity import ICM +import extras.optical_flow as of +from extras.statistics import * + +from environments.WebotsEnv import * +from agents.DDQN import Agent +from extras.experience_memory import Memory +from networks.networks import * + +class Memory(Memory): + def __init__(self,n_actions): + super(Memory,self).__init__(n_actions=n_actions) + + def sample_memory(self,n_samples): + samples = random.sample(self.memory,n_samples) + batch_size = self.memCounter + + num_lists = len(self.memory[0]) + lists = [[] for _ in range(num_lists+2)] + + for sample in samples: + image = sample[0][0] + image_ = sample[3][0] + sensors = sample[0][1] + sensors_ = sample[3][1] + sample = [image,sensors] + list(sample[1:3]) + [image_,sensors_] + list(sample[4:]) + for i in range(len(sample)): + lists[i].append(sample[i]) + + + lists = [np.vstack(l) if isinstance(l[0],np.ndarray) else np.vstack(l).reshape(-1) for l in lists ] + + lists = [[lists[0],lists[1]]] + lists[2:4] + [[lists[4],lists[5]]] + lists[6:] + + return tuple(lists) + +class DoubleInputAgent(Agent): + def __init__(self, action_size, lr=0.0001, conv=False, batch_size=32, \ + gamma=0.99, epsilon_max=1.0, epsilon_min=0.0001,\ + update_target_freq=3000, train_interval=100, \ + mem_size=50000, fname='dqn.h5'): + self.action_size = action_size + self.action_space = [i for i in range(action_size)] + self.lr = lr + self.epsilon_max = epsilon_max + self.epsilon_min = epsilon_min + self.epsilon = epsilon_max + self.batch_size = batch_size + self.gamma = gamma + self.update_target_freq = update_target_freq + self.train_interval = train_interval + self.model_file = fname + + self.memory = Memory(n_actions=action_size) + + self.model = MitsosDQNet(action_size) + self.model.compile(loss='mse',optimizer=Adam(lr)) + self.target_model = MitsosDQNet(action_size) + + if os.path.exists(self.model_file): + state = env.reset() + state = [tf.convert_to_tensor([state[0]]),tf.convert_to_tensor([state[1]])] + self.model(state) + self.target_model(state) + self.load_model() + + + def choose_action(self,state): + + if np.random.random() < self.epsilon: + action_idx = np.random.choice(self.action_space) + else: + state = [tf.convert_to_tensor([state[0]]),tf.convert_to_tensor([state[1]])] + action = self.model(state).numpy()[0] + action_idx = np.argmax(action) + return action_idx + + +dir_path = os.path.dirname(os.path.realpath(__file__)) +L = Logger(dir=dir_path,fname='WebotsRound_ddqn') + +env = Mitsos() + +keyboard = Keyboard() # to control training from keyboard input +keyboard.enable(env.timestep) + +agent = DoubleInputAgent(action_size=3,lr=0.0001,mem_size=40000) + + + +RESTORE_DAMAGE = 30 +n_games = 2000 +training = True +epsilon_train = agent.epsilon +k = -1 +filename = 'saveforreload.out' + + + +scores = deque(maxlen=100) +i = 0 + + + + +if os.path.exists(filename): + my_shelf = shelve.open(filename) + for key in my_shelf: + globals()[key]=my_shelf[key] + agent.epsilon = epsilon + agent.memory.memCounter = memCounter + agent.memory.memory = memory + L.Variables = Lvars + L.fname = fname + + my_shelf.close() + del my_shelf,epsilon,memCounter,memory,Lvars,fname + print('VARIABLES LOADED') + os.remove(filename) + +while (i60: action_idx = k-314 + + observation_, reward, done, info = env.step(action_idx) + + state = [np.expand_dims(observation[0],axis=0),np.expand_dims(observation[1],axis=0)] + new_state = [np.expand_dims(observation_[0],axis=0),np.expand_dims(observation_[1],axis=0)] + + agent.store_experience(state,action_idx,reward,new_state,done) + observation = observation_ + if training: agent.learn() + score += reward + ep_steps += 1 + L.add_log('reward',reward) + L.tick() + if k == 43: + training = False + epsilon_train = agent.epsilon + agent.epsilon = agent.epsilon_min + print('Training off') + if k == 45: + training = True + agent.epsilon = epsilon_train + print('Training on') + L.add_log('score',score) + L.save_game() + scores.append(score) + print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores)) + agent.save_model() + + i += 1 + + if i % RESTORE_DAMAGE == 0: + myshelve = shelve.open(filename,'n') + epsilon = agent.epsilon + memory = agent.memory.memory + memCounter = agent.memory.memCounter + Lvars = L.Variables + fname = L.fname + for key in dir(): + try: + myshelve[key] = globals()[key] + except TypeError: + pass + # + # __builtins__, my_shelf, and imported modules can not be shelved. + # + print('ERROR shelving: {0}'.format(key)) + myshelve.close() + del myshelve + env.robot.worldReload() diff --git a/controllers/ppo_webots/ppo_webots.py b/controllers/ppo_webots/ppo_webots.py new file mode 100644 index 0000000..e1cec0a --- /dev/null +++ b/controllers/ppo_webots/ppo_webots.py @@ -0,0 +1,166 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +parent_dir = os.path.dirname(parent_dir) +sys.path.insert(0, parent_dir) + +import gym +import tensorflow as tf +from collections import deque +import os +from tensorflow.keras.optimizers import Adam +import tensorflow_probability as tfp +import shelve + +from agents.PPO import Agent +from environments.WebotsEnv import Mitsos + +from networks.networks import * +from extras.experience_memory import * + +class Memory(Memory): + def __init__(self,n_actions): + super(Memory,self).__init__(n_actions=n_actions) + + def read_memory(self): + samples = self.memory + batch_size = self.memCounter + + num_lists = len(self.memory[0]) + lists = [[] for _ in range(num_lists+2)] + + for sample in samples: + image = sample[0][0] + image_ = sample[3][0] + sensors = sample[0][1] + sensors_ = sample[3][1] + sample = [image,sensors] + list(sample[1:3]) + [image_,sensors_] + list(sample[4:]) + for i in range(len(sample)): + lists[i].append(sample[i]) + + + lists = [np.vstack(l) if isinstance(l[0],np.ndarray) else np.vstack(l).reshape(-1) for l in lists ] + + lists = [[lists[0],lists[1]]] + lists[2:4] + [[lists[4],lists[5]]] + lists[6:] + + return tuple(lists) + +class DoubleInputAgent(Agent): + def __init__(self,n_actions,lr,gamma=0.99): + self.lr = lr + self.gamma = gamma + self.n_actions = n_actions + self.LAMBDA_GAE = 0.95 + self.PPO_EPOCHS = 10 + self.MINIBATCH_SIZE = 64 + self.PPO_EPSILON = 0.15 + + self.ppo_network = MitsosPPONet(n_actions) + self.ppo_network.compile(optimizer=Adam(lr=lr)) + + self.memory = Memory(n_actions=n_actions) + + def choose_action(self,state): + state = [ tf.convert_to_tensor([state[0]]) , tf.convert_to_tensor([state[1]]) ] + probs, value = self.ppo_network(state) + action_dist = tfp.distributions.Categorical(probs=probs, dtype=tf.float32) + action = action_dist.sample() + try: + log_prob = action_dist.log_prob(action) + except: + print('Nan Error!') + exit() + + return int(action.numpy()[0]), log_prob.numpy()[0], value.numpy()[0][0] + + +env = Mitsos() +agent = DoubleInputAgent(n_actions=env.action_size,lr=0.0005) + + +RESTORE_DAMAGE = 30 +n_games = 2000 +discrete_actions = [[-1,1],[1,1],[1,-1]] +training = True +k = -1 +filename = 'saveforreload.out' + + +scores = deque(maxlen=100) +i = 0 + + +if os.path.exists(filename): + my_shelf = shelve.open(filename) + for key in my_shelf: + globals()[key]=my_shelf[key] + my_shelf.close() + del my_shelf + print('VARIABLES LOADED') + os.remove(filename) + + + +def test_agent(env): + total_reward = 0 + state = env.reset() + done = False + while not done: + action,_,_ = agent.choose_action(state) + state_, reward, done, info = env.step(action) + state = state_ + total_reward += reward + return total_reward + +TEST_EPOCHS = 5 +PPO_STEPS = 256 +TARGET_SCORE = 200 + +train_epochs = 0 +early_stop = False + +while not early_stop: + observation = env.reset() + for _ in range(PPO_STEPS): + action, log_probs, value = agent.choose_action(observation) #observation = [frames_stack, sensors_stack] + observation_, reward, done, info = env.step(action) + + state = [np.expand_dims(observation[0],axis=0),np.expand_dims(observation[0],axis=0)] + state_ = [np.expand_dims(observation_[0],axis=0),np.expand_dims(observation_[0],axis=0)] + agent.store_experience(state,action,reward,state_,done,log_probs,value) + if done: + observation = env.reset() + continue + observation = observation_ + + obs = [ tf.convert_to_tensor([observation_[0]]) , tf.convert_to_tensor([observation_[1]]) ] + _,next_value = agent.ppo_network(obs) + next_value = next_value.numpy()[0][0] + states,actions,rewards,states_,dones,log_probs,values = agent.read_memory()#? + returns = agent.compute_gae(next_value,values,rewards,dones) + advantages = returns - values + agent.ppo_update(states,actions,log_probs,returns,advantages) + + if train_epochs % TEST_EPOCHS == 0: + score = test_agent(env) + print(score) + if score >= TARGET_SCORE: + early_stop = True + + train_epochs += 1 + + + if i>0 and i % RESTORE_DAMAGE == 0: + myshelve = shelve.open(filename,'n') + for key in dir(): + try: + myshelve[key] = globals()[key] + except TypeError: + pass + # + # __builtins__, my_shelf, and imported modules can not be shelved. + # + print('ERROR shelving: {0}'.format(key)) + myshelve.close() + del myshelve + env.robot.worldReload() diff --git a/depend.sh b/depend.sh new file mode 100644 index 0000000..3a70297 --- /dev/null +++ b/depend.sh @@ -0,0 +1,24 @@ + +#!/bin/bash + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + exit 1 +fi + +apt update +apt install --yes lsb-release g++ make libavcodec-extra libglu1-mesa libxkbcommon-x11-dev execstack libusb-dev libxcb-keysyms1 libxcb-image0 libxcb-icccm4 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcomposite-dev libxtst6 libnss3 +if [[ -z "$DISPLAY" ]]; then + apt install --yes xvfb +fi + +UBUNTU_VERSION=$(lsb_release -rs) +if [[ $UBUNTU_VERSION == "16.04" ]]; then + apt install --yes libav-tools +elif [[ $UBUNTU_VERSION == "18.04" ]]; then + apt install --yes ffmpeg +elif [[ $UBUNTU_VERSION == "20.04" ]]; then + apt install --yes ffmpeg +else + echo "Unsupported Linux version." +fi \ No newline at end of file diff --git a/environments/VizDoomEnv.py b/environments/VizDoomEnv.py new file mode 100644 index 0000000..d1a3827 --- /dev/null +++ b/environments/VizDoomEnv.py @@ -0,0 +1,103 @@ +import os,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) + +from vizdoom import DoomGame, ScreenResolution +from vizdoom import * + +import skimage +from skimage import transform,color,exposure +from skimage.viewer import ImageViewer + +import numpy as np +from collections import deque + + +def preprocessImg(img, size): + + img = np.rollaxis(img, 0, 3) # It becomes (640, 480, 3) + img = skimage.transform.resize(img,size) + img = skimage.color.rgb2gray(img) + + return img + + +class VizDoomEnv(object): + def __init__(self,state_size=(64,64,4),scenario='defend_the_center.cfg'): + game = DoomGame() + path_to_scenario = os.path.join(current_dir,scenario) + game.load_config(path_to_scenario) + game.set_sound_enabled(True) + game.set_screen_resolution(ScreenResolution.RES_640X480) + game.set_window_visible(False) + game.set_available_game_variables([GameVariable.KILLCOUNT,GameVariable.AMMO2,GameVariable.HEALTH]) + game.init() + self.game = game + + self.skiprate = 4 + + self.state = None + self.state_size = state_size + self.action_size = self.game.get_available_buttons_size() + + self.steps = 0 + self.life = deque(maxlen=30) + self.kills = deque(maxlen=30) + + def reset(self): + self.game.new_episode() + game_state = self.game.get_state() + frame = game_state.screen_buffer # initial resolution 480 x 640 + frame = preprocessImg(frame, size=(self.state_size[0], self.state_size[1])) # 64x64 + state = np.stack(([frame]*4),axis=2) # 64x64x4 (stack the same frame) + #self.state = np.expand_dims(state,axis=0) #1x64x64x4 + self.state = state + self.prev_misc = game_state.game_variables + #print('new episode') + return self.state + + + def step(self,action_idx): + # perform action + action = np.zeros([self.action_size]) + action[action_idx] = 1 + action = action.astype(int).tolist() + self.game.set_action(action) + self.game.advance_action(self.skiprate) + + # get state and reward + done = self.game.is_episode_finished() + if done: + self.kills.append(self.prev_misc[0]) + self.life.append(self.steps) + #print('LIFE:',self.steps,'KILLS:',self.kills[-1],'AVG-KILLS:',np.mean(self.kills)) + self.steps = 0 + self.game.new_episode() + game_state = self.game.get_state() + reward = self.game.get_last_reward() + + new_frame = game_state.screen_buffer + misc = game_state.game_variables + + (img_rows, img_cols) = (self.state_size[0], self.state_size[1]) + new_frame = preprocessImg(new_frame, size=(img_rows, img_cols)) + new_frame = np.reshape(new_frame, (img_rows, img_cols, 1)) + self.state = np.append(new_frame, self.state[ :, :, :3], axis=2) + + reward = self.shape_reward(reward,misc,self.prev_misc) + self.prev_misc = misc + self.steps += 1 + return self.state,reward,done,(self.kills[-1] if done else 0) + + def shape_reward(self, r_t, misc, prev_misc): + + # Check any kill count + if (misc[0] > prev_misc[0]): + r_t = r_t + 1 + + if (misc[1] < prev_misc[1]): # Use ammo + r_t = r_t - 0.1 + + if (misc[2] < prev_misc[2]): # Loss HEALTH + r_t = r_t - 0.1 + + return r_t diff --git a/environments/WebotsEnv.py b/environments/WebotsEnv.py new file mode 100644 index 0000000..f9d2d41 --- /dev/null +++ b/environments/WebotsEnv.py @@ -0,0 +1,239 @@ + +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +from extras.dynamic_map import * +from extras.optical_flow import * + +from controller import Robot,Supervisor,Node,Field +from controller import Camera,DistanceSensor,LED,Motor +import numpy as np +import random +import cv2 + +OF = OpticalFlow() + +def D(A,B): + (x,y) = A + (a,b) = B + return np.sqrt((x-a)**2 + (y-b)**2) + +def target_reward(P0,P1,TARGET): + + d0 = D(P0,TARGET) + d1 = D(P1,TARGET) + + # version 1: distance form target + c = 0.5 + R1 = 1 / (d1+0.1) + + #version 2: difference of distances + dD = d1-d0 + R2 = -dD*5 + + flag = False + if R2 < 0.0005 and R2 > -0.0005: + if np.sign(R1)==-1: R1 = -1 + else: + R1 = 0 + + return R1 + +class Mitsos(): + # Webots-to-environment-agnostic + def __init__(self,max_steps=500): + self.name = "Mitsos" + self.max_steps = max_steps + + self.robot = Supervisor() # create the Robot instance + self.timestep = int(self.robot.getBasicTimeStep()) # get the time step of the current world. + # crash sensor + self.bumper = self.robot.getDeviceByIndex(36) #### <--------- + self.bumper.enable(self.timestep) + + # camera sensor + self.camera = self.robot.getCamera("camera") + self.camera.enable(self.timestep) + # ir sensors + IR_names = ["ps0", "ps1", "ps2", "ps3", "ps4", "ps5", "ps6", "ps7"] + self.InfraredSensors = [self.robot.getDistanceSensor(s) for s in IR_names] + for ir in self.InfraredSensors: ir.enable(self.timestep) + + # wheels motors + motors = ["left wheel motor","right wheel motor"] + self.wheels = [] + for i in range(len(motors)): + self.wheels.append(self.robot.getMotor(motors[i])) + self.wheels[i].setPosition(float('inf')) + self.wheels[i].setVelocity(0) + + self.robot.step(self.timestep) + + self.cam_shape = (self.camera.getWidth(),self.camera.getHeight()) + self.sensors_shape = (14,) + + self.x_start = -0.71 + self.y_start = -0.83 + self.path = [] + self.map = DynamicMap(self.x_start,self.y_start,map_unit=0.02) + self.first_step = True + self.x_target,self.y_target = 0,0 + self.set_target() + self.misc = [0,0] + + #self.discrete_actions = [[0,-1],[1,0],[0,1]] # normal mode + self.discrete_actions = [[1,-1],[1,0],[1,1]] # WebotsRound + self.action_size = len(self.discrete_actions) + self.stepCounter = 0 + + + def read_ir(self): + ir_sensors = np.array([ i.getValue() for i in self.InfraredSensors]) + max_ = 2500.0 + for i in range(len(ir_sensors)): + if ir_sensors[i] < 0: + ir_sensors[i] = 0.0 + elif ir_sensors[i] > max_: + ir_sensors[i] = 1.0 + else: + ir_sensors[i] = ir_sensors[i] / max_ + return ir_sensors + + def read_camera(self): + image = np.uint8(self.camera.getImageArray()) + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + grayN = gray / 255.0 + return gray + + def collision(self): + return bool(self.bumper.getValue()) + + def set_position(self,x,y,z): + #return + object = self.robot.getFromDef(self.name) + positionField = object.getField("translation") + Field.setSFVec3f(positionField,[y,z,x]) + for _ in range(5): self.robot.step(self.timestep) # if not nan values in first iteration + + def set_rotation(self,a): + object = self.robot.getFromDef(self.name) + rotationField = object.getField("rotation") #object.getPosition() + Field.setSFRotation(rotationField,[0,1,0,a]) + self.robot.step(self.timestep) # if not nan values in first iteration + + def set_wheels_speed(self,u,w): + # u: velocity + # w: angular velocity + u1 = u + w + u2 = u - w + + self.wheels[0].setVelocity(u1) + self.wheels[1].setVelocity(u2) + + def get_robot_position(self): + object = self.robot.getFromDef(self.name) + y,z,x = object.getPosition() + return [x,y,z] + + def wait(self,timesteps): + for _ in range(timesteps): + self.robot.step(self.timestep) + return + + def get_reward(self,version='sparse'): + reward = 1 + if version == 'sparse': + if (self.collision()): return reward-100 + #return (not WasVisited) + 1 + return reward + 1 + + def place_target(self,x,y,z): + root = self.robot.getRoot() + node = root.getField('children') + if not self.first_step: node.removeMF(-1) + else: self.first_step = False + translation = 'translation '+str(y)+' '+str(z)+' '+str(x) + shape = 'Cylinder { height 0.1 radius 0.02}' + nodeString = " Solid { "+translation+" children [Shape {appearance PBRAppearance {baseColor "+"1 0 0"+" roughness 1 metalness 0} geometry "+shape+" } ] boundingObject "+shape+" physics Physics { }} " + node.importMFNodeFromString(-1,"DEF My_Solid_"+'TARGET'+nodeString) + + def set_target(self): + # in analog coordinates + x = (random.random() - 0.5)*2 + z = 0 + y = (random.random() - 0.5)*2 + #x,y,z = 0,0,0.9 + #self.place_target(x,y,z) + self.x_target = x + self.y_target = y + + def reset(self,reset_position=True): + self.stepCounter = 0 + xs,ys = self.x_start,self.y_start + self.path = [] + self.map.path = [] + OF.reset() + self.set_target() + if (reset_position): + self.set_position(xs,ys,0.005) + self.set_rotation(3.14) + state,_,_,_ = self.step(1) + return state + + + def step(self,action_idx): + action = self.discrete_actions[action_idx] + stacked_frames = 4 + xt,yt = self.x_target,self.y_target + x0,y0,z = self.get_robot_position() + was_visited = self.map.visit(x0,y0) + self.path.append((x0,y0)) + + u1,u2 = action + self.set_wheels_speed(u1,u2) + + xp,yp = x0,y0 + cam4 = np.zeros(shape=self.cam_shape+(4,)) + sensors4 = np.zeros(shape=self.sensors_shape+(4,)) + for i in range(stacked_frames): + [cam,sensors] = [self.read_camera(),self.read_ir()] + self.robot.step(self.timestep) + xn,yn,z = self.get_robot_position() + + pos = [xp,yp,xn,yn,xt,yt] + xp,yp=xn,yn + sensors = np.array(list(sensors) + pos) + + cam4[:,:,i] = cam + sensors4[:,i] = sensors + + sensors4 = sensors4.reshape(-1) + state = [cam4, sensors4] + + xn,yn,z = self.get_robot_position() + + if action==[0,0]: + return state,0,0,'' + + explore = self.map.spatial_std_reward() + collision = self.collision() + #r_optic_flow = OF.optical_flow(cam4[:,:,0],cam4[:,:,3],action) + #r_reach_target = target_reward((x0,y0),(xn,yn),(xt,yt)) + + c1 = 0.1 + c2 = -1 + c3 = 10 + c4 = 0 + external_reward = c1*explore + c2*collision + c1*int(explore > self.misc[0]) + self.misc = [explore,collision] + + done = collision or (self.stepCounter >= self.max_steps) + self.stepCounter += 1 + info = '' + return state,external_reward,done,info + + + def render(): + return \ No newline at end of file diff --git a/environments/__pycache__/VizDoomEnv.cpython-38.pyc b/environments/__pycache__/VizDoomEnv.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5e6be0c802785a18dbfefad342404c984c5070e GIT binary patch literal 3052 zcmZuz-ESPX5hr)|_Vzw?`j#9cNYl?tTgi3Pq-lfJhU3IW9R-LTqu2sntd6|Xp7wSx z;drxs51N60hC{9IRjQ$1rSNPhO>R-r9^Be9-k&E)cx#4igC1-v!!_PXMCPVwz zKmQXY9mf7mgX=B8;10U}JBVO{r!1`gdB`OjI}6uRPx3_#-9KmNoPJ>)+D;lw8ewD7 z44a%*xoPmETHOj;7<*}J(hl2`PS}}r!>-k-r7M$O*t0xOSHm^v`RV#(Biw+jE`k>< z-28}%hG@QEqA9n&aKmlEAG3bz0%v3gea8eJIcp^>j@jywLeJXXH&Wa?@`rPoh z`mSjdD$evUR})hkLViAz1BdosHsWjO_t5pv zKnk|tB@85-b9c@PXY4M#l9l}2v7_3;FJYQFD;bXJ9w?4A7cNM@Zk%jt0+pv}{4CL? zu8$M?yrTV$ucS`Sr1CKc12sClJyP*=T+&S@BjeHBOQu#a`^|g%NBKnVXGN0b`yWVs zT;$VzkxbKk5*Nu}UuGwX%Cm{g3cdd{IU_6G&rbHH&rN5lj3*^EB`DRndq0B^79LZ>a^GdZh7>)+?#mh$4x$?k2Feum;Z0;7EKv{UD1Iv4* zyQr1kIp++VF91x-)r(-nZ<{x6yc^7SUUOz5h6R=j~g6eEZk8!ZlJnO)`<6Mkk3*usXa+viKxUQravUA%dclSS2*lVVffA z>2er0{_yb8qkErxcJL&u-TnCEPi}|)gZp($sW8e@*)r z5p2U7qgOt6I4g=g({|3Z;sJ$gHNaXq)v7_(@zP|7-Hwwq)%_X*K&C_W1DXjmkb>jo z>U(tC9+@yI^5{xTL1H6Cty%pjqLW6^E=Kwd5XODLy@_8NeTxU&ckqL>4cg{s>)&B^ z?{f8kBKfirc&|YDXAA%kBnAs1;+%6f=OrJzbB7>`*_zE0d>@oF+OK z(lp5Ul}|dl!pq@oIF!o9y0xfkVH4}P!Lj;2@!eM;VH!vqC($I)gTANwq|vk*)xNc% z0tW2L%Iwww*7h}EQx=prG@Y4Qqx<|O-+-Y5=d5?l&{p-8zFl5ranHr_!6WM4X0)cevIy1NxFC?V~XY_VI`O8Qa<#m90{r(_-*uL%AwzG%<8 z10CG(FI@cwnk`Y^Wtk(Kp$8lV2a1ZA1(W(Yab#mf4pRFd7ym`;-&i677^oi;`4N#{5V;K!t`K-rd>T3KG*&|WlBRw|gi?Wpu&Dz$NAa`*plMrvso!r} zw1&-PmPy1j+{B*Mcp}fFdKcSJ?WNtr1_h1?>rE0>LU%I6mSkHC~b zJn-cbJ@(X2iATsbm7HEtwd!eN;V}B!U#Wfl`w9pY zzRNfdT@@ATJ_MDMUQMm)E++njuDc-2?IENu``PyOyODpEKEcCHZ9-e^fZ$OSL{UZs zC^F3`n&e`JiWHf46n#F6(`rWjl=R;t@*xrW>@`1Jr5m+@3EI{^Y69Ne3_8IZR@&cD zuVKQ}lT1(LpinpM2&I-jVe0XrwxzfZ#0`^Fn)b39SGkQUcvwG4&II04wtU-CtMHa) z_WfqxZHkgr*j>Ks_D<}(Qqcp;)Uw%R`n-y(7I2i}45)imv8! aSLXR0RRK+EJ^s2ic)&MMx&q#IgT}W9q47!p literal 0 HcmV?d00001 diff --git a/environments/__pycache__/WebotsEnv.cpython-38.pyc b/environments/__pycache__/WebotsEnv.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d23eb767b9fd5d594bbc020709bc240cf840632d GIT binary patch literal 7333 zcmZu$&2JpZb?@r#>FN34a5((dXIqjLkK;&`R#`6)W%b3StYk&Wq`VuqwL54|HHS^j zOpmIY5;^Kg4qV5=#tVCm91@>M=d?(Wqv7PTK>mUthaBqUAV3@@K#%~*#R!o6UUd)2 z;qDOi>btt?)qB79>hU{cV?_<3Agf+ep#hgK&+rw&tcebD65^ zuI_7_#-6EY&e!)Wr2M|N&*;y!vtAqJg3mCr$Th#PuQOUlZnPgWL#DE7Kdf7S2OFyq~K7G>^X7>A;%)0)}OLY_mqhZJ1_NwlZ> zKpz+hLv9Xov9`w&Ju&CN+DR=jl3ZGv1^ci;effZvtLswl_M{m-7jfN^?2cr2CEJ#4 zM>0>cwa+v$g<-#b>*_|>@vnB{pc`IY^rL5S*t^PuUOVh~anQW#cV7e|>~{Qa99@0i z$6=#)x-YKuu*_nOwn+Qcn2kR%gHqF=diEnhp}IQDItnpPljyrR4N5o#N+xH69LIUM zF{_={23BGw#+)|DC;5`b%_(hQ^W312*xY)ii4T$IpPAyrnCWyOc!h19#~zI z7ckob4~ombN_X(fTL%oKU;XsY>GwCc4v4c$t^fV(zy9Ij+U;Az!siG7ho7LKbweFP zlu-uvf>yU)boIvzNY|wGTtHuw1|0tp6K^9$AA|TB=@Gtq zk?Y*}h`9zgc@FYwa*OAY=eW%a$Sq#vCFFTt=3~fhUg6`&3%trFkQaH4Pa-ezOMIHo zV5Ksj<#T8m8OtwK!>M;)1h1TK>vZZ&eXx}L|;tTFJo4DQTw0@Z8DB#KmV8*(}FCvXjfx#B_2=A zUAD_G_Vyr$Sz~)u_5BG*XKr2rmDIw|GAns4LUqUX>Vqq zPE*>O-PhK2d~uG~K>n&FR-hg575Nr`z23J-SUIyVWnz@ks}=jLh( zKESA)Q203k4H?Bp!mld}l0}~}sG;9pKc-T}n{8leGwk%<{$2Wz31Vrw*1wnv2Le825<2!8t$B4}-lG)W`quBUjw4k}o_^^0@g z|MzeI;$J>*{pj!h=W9=Y^lzWHUQPY={eSBJ>8&~pR>$i#w(!X(bzRL$3F)~w4O0IN zX?DR?S?GA%jVShek)TbBItYC$mQ`@zt``N(Cqc(w#RzG~DA8Eex9GnJn*QCL2VL%O zOKWYb1A`+izw52Fu`Xt(c1B#LCC49N`=0Q5YHT8Dsq;qQx7Tg?(rEi#X?eY#-{sQA zWMzkBDe~jTVHCtc*p<2UcId^b?L8lS4B{Q!gxVPA!E-O*FSdeV7|Vb zp6Hk1?Rq{t(H4;5tyH7OpmPs8Udz8jr<6wX#dT>nU&QyqHk`@Iy+`jpT4~(9zjFQl z%AN0KrZW=OMzDQ=GF@`)>vC3WN!@;B(%WV1DLMvr&@29ch*Gan$pCtUZ9p_sxQihXiJr##)$H?!olWBK z@Rvt{yifJ<-$MbPCL;rWgV$za?im~g2OcwVJqMo|O42gu84+7tSBg`E$4ImCpm=V; zUSC%h(O8zH7+4v#;RT0XaSF@V3n^X~aTQA<>@520;yT)u>??O)&~_xj)aufEe)Em> zR1g=@D&C|O@+k&YsbPY~vc9y!wN1YnyJNi!dDT*qHBMetnc#0Y=s9Lbw5q9UuY zveuu>*mZo|h)+h`Bd#fqk$Q}94Dy(94A26O8RTBVGiNQ6c&5e@xAMd%#U*oD+9Hfo z7LhCnGI@1d@kx+4DF%svN{Tly4dBhnaLy5DQau%vi|I`3q3w^LDiK}>qIYLSAiTkKw zT0{AAhZ6b&Iu2dB1}v$t{(N?rFOD3MQMwh9QzfGg3iMLUbXCXE=-r2x0NufJo1}1f z5QX{VP>Ca{GSYh}3|*;6bZEed&6_X1ApRjbvh9`ufBjOnUFwro)bm8-e_^A9Q);7M zLjez2C=1u|Aib1mWFo~GKs+9^t9bNsHj6w{8>J`W6($|1YhYp>WxPF=*yLaMf}l`Z z>7()FehGlG_vELTmcjt+6r@J8_5Q?hMyprn2y7NGg=9wUn@*j|%RULcxrcocWD<*` z%Smp}q{kT3x)wJQJlMDy14wsq7M`2cP&SU0aZOMmi(7*{&nFhf*hmGW;;feBk(aW( zjBz$r2MqJE1f$Z}4$NQ$kkZ@;+FbZuv-IY5>wO4Ci!+-ay?Hqy8P%!7ImGGs3G(~H!S z&O9k%GGkP5YoaYf@*MvsFjZB zPU_5#wh;;=N&tJUHT0p%Q0PRIh>)d$Ut!?_dDs!l)b@zlG9#h@Nbkjt50}@UTbD#mD zo5-11Mi~JKGOqiE>M8Kdz#z&A)AH1$ARqlDcqi9$sfo#6oAIsId3uMQE8I>^ZAm=A zPQ(Kud2x?J>)hhe9WFhvZ6*Dg0|GR@g~~ALlI9lyqdlQ zF2yHU95qpJQ@zj zg-u2Ij98)ap*9P0>rzq9A+jFCsche())z#|ATrnYMHq?u)bK4L$_;2z>3c**9Rl%P zv`LeKLG;*DFzD7`+~dG&Bk;g8v#QLoAhZ(SCZd##A_sLRHPOW^%n)z{W74u)fyy+W z`MiP9iwZ4l$517tZA6QA78Dk{rI%jRP{;APh-vLD5RBD_@qM-A;ZLLL=0!ZuCJ zqQA@P!CXP<(3zrm4BgGhkKkN6BP1_}9^J96O`$!ls-O(KA~OE}f!5(eaqJLos0aU@ z5tq=5j9jcbds44SgmNh2(`d+IqtOZZR-4M@M&tPw47%zO@6(5r*eC%apX?ZHDxp(C z*{6;_BJwd2vM34_Q_u4+sd}_$Ricfn80$Ash|Yo7dhv=~wk_MR^PmMgXXlD}yJ#1S z75j!7TR$)LFoMMg9_NED9tq7<31prJxKRWY^WGXs?}j9wVLd=7u-x1dU{I`tP?oY< z0~2HsPAqy<*}{6hh}{~bLwL~@p2p%D6o;G4+QJ_G0|dE2An>IuHp6Z#@JjcEFz7UN zTY?Otdf5rWbYT-A2UCz<%Ul;>${nSs;!mjcLuxHOO4nV&vf_u-Ool}L<3k<$%Xla4 jo9UgsO}<2Q24s$%gD6%Yn6^HLxSO&-I%0Dg7I86HaZrd8{0nlwNSC;+RXfEJd$qP}d$4G=}vZiLv9AZe2{MPM+}*k+d| z4T}ECVu3y6kW+Ii(ENewx&@k|hqge^S!@qIB!?7PBq>^;m;Qh>?eh*f{7~d@y;$^A z0W{~GdFPqu{g`)1y8CK<^P7yZKYpLFdSlS(soT0^SiG^X8`fAqdF8Fqb5mWJTNzbd zYIJV22QRm_wlcRZ{eX`T^&wZES95QTZ2KU$k#YA8Yfrs;RV_l_7-2};$eiL)y^}j=AIVQ|VfA>GE3f zt6q(1Y6ybRTrF2a|Cft3Pl)N3eBE*>)C?AgX_lp?#G#6H!$XWFKDzGL>vMv;fFwq zA#1avc8(`B8Mh!-1nt`z$CYslmi;;sGIHUvLVLE0H5t3*)!JH^KFeCQxKgXsHZpM{ zGI}v0fe_b~(TinJ;B)AjC|e`GD>ue5?M{@GW7?KfPz($@w&i-=Z<_Y-CY2~zVd4nn zdVx^#5z27{045+%I0Qn)N2taT0Gxn8VG;=No3LUvjsQvt2oyVku;wGI#}Pm+0fFKv z5VXLRQ@(^*2^bV*4?{W=>qf%?`w}21_yPi}c>$e@2V9c?L3`R`fvExm%SKtiM+p$L z)jbFqQ}Rj_1O83G5GB9^fg?RzrZR&-lTd>ElptUqC88xKs3;hM30Mmv2u}UzG+EkWm$!V$uc}kN#pGkLz_*d>ahS(3Op7dN`uD&M73)+kcQtyoJ1vTHd6Uw z-WGLiHqu2oiRYZzP!}aNoxou-U6|B#Mu$)rC3Sv=S1Kqj51-+p!p(M+RztzO0Mhw6 zUaDS<)A>oRm1;Oyl1OW?yKCCA>UkYQZs6&Tp4Fnhmuk6dTb)j?xKpLq zPOn^q6LO7aYpb%OVaz0KlOJ;vKqWe5Q8(+2KKq@**t6^^TTEq@zdozHvzX2*AJcs< zt4!`(PBV-@wK$(;e@-o4%3jK*vvU)cO=ahQF;SlBwD9^oVfVACyG;4l{Dh?$^w$@a zZ0gsardgI0mETO1`xEv)yUW-;Hc`^$vS_gM?yleS-g|z}y?fM9ettKlygNxfsNGFp zdRNIRb8r9FL%#ezo4mdnER&)IJ^{U?G4o};LV>GSCIlo(bK3NRlaqKOfx zize`iAx#55@I(__$XnD4Btq~+flIxZL&i}Pt|Ws#OWptS3s|S7?|=E=3-9j_{{0D= zatGsgXb1crjsNd}hu;C4a31}lJ75KJ1=j|y_zu8FwAq{Bf4U2PXjnt@XvFc2ZfGB1 zORng{ac&{Akl8csH+54r@NQgu&fzDzGuohj>ah4pOMJa6qKi7{aa6qgLms=13ZSve zo$*ggZ(!tB?klF}3TjT2fZT0?x_e{|UBkA-dVMM_15sM?gzlpi#Gt4DLGaUXLxFIOzaK$S> z*5C%Rl70@y>wtwX99~!SZ=bt$dU5DF_8~qNo_Rb0w^>l@-Cn1=^AguhcfX)&(issK zfwQAqBl{pPsAwxisRY~seb((C@}Z%d0W4Uei0m{^`S8f)b@N0&8yD1a0fxwUN`t^4 zvXlh&ZP8%MZA!fH&@m2ORZ~0NW;=+ymzkg^ZSF^sr&s`6i5DHvFi@H`KriT^6P<@ zX}?B-8OfVMBxz3e4U>CG?(cIyCnT{5M3~{X!bJC$4Lk%(g|U<;bi~pF$7Zw9>D32K zS(dl!-6B1Vko*T6orC6U1N1Qk=j`V=>RxQVRDY$_>HQ5SE1iRzt@>8y`i|^_Zz)dchDFAHNkY&Y;O`g`!D|cqTK)h literal 0 HcmV?d00001 diff --git a/extras/__pycache__/dynamic_map.cpython-38.pyc b/extras/__pycache__/dynamic_map.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e789f22ef06f1363136bacbd80b2b0b4c4ed3bbc GIT binary patch literal 3411 zcmbVO&2Jn@6|buPoSw1cI1aYSvMX^|b_BA+vJw)ql1M~w0dsIzq=wyAXR2+t-QClv zZYSeW&na1nBV6`|G(P4JK;nQn?3o+4Iw9==EOFt6IBfX6>Y4G_yV?WQs#mYRU%h_6 z_uem8Rsx3d>p%Q6`mD#;=hRs)Had?`t343OB+ppLO)GLMv^kB5%*ox*H9XI}(EBNq zmb70nX(#?mE9^*q!1~T5PGI|eu7g^|MRhn-c{QN1ka%uAvR&ZMlxNEAK##4IMl_FXB8M{5&oP z^uBo^p6ZWLs}DdLt6>p4VxYVhQ%mv-4&H9q)MmX7DXA_!y>7)g&7^lv>K?SPREJnztDD_=7|>d{nxg(t0X(r z-YBZ-Sk>A)ZyDv_;MYvuM)Thfc8`a7vRl+?G2DHeRHyZ@+?8pW4fD882fN7#+OBqG z`{W2W?v&@+kD?S3MUjs)Dq{lN#nWZ2bdyr%O*Hu0W)49>m>PWt$~ z#@k@Bv&rtUEj9@nPX?#9`b!hY&Ia`Zm{4~={nwi}Z?3(QG(tK*K&>`F$SgnVP4EWE zw%PB+GaEa(GL}b{@mgrHb6@lFhLUx#m!(xb80O`JyL1?x>N3H`-(S*p1yfDq39gE*@mfSK?Aza*slTA8*N90{z936ME`19c(RB|X+W>ymU}P;~?boNufM&p`9mjp&huIqXbBLrs{szn4VdvHnB!+(H5SC)*u=YIS(y7 zSo2hK83xsHTqcrKa8rfrUslf@994ZAgz;7L`P^r=74SgVfYrM^;J&rNF)GI2`dUSQ z$tMxqjd_!T;Cl#cqOmS4^0)^~k%I8&NdmP(#AtJ&Qs^M5lKQ)tnd540NdPl>4c@n~ zl2mB!{4)${e&>bwjhI*f4E*8=mwX#0+`t=x2iUe?LlXyZu_j*QNPg<5Pa6lb;sl;^ zn*m~3wX^{iaw4;_Jp#22&{>Ur2yk-Ncn36QWSIDdBe-{F z_x`nZrzC^eq*b%~FT^A$-fEog(%2?JZuT17Vx;>WS3R@eUD}V+vA>J`?qWa2u)jOk zz#hXczX_Ty$vEjXtz1S+la;13mlCjFC8?TTvx09S-zHpEvx=`BF{ghb4h7`uA-X^x z(@q<8IIDwcKr!c}3zx%B8D#zw=6e_HYA))Uc;xK2K7nBOlPCQZ<75@Nd_~S*JI2gg z^tf?ZCWUMP_7P-iiYWbC23VmO#i2My9Z9tE z3S(jLBCbB9?THNCQB)^+8M^1D)%<6g|0?u~ejeH98mU6u@1xqZ7W(t?pO$Lf-o3}@iGL=!>&PqA{=>A0|C<90%)x~ z$DWT(w?+}4)( z6K$1Q9lB{=9S>hSy*WLID$$<3yekB9~t37{w>+!p#)V|9_5n#_RQ77|K04F~SVZS1MtI9QAL+z9Dx zkX!m#x`@~0!mLw7ZGhc1vp!~RRmcwd$Qk;<{?WgWKmY9?|05q>z7G*D5xn2DsCx%p z^)`)>SG^=xe)$Dv`;Gw_eDm-OfTQsig3bT~b>DAwbr)GO1z!ZAq>PXP)p0RM!WHUP zQ9W$)N}h*77FV@dg|xB+TDj7Ea6`*_BN>%(A=BcBpf(Hahq$|v91IqEO*+DrWxdTHO|NM2;;R+Iz2$Neq*-s3!9S!od{zy9%8_-u)gKe2PQ zSkT#pT6KYN!f8tS)Ns~kz*%Z#cHgGtDd85kPYJgttnYBTPXgy0_K>}RYKM!#P{i21 zS{QV8p;p^KggF=vrJS9TzSSJpx4FYz=sVowK5&;W@fL88FY`8VpRa(Zj&`4ktdJ)$ zo;fer#`+j)wGQNv)FcEWfYKT%m;k%ko}@4vSlZ3ZDY^yxaWTx5ka{T(qgW+HUIk3s zl}KMo7gkS+^nv~TbML6gL@!rKUi6-b>XjN@$>d zV}RB%9W%wS$LR){(6PlCw@!(&YHLED;D}uVKiC{Q%7T^7gyQ(Y`q+g#bT`PExycQJ zmJW6YTinL|wcYGhPHj(2lr`YJrq$eQ`)Ckag}^`X$Mlf=&Ia~=16|{fDbuXGrD?e6 zoz`}mR4OjAa%TgNLBF^FqtT<8U!;im9Lw8j5l3mY)4kkuLm@<%QRs)cLftV{q=s~| zK@>e^6DB`X^vtU1Tbg&~tuvcLzfpV2OrW0s17+&yR#GK7fD(dAv%J(^&Lb(K6Yai+ z(fF12q^ORfQoz1@AXL+I!Af}_hChXRVP4>xxG?dctyJXNkx_mqWCss$)UZrN;5BjL zRZ#}rO1EA_Dn1IUWF#P646_hjS81nVZnI3&WI?)KzT(3>sKpu(lLA)Y_H`m1IpaJ2)WTAs*HaB&ZaUXF`2 c70qKWQCYL{w!^i*-RQZ4UNTQNNT6-@FYrk9M*si- literal 0 HcmV?d00001 diff --git a/extras/__pycache__/optical_flow.cpython-38.pyc b/extras/__pycache__/optical_flow.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54d3a7096678e00b13f40743c6a9165d82ff89fd GIT binary patch literal 3355 zcmZuzTW=f36`q-$z3?JRvfV_o4d;Tk34y{&1N4HTNNmfoo5rCN*+CmN#dghE(kqg? z)a**Kge6cw?n8k(4}B?GpoR39{D8uJ=?@v?tuGPUUr?Y;zcVXFwA@+DnKN_ddM@9d zj}{i{4CNPp{xbYzsK6{G1jwAn;TcfTibDuy7NV#a}71W2@=~wZ~#~0G3z?jc3r7l<)QB? zUj=AARa14ezG|o@+CVL+7TTIx#F0y;ad%QAy=Zi6lNgmnlKV zDEEmhS;#TsDZdb#4ywGtRG?~oN7bJ=kh-b?>J`3fs)Z*Ue4v`3HN|jntCM^Qy$$~K zM=F1N``E<^$393~cI)Hl(R!xSSm)j5)^;>XioHAWPCV*1#z}fJ$%`oM#ogM2QPvyY zOZMZoIEG=Eru!a~mX66{gQr)1^Un{T{^jkqWBlDDrJ3CTdslk}J)IP>PNG}KWWmq4 z@gN(;X}fNuN_vH9_2a16)^RwAbTrP5)7yEoyL|thjrYU#jd#($dp&&TgL~cc&kn8M z{it)_)JDVEIzP>d+dA6QbT#AQDk|I6wdzor$D{sdOn(F2e}8Z7VK$D}(jrN-wVQE1 zEV9X(N+zRh9KlR$@uNaV`C3-lEbPN*S0{TW2*V^ziZG--^A!*#Bp3AkC-8_{`fngT z(d7FV=IniSwyZ}Nn-x-qM74$|moC^LFZmIl!ibwfUn!hJS@Oc!bdSW8TCe1N4(DJA z&G7#qKIKzU3Yb}Rj2uV#upJnYPK>*qq{R>QGB(v@Fcb84iuG)iY13N2d*|*(c;oiQ zo3}Tvf80Gk$TD@S(tmzGyRW0(P`9wA?dS`%)J?|GAU0z1hE-ifULeWhB*QTw^cwey zBug)1Ft;Jh>%!wLz9RQ8&8y+e{2sl2R>&p44udbyIn{>c1oR@TUcP@;FVK=W(Gs=L zQYlJDu|9%l)Bluz;!Ne#oq9!3$^!(`W^L%`$3# zk#Hw@wVb|0-OC`|3qX(FoH{?)(|6v}OAr}Z>`h|hrz+BLf8$Ox3}a;cC@GAu zDk>4`h>H=BVOXH#oD)Gj2dG8=%^t_!k(9g<=2`k{h7H#77h)ZXpX7 z3Ka+9fZt@l{Pibd>Y@76p!5&DBL==Ut(Uh3;$Girism19wCX!8^{RB9mBcOQ&0dkZ8=q)6WPjwegfnekI4de9^-5X(D zq?D-v{nC7r<+1USasDvd?fSb>QoI-M;Q|B;`gJ1YF}e+6+#aEeCP*|qF%F!>IN3-U z9vS|?@SfqSTeE%^>cIoOMiZT6JTQEC*3FtqN4wz;Kqb@pbxh2^3BsJZSaL4HpPKw4 zybJImR)j#W4!msva6GZUG8Z3L=eOw*+Rh4Tma28Kzk^YH>YxH`4_V1Kfw8MqOOZ>+ z14MyDoJgphNkk!LE36xCz%dE8Md<=3>^?JY1-Nk@yVyQZ1UN-=z*UkL8BX%x(%ui) zlpM}ME4O|Mjt`~&D=_V(E#)Qr+?7P7pS;^S^%(0V`YImPcJ;T=Xo^zugme&Bz9aK3 zU7X;+oC!tNH$2t!C(_r5(4TS+?!9Pd5N7>fVKgWFXI$dDi0>SK3n35l@IdUJn>RtV zhj1zR0GU0;t@C_(!A1x zT7=%1o9`fh5(9x<`j9%87-nI&L%&4qAqoTP9dIRwO#g1SZtA7y3UOGY{t^9P>vo-| zTqyr@g&#=iHJEZS>X$y$?scr5t;Pp<2+9lBH2nZIJ+j@INQjJ+Z;x%h(hjO9dyVE%s9~P{$DZ0t-@-6Lim+Y5E@#cv%;u7p3FhW> zJdj$nSY4b$%ml2C=m|vAY4IjT0wy7SHiqUmc`j|Gn6hT1Hx)1w#fMEnk&Z(_qN5v- zAd{fH$3Coj{(0)2aW5DDskrzpB)QF!`2O;|Cs}C+DI;>Q6lfTRrXGglOl^;dZ-yZy zu$4r=Nc&smTSe0yt+w_1#C3?Ao^CmEXie`S%jJ~+1Uy&{n!%;urC=#owX$~2xB%G6 zUS&?3SuG=BMA}%4k_X5Ti8J0w>r6(4X#f~c@}s;`1D*2!jyJ)NtLMB$=|D~w!5s+i GE^OSkrp8#?m=O6IEFR52v$Y0${&-mIqUArLQ%v!c^|&2HPhb?O7&<+N)0vaHL$Buz{hn{vJPn4jSF7grUfsU8 z)N5dL)8??*YgT8iUh7k)JmtS+%0CyqWvl`vv65J=#P?L~Na!D8F<715xBXiD;DTT9 z-g>+eudBLR`k&~v=obZI>sGE!`L`!c4%DE0vft`oE~V27hf^m!0^jocAjU$ zcxO~3qip9{oF5h0ct<7UG#f@m(%*@{Ep(LcpuPK{Gqvv_Ay$%jV>XNpv^cuBrXJ2qHv~cAkGm&)>Ez`-+AP( zuGqW$6;IbUHCQl-_R}~wGLH*uI^!K!S9)Wej0#OTW_Zzc^b*Y+1fQl@xNSQ!>V34_ zCMDd{;&tn!62QVqmmETO!i*qUI2}x;KsP|JJx25qtO1_^(!gd00nh=2pwI?}Qa?W$ zkK>W*3aey!K#nYxB;Qg{_KzN7mXqli)SE0-s?o{sYqSdK0@|FzfCCu#3HV~JgRmi@ zDX*&kHr^>&@0N8;BT_0nt&II9#BGQ%KDl4%aQ*y_vlrHRiZ>|<2ka=&Yo)`9C1_{L zH{tM^g8;a`<6?&mT+1a3y*;b4CPg%;lj+x}BhOWS)^A`AEKz!L2TMS@rrF^ZL~_b* z#_O!kSNWuM4GaWs*p#eJjBLG3I0{z4rln<%M7~jgk|%Qr#iH@ruTLeXkbe~*dcz+)5j zf)J)>?$n(L1)rUHK<=G$<)B`)rk-*)QCF(DzsV}e;F_eoI#ofNmG88zora^-CkyKp%0x$n=YnME*bm{J@BV z@RrH-1021BW-AZlt|gQWE{uY%gw`h#7<6(A+ zlAM8!j6VjiP=Qt_!?BGwd*9*TQpV6qtiG%1ckhZ(~3-6K-?g_D^;v5 zgl?o5xrUkzw!dORfCobV5rM@mI8nGWj|c*_z@Lg)K+s0AkjhpH3)0y$LhnzSds!zx z=|At#C1a~wyTYvhhEMCq)cqZuanmePWj?)6o0MsD8lg%7=lM(0hxoVvpjJS24-& z=v*3k7q93Ksk7Am5oYt^Zqve z*Qv9mqC+!cK%1QUA2j=bx@Xi`m9E0s&g`{{DkwF3|CC$?byqsllfG=qK-Q$E>6ET+ z!zdZib?r2ns4N?XN*#}YZIeDvp)^^&eiFV*Gz9MHXFBdaG#Xwc5vfPulrKKzd1mHA# zW0E?KQu;4~LTLji5{IGP<6o4v`aLrl`J}wBJ|z;%$>6XRytQIIMl 0.3: + shaped_std = shaped_std*0.5 + + return shaped_std + + def reset(self,x_start,y_start,map_unit): + self.__init__(x_start,y_start,map_unit) + + def visit(self,xa,ya): + [xd,yd] = self.discretize(xa,ya) + (x,y) = (xd - self.O[0],yd - self.O[1]) + + wasVisited = False + if (x>=0 and x=0 and y=0: + L = np.maximum(x+1,self.map.shape[0]) + else: + L = -x + self.map.shape[0] + self.O[0] = xd + if y>=0: + W = np.maximum(y+1,self.map.shape[1]) + else: + W = -y + self.map.shape[1] + self.O[1] = yd + + new_map = np.zeros((L,W)) + x_prev = x_temp-self.O[0] + y_prev = y_temp-self.O[1] + new_map[x_prev:x_prev + self.map.shape[0], y_prev:y_prev+self.map.shape[1]] = self.map + #time.sleep(5) + + self.map = new_map + self.visit(xa,ya) + + if time.time() - self.t0 < 0: + self.plot_map() + self.t0 = time.time() + + return + + def add_obstacle(self,x,y): + #self.path.append([int(x*1000),int(y*1000),'r']) + return + + def get_covered_area(self): + # points = convex_hull(self.map) + # area = calculate_area(points) + return # area + + def plot_map(self): + plot = self.map.T + plt.imshow(plot) + plt.pause(1) + plt.close() + return + + def expanding_map_reward(self): + forward_step_distance = 0.00256 + steps_to_unit = np.round(self.map_unit / forward_step_distance) # forward steps to cover map unit + m = 50 + last_steps = self.path[-m:-1] + pos = self.path[-1] + r = 0 + c = last_steps.count(pos) + if c < steps_to_unit*0.3: + r = 2 + 15* (pos not in self.path) + if c > steps_to_unit*5: + r = -2 + + return r + + diff --git a/extras/experience_memory.py b/extras/experience_memory.py new file mode 100644 index 0000000..dd8b306 --- /dev/null +++ b/extras/experience_memory.py @@ -0,0 +1,78 @@ +import numpy as np +import random +from collections import deque + +class Memory: + def __init__(self,n_actions): + self.memory = [] + self.memCounter = 0 + self.n_actions = n_actions + + + def store_experience0(self,state,action,reward,state_,done,log_prob,value): + self.memory.append((state,action,reward,state_,done,log_prob,value)) + self.memCounter += 1 + + def store_experience(self,*experience): + self.memory.append(experience) + self.memCounter += 1 + + def read_memory(self): + samples = self.memory + batch_size = self.memCounter + + num_lists = len(self.memory[0]) + lists = [[] for _ in range(num_lists)] + + for sample in samples: + for i in range(len(sample)): + lists[i].append(sample[i]) + + + lists = [np.vstack(l) if isinstance(l[0],np.ndarray) else np.vstack(l).reshape(-1) for l in lists ] + + return tuple(lists) + + def sample_memory(self,n_samples): + samples = random.sample(self.memory,n_samples) + batch_size = self.memCounter + + num_lists = len(self.memory[0]) + lists = [[] for _ in range(num_lists)] + + for sample in samples: + for i in range(len(sample)): + lists[i].append(sample[i]) + + + lists = [np.vstack(l) if isinstance(l[0],np.ndarray) else np.vstack(l).reshape(-1) for l in lists ] + + return tuple(lists) + + def clear(self): + self.__init__(self.n_actions) + + + + # def read_memory0(self): + # samples = self.memory + # batch_size = self.memCounter + + # states = np.zeros((batch_size,)+self.state_shape) + # actions = np.zeros((batch_size,)) + # rewards = np.zeros((batch_size,)) + # states_ = np.zeros((batch_size,)+self.state_shape) + # dones = np.zeros((batch_size,)) + # log_probs = np.zeros((batch_size,)) + # values = np.zeros((batch_size,)) + + # for i in range(batch_size): + # states[i,:] = samples[i][0] + # actions[i] = samples[i][1] + # rewards[i] = samples[i][2] + # states_[i,:] = samples[i][3] + # dones[i] = samples[i][4] + # log_probs[i] = samples[i][5] + # values[i] = samples[i][6] + + # return states,actions,rewards,states_,dones,log_probs,values diff --git a/extras/optical_flow.py b/extras/optical_flow.py new file mode 100644 index 0000000..e0efca1 --- /dev/null +++ b/extras/optical_flow.py @@ -0,0 +1,138 @@ +import numpy as np +import cv2 +import matplotlib.pyplot as plt +from collections import deque + + + +class OpticalFlow(): + def __init__(self): + # params for ShiTomasi corner detection + self.feature_params = dict( maxCorners = 1000, + qualityLevel = 0.1, + minDistance = 5, + blockSize = 7 ) + + # Parameters for lucas kanade optical flow + self.lk_params = dict( winSize = (15,15), + maxLevel = 2, + criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) + + self.notGray = False + + self.window = deque(maxlen=3) + self.winSize = 3 + + def point_selection(self,image): + image = np.uint8(image) + # Take first frame and find corners in it + if self.notGray: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + p0 = cv2.goodFeaturesToTrack(image, mask = None, **self.feature_params) + + + + return p0 + + + def find_new_points(self,prev_state,state,p0): + prev_state = np.uint8(prev_state) + state = np.uint8(state) + + if self.notGray: + prev_state = cv2.cvtColor(prev_state, cv2.COLOR_BGR2GRAY) + state = cv2.cvtColor(state, cv2.COLOR_BGR2GRAY) + # calculate optical flow + p1, st, err = cv2.calcOpticalFlowPyrLK(prev_state, state, p0, None, **self.lk_params) + #p1: new positions of input pixels (given through p0) + #st: boolean: was the optical flow calculated for the given points? + + if type(p1)!=np.ndarray: + print("aaaaa") + print(type(p1),p1) + exit() + return p0,p0 + + + # Select good points + p1 = p1[st==1] + p1 = p1.reshape((p1.shape[0],1,p1.shape[1])) + p0 = p0[st==1] + p0 = p0.reshape((p0.shape[0],1,p0.shape[1])) + + return p0,p1 + + def draw_vectors(self,state,p0,p1): + + # Create some random colors + color = [255,0,0] + # Create a mask image for drawing purposes + mask = np.zeros_like(np.uint8(prev_state)) + + # draw the tracks + for i,(new,old) in enumerate(zip(p1,p0)): + a,b = new.ravel() + c,d = old.ravel() + mask = cv2.line(mask, (a,b),(c,d), color, 2) + if self.notGray: + state_rgb = cv2.cvtColor(state, cv2.COLOR_BGR2RGB) + frame = cv2.circle(state_rgb,(a,b),2,color,-1) + else: + frame = cv2.circle(state,(a,b),2,color,-1) + + try: + img = cv2.add(frame,mask) + img = cv2.transpose(img) + + cv2.imshow('frame',img) + k = cv2.waitKey(1) + + except: + pass + + return k + + + def avg_of(self,p0,p1): + of = 0 + n = 0 + p0 = p0.reshape((p0.shape[0],p0.shape[2])) + p1 = p1.reshape((p1.shape[0],p1.shape[2])) + for i in range(p0.shape[0]): + d = np.sqrt((p1[i][0] - p0[i][0])**2 + (p1[i][1] - p0[i][1])**2) + of += d + n += 1 + + if n>0 and of>0: + return of / n + else: + return -1 + + + def optical_flow(self,prev_state,state,action): + p0 = self.point_selection(prev_state) + + if type(p0) != np.ndarray: return self.window[-1] # if no points to calculate, return last valid value + + p0,p1 = self.find_new_points(prev_state,state,p0) + + of = self.avg_of(p0,p1) + if of==-1: return self.window[-1] + if action != [1,1]: + of = of/5.5 + + self.window.append(of) + + + if len(self.window) < 3: + of = self.window[-1] + else: + of = np.sum(self.window) / self.winSize + + if of > 10: of = 10 + shaped_of = - of / 10 + + return shaped_of + + def reset(self): + self.window = deque(maxlen=self.winSize) \ No newline at end of file diff --git a/extras/statistics.py b/extras/statistics.py new file mode 100644 index 0000000..99bf0e0 --- /dev/null +++ b/extras/statistics.py @@ -0,0 +1,90 @@ +import numpy as np +import os +from os import listdir +from os.path import isfile, join +import matplotlib.pyplot as plt +import time +import pandas as pd +from datetime import datetime +import pickle + + +class VarLog: + def __init__(self,name): + self.name = name + self.log = [] + self.time = [] + +class Logger: + def __init__(self,dir,fname): + self.Variables = {} + self.fname = self.set_name(dir,fname) + print(self.fname) + self.time = [] + self.t = -1 + + def tick(self,t=1): + self.t += t + self.time.append(self.t) + + def add_variable(self,vname): + self.Variables[vname] = VarLog(vname) + + def add_log(self,vname,value): + if not vname in self.Variables: self.add_variable(vname) + self.Variables[vname].log.append(value) + self.Variables[vname].time.append(self.time) + + def add_logs(self,vars): + if not len(a) == len(vars): print('Error! Wrong number of variables!') + + for i,vname in enumerate(self.Variables): + add_log(vname,vars[i]) + + def set_name(self,directory,fname): + i = -1 + for f in os.listdir(directory): + f_ = f.split('.') + if len(f_)==2 and f_[1] == 'pkl': + f = f_[0] + f_ = f.split('_') + if len(f_) == 3: + f = f_[0] +'_'+ f_[1] + if f == fname: + i = int(f_[2]) + return os.path.join(directory,fname+'_'+str(i+1)+'.pkl') + + def save_game(self): + if os.path.exists(self.fname): + os.remove(self.fname) + f = open(self.fname,"wb") + pickle.dump(self.Variables,f) + f.close() + + def load_game(self,fname): + if os.path.exists: + f1 = open(fname,"rb") + self.Variables = pickle.load(f1) + f1.close() + else: + print('No such file!') + + + def plot_game(self,game,vars): + for vname in vars: + plt.plot(game[vname].time,game[vname].log) + plt.show() + + def plot(self,var): + var = self.Variables[var] + plt.plot(var.log) + plt.show() + return + +if __name__ == '__main__': + dir_path = os.path.dirname(os.path.realpath(__file__)) + L = Logger(dir=dir_path,fname='vizdoom_ddqn') + + L.load_game(dir_path+'vizdoom_ddqn_0.pkl') + + L.plot('score') \ No newline at end of file diff --git a/extras/utils.py b/extras/utils.py new file mode 100644 index 0000000..9f625a9 --- /dev/null +++ b/extras/utils.py @@ -0,0 +1,29 @@ +import numpy as np + + +class cyclic(): + def __init__(self,x,min,max): + self.min = min + self.max = max + self.cycle = max - min + self.x = self.make_cyclic(x) + + def __add__(self,y): + r = self.x + y + return self.make_cyclic(r) + + def __sub__(self,y): + r = self.x - y + return self.make_cyclic(r) + + def __mul__(self,y): + r = self.x * y + return self.make_cyclic(r) + + def make_cyclic(self,r): + r = r % self.cycle + if r > self.max: + r = self.min + (r - self.max) + elif r < self.min: + r = self.max + (r - self.min) + return r diff --git a/mains/ddqn_vizdoom.py b/mains/ddqn_vizdoom.py new file mode 100644 index 0000000..e5dbfed --- /dev/null +++ b/mains/ddqn_vizdoom.py @@ -0,0 +1,49 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +import numpy as np +import random +import os +from tensorflow.keras.optimizers import Adam + +from environments.VizDoomEnv import * +from networks.networks import * +from extras.experience_memory import * +from agents.DDQN import Agent +from extras.statistics import * +dir_path = os.path.dirname(os.path.realpath(__file__)) +L = Logger(dir=dir_path,fname='vizdoom_ddqn') + + +env = VizDoomEnv(scenario='defend_the_center.cfg') +agent = Agent(action_size=env.action_size,conv=True) + + +n_games = 2000 +scores = [] +avg_score = 0 + +for i in range(n_games): + observation = env.reset() + done = False + score = 0 + while not done: + action = agent.choose_action(observation) + new_observation,reward,done,kills = env.step(action) + score += reward + state = np.expand_dims(observation,axis=0) + new_state = np.expand_dims(new_observation,axis=0) + agent.store_experience(state,action,reward,new_state,done) + observation = new_observation + + agent.learn() + + scores.append(score) + print('GAME:',i,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) + L.add_log('score',score) + L.add_log('kills',kills) + + if i % 10==0: + L.save_game() \ No newline at end of file diff --git a/mains/ppo_vizdoom.py b/mains/ppo_vizdoom.py new file mode 100644 index 0000000..a4e2cbb --- /dev/null +++ b/mains/ppo_vizdoom.py @@ -0,0 +1,57 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +import gym +import tensorflow as tf + +from agents.PPO import Agent +from environments.VizDoomEnv import VizDoomEnv + +env = VizDoomEnv(scenario='defend_the_center.cfg') +agent = Agent(state_shape=env.state_size,n_actions=env.action_size) + +def test_agent(env): + total_reward = 0 + state = env.reset() + done = False + while not done: + action,_,_ = agent.choose_action(state) + state_, reward, done, info = env.step(action) + state = state_ + total_reward += reward + return total_reward + +TEST_EPOCHS = 5 +PPO_STEPS = 256 +TARGET_SCORE = 200 + +train_epochs = 0 +early_stop = False +while not early_stop: + observation = env.reset() + for _ in range(PPO_STEPS): + action, log_probs, value = agent.choose_action(observation) + observation_, reward, done, info = env.step(action) + agent.store_experience(observation,action,reward,observation_,done,log_probs,value) + if done: + observation = env.reset() + continue + observation = observation_ + + obs = tf.convert_to_tensor([observation_]) + _,next_value = agent.ppo_network(obs) + next_value = next_value.numpy()[0][0] + states,actions,rewards,states_,dones,log_probs,values = agent.read_memory()#? + returns = agent.compute_gae(next_value,values,rewards,dones) + advantages = returns - values + agent.ppo_update(states,actions,log_probs,returns,advantages) + + if train_epochs % TEST_EPOCHS == 0: + score = test_agent(env) + print(score) + if score >= TARGET_SCORE: + early_stop = True + + train_epochs += 1 diff --git a/networks/__pycache__/networks.cpython-38.pyc b/networks/__pycache__/networks.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da9ec138df2b40d19c7509cf87a903e428f68733 GIT binary patch literal 5001 zcmd5=OOqQ{74ElM&lxM@v6Da;2n57q20Oe8LP(gw4qz*$T(A(Nx>V|(MB|n;zPD#$ ztK^kkWaSMD7Cfq)w8%E$?3k)o&W(Y>c{-^aP%Ip@3c>w3Ln z;rZ=f{~2B1wyb}!vHW;wyn&+s0ufuWomw4Rew~5aac%0H)EoF6e-LznL8((3gq_f~ zu3NDidylNxOWdRKL$_0b#*YJNf9oMr_}5-Rp$}-IwfZu&mB;Q< z@8F@e)GEPpUR-Je8y; zXRfKY+{|+ zc^y^D1ZjVmbQK029cF{>y;jBe`s6rKoyNlEJYf??QGeLaqR7c??1ywSa%C~bo7 z@o_SY73Wg2bk1+=Q{x=>TdwhRk{+7!Fyd-QLv6eshC8!OIf{B|SL-M`v;Ot!-g~1# zvNz27!_nSBqVHs*c~2<-l;usCxFjGXzo@f))RYzLw?|) z=19$-coYB7xec4PTdw*XzE)qLcnP8vs2%*ko*6vB*!PT|(qtw$9;N->edC{Y)00Gf zg&&BZPry9gG`)nHu0dF~=Tz*9l<~&WUf8Q;UpotUFvIR3!NF9bXfTRTQr2rxboZp2 z=5NGO>RTMXURb@Cq-hi_P5v_8Dy~4$B0h@dX@+*=f@peZtUUjlD>vWlWh1q(`Z(3X zrA-ac7^$cy%5(nBd3ZqjHxR4-J%O*&zai(oEnXJD(>(?hX_e4IOe|Vq=EDO^MJxBf zu-!+ru~ZdPD57a<0V9gXpe|E957DZq7g&Flg4k1EgfJD@9KK0QD}jC>$9x68#pAvz zU=UaUB0KZXJo!MZK0kaJ_50|e8xTOW9oPX-%)jxA=PkD$(L6gEY#&8`8v=L*s)1Y> zs=zR7Ifh--U_!yQH*tw(^;LY-@)TF1en7DRwV!X{Yt(*o<<%0F1V)70nM&XjUF>osa%9#4N=~f;mBEhyC2 zDJ1Nzf$kAr7T#7QZD-tAEVankPsc3H_&bh~W42LO)SSaCcgE*g;#=(|4sN7#{%|g= z5fB_z{~cm2W@jw9V9zUTn4H>YOg*PX#+_+1aAsubQWlbDKW5%8i$8!?9aX^0tGJQ4 z;Mp@4o$IO#T~6BAvNzfS;OPR?#O{<}5GYUojR(Bb>)z{YfwW}$ zJOVSnFVcOhkpr7yUVv>X@KzO0D3~zRPbdiIRy}v8x2cdkA!u0jqZ}OKtqX*R<47njm<|c$ zI@kF#^qR|A<2ufGYsvKMy@nxh?lteB@tJr{fTVK+lOfTp^AFj0Y2g~`Ci<#2g(M9-e&LKamu>RNPQi2g6Nf9DVnb2W+B6mXmq(A(VLE!>IN4!mbJ5<# zzbv}i`0~@a^zj$FayQaKYL~czgi!w`FH~~r%vzppd! WXtEguPy4ddywcoiHk+55ulxstDZt(U literal 0 HcmV?d00001 diff --git a/networks/networks.py b/networks/networks.py new file mode 100644 index 0000000..f3b20b2 --- /dev/null +++ b/networks/networks.py @@ -0,0 +1,186 @@ +import tensorflow as tf +import tensorflow.keras as keras +from tensorflow.keras.layers import Dense,Conv2D,Flatten,Concatenate + +class PPONetwork(keras.Model): + def __init__(self,n_actions,conv=False): + super(PPONetworkConv,self).__init__() + + self.HiddenLayers = [] + + if conv: + self.HiddenLayers.append( Conv2D(32,kernel_size=8,strides=(4,4),activation='relu') ) + self.HiddenLayers.append( Conv2D(64,kernel_size=4,strides=(2,2),activation='relu') ) + self.HiddenLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) + self.HiddenLayers.append( Flatten() ) + + self.HiddenLayers.append( Dense(256,activation='relu') ) + self.HiddenLayers.append( Dense(256,activation='relu') ) + + self.v = Dense(1,activation='linear') + self.pi = Dense(n_actions,activation='softmax') + + def call(self,state): + x = state + + for layer in self.HiddenLayers: + x = layer(x) + + policy = self.pi(x) + value = self.v(x) + + return policy, value + + +# class PPONetwork(keras.Model): +# def __init__(self,n_actions): +# super(PPONetwork,self).__init__() + +# self.fc1 = Dense(256,activation='relu') +# self.fc2 = Dense(256,activation='relu') + +# self.v = Dense(1,activation='linear') +# self.pi = Dense(n_actions,activation='softmax') + +# def call(self,state): +# x = self.fc1(state) +# x = self.fc2(x) + +# policy = self.pi(x) +# value = self.v(x) + +# return policy, value + +class ActorCriticNetwork(keras.Model): + def __init__(self, n_actions, name='actor_critic'): + super(ActorCriticNetwork, self).__init__() + self.n_actions = n_actions + self.model_name = name + + self.layer1 = Dense(1024, activation='relu') + self.layer2 = Dense(512, activation='relu') + self.v = Dense(1, activation='linear') + self.pi = Dense(n_actions,activation='softmax') + + def call(self,state): + value = self.layer1(state) + value = self.layer2(value) + + pi = self.pi(value) + v = self.v(value) + + return v,pi + +class PolicyGradientNetwork(keras.Model): + def __init__(self,n_actions): + super(PolicyGradientNetwork, self).__init__() + self.n_actions = n_actions + + self.fc1 = Dense(256,activation='relu') + self.fc2 = Dense(256,activation='relu') + self.pi = Dense(n_actions,activation='softmax') + + def call(self,state): + value = self.fc1(state) + value = self.fc2(value) + + pi = self.pi(value) + + return pi + + +class DQNetwork(keras.Model): + def __init__(self,action_size,conv=False): + super(DQNetwork, self).__init__() + self.HiddenLayers = [] + + if conv: + self.HiddenLayers.append( Conv2D(32,kernel_size=8,strides=(4,4),activation='relu') ) + self.HiddenLayers.append( Conv2D(64,kernel_size=4,strides=(2,2),activation='relu') ) + self.HiddenLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) + self.HiddenLayers.append( Flatten() ) + self.HiddenLayers.append( Dense(units=512, activation='relu') ) + + self.value = Dense(units=action_size, activation='linear') + + def call(self,state): + x = state + + for layer in self.HiddenLayers: + x = layer(x) + + value = self.value(x) + + return value + +class MitsosPPONet(keras.Model): + def __init__(self,n_actions): + super(MitsosPPONet, self).__init__() + self.ConvLayers = [] + self.ConvLayers.append( Conv2D(64,kernel_size=9,activation='relu') ) + self.ConvLayers.append( Conv2D(64,kernel_size=5,activation='relu') ) + self.ConvLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) + + self.flatten = Flatten() + self.concat = Concatenate(axis=-1) + + self.DenseLayers = [] + self.DenseLayers.append( Dense(512,activation='relu') ) + self.DenseLayers.append( Dense(256,activation='relu') ) + + self.policy = Dense(n_actions,activation='softmax') + self.value = Dense(1,activation='linear') + + def call(self,state): + x1 = state[0] #stacked frames + x2 = state[1] #stacked sensor values + + for layer in self.ConvLayers: + x1 = layer(x1) + + x1 = self.flatten(x1) + x2 = self.flatten(x2) + x = self.concat([x1,x2]) + + for layer in self.DenseLayers: + x = layer(x) + + pi = self.policy(x) + v = self.value(x) + + return pi,v + + +class MitsosDQNet(keras.Model): + def __init__(self,action_size): + super(MitsosDQNet, self).__init__() + self.ConvLayers = [] + self.ConvLayers.append( Conv2D(64,kernel_size=9,activation='relu') ) + self.ConvLayers.append( Conv2D(64,kernel_size=5,activation='relu') ) + self.ConvLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) + + self.flatten = Flatten() + self.concat = Concatenate(axis=-1) + + self.DenseLayers = [] + self.DenseLayers.append( Dense(units=512, activation='relu') ) + + self.value = Dense(units=action_size, activation='linear') + + def call(self,state): + x1 = state[0] #stacked frames + x2 = state[1] #stacked sensor values + + for layer in self.ConvLayers: + x1 = layer(x1) + + x1 = self.flatten(x1) + x2 = self.flatten(x2) + x = self.concat([x1,x2]) + + for layer in self.DenseLayers: + x = layer(x) + + v = self.value(x) + + return v diff --git a/setupvizdoom.sh b/setupvizdoom.sh new file mode 100644 index 0000000..b166ad1 --- /dev/null +++ b/setupvizdoom.sh @@ -0,0 +1,27 @@ +apt-get update + +apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \ +nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \ +libopenal-dev timidity libwildmidi-dev unzip +it +sudo apt-get install libboost-all-dev +sudo apt-get install python3-dev python3-pip +sudo apt-get install liblua5.1-dev + +# go to your user folder +cd ~ +# get julia +wget https://julialang-s3.julialang.org/bin/linux/x64/1.3/julia-1.3.0-linux-x86_64.tar.gz +# extract the file (eXtract File as options) +tar xf julia-1.3.0-linux-x86_64.tar.gz +# Create a shortcut (a soft link) that's places in a globally accessible folder +sudo ln -s ~/julia-1.3.0/bin/julia /usr/local/bin/julia + +pip install vizdoom +pip install varname + + +julia + +using Pkg +Pkg.add("CxxWrap") \ No newline at end of file diff --git a/setupwebots.sh b/setupwebots.sh new file mode 100644 index 0000000..4df55b6 --- /dev/null +++ b/setupwebots.sh @@ -0,0 +1,13 @@ +sudo rm -rf /var/lib/apt/lists/* +sudo apt-get clean + +wget -qO- https://cyberbotics.com/Cyberbotics.asc | sudo apt-key add - +sudo apt-add-repository 'deb https://cyberbotics.com/debian/ binary-amd64/' +sudo apt-get update +sudo bash '/content/gdrive/MyDrive/Colab Notebooks/thesis_code/depend.sh' +sudo apt-get install webots +export WEBOTS_HOME=/snap/webots/current/usr/share/webots +export LD_LIBRARY_PATH=$WEBOTS_HOME/lib/controller +pip install varname +cd ./webots +sudo xvfb-run --auto-servernum webots --mode=fast --stdout --stderr --minimize --batch --no-sandbox ./worlds/Dworld.wbt \ No newline at end of file diff --git a/setupwebots2.sh b/setupwebots2.sh new file mode 100644 index 0000000..8ba5d4b --- /dev/null +++ b/setupwebots2.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +curl -L -O https://github.com/cyberbotics/webots/releases/download/R2021a/webots_2021a_amd64.deb + +#####################################3 + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + exit 1 +fi + +apt update +apt install --yes lsb-release g++ make libavcodec-extra libglu1-mesa libxkbcommon-x11-dev execstack libusb-dev libxcb-keysyms1 libxcb-image0 libxcb-icccm4 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcomposite-dev libxtst6 libnss3 +if [[ -z "$DISPLAY" ]]; then + apt install --yes xvfb +fi + +UBUNTU_VERSION=$(lsb_release -rs) +if [[ $UBUNTU_VERSION == "16.04" ]]; then + apt install --yes libav-tools +elif [[ $UBUNTU_VERSION == "18.04" ]]; then + apt install --yes ffmpeg +elif [[ $UBUNTU_VERSION == "20.04" ]]; then + apt install --yes ffmpeg +else + echo "Unsupported Linux version." +fi + +###################################### + +sudo apt-get install libjxr0 +sudo apt-get install libraw16 +sudo apt-get install libfreeimage3 +sudo apt-get install libzzip-0-13 +sudo apt-get install libssh-4 +sudo apt-get install libssh-dev +sudo apt-get install libzip4 +sudo apt-get install libzip-dev +apt install python3.6-gdbm + +###################################### + +sudo dpkg -i webots_2021a_amd64.deb + +###################################### + +export WEBOTS_HOME=/snap/webots/current/usr/share/webots +export LD_LIBRARY_PATH=$WEBOTS_HOME/lib/controller + +######################################### + +cd '/content/gdrive/MyDrive/Colab Notebooks/thesis_code' +sudo xvfb-run --auto-servernum webots --mode=fast --stdout --stderr --minimize --batch --no-sandbox ./worlds/Dworld.wbt \ No newline at end of file diff --git a/worlds/Cworld.wbt b/worlds/Cworld.wbt new file mode 100644 index 0000000..2d5afef --- /dev/null +++ b/worlds/Cworld.wbt @@ -0,0 +1,450 @@ +#VRML_SIM R2020b utf8 +WorldInfo { + coordinateSystem "NUE" +} +Viewpoint { + fieldOfView 1.3 + orientation 0.010043673917034802 0.8423932874717313 0.5387695925317402 3.1616948297772707 + position -0.011961103750660083 1.4647237534142843 -0.9396959046165965 +} +TexturedBackground { +} +TexturedBackgroundLight { +} +DEF MyFloor Floor { + size 2 2 +} +SolidBox { + translation -0.52424 0.05 0.0584396 + name "box(37)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.651715 0.05 0.117673 + name "box(38)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.684383 0.05 -0.579344 + name "box(39)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.739455 0.05 -0.0994432 + name "box(40)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.630067 0.05 0.0451448 + name "box(41)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.0934386 0.05 -0.893598 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(26)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.049222 0.05 -0.434281 + rotation 0.7071067811865163 -2.973929079720631e-07 0.7071067811865163 -3.1415853071795863 + name "box(28)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.0749994 0.05 -0.92742 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(29)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.510085 0.05 0.00454399 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(27)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/desktop_wood.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.466685 0.05 -0.0444477 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(30)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.73406 0.05 -0.197511 + rotation -0.13052598533525914 1.120639874094853e-06 -0.9914448886100546 3.14159 + name "box(31)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.656055 0.05 -0.203993 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(32)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.778692 0.05 -0.145827 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(33)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.0494207 0.05 0.151692 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(34)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.703332 0.05 -0.323405 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(35)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.722747 0.05 -0.468955 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(36)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.314899 0.05 0.268988 + name "box(17)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.278181 0.05 0.365483 + name "box(18)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.173404 0.05 -0.375836 + name "box(19)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/fabric_with_motif.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.320716 0.05 0.494056 + name "box(20)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.316694 0.05 0.477297 + name "box(21)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.162788 0.05 -0.0985685 + name "box(22)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.891698 0.05 -0.0547925 + name "box(23)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.326988 0.05 0.159103 + name "box(8)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.522779 0.05 -0.700135 + name "box(9)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.418508 0.05 -0.177731 + name "box(3)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.0446427 0.05 -0.190829 + name "box(4)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/interlaced_parquetry.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.051497 0.05 0.380365 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(5)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.327948 0.05 -0.777995 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(24)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.172366 0.05 -0.679149 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(25)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/gray_brick_wall.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.876423 0.05 -0.359378 + name "box(6)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.348956 0.05 -0.686402 + name "box(7)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/dry_grass.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.570365 0.05 -0.696242 + name "box(10)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/grass.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.28719 0.05 -0.214558 + name "box(11)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.610416 0.05 -0.358784 + name "box(12)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/rock.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.419413 0.05 -0.441466 + name "box(13)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.565655 0.05 -0.808799 + name "box(14)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.73034 0.05 -0.361945 + name "box(15)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.307226 0.05 -0.788899 + name "box(16)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/lightwood.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.299792 0.05 -0.427535 + name "box(2)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/interlaced_parquetry.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.302002 0.05 -0.918502 + name "box(1)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/asphalt.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.151636 0.05 -0.6219 + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/chessboard.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +DEF MyWall1 Solid { + translation 0 0.05 1 + children [ + DEF wall1 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(2)" + boundingObject USE wall1 +} +DEF MyWall2 Solid { + translation 0 0.05 -1 + children [ + DEF wall2 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(3)" + boundingObject USE wall2 +} +DEF MyWall3 Solid { + translation 1 0.05 0 + rotation 0 1 0 1.5707996938995747 + children [ + DEF wall3 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(4)" + boundingObject USE wall3 +} +DEF MyWall4 Solid { + translation -1 0.05 0 + rotation 0 1 0 1.5707996938995747 + children [ + DEF wall22 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(5)" + boundingObject USE wall22 +} +DEF letter_box Solid { + translation 0.334294 3.33067e-15 -0.384701 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(1)" +} +RubberDuck { + translation -0.862172 -1.16018e-14 0.480687 + rotation 0 1 0 3.14159 + scale 2 +} +RubberDuck { + translation -0.693439 0 -0.722354 + name "rubber duck(2)" + scale 1.5 +} +RubberDuck { + translation -0.18786 0 -0.804563 + name "rubber duck(3)" +} +RubberDuck { + translation -0.443423 -5.10703e-15 0.32712 + name "rubber duck(4)" +} +RubberDuck { + translation 0.41311 -4.4409e-16 0.0645548 + name "rubber duck(5)" +} +RubberDuck { + translation 0.842931 0 -0.772584 + name "rubber duck(6)" +} +DEF Mitsos E-puck { + translation 0.00733535 0 -0.748403 + rotation 0 1 0 3.14159 + controller "ppo_webots" + supervisor TRUE + emitter_channel 13 + receiver_channel 12 + groundSensorsSlot [ + DEF Bumper TouchSensor { + translation -0.03 0 0.03 + rotation 0 0 1 1.57075 + boundingObject Cylinder { + height 0.05 + radius 0.04 + } + } + ] +} diff --git a/worlds/Dworld.wbt b/worlds/Dworld.wbt new file mode 100644 index 0000000..3646232 --- /dev/null +++ b/worlds/Dworld.wbt @@ -0,0 +1,601 @@ +#VRML_SIM R2020b utf8 +WorldInfo { + coordinateSystem "NUE" +} +Viewpoint { + fieldOfView 1.3 + orientation 0.010043673917034802 0.8423932874717313 0.5387695925317402 3.1616948297772707 + position -0.011961103750660083 1.4647237534142843 -0.9396959046165965 +} +TexturedBackground { +} +TexturedBackgroundLight { +} +DEF MyFloor Floor { + size 2 2 +} +DEF letter_box Solid { + translation -0.565576 3.33067e-15 -0.794708 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(20)" +} +SolidBox { + translation -0.618803 0.05 0.463736 + name "box(37)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.625617 0.05 0.578247 + name "box(38)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.642827 0.05 -0.404475 + name "box(39)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.614491 0.05 0.216461 + name "box(40)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.602943 0.05 0.010313 + name "box(41)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.00300413 0.05 -0.705049 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(26)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.324908 0.05 0.517289 + rotation 0.7071067811865163 -2.973929079720631e-07 0.7071067811865163 -3.1415853071795863 + name "box(28)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.116254 0.05 -0.725146 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(29)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.616903 0.05 0.127591 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(27)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/desktop_wood.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.607375 0.05 0.346382 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(30)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.625828 0.05 0.118225 + rotation -0.13052598533525914 1.120639874094853e-06 -0.9914448886100546 3.14159 + name "box(31)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.623492 0.05 -0.239298 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(32)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.586752 0.05 0.240398 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(33)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.202897 0.05 0.554026 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(34)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.631141 0.05 -0.13868 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(35)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.622588 0.05 -0.271265 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(36)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.575948 0.05 0.486566 + name "box(17)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.467608 0.05 0.580531 + name "box(18)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.11035 0.05 0.0177173 + name "box(19)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/fabric_with_motif.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.589959 0.05 0.585199 + name "box(20)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.0695238 0.05 0.5921 + name "box(21)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.349746 0.05 0.568924 + name "box(22)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.602641 0.05 -0.11369 + name "box(23)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.59282 0.05 0.36748 + name "box(8)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.639982 0.05 -0.644952 + name "box(9)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.627204 0.05 0.00824829 + name "box(3)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.442466 0.05 0.521656 + name "box(4)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/interlaced_parquetry.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.0677715 0.05 0.567313 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(5)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.254087 0.05 -0.703318 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(24)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.274087 0.05 -0.690153 + rotation -0.5000001748437317 6.633972319816142e-07 -0.8660253028380855 3.14159 + name "box(25)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/gray_brick_wall.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.596802 0.05 -0.372769 + name "box(6)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.417327 0.05 -0.701469 + name "box(7)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/dry_grass.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.489265 0.05 -0.720756 + name "box(10)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/grass.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.887799 0.05 -0.891526 + name "box(11)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.58383 0.05 -0.508023 + rotation 0 1 0 0.261799 + name "box(12)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/rock.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.910765 0.05 -0.13708 + name "box(13)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.381394 0.05 -0.707274 + name "box(14)" + size 0.1 0.1 0.1 +} +SolidBox { + translation 0.575439 0.05 -0.627842 + name "box(15)" + size 0.1 0.1 0.1 +} +SolidBox { + translation -0.519345 0.05 -0.69095 + name "box(16)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/lightwood.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation -0.78499 0.05 -0.542529 + name "box(2)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/interlaced_parquetry.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.611365 0.05 -0.733326 + name "box(1)" + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/asphalt.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +SolidBox { + translation 0.125138 0.05 -0.703332 + size 0.1 0.1 0.1 + appearance PBRAppearance { + baseColorMap ImageTexture { + url [ + "textures/chessboard.jpg" + ] + } + roughness 0.5 + metalness 0 + } +} +DEF MyWall1 Solid { + translation 0 0.05 1 + children [ + DEF wall1 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(2)" + boundingObject USE wall1 +} +DEF MyWall2 Solid { + translation 0 0.05 -1 + children [ + DEF wall2 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(3)" + boundingObject USE wall2 +} +DEF MyWall3 Solid { + translation 1 0.05 0 + rotation 0 1 0 1.5707996938995747 + children [ + DEF wall3 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(4)" + boundingObject USE wall3 +} +DEF MyWall4 Solid { + translation -1 0.05 0 + rotation 0 1 0 1.5707996938995747 + children [ + DEF wall22 Shape { + appearance PBRAppearance { + } + geometry Box { + size 2 0.1 0.01 + } + } + ] + name "solid(5)" + boundingObject USE wall22 +} +DEF letter_box Solid { + translation 0.354257 3.33067e-15 -0.838301 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(1)" +} +DEF letter_box Solid { + translation 0.878747 3.33067e-15 -0.923174 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(6)" +} +DEF letter_box Solid { + translation 0.87935 3.33067e-15 -0.345449 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(7)" +} +DEF letter_box Solid { + translation 0.6008 3.9968e-15 0.712204 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(10)" +} +DEF letter_box Solid { + translation -0.0740333 3.77476e-15 0.707197 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(14)" +} +DEF letter_box Solid { + translation -0.908079 2.88658e-15 0.899124 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(15)" +} +DEF letter_box Solid { + translation -0.722202 3.33067e-15 0.502841 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(16)" +} +DEF letter_box Solid { + translation -0.789943 4.21885e-15 0.195154 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(17)" +} +DEF letter_box Solid { + translation -0.416995 3.77476e-15 0.928224 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(18)" +} +DEF letter_box Solid { + translation 0.270842 2.66454e-15 0.921813 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(19)" +} +DEF letter_box Solid { + translation 0.887998 3.9968e-15 0.860353 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(11)" +} +DEF letter_box Solid { + translation 0.793262 2.22045e-15 0.245146 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(12)" +} +DEF letter_box Solid { + translation 0.721582 3.33067e-15 -0.0238581 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(13)" +} +DEF letter_box Solid { + translation 0.724771 3.33067e-15 -0.598562 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(8)" +} +DEF letter_box Solid { + translation -0.0767346 3.33067e-15 -0.933214 + children [ + SolidBox { + translation 0 0.05 0 + size 0.1 0.1 0.1 + } + ] + name "solid(9)" +} +RubberDuck { + translation -0.207799 -1.18238e-14 0.554874 + rotation 1.27237e-14 -1 -9.58979e-09 -3.1415853071795863 + scale 2 +} +RubberDuck { + translation -0.682842 0 -0.51083 + name "rubber duck(2)" + scale 1.5 +} +RubberDuck { + translation -0.202103 0 -0.6385 + name "rubber duck(3)" +} +RubberDuck { + translation -0.552272 -5.10703e-15 0.582049 + name "rubber duck(4)" +} +RubberDuck { + translation 0.41311 -4.4409e-16 0.0645548 + name "rubber duck(5)" +} +RubberDuck { + translation 0.729913 0 -0.723984 + name "rubber duck(6)" +} +DEF Mitsos E-puck { + translation 0.717845 0 -0.835527 + rotation 0 1 0 1.570796 + controller "alpha" + supervisor TRUE + emitter_channel 13 + receiver_channel 12 + groundSensorsSlot [ + DEF Bumper TouchSensor { + translation -0.03 0 0.03 + rotation 0 0 1 1.57075 + boundingObject Cylinder { + height 0.05 + radius 0.04 + } + } + ] +}