diff --git a/_vizdoom.ini b/_vizdoom.ini new file mode 100644 index 0000000..8fade4d --- /dev/null +++ b/_vizdoom.ini @@ -0,0 +1,568 @@ +# This file was generated by ViZDoom 1.1.8 (ZDOOM 2.8.1) on Mon May 31 14:58:54 2021 + +# These are the directories to automatically search for IWADs. +# Each directory should be on a separate line, preceded by Path= +[IWADSearch.Directories] +Path=. +Path=$DOOMWADDIR +Path=./_vizdoom +Path=/usr/local/share/doom +Path=/usr/local/share/games/doom +Path=/usr/share/doom +Path=/usr/share/games/doom + +# These are the directories to search for wads added with the -file +# command line parameter, if they cannot be found with the path +# as-is. Layout is the same as for IWADSearch.Directories +[FileSearch.Directories] +Path=./_vizdoom +Path=/usr/local/share/ +Path=$DOOMWADDIR + +# Files to automatically execute when running the corresponding game. +# Each file should be on its own line, preceded by Path= + +[Doom.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Heretic.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Hexen.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Strife.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Chex.AutoExec] +Path=./_vizdoom/autoexec.cfg + +# WAD files to always load. These are loaded after the IWAD but before +# any files added with -file. Place each file on its own line, preceded +# by Path= +[Global.Autoload] + +# Wad files to automatically load depending on the game and IWAD you are +# playing. You may have have files that are loaded for all similar IWADs +# (the game) and files that are only loaded for particular IWADs. For example, +# any files listed under 'doom.Autoload' will be loaded for any version of Doom, +# but files listed under 'doom.doom2.Autoload' will only load when you are +# playing a Doom 2 based game (doom2.wad, tnt.wad or plutonia.wad), and files listed under +# 'doom.doom2.commercial.Autoload' only when playing doom2.wad. + +[doom.Autoload] + +[doom.doom2.Autoload] + +[doom.doom2.commercial.Autoload] + +[doom.doom2.bfg.Autoload] + +[doom.doom2.plutonia.Autoload] + +[doom.doom2.tnt.Autoload] + +[doom.doom1.Autoload] + +[doom.doom1.registered.Autoload] + +[doom.doom1.ultimate.Autoload] + +[doom.doom1.bfg.Autoload] + +[doom.freedoom.Autoload] + +[doom.freedoom.demo.Autoload] + +[doom.freedoom.phase1.Autoload] + +[doom.freedoom.phase2.Autoload] + +[doom.freedoom.freedm.Autoload] + +[heretic.Autoload] + +[heretic.heretic.Autoload] + +[heretic.shadow.Autoload] + +[blasphemer.Autoload] + +[hexen.Autoload] + +[hexen.deathkings.Autoload] + +[hexen.hexen.Autoload] + +[strife.Autoload] + +[chex.Autoload] + +[chex.chex1.Autoload] + +[chex.chex3.Autoload] + +[urbanbrawl.Autoload] + +[hacx.Autoload] + +[hacx.hacx1.Autoload] + +[hacx.hacx2.Autoload] + +[harmony.Autoload] + +[square.Autoload] + +[square.squareware.Autoload] + +[square.square.Autoload] + +[LastRun] +Version=211 + +[GlobalSettings] +gus_memsize=0 +midi_dmxgus=true +gus_patchdir= +midi_voices=32 +midi_config=timidity.cfg +snd_efx=true +snd_aldevice=Default +wildmidi_enhanced_resampling=true +wildmidi_reverb=false +wildmidi_frequency=0 +wildmidi_config= +fluid_chorus_type=0 +fluid_chorus_depth=8 +fluid_chorus_speed=0.3 +fluid_chorus_level=1 +fluid_chorus_voices=3 +fluid_reverb_level=0.57 +fluid_reverb_width=0.76 +fluid_reverb_damping=0.23 +fluid_reverb_roomsize=0.61 +fluid_threads=1 +fluid_samplerate=0 +fluid_interp=1 +fluid_voices=128 +fluid_chorus=true +fluid_reverb=true +fluid_gain=0.5 +fluid_patchset= +opl_core=0 +opl_numchips=2 +timidity_frequency=44100 +timidity_pipe=90 +timidity_mastervolume=1 +timidity_byteswap=false +timidity_8bit=false +timidity_stereo=true +timidity_reverb=0 +timidity_chorus=0 +timidity_extargs= +timidity_exe=timidity +snd_mididevice=-1 +spc_amp=1.875 +mod_dumb_mastervolume=1 +mod_autochip_scan_threshold=12 +mod_autochip_size_scan=500 +mod_autochip_size_force=100 +mod_autochip=false +mod_interp=2 +mod_volramp=2 +mod_samplerate=0 +mod_dumb=true +snd_sfxvolume=1 +snd_backend=openal +snd_output=default +snd_buffersize=0 +snd_samplerate=0 +snd_musicvolume=0.5 +snd_waterlp=250 +snd_midipatchset= +snd_output_format=PCM-16 +snd_speakermode=Auto +snd_resampler=Linear +snd_waterreverb=true +snd_hrtf=false +snd_buffercount=0 +snd_driver=0 +opl_fullpan=true +vid_tft=true +m_showinputgrid=false +m_show_backbutton=0 +m_use_mouse=1 +show_messages=true +mouse_sensitivity=1 +map_point_coordinates=true +vid_aspect=3 +vid_nowidescreen=false +vid_refreshrate=0 +vid_vsync=false +vid_defbits=8 +vid_defheight=480 +vid_defwidth=640 +Gamma=1 +statfile=zdoomstat.txt +savestatistics=0 +snd_flipstereo=false +snd_channels=32 +r_columnmethod=1 +r_quakeintensity=1 +cl_predict_lerpthreshold=2 +cl_predict_lerpscale=0.05 +cl_predict_specials=true +cl_noprediction=false +telezoom=true +r_fakecontrast=1 +chase_dist=90 +chase_height=-8 +gl_cachetime=0.6 +gl_cachenodes=true +nomonsterinterpolation=false +png_gamma=0 +png_level=5 +screenshot_dir= +screenshot_type=png +screenshot_quiet=false +use_joystick=false +autosavecount=4 +disableautosave=0 +autosavenum=0 +smooth_mouse=false +m_side=2 +m_forward=1 +m_yaw=1 +m_pitch=1 +lookstrafe=false +freelook=false +invertmouse=false +cl_run=false +demo_compress=true +cl_waitforsave=true +save_dir= +longsavemessages=true +storesavepic=true +nofilecompression=false +cl_capfps=true +defaultiwad= +queryiwad=true +con_ctrl_d= +con_buffersize=-1 +showendoom=0 +bgamma=1 +ggamma=1 +rgamma=1 +vid_forcesurface=false +vid_displaybits=32 +vid_adapter=0 +mouse_capturemode=1 +m_filter=false +m_noprescale=false +use_mouse=false +vid_winscale=1 +fullscreen=false +vid_maxfps=200 + +[GlobalSettings.Unknown] + +[Doom.Player] +wi_noautostartmap=false +playerclass=Fighter +stillbob=0 +movebob=0.25 +neverswitchonpickup=false +gender=male +team=255 +skin=base +colorset=0 +color=40 cf 00 +name=Player +autoaim=35 + +[Doom.ConsoleVariables] +r_drawfuzz=1 +vid_nopalsubstitutions=false +snd_pitched=false +menu_screenratios=-1 +snd_menuvolume=0.6 +show_obituaries=true +am_showmaplabel=2 +crosshairgrow=false +crosshairscale=false +crosshairhealth=true +crosshaircolor=ff 00 00 +crosshairforce=false +crosshair=0 +st_scale=true +paletteflash=0 +hudcolor_stats=3 +hudcolor_statnames=6 +hudcolor_xyco=3 +hudcolor_ttim=5 +hudcolor_ltim=8 +hudcolor_time=6 +hudcolor_titl=10 +hud_berserk_health=true +hud_armor_green=100 +hud_armor_yellow=50 +hud_armor_red=25 +hud_health_green=100 +hud_health_yellow=50 +hud_health_red=25 +hud_ammo_yellow=50 +hud_ammo_red=25 +hud_showlag=0 +hud_timecolor=5 +hud_showtime=0 +hud_showammo=2 +hud_showweapons=true +hud_showscore=false +hud_showstats=false +hud_showitems=false +hud_showmonsters=true +hud_showsecrets=true +hud_althud=false +hud_althudscale=2 +st_oldouch=false +cl_maxdecals=0 +cl_spreaddecals=false +transsouls=0.75 +wi_showtotaltime=true +wi_percents=true +dimcolor=ff d7 00 +dimamount=-1 +hud_scale=true +allcheats=false +r_stretchsky=true +r_shadercolormaps=true +screenblocks=10 +r_deathcamera=false +cl_showsecretmessage=true +cl_bloodtype=1 +cl_pufftype=0 +addrocketexplosion=false +cl_missiledecals=false +cl_doautoaim=false +cl_bloodsplats=false +cl_showmultikills=false +cl_showsprees=false +r_maxparticles=4000 +r_rail_trailsparsity=1 +r_rail_spiralsparsity=1 +r_rail_smartspiral=false +cl_rockettrails=3 +dlg_musicvolume=1 +sb_teamdeathmatch_headingcolor=6 +sb_teamdeathmatch_enable=true +sb_deathmatch_otherplayercolor=2 +sb_deathmatch_yourplayercolor=3 +sb_deathmatch_headingcolor=6 +sb_deathmatch_enable=true +sb_cooperative_otherplayercolor=2 +sb_cooperative_yourplayercolor=3 +sb_cooperative_headingcolor=6 +sb_cooperative_enable=true +nametagcolor=5 +displaynametags=0 +language=auto +compatmode=0 +vid_cursor=None +wipetype=0 +dehload=0 +chat_substitution=false +chatmacro0=No +chatmacro9=Yes +chatmacro8=I'll take care of it. +chatmacro7=Come here! +chatmacro6=Next time, scumbag... +chatmacro5=You suck! +chatmacro4=Help! +chatmacro3=I'm not looking too good! +chatmacro2=I'm OK. +chatmacro1=I'm ready to kick butt! +lookspring=true +con_midtime=0 +msgmidcolor2=4 +msgmidcolor=5 +msg4color=3 +msg3color=3 +msg2color=2 +msg1color=5 +msg0color=6 +msg=0 +con_alpha=0.75 +con_scaletext=0 +con_centernotify=false +con_notifytime=0 +con_notablist=false +cl_bbannounce=false +am_followplayer=true +am_textured=true +am_ovthingcolor_citem=e8 88 00 +am_ovthingcolor_item=e8 88 00 +am_ovthingcolor_ncmonster=e8 88 00 +am_ovthingcolor_monster=e8 88 00 +am_ovthingcolor_friend=e8 88 00 +am_ovthingcolor=e8 88 00 +am_ovsecretsectorcolor=00 ff ff +am_ovinterlevelcolor=ff ff 00 +am_ovtelecolor=ff ff 00 +am_ovunseencolor=00 22 6e +am_ovcdwallcolor=00 88 44 +am_ovfdwallcolor=00 88 44 +am_ovefwallcolor=00 88 44 +am_ovlockedcolor=00 88 44 +am_ovotherwallscolor=00 88 44 +am_ovspecialwallcolor=ff ff ff +am_ovsecretwallcolor=00 88 44 +am_ovwallcolor=00 ff 00 +am_ovyourcolor=fc e8 d8 +am_thingcolor_citem=fc fc fc +am_thingcolor_item=fc fc fc +am_thingcolor_ncmonster=fc fc fc +am_thingcolor_monster=fc fc fc +am_thingcolor_friend=fc fc fc +am_secretsectorcolor=ff 00 ff +am_interlevelcolor=ff 00 00 +am_intralevelcolor=00 00 ff +am_lockedcolor=00 78 00 +am_notseencolor=6c 6c 6c +am_xhaircolor=80 80 80 +am_gridcolor=8b 5a 2b +am_thingcolor=fc fc fc +am_efwallcolor=66 55 55 +am_cdwallcolor=4c 38 20 +am_fdwallcolor=88 70 58 +am_tswallcolor=88 88 88 +am_specialwallcolor=ff ff ff +am_secretwallcolor=00 00 00 +am_wallcolor=2c 18 08 +am_yourcolor=fc e8 d8 +am_backcolor=6c 54 40 +am_showthingsprites=0 +am_showtriggerlines=true +am_showkeys=true +am_drawmapback=0 +am_map_secrets=1 +am_customcolors=true +am_colorset=0 +am_showtotaltime=false +am_showtime=false +am_showitems=false +am_showmonsters=false +am_showsecrets=false +am_overlay=0 +am_rotate=0 + +[Doom.LocalServerInfo] +sv_corpsequeuesize=64 +forcewater=false +sv_smartaim=0 +sv_disableautohealth=false +sv_dropstyle=0 +compatflags2=0 +compatflags=0 + +[Doom.UnknownConsoleVariables] + +[Doom.ConsoleAliases] + +[Doom.Bindings] +1=slot 1 +2=slot 2 +3=slot 3 +4=slot 4 +5=slot 5 +6=slot 6 +7=slot 7 +8=slot 8 +9=slot 9 +0=slot 0 +-=sizedown +Equals=sizeup +tab=togglemap +t=messagemode +LeftBracket=invprev +RightBracket=invnext +enter=invuse +ctrl=+attack +`=toggleconsole +shift=+speed +\=+showscores +,=+moveleft +.=+moveright +alt=+strafe +space=+use +capslock=toggle cl_run +f1=menu_help +f2=menu_save +f3=menu_load +f4=menu_options +f5=menu_display +f6=quicksave +f7=menu_endgame +f8=togglemessages +f9=quickload +f10=menu_quit +f11=bumpgamma +f12=spynext +sysrq=screenshot +pause=pause +home=land +uparrow=+forward +pgup=+moveup +leftarrow=+left +rightarrow=+right +end=centerview +downarrow=+back +pgdn=+lookup +ins=+movedown +del=+lookdown +mouse1=+attack +mouse2=+strafe +mouse3=+forward +mouse4=+speed +joy1=+attack +joy2=+strafe +joy3=+speed +joy4=+use +mwheelup=weapprev +mwheeldown=weapnext +mwheelright=invnext +mwheelleft=invprev +dpadup=togglemap +dpaddown=invuse +dpadleft=invprev +dpadright=invnext +pad_start=pause +pad_back=menu_main +lthumb=crouch +lshoulder=weapprev +rshoulder=weapnext +ltrigger=+altattack +rtrigger=+attack +pad_a=+use +pad_y=+jump + +[Doom.DoubleBindings] + +[Doom.AutomapBindings] +0=am_gobig +-=+am_zoomout +Equals=+am_zoomin +p=am_toggletexture +f=am_togglefollow +g=am_togglegrid +c=am_clearmarks +m=am_setmark +kp-=+am_zoomout +kp+=+am_zoomin +uparrow=+am_panup +leftarrow=+am_panleft +rightarrow=+am_panright +downarrow=+am_pandown +mwheelup=am_zoom 1.2 +mwheeldown=am_zoom -1.2 + diff --git a/controllers/ddqn_webots/ddqn_webots.py b/controllers/ddqn_webots/ddqn_webots.py index d433edb..987ba50 100644 --- a/controllers/ddqn_webots/ddqn_webots.py +++ b/controllers/ddqn_webots/ddqn_webots.py @@ -3,6 +3,8 @@ parent_dir = os.path.dirname(current_dir) parent_dir = os.path.dirname(parent_dir) sys.path.insert(0, parent_dir) +print(parent_dir) +exit() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #cut annoying tf messages @@ -30,6 +32,13 @@ import __main__ from datetime import datetime +def WithNoise(input_vector): + mean = 0 + std = 0.005 + n = len(input_vector) + noise = np.random.normal(mean,std,n) + return list(np.array(input_vector) + noise) + dir_path = os.path.dirname(os.path.realpath(__file__)) L = Logger() @@ -60,6 +69,7 @@ filename = os.path.join(parent_dir,'history','checkpoint') scores = deque(maxlen=100) +goals = deque(maxlen=100) i = 0 if os.path.exists(filename): @@ -89,8 +99,13 @@ else: state = np.expand_dims(observation,axis=0) new_state = np.expand_dims(observation_,axis=0) + agent.store_experience(state,action_idx,reward,new_state,done) + # # Add exp from noise + # agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done) + # agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done) + observation = observation_ if training: agent.learn() score += reward @@ -105,7 +120,9 @@ training = True agent.epsilon = epsilon_train print('Training on') - + + goal = (reward == 100) + her_memory = env.her.in_done() for m in her_memory: state,action_idx,reward,new_state,done = m @@ -115,11 +132,14 @@ L.add_log('score',score) + L.add_log('goals',goal) L.save_game() + scores.append(score) + goals.append(goal) - print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'\n') + print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'goals/100:',sum(goals),'\n') agent.save_model() i += 1 diff --git a/environments/TargetGame.py b/environments/TargetGame.py new file mode 100644 index 0000000..8ed72d1 --- /dev/null +++ b/environments/TargetGame.py @@ -0,0 +1,162 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +from extras import obstacles +import numpy as np +import random +import cv2 + + +def WithNoise(input_vector): + mean = 0 + std = 0.005 + n = len(input_vector) + noise = np.random.normal(mean,std,n) + return list(np.array(input_vector) + noise) + +def cart2pol(x, y): + rho = np.sqrt(x**2 + y**2) + phi = np.arctan2(y, x) + return(rho, phi) + +def pol2cart(rho, phi): + x = rho * np.cos(phi) + y = rho * np.sin(phi) + return(x, y) + +def D(A,B): + if len(A) == 3: + (x,y,z) = A + (a,b,c) = B + else: + (x,y) = A + (a,b) = B + return np.sqrt((x-a)**2 + (y-b)**2) + +def reward_function(position_data,prev_shaping,collision=False): + X,Y,X1,Y1 = position_data + + reward = 0 + sh1 = -100*(X1**2+Y1**2) + shaping = sh1 + if prev_shaping is not None: + reward = shaping - prev_shaping + + done = False + if collision: + #reward -= 100 + done = True + + if np.sqrt(X1**2+Y1**2) < 3: + reward = 100 + done = True + + return reward,done,shaping + + + +class Follower(): + # Webots-to-environment-agnostic + def __init__(self,max_steps=50: + self.max_steps = max_steps + + self.discrete_actions = [0,1,2] + self.action_size = len(self.discrete_actions) + self.stepCounter = 0 + self.shaping = None + + self.create_world() + + def reset(self,reset_position=True): + + self.create_world() + + self.stepCounter = 0 + + self.shaping = None + + self.path.append(position) + state,_,_,_ = self.step(1) + return state + + + def step(self,action): + + [xg,yg,] = self.GOAL + [x0,y0] = self.position + + position_data = [] + + if self.direction == 0: #up + x1 = x0-1 + if action==1: + y1 = y0-1 + self.direction=2 + if action==2: + y1 = y0+1 + self.direction=3 + if self.direction == 1: #down + x1 = x0+1 + if action==1: + y1 = y0+1 + self.direction=3 + if action==2: + y1 = y0-1 + self.direction=2 + if self.direction == 2: #left + y1 = y0-1 + if action==1: + x1 = x0+1 + self.direction=1 + if action==2: + x1=x0-1 + self.direction=0 + if self.direction == 3: #right + y1 = y0+1 + if action==1: + x1 = x0-1 + self.direction=0 + if action==2: + x1=x0+1 + self.direction=1 + + try: + self.map[x1,y1] = 1 + except: + x1,y1 = x1,y0 + + position_data = [x0-xg,y0-yg,x1-xg,y1-yg] + + + # rho0,phi0 = cart2pol(x-xg,y-yg) + # rho1,phi1 = cart2pol(x1-xg,y1-yg) + # state = [rho0,phi0,rho1,phi1] + state = position_data + + # REWARD + reward,done,self.shaping = reward_function(position_data,self.shaping) + + if reward == 100: print('goal') + + if self.stepCounter >= self.max_steps: + done = True + + self.path.append([x1,y1]) + self.stepCounter += 1 + info = '' + return state,reward,done,info + + + def create_world(self): + L = 100 + self.map = np.zeros((L,L)) + self.start = [int(random.random()*L),int(random.random()*L)] + self.target = [int(random.random()*L),int(random.random()*L)] + + self.direction = np.random.choice([1,2,3,4]) # up, down, left, right + self.position = self.start + + + diff --git a/environments/WebotsEnv.py b/environments/WebotsEnv.py index 07c49b0..bf47e4a 100644 --- a/environments/WebotsEnv.py +++ b/environments/WebotsEnv.py @@ -15,6 +15,13 @@ OF = OpticalFlow() +def WithNoise(input_vector): + mean = 0 + std = 0.005 + n = len(input_vector) + noise = np.random.normal(mean,std,n) + return list(np.array(input_vector) + noise) + def cart2pol(x, y): rho = np.sqrt(x**2 + y**2) phi = np.arctan2(y, x) @@ -92,6 +99,8 @@ def in_done(self): rho1,phi1 = cart2pol(x1-xg,y1-yg) state = [rho0,phi0,rho1,phi1] + + reward,done,prev_shaping = reward_function(position_data,prev_shaping) done = (i==n-1) @@ -101,6 +110,11 @@ def in_done(self): if prev_state is not None: memory.append([prev_state,prev_action,prev_reward,state,prev_done]) + + # # Add Gaussian Noise to increase data and regularize + # memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done]) + # memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done]) + prev_state,prev_action,prev_reward,prev_done = state,action,reward,done return memory diff --git a/environments/__pycache__/VizDoomEnv.cpython-37.pyc b/environments/__pycache__/VizDoomEnv.cpython-37.pyc new file mode 100644 index 0000000..a327aad Binary files /dev/null and b/environments/__pycache__/VizDoomEnv.cpython-37.pyc differ diff --git a/environments/__pycache__/WebotsEnv.cpython-37.pyc b/environments/__pycache__/WebotsEnv.cpython-37.pyc index b0e10cf..b742010 100644 Binary files a/environments/__pycache__/WebotsEnv.cpython-37.pyc and b/environments/__pycache__/WebotsEnv.cpython-37.pyc differ diff --git a/mains/ddqn_target.py b/mains/ddqn_target.py new file mode 100644 index 0000000..b1c9db0 --- /dev/null +++ b/mains/ddqn_target.py @@ -0,0 +1,46 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) +print(parent_dir) + + +import numpy as np +import random +import os +from tensorflow.keras.optimizers import Adam + +from environments.TargetGame import * +from networks.networks import * +from extras.experience_memory import * +from agents.DDQN import Agent +from extras.statistics import * +dir_path = os.path.dirname(os.path.realpath(__file__)) +L = Logger() + +env = VizDoomEnv(scenario='defend_the_center.cfg') +agent = Agent(action_size=env.action_size,Network=SimpleDQN) + +n_games = 2000 +scores = [] +avg_score = 0 + +for i in range(n_games): + observation = env.reset() + done = False + score = 0 + while not done: + action = agent.choose_action(observation) + new_observation,reward,done,info = env.step(action) + score += reward + state = np.expand_dims(observation,axis=0) + new_state = np.expand_dims(new_observation,axis=0) + agent.store_experience(state,action,reward,new_state,done) + observation = new_observation + + agent.learn() + + scores.append(score) + print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) + L.add_log('score',score) + diff --git a/mains/ddqn_vizdoom.py b/mains/ddqn_vizdoom.py index e5dbfed..06d401f 100644 --- a/mains/ddqn_vizdoom.py +++ b/mains/ddqn_vizdoom.py @@ -1,24 +1,27 @@ import os,sys,inspect current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parent_dir = os.path.dirname(current_dir) +#parent_dir = os.path.dirname(parent_dir) sys.path.insert(0, parent_dir) +print(parent_dir) + import numpy as np import random import os from tensorflow.keras.optimizers import Adam -from environments.VizDoomEnv import * from networks.networks import * +from environments.VizDoomEnv import * from extras.experience_memory import * from agents.DDQN import Agent from extras.statistics import * dir_path = os.path.dirname(os.path.realpath(__file__)) -L = Logger(dir=dir_path,fname='vizdoom_ddqn') +L = Logger() env = VizDoomEnv(scenario='defend_the_center.cfg') -agent = Agent(action_size=env.action_size,conv=True) +agent = Agent(action_size=env.action_size,Network=ConvDQN,epsilon_step=1/50000) n_games = 2000 @@ -41,9 +44,9 @@ agent.learn() scores.append(score) - print('GAME:',i,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) + print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) L.add_log('score',score) L.add_log('kills',kills) - if i % 10==0: - L.save_game() \ No newline at end of file + # if i % 10==0: + # L.save_game() \ No newline at end of file diff --git a/networks/__pycache__/networks.cpython-37.pyc b/networks/__pycache__/networks.cpython-37.pyc index 6c79338..beb13da 100644 Binary files a/networks/__pycache__/networks.cpython-37.pyc and b/networks/__pycache__/networks.cpython-37.pyc differ diff --git a/networks/networks.py b/networks/networks.py index 706fa7f..d0090fa 100644 --- a/networks/networks.py +++ b/networks/networks.py @@ -2,175 +2,6 @@ import tensorflow.keras as keras from tensorflow.keras.layers import Dense,Conv2D,Flatten,Concatenate,MaxPooling2D -class PPONetwork(keras.Model): - def __init__(self,n_actions,conv=False): - super(PPONetwork,self).__init__() - - self.HiddenLayers = [] - - if conv: - self.HiddenLayers.append( Conv2D(32,kernel_size=8,strides=(4,4),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=4,strides=(2,2),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - self.HiddenLayers.append( Flatten() ) - - self.HiddenLayers.append( Dense(256,activation='relu') ) - self.HiddenLayers.append( Dense(512,activation='relu') ) - - self.v = Dense(1,activation='linear') - self.pi = Dense(n_actions,activation='softmax') - - def call(self,state): - x = state - - for layer in self.HiddenLayers: - x = layer(x) - - policy = self.pi(x) - value = self.v(x) - - return policy, value - - - -class ActorCriticNetwork(keras.Model): - def __init__(self, n_actions, name='actor_critic'): - super(ActorCriticNetwork, self).__init__() - self.n_actions = n_actions - self.model_name = name - - self.layer1 = Dense(1024, activation='relu') - self.layer2 = Dense(512, activation='relu') - self.v = Dense(1, activation='linear') - self.pi = Dense(n_actions,activation='softmax') - - def call(self,state): - value = self.layer1(state) - value = self.layer2(value) - - pi = self.pi(value) - v = self.v(value) - - return v,pi - -class PolicyGradientNetwork(keras.Model): - def __init__(self,n_actions): - super(PolicyGradientNetwork, self).__init__() - self.n_actions = n_actions - - self.fc1 = Dense(256,activation='relu') - self.fc2 = Dense(256,activation='relu') - self.pi = Dense(n_actions,activation='softmax') - - def call(self,state): - value = self.fc1(state) - value = self.fc2(value) - - pi = self.pi(value) - - return pi - - -class DQNetwork(keras.Model): - def __init__(self,action_size,conv=False): - super(DQNetwork, self).__init__() - self.HiddenLayers = [] - - if conv: - self.HiddenLayers.append( Conv2D(32,kernel_size=8,strides=(4,4),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=4,strides=(2,2),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - self.HiddenLayers.append( Flatten() ) - self.HiddenLayers.append( Dense(units=512, activation='relu') ) - - self.value = Dense(units=action_size, activation='linear') - - def call(self,state): - x = state - - for layer in self.HiddenLayers: - x = layer(x) - - value = self.value(x) - - return value - -class MitsosPPONet(keras.Model): - def __init__(self,n_actions): - super(MitsosPPONet, self).__init__() - self.ConvLayers = [] - self.ConvLayers.append( Conv2D(64,kernel_size=9,activation='relu') ) - self.ConvLayers.append( Conv2D(64,kernel_size=5,activation='relu') ) - self.ConvLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - - self.flatten = Flatten() - self.concat = Concatenate(axis=-1) - - self.DenseLayers = [] - self.DenseLayers.append( Dense(512,activation='relu') ) - self.DenseLayers.append( Dense(256,activation='relu') ) - - self.policy = Dense(n_actions,activation='softmax') - self.value = Dense(1,activation='linear') - - def call(self,state): - x1 = state[0] #stacked frames - x2 = state[1] #stacked sensor values - - for layer in self.ConvLayers: - x1 = layer(x1) - - x1 = self.flatten(x1) - x2 = self.flatten(x2) - x = self.concat([x1,x2]) - - for layer in self.DenseLayers: - x = layer(x) - - pi = self.policy(x) - v = self.value(x) - - return pi,v - - -class MitsosDQNet(keras.Model): - def __init__(self,action_size): - super(MitsosDQNet, self).__init__() - self.ConvLayers = [] - self.ConvLayers.append( Conv2D(64,kernel_size=9,activation='relu') ) - self.ConvLayers.append( Conv2D(64,kernel_size=5,activation='relu') ) - #self.ConvLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - - self.flatten = Flatten() - self.concat = Concatenate(axis=-1) - - self.DenseLayers = [] - self.DenseLayers.append( Dense(units=512, activation='relu') ) - self.DenseLayers.append( Dense(units=512, activation='relu') ) - self.DenseLayers.append( Dense(units=512, activation='relu') ) - - self.value = Dense(units=action_size, activation='linear') - - def call(self,state): - x1 = state[0] #stacked frames - x2 = state[1] #stacked sensor values - - for layer in self.ConvLayers: - x1 = layer(x1) - - x1 = self.flatten(x1) - x2 = self.flatten(x2) - x = self.concat([x1,x2]) - - for layer in self.DenseLayers: - x = layer(x) - - v = self.value(x) - - return v - - -################################################################################################################################ class DenseNet(keras.Model): def __init__(self,units=[64]): @@ -223,7 +54,7 @@ def call(self,INPUT): class ConvDQN(keras.Model): def __init__(self,output_size): - super(Net1,self).__init__() + super(ConvDQN,self).__init__() self.conv = ConvNet(filters=[64,64]) self.main = DenseNet(units=[128,128]) diff --git a/setupvizdoom.sh b/setupvizdoom.sh index b166ad1..2c5d4f5 100644 --- a/setupvizdoom.sh +++ b/setupvizdoom.sh @@ -18,10 +18,9 @@ tar xf julia-1.3.0-linux-x86_64.tar.gz sudo ln -s ~/julia-1.3.0/bin/julia /usr/local/bin/julia pip install vizdoom -pip install varname -julia +# julia -using Pkg -Pkg.add("CxxWrap") \ No newline at end of file +# using Pkg +# Pkg.add("CxxWrap") \ No newline at end of file