From c0ae26f7e866276557e39d7626a6a519cad0600c Mon Sep 17 00:00:00 2001 From: constant-inos Date: Tue, 1 Jun 2021 06:33:27 +0000 Subject: [PATCH] m --- _vizdoom.ini | 568 ++++++++++++++++++ controllers/ddqn_webots/ddqn_webots.py | 24 +- environments/TargetGame.py | 162 +++++ environments/WebotsEnv.py | 14 + .../__pycache__/VizDoomEnv.cpython-37.pyc | Bin 0 -> 3050 bytes .../__pycache__/WebotsEnv.cpython-37.pyc | Bin 10114 -> 10346 bytes mains/ddqn_target.py | 46 ++ mains/ddqn_vizdoom.py | 15 +- networks/__pycache__/networks.cpython-37.pyc | Bin 9393 -> 4625 bytes networks/networks.py | 171 +----- setupvizdoom.sh | 7 +- 11 files changed, 825 insertions(+), 182 deletions(-) create mode 100644 _vizdoom.ini create mode 100644 environments/TargetGame.py create mode 100644 environments/__pycache__/VizDoomEnv.cpython-37.pyc create mode 100644 mains/ddqn_target.py diff --git a/_vizdoom.ini b/_vizdoom.ini new file mode 100644 index 0000000..8fade4d --- /dev/null +++ b/_vizdoom.ini @@ -0,0 +1,568 @@ +# This file was generated by ViZDoom 1.1.8 (ZDOOM 2.8.1) on Mon May 31 14:58:54 2021 + +# These are the directories to automatically search for IWADs. +# Each directory should be on a separate line, preceded by Path= +[IWADSearch.Directories] +Path=. +Path=$DOOMWADDIR +Path=./_vizdoom +Path=/usr/local/share/doom +Path=/usr/local/share/games/doom +Path=/usr/share/doom +Path=/usr/share/games/doom + +# These are the directories to search for wads added with the -file +# command line parameter, if they cannot be found with the path +# as-is. Layout is the same as for IWADSearch.Directories +[FileSearch.Directories] +Path=./_vizdoom +Path=/usr/local/share/ +Path=$DOOMWADDIR + +# Files to automatically execute when running the corresponding game. +# Each file should be on its own line, preceded by Path= + +[Doom.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Heretic.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Hexen.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Strife.AutoExec] +Path=./_vizdoom/autoexec.cfg + +[Chex.AutoExec] +Path=./_vizdoom/autoexec.cfg + +# WAD files to always load. These are loaded after the IWAD but before +# any files added with -file. Place each file on its own line, preceded +# by Path= +[Global.Autoload] + +# Wad files to automatically load depending on the game and IWAD you are +# playing. You may have have files that are loaded for all similar IWADs +# (the game) and files that are only loaded for particular IWADs. For example, +# any files listed under 'doom.Autoload' will be loaded for any version of Doom, +# but files listed under 'doom.doom2.Autoload' will only load when you are +# playing a Doom 2 based game (doom2.wad, tnt.wad or plutonia.wad), and files listed under +# 'doom.doom2.commercial.Autoload' only when playing doom2.wad. + +[doom.Autoload] + +[doom.doom2.Autoload] + +[doom.doom2.commercial.Autoload] + +[doom.doom2.bfg.Autoload] + +[doom.doom2.plutonia.Autoload] + +[doom.doom2.tnt.Autoload] + +[doom.doom1.Autoload] + +[doom.doom1.registered.Autoload] + +[doom.doom1.ultimate.Autoload] + +[doom.doom1.bfg.Autoload] + +[doom.freedoom.Autoload] + +[doom.freedoom.demo.Autoload] + +[doom.freedoom.phase1.Autoload] + +[doom.freedoom.phase2.Autoload] + +[doom.freedoom.freedm.Autoload] + +[heretic.Autoload] + +[heretic.heretic.Autoload] + +[heretic.shadow.Autoload] + +[blasphemer.Autoload] + +[hexen.Autoload] + +[hexen.deathkings.Autoload] + +[hexen.hexen.Autoload] + +[strife.Autoload] + +[chex.Autoload] + +[chex.chex1.Autoload] + +[chex.chex3.Autoload] + +[urbanbrawl.Autoload] + +[hacx.Autoload] + +[hacx.hacx1.Autoload] + +[hacx.hacx2.Autoload] + +[harmony.Autoload] + +[square.Autoload] + +[square.squareware.Autoload] + +[square.square.Autoload] + +[LastRun] +Version=211 + +[GlobalSettings] +gus_memsize=0 +midi_dmxgus=true +gus_patchdir= +midi_voices=32 +midi_config=timidity.cfg +snd_efx=true +snd_aldevice=Default +wildmidi_enhanced_resampling=true +wildmidi_reverb=false +wildmidi_frequency=0 +wildmidi_config= +fluid_chorus_type=0 +fluid_chorus_depth=8 +fluid_chorus_speed=0.3 +fluid_chorus_level=1 +fluid_chorus_voices=3 +fluid_reverb_level=0.57 +fluid_reverb_width=0.76 +fluid_reverb_damping=0.23 +fluid_reverb_roomsize=0.61 +fluid_threads=1 +fluid_samplerate=0 +fluid_interp=1 +fluid_voices=128 +fluid_chorus=true +fluid_reverb=true +fluid_gain=0.5 +fluid_patchset= +opl_core=0 +opl_numchips=2 +timidity_frequency=44100 +timidity_pipe=90 +timidity_mastervolume=1 +timidity_byteswap=false +timidity_8bit=false +timidity_stereo=true +timidity_reverb=0 +timidity_chorus=0 +timidity_extargs= +timidity_exe=timidity +snd_mididevice=-1 +spc_amp=1.875 +mod_dumb_mastervolume=1 +mod_autochip_scan_threshold=12 +mod_autochip_size_scan=500 +mod_autochip_size_force=100 +mod_autochip=false +mod_interp=2 +mod_volramp=2 +mod_samplerate=0 +mod_dumb=true +snd_sfxvolume=1 +snd_backend=openal +snd_output=default +snd_buffersize=0 +snd_samplerate=0 +snd_musicvolume=0.5 +snd_waterlp=250 +snd_midipatchset= +snd_output_format=PCM-16 +snd_speakermode=Auto +snd_resampler=Linear +snd_waterreverb=true +snd_hrtf=false +snd_buffercount=0 +snd_driver=0 +opl_fullpan=true +vid_tft=true +m_showinputgrid=false +m_show_backbutton=0 +m_use_mouse=1 +show_messages=true +mouse_sensitivity=1 +map_point_coordinates=true +vid_aspect=3 +vid_nowidescreen=false +vid_refreshrate=0 +vid_vsync=false +vid_defbits=8 +vid_defheight=480 +vid_defwidth=640 +Gamma=1 +statfile=zdoomstat.txt +savestatistics=0 +snd_flipstereo=false +snd_channels=32 +r_columnmethod=1 +r_quakeintensity=1 +cl_predict_lerpthreshold=2 +cl_predict_lerpscale=0.05 +cl_predict_specials=true +cl_noprediction=false +telezoom=true +r_fakecontrast=1 +chase_dist=90 +chase_height=-8 +gl_cachetime=0.6 +gl_cachenodes=true +nomonsterinterpolation=false +png_gamma=0 +png_level=5 +screenshot_dir= +screenshot_type=png +screenshot_quiet=false +use_joystick=false +autosavecount=4 +disableautosave=0 +autosavenum=0 +smooth_mouse=false +m_side=2 +m_forward=1 +m_yaw=1 +m_pitch=1 +lookstrafe=false +freelook=false +invertmouse=false +cl_run=false +demo_compress=true +cl_waitforsave=true +save_dir= +longsavemessages=true +storesavepic=true +nofilecompression=false +cl_capfps=true +defaultiwad= +queryiwad=true +con_ctrl_d= +con_buffersize=-1 +showendoom=0 +bgamma=1 +ggamma=1 +rgamma=1 +vid_forcesurface=false +vid_displaybits=32 +vid_adapter=0 +mouse_capturemode=1 +m_filter=false +m_noprescale=false +use_mouse=false +vid_winscale=1 +fullscreen=false +vid_maxfps=200 + +[GlobalSettings.Unknown] + +[Doom.Player] +wi_noautostartmap=false +playerclass=Fighter +stillbob=0 +movebob=0.25 +neverswitchonpickup=false +gender=male +team=255 +skin=base +colorset=0 +color=40 cf 00 +name=Player +autoaim=35 + +[Doom.ConsoleVariables] +r_drawfuzz=1 +vid_nopalsubstitutions=false +snd_pitched=false +menu_screenratios=-1 +snd_menuvolume=0.6 +show_obituaries=true +am_showmaplabel=2 +crosshairgrow=false +crosshairscale=false +crosshairhealth=true +crosshaircolor=ff 00 00 +crosshairforce=false +crosshair=0 +st_scale=true +paletteflash=0 +hudcolor_stats=3 +hudcolor_statnames=6 +hudcolor_xyco=3 +hudcolor_ttim=5 +hudcolor_ltim=8 +hudcolor_time=6 +hudcolor_titl=10 +hud_berserk_health=true +hud_armor_green=100 +hud_armor_yellow=50 +hud_armor_red=25 +hud_health_green=100 +hud_health_yellow=50 +hud_health_red=25 +hud_ammo_yellow=50 +hud_ammo_red=25 +hud_showlag=0 +hud_timecolor=5 +hud_showtime=0 +hud_showammo=2 +hud_showweapons=true +hud_showscore=false +hud_showstats=false +hud_showitems=false +hud_showmonsters=true +hud_showsecrets=true +hud_althud=false +hud_althudscale=2 +st_oldouch=false +cl_maxdecals=0 +cl_spreaddecals=false +transsouls=0.75 +wi_showtotaltime=true +wi_percents=true +dimcolor=ff d7 00 +dimamount=-1 +hud_scale=true +allcheats=false +r_stretchsky=true +r_shadercolormaps=true +screenblocks=10 +r_deathcamera=false +cl_showsecretmessage=true +cl_bloodtype=1 +cl_pufftype=0 +addrocketexplosion=false +cl_missiledecals=false +cl_doautoaim=false +cl_bloodsplats=false +cl_showmultikills=false +cl_showsprees=false +r_maxparticles=4000 +r_rail_trailsparsity=1 +r_rail_spiralsparsity=1 +r_rail_smartspiral=false +cl_rockettrails=3 +dlg_musicvolume=1 +sb_teamdeathmatch_headingcolor=6 +sb_teamdeathmatch_enable=true +sb_deathmatch_otherplayercolor=2 +sb_deathmatch_yourplayercolor=3 +sb_deathmatch_headingcolor=6 +sb_deathmatch_enable=true +sb_cooperative_otherplayercolor=2 +sb_cooperative_yourplayercolor=3 +sb_cooperative_headingcolor=6 +sb_cooperative_enable=true +nametagcolor=5 +displaynametags=0 +language=auto +compatmode=0 +vid_cursor=None +wipetype=0 +dehload=0 +chat_substitution=false +chatmacro0=No +chatmacro9=Yes +chatmacro8=I'll take care of it. +chatmacro7=Come here! +chatmacro6=Next time, scumbag... +chatmacro5=You suck! +chatmacro4=Help! +chatmacro3=I'm not looking too good! +chatmacro2=I'm OK. +chatmacro1=I'm ready to kick butt! +lookspring=true +con_midtime=0 +msgmidcolor2=4 +msgmidcolor=5 +msg4color=3 +msg3color=3 +msg2color=2 +msg1color=5 +msg0color=6 +msg=0 +con_alpha=0.75 +con_scaletext=0 +con_centernotify=false +con_notifytime=0 +con_notablist=false +cl_bbannounce=false +am_followplayer=true +am_textured=true +am_ovthingcolor_citem=e8 88 00 +am_ovthingcolor_item=e8 88 00 +am_ovthingcolor_ncmonster=e8 88 00 +am_ovthingcolor_monster=e8 88 00 +am_ovthingcolor_friend=e8 88 00 +am_ovthingcolor=e8 88 00 +am_ovsecretsectorcolor=00 ff ff +am_ovinterlevelcolor=ff ff 00 +am_ovtelecolor=ff ff 00 +am_ovunseencolor=00 22 6e +am_ovcdwallcolor=00 88 44 +am_ovfdwallcolor=00 88 44 +am_ovefwallcolor=00 88 44 +am_ovlockedcolor=00 88 44 +am_ovotherwallscolor=00 88 44 +am_ovspecialwallcolor=ff ff ff +am_ovsecretwallcolor=00 88 44 +am_ovwallcolor=00 ff 00 +am_ovyourcolor=fc e8 d8 +am_thingcolor_citem=fc fc fc +am_thingcolor_item=fc fc fc +am_thingcolor_ncmonster=fc fc fc +am_thingcolor_monster=fc fc fc +am_thingcolor_friend=fc fc fc +am_secretsectorcolor=ff 00 ff +am_interlevelcolor=ff 00 00 +am_intralevelcolor=00 00 ff +am_lockedcolor=00 78 00 +am_notseencolor=6c 6c 6c +am_xhaircolor=80 80 80 +am_gridcolor=8b 5a 2b +am_thingcolor=fc fc fc +am_efwallcolor=66 55 55 +am_cdwallcolor=4c 38 20 +am_fdwallcolor=88 70 58 +am_tswallcolor=88 88 88 +am_specialwallcolor=ff ff ff +am_secretwallcolor=00 00 00 +am_wallcolor=2c 18 08 +am_yourcolor=fc e8 d8 +am_backcolor=6c 54 40 +am_showthingsprites=0 +am_showtriggerlines=true +am_showkeys=true +am_drawmapback=0 +am_map_secrets=1 +am_customcolors=true +am_colorset=0 +am_showtotaltime=false +am_showtime=false +am_showitems=false +am_showmonsters=false +am_showsecrets=false +am_overlay=0 +am_rotate=0 + +[Doom.LocalServerInfo] +sv_corpsequeuesize=64 +forcewater=false +sv_smartaim=0 +sv_disableautohealth=false +sv_dropstyle=0 +compatflags2=0 +compatflags=0 + +[Doom.UnknownConsoleVariables] + +[Doom.ConsoleAliases] + +[Doom.Bindings] +1=slot 1 +2=slot 2 +3=slot 3 +4=slot 4 +5=slot 5 +6=slot 6 +7=slot 7 +8=slot 8 +9=slot 9 +0=slot 0 +-=sizedown +Equals=sizeup +tab=togglemap +t=messagemode +LeftBracket=invprev +RightBracket=invnext +enter=invuse +ctrl=+attack +`=toggleconsole +shift=+speed +\=+showscores +,=+moveleft +.=+moveright +alt=+strafe +space=+use +capslock=toggle cl_run +f1=menu_help +f2=menu_save +f3=menu_load +f4=menu_options +f5=menu_display +f6=quicksave +f7=menu_endgame +f8=togglemessages +f9=quickload +f10=menu_quit +f11=bumpgamma +f12=spynext +sysrq=screenshot +pause=pause +home=land +uparrow=+forward +pgup=+moveup +leftarrow=+left +rightarrow=+right +end=centerview +downarrow=+back +pgdn=+lookup +ins=+movedown +del=+lookdown +mouse1=+attack +mouse2=+strafe +mouse3=+forward +mouse4=+speed +joy1=+attack +joy2=+strafe +joy3=+speed +joy4=+use +mwheelup=weapprev +mwheeldown=weapnext +mwheelright=invnext +mwheelleft=invprev +dpadup=togglemap +dpaddown=invuse +dpadleft=invprev +dpadright=invnext +pad_start=pause +pad_back=menu_main +lthumb=crouch +lshoulder=weapprev +rshoulder=weapnext +ltrigger=+altattack +rtrigger=+attack +pad_a=+use +pad_y=+jump + +[Doom.DoubleBindings] + +[Doom.AutomapBindings] +0=am_gobig +-=+am_zoomout +Equals=+am_zoomin +p=am_toggletexture +f=am_togglefollow +g=am_togglegrid +c=am_clearmarks +m=am_setmark +kp-=+am_zoomout +kp+=+am_zoomin +uparrow=+am_panup +leftarrow=+am_panleft +rightarrow=+am_panright +downarrow=+am_pandown +mwheelup=am_zoom 1.2 +mwheeldown=am_zoom -1.2 + diff --git a/controllers/ddqn_webots/ddqn_webots.py b/controllers/ddqn_webots/ddqn_webots.py index d433edb..987ba50 100644 --- a/controllers/ddqn_webots/ddqn_webots.py +++ b/controllers/ddqn_webots/ddqn_webots.py @@ -3,6 +3,8 @@ parent_dir = os.path.dirname(current_dir) parent_dir = os.path.dirname(parent_dir) sys.path.insert(0, parent_dir) +print(parent_dir) +exit() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #cut annoying tf messages @@ -30,6 +32,13 @@ import __main__ from datetime import datetime +def WithNoise(input_vector): + mean = 0 + std = 0.005 + n = len(input_vector) + noise = np.random.normal(mean,std,n) + return list(np.array(input_vector) + noise) + dir_path = os.path.dirname(os.path.realpath(__file__)) L = Logger() @@ -60,6 +69,7 @@ filename = os.path.join(parent_dir,'history','checkpoint') scores = deque(maxlen=100) +goals = deque(maxlen=100) i = 0 if os.path.exists(filename): @@ -89,8 +99,13 @@ else: state = np.expand_dims(observation,axis=0) new_state = np.expand_dims(observation_,axis=0) + agent.store_experience(state,action_idx,reward,new_state,done) + # # Add exp from noise + # agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done) + # agent.store_experience(np.expand_dims(WithNoise(state[0]),axis=0),action_idx,reward,np.expand_dims(WithNoise(new_state[0]),axis=0),done) + observation = observation_ if training: agent.learn() score += reward @@ -105,7 +120,9 @@ training = True agent.epsilon = epsilon_train print('Training on') - + + goal = (reward == 100) + her_memory = env.her.in_done() for m in her_memory: state,action_idx,reward,new_state,done = m @@ -115,11 +132,14 @@ L.add_log('score',score) + L.add_log('goals',goal) L.save_game() + scores.append(score) + goals.append(goal) - print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'\n') + print('EPISODE:',i,'STEPS:',ep_steps,'EPSILON',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores),'goals/100:',sum(goals),'\n') agent.save_model() i += 1 diff --git a/environments/TargetGame.py b/environments/TargetGame.py new file mode 100644 index 0000000..8ed72d1 --- /dev/null +++ b/environments/TargetGame.py @@ -0,0 +1,162 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) + +from extras import obstacles +import numpy as np +import random +import cv2 + + +def WithNoise(input_vector): + mean = 0 + std = 0.005 + n = len(input_vector) + noise = np.random.normal(mean,std,n) + return list(np.array(input_vector) + noise) + +def cart2pol(x, y): + rho = np.sqrt(x**2 + y**2) + phi = np.arctan2(y, x) + return(rho, phi) + +def pol2cart(rho, phi): + x = rho * np.cos(phi) + y = rho * np.sin(phi) + return(x, y) + +def D(A,B): + if len(A) == 3: + (x,y,z) = A + (a,b,c) = B + else: + (x,y) = A + (a,b) = B + return np.sqrt((x-a)**2 + (y-b)**2) + +def reward_function(position_data,prev_shaping,collision=False): + X,Y,X1,Y1 = position_data + + reward = 0 + sh1 = -100*(X1**2+Y1**2) + shaping = sh1 + if prev_shaping is not None: + reward = shaping - prev_shaping + + done = False + if collision: + #reward -= 100 + done = True + + if np.sqrt(X1**2+Y1**2) < 3: + reward = 100 + done = True + + return reward,done,shaping + + + +class Follower(): + # Webots-to-environment-agnostic + def __init__(self,max_steps=50: + self.max_steps = max_steps + + self.discrete_actions = [0,1,2] + self.action_size = len(self.discrete_actions) + self.stepCounter = 0 + self.shaping = None + + self.create_world() + + def reset(self,reset_position=True): + + self.create_world() + + self.stepCounter = 0 + + self.shaping = None + + self.path.append(position) + state,_,_,_ = self.step(1) + return state + + + def step(self,action): + + [xg,yg,] = self.GOAL + [x0,y0] = self.position + + position_data = [] + + if self.direction == 0: #up + x1 = x0-1 + if action==1: + y1 = y0-1 + self.direction=2 + if action==2: + y1 = y0+1 + self.direction=3 + if self.direction == 1: #down + x1 = x0+1 + if action==1: + y1 = y0+1 + self.direction=3 + if action==2: + y1 = y0-1 + self.direction=2 + if self.direction == 2: #left + y1 = y0-1 + if action==1: + x1 = x0+1 + self.direction=1 + if action==2: + x1=x0-1 + self.direction=0 + if self.direction == 3: #right + y1 = y0+1 + if action==1: + x1 = x0-1 + self.direction=0 + if action==2: + x1=x0+1 + self.direction=1 + + try: + self.map[x1,y1] = 1 + except: + x1,y1 = x1,y0 + + position_data = [x0-xg,y0-yg,x1-xg,y1-yg] + + + # rho0,phi0 = cart2pol(x-xg,y-yg) + # rho1,phi1 = cart2pol(x1-xg,y1-yg) + # state = [rho0,phi0,rho1,phi1] + state = position_data + + # REWARD + reward,done,self.shaping = reward_function(position_data,self.shaping) + + if reward == 100: print('goal') + + if self.stepCounter >= self.max_steps: + done = True + + self.path.append([x1,y1]) + self.stepCounter += 1 + info = '' + return state,reward,done,info + + + def create_world(self): + L = 100 + self.map = np.zeros((L,L)) + self.start = [int(random.random()*L),int(random.random()*L)] + self.target = [int(random.random()*L),int(random.random()*L)] + + self.direction = np.random.choice([1,2,3,4]) # up, down, left, right + self.position = self.start + + + diff --git a/environments/WebotsEnv.py b/environments/WebotsEnv.py index 07c49b0..bf47e4a 100644 --- a/environments/WebotsEnv.py +++ b/environments/WebotsEnv.py @@ -15,6 +15,13 @@ OF = OpticalFlow() +def WithNoise(input_vector): + mean = 0 + std = 0.005 + n = len(input_vector) + noise = np.random.normal(mean,std,n) + return list(np.array(input_vector) + noise) + def cart2pol(x, y): rho = np.sqrt(x**2 + y**2) phi = np.arctan2(y, x) @@ -92,6 +99,8 @@ def in_done(self): rho1,phi1 = cart2pol(x1-xg,y1-yg) state = [rho0,phi0,rho1,phi1] + + reward,done,prev_shaping = reward_function(position_data,prev_shaping) done = (i==n-1) @@ -101,6 +110,11 @@ def in_done(self): if prev_state is not None: memory.append([prev_state,prev_action,prev_reward,state,prev_done]) + + # # Add Gaussian Noise to increase data and regularize + # memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done]) + # memory.append([WithNoise(prev_state),prev_action,prev_reward,WithNoise(state),prev_done]) + prev_state,prev_action,prev_reward,prev_done = state,action,reward,done return memory diff --git a/environments/__pycache__/VizDoomEnv.cpython-37.pyc b/environments/__pycache__/VizDoomEnv.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a327aad9dc0a3c8a76fd7ca9c43d1a6f0c82d20d GIT binary patch literal 3050 zcmZuzOK%*<5$>Luot=Gh`6gw-Q0%GTiQ6&2n~T z-91Z+#O%rP(Lk=rAz0?-Yi{`~ea(qKA-*JE^(<*BNDsQEySn;S^;K2>y3=Vglz;Po z{3+=$_AlyOmH>^rsQPapf(f3ou=?|mOEz{EuBD#jiyEqb&dxb~VI9&=8cZ5tW6}(p zoMyRc@VuJc3R~!VX=~CB+mlY%nRLUhm8qpGlU~@fZJw@%YmoEP^~pxK0bX4MuUWYH z2@?&`e9c5tZhh{C+k!u1{niD}$PW6B3GV0l;d{5+bcflXxUwB=STmbZbI>Hj@K~)<>a#4fP(X{vk-g z7QBRVgmdoBS>cS`g;%nYpF6f!Tlgi^GG`^jG2H{jG3UYs>DP^uO--QkG>u;++SK(? zLR%|Z-}p-EBAbmV=WVV)o9{o*AS)zKgqa$jc0iORDH=IQ+x$rh=%izs-|YUCdqL}{EszTj1vq6}MHi#~suz>dMGna(m~D=9Da2 z6+{6|(Df3wk2Xp+UR`XS6K6}n`q6f=LwEj?Mt85ZZwNv`{owg;)Hcpqf#Gem05pFR zze;7+e@hX@jFW5QP2=LF@y2th_yw5T0IZsr5m+!)D5CxQT=np?*t#GT0ahNxmVV1> z!?ZGa63J z#!|X#IV;o?SSebut5qR^X0Kv4+#anpv=C~>>vJD;V_pY$lUA~S*x6z8VBRRUO1})s z#?sn8Zm?Z(1RuHxS>TM@;IQ!yav=M{xuV_|&d}YhFhPN6r(}!WvR2Yg9i$-JiaI5L zI9?I_1MFzey8{{A@XuWR0+KCJ-({I2oS_E{1qX^!nFZ1%hDgimM<8I3l`j5+(!a4p z1QJuO)px)*eyoersYG7N3gZ`fn&_hPy`~K$)`P=tERN%BAfw8o)jOCPt|J#M(HITk zh5Drw>igBA(uF)VilVeDR~BzK6%$|a*wa^E`Ux^rb)5I+2TF$-gA03`kbv?gFme^ptwSX z{uIY8`X8fF0`b6?OZ3Q7J0+eX+Z1D9UCReOg4V!ycPhST`JmuGyyDY4s#ZNvEF4CE z{R@?Ueq>=_Z@2XE&{a{P=meF4-VCYgBMkfjRd+#{+k-n_*0b&F_afUY?SZG8+Jv;) z0m0KKh@y-VPh^@=G|9ybnJ6;tDEe#`r`3r10m*+rc@LXFzM04-}DF#&IG z2A$xR<@R^fTNp6)B-2wlDAY~cLyDy>Og%o-HU-y#wqcS=(_UubDy~uH4(rFsnZVo0 zrfr*I6<*TJzT)ibO;NB4yUW+z-m!gIDtb_vS~i9pNRFDu4;eiSj&bi~? zrMvRybI(2Z+;h%7KYMrlZ&%VosZ=t8pNDV$VD{|sm(q=j%3eD1QE!czT93@Mn+MEV zGqcWSIz}VrK6C$i#N5B8%yioe^O+@s1euNpTjFjO=+x)wT|NWB)CR<^q zvZ4f$<;WUai3aQ`<&EevTUM5%Z$?+tKn>U%+H&MgwyZXw1Zs?q0jVoKoqc!oPj`X= zJtm?h%Mps>i5M7;>6S&zaiMIKgj%xwKxoGe0AoqlM2+ou3qk&tRR~-VYS}Uz5%mL8 zFh^*PYx~yQ5oqB5WfL8V$L(N#j9B~0SQ!qc;y2DR44RO~pJQVhq=*u%{U|gb6+$CY zB3>{cIO(}1lJ#Q<5mwJ?7_{?}GW}2+uHhg-D?yH+QUe}7Nm^)gQ$w1i8FXCrwm5p-qq9{!>_-F+W&D)rTZb=v?lKeyl%0^4!wQQ@_E~_ zgM5C7H^rPY{iN+7g7X9e1Q!r=27^RboYuhdt>6;>Aoj#*IwfL;=UI*^Ph(K%j9@VXq{G|_qV42hgYKs#k%D*dUloxJHf)+B#9(Os)%6nqRtjl`8C z%f|RG5)TXvkqW9C7$KM<*a?cHDDB`J0(tMp5%4jq@Tjbg9Hl* zx*9%SS|(85bA6i<6((^{u>7u&N;@dv-`i049al2XkqSmYxdQ!ssIDtX)`^JT26wnu zmy4~@&M<$uuA^^}NEwM0qGk!m04Ndcq!n2vG!JYikdA=O|5=y24*Q57b8kF_y-KJq zCbv+1`AHW%SBy5lmz7YV*bu$`N5gqO32A2>lw8T~P$ z#u0>arw6Xk>MFrCf(ZhO4una}f0b>`?j)X!68ZI$jsIlNGB(Q3)F1AgLk7OiH|f}2 z1W_9{=AC#S0$H?Lm~;n-yDJp5tnlVL$lJ%K8XxQ2otArraMED6v7m*2 z*!WRTC@11YE0{7$3l=uY8>DzUx5F!BtO~f%#(&>5)j%%{9>I3OPY zGR(XUIUY2Bon7ajHlI2zL#Nqgqi6}Wh(m5hBOI&nNmR?T1uS1Mj`Cv5bhkqnzmKq0 zcY7U#9^AU0|E1-X)z6bq0%q}_NTQ1);FgE2IhaQVe4l`Zn@D0qo*0?33cYioL)+Po zeUEDmOhTr;t!QTiOIaBi$9oq@*#| z+`y34y<^8hc z?AHk@G`)ztLH^5}dwG_2w~ZFn7}6`7ZD>;$kY(Xdf+Ix9e67&;0&aX^W4*0~^;1*C znU^BbpCjOqfkI$7dTrPl*eIMLwlaNkS5pY9p8xUS>Gr*cK4E;xD>+Toc#P_K=xEj!Zb2ByK`lx z>{SO;;~syfLub!#{GlVm*z^3aodfCLl4+#+6Lt`q`Khi;tebze>)Mw|vxpVuUAtgG zh8F5gYssfyez29BGUBrDRE0g)hYv_n^9x1^o34j3aVy{SyC|{9Pjp{geG)01P3l>A z94VM4c!FSvK#px0G&!%R!N7Y28w5Wk=pmp)mnjF7EkI8U9}@hAfZPVUa+eu0GL4Zq zGG}n-Y5wo-`*K9-Y((z{jug6yzTyuI-@{P?YkcTXAA5zb9J*^zVBkk2Q$6klHhMZz za&ONl<0JA1ZaAg7NJw_nKB8!h%e(&H3e*$t$2PuDvd&}8MEpbi$-}wos+ZC zzH{&VcJTa2Y)@%v)Q7)EBA-t7ZF(_Qt^DnKas9U&R~e;7>`6A!G~_eNjQEp2BmRUk z(Tt;VqXI`2IBGH25ucji9Zcs}lmh{w2)bL;Yr;nAOR`m;i#>_z} z?TnEkELgu_dzP&VHZPbi*py(Y!V}m)J((s;K6EY0N|Z1QGic`50`IUAemq#kVtghz zo@~Mug}ckZoYle5re|^~*Rpd-elwV49lSd9O-*4anr7uJSJQU#e?qytyU1EM!CeGf z2<}Eu8EhrFV0REq$8`7bw~HR!Lc2sr&*#mYA$O~OxkoSu8t<^zuN%hjLPvO#4O5s< zOVEm-7C{}!(qItuT-t=yr05`!e(j)T)t>oK*38;4Cs+>F@gE|6tCq?plrx<|nd^ik zZ;!TY*hJemBcPWuErrn(Svk$HbLIhlHkvF`;tD2S3HZWFlqDGdIJ$NBHZrlDpoic9 z!E#EZL=1vs^i)4BRuvHhd&+i2ByG9_=4mTsqE2b= zfSokGGU;UZuV_!ka@>@i&G$aW2e1`(@$0cBc9j1f8w!o%3K-?v%GMP3k|>k7gHM)S z-q?;~5y)CjN|dC))Lm1XwIO3jM}_}{32MkI@FZ`F*VYZte(B+&3ojz zDSbBa(a=xx?rxy+r5i8&e&||5q)#n|)3n<`5Wyp({f9?IAg{Y8V3yR|`Ni_aK@q~T z8g>@QF~}gO0k4`d$tW#vI~Fk)_K_K;t;n);g=+B1!ms7+O8jvYfe{dSpo_0dw6>Fj z!lyRE!z2WTM*8=Q3e2%4)4gndp0@1ZXA;flY!YSST#}9wkY|u0SSFy%WiSs`&XqyJ zB+d)Pc0}N$ZRJEXuY;L$HNyfZrm3I~pwEerJ$2Gdxgua=hlqfqpEgA>m9ZUDW@ekD zl)A=CL)2raW*KK-Kb^maU@yTrf+dba10Shu+nOY$tlB*!4I&6-z7vLMbT7d$!F>dj zQrJYWnSWThCP8)f@+(tIF~7g?W94>+?5eJ6YdMYztm;KtHisa}yy7iqv6)X*bt4Ne zS9Mm&yqruZ<)74i+v%+nVWh>Fo|(bUg9pj*68TpoWdbM4 z`PJI7`<|yIX@aFDTjcIU)B?>hbB+zjg{*!?yOSwVg#Bk|7R32j-7{>I->AE5i%eS3 z%IaxT1k%Wsi6G(q1wo2pxi_U}P0;K4aQ%4FBv0N$xO2qNhEvV_%lb>_T4+&3@>)bO z=0UKt1LibnWZ^9WdJ07pFZ3gWV`i#*+KX_h2k>4dKMU*d0_rA8Z#C?oW#Y9WIeL46 zLU0xrCi#suudSu-RHO236iVLmdhIi^?K#6=YpCvBKG(Y%yhp@44QyAZ>%lwi_JiON zG_;A|YuLT))0U>&O^f)gOlYpHrENXa&sQ}+(I=@XonOx4;%gjnt*q&|X5P6|l|*1xx7_3W zyJp*al_`bkY5DtOx4dSNXpd49i&gpmu6!L=PVwhkRCZzET1%X<3;cTP?%3C;1~UB@ zTMH$8b#j>1^2y|3jVy~$>V%Cip$O1pCjzI;IY;;%7j6+yru)ZR4qT;C&`IeT zyj1g;6uYpfZ{muRJic!4xx*-_Y*bCa{V2f*!9jxM*Ii~!K4WyPz`Fz=5L_Xko|0^q zR0E<5P>ta$g6jknH_(wQ+>oWK^o3D*2xqcftlQW?lFA0vR(KXA3=?C;?+_=CPX)~L zmbM-2IsQ=F`D2tB_?XrdkK|noBOT1xvlGh5p!YV_(7e1tJ4oszAmjga08NB@@PeMn Xn7#CFI<35aeM2M|4u&Jq%2@Zm#RQ99 diff --git a/mains/ddqn_target.py b/mains/ddqn_target.py new file mode 100644 index 0000000..b1c9db0 --- /dev/null +++ b/mains/ddqn_target.py @@ -0,0 +1,46 @@ +import os,sys,inspect +current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_dir = os.path.dirname(current_dir) +sys.path.insert(0, parent_dir) +print(parent_dir) + + +import numpy as np +import random +import os +from tensorflow.keras.optimizers import Adam + +from environments.TargetGame import * +from networks.networks import * +from extras.experience_memory import * +from agents.DDQN import Agent +from extras.statistics import * +dir_path = os.path.dirname(os.path.realpath(__file__)) +L = Logger() + +env = VizDoomEnv(scenario='defend_the_center.cfg') +agent = Agent(action_size=env.action_size,Network=SimpleDQN) + +n_games = 2000 +scores = [] +avg_score = 0 + +for i in range(n_games): + observation = env.reset() + done = False + score = 0 + while not done: + action = agent.choose_action(observation) + new_observation,reward,done,info = env.step(action) + score += reward + state = np.expand_dims(observation,axis=0) + new_state = np.expand_dims(new_observation,axis=0) + agent.store_experience(state,action,reward,new_state,done) + observation = new_observation + + agent.learn() + + scores.append(score) + print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) + L.add_log('score',score) + diff --git a/mains/ddqn_vizdoom.py b/mains/ddqn_vizdoom.py index e5dbfed..06d401f 100644 --- a/mains/ddqn_vizdoom.py +++ b/mains/ddqn_vizdoom.py @@ -1,24 +1,27 @@ import os,sys,inspect current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parent_dir = os.path.dirname(current_dir) +#parent_dir = os.path.dirname(parent_dir) sys.path.insert(0, parent_dir) +print(parent_dir) + import numpy as np import random import os from tensorflow.keras.optimizers import Adam -from environments.VizDoomEnv import * from networks.networks import * +from environments.VizDoomEnv import * from extras.experience_memory import * from agents.DDQN import Agent from extras.statistics import * dir_path = os.path.dirname(os.path.realpath(__file__)) -L = Logger(dir=dir_path,fname='vizdoom_ddqn') +L = Logger() env = VizDoomEnv(scenario='defend_the_center.cfg') -agent = Agent(action_size=env.action_size,conv=True) +agent = Agent(action_size=env.action_size,Network=ConvDQN,epsilon_step=1/50000) n_games = 2000 @@ -41,9 +44,9 @@ agent.learn() scores.append(score) - print('GAME:',i,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) + print('GAME:',i,'epsilon',agent.epsilon,'SCORE:',score,'AVG SCORE:',np.mean(scores[-100:])) L.add_log('score',score) L.add_log('kills',kills) - if i % 10==0: - L.save_game() \ No newline at end of file + # if i % 10==0: + # L.save_game() \ No newline at end of file diff --git a/networks/__pycache__/networks.cpython-37.pyc b/networks/__pycache__/networks.cpython-37.pyc index 6c79338e686139f81ad141d427b638f187f942ad..beb13da10258935849e5856cf9c45f4e33423c84 100644 GIT binary patch literal 4625 zcmcJSTW{mW6~||I)5WqZYd4!snxdOFX``gI69efDy=-eEm!d0duqgrvC>Tbh9VL|L zouM~gLkkq}0(si+u>0880{tLGUkbkVNnar^?f*YRN|vonurWe`L+X&6%Q-*(&*2}} z*P8;{zyAHtkN?vW;=lY;Z5~c;V%Psb6N^};Vj#`Y8M^~l(&nVz*dO@gU=WOJgW9-0 zsE-?ih7|9L*p0o1BK8vZv*xKgScAon16YF6(t@QH*I}ubmUUPfaTAtiX=%f<7Pnw& zm6i=y*5fuT?b6bLWh3su(kU&QvAiRCn={N?^m~5c?JWnRY z8Vth-XV{C@t?2l6mZhV~!Oq^0k5(HU!A-x8CNWMJII%pC`|?Y%FJmWmA2tS9ZtOo4 z1Mf@WiGhy^1V!Bp+)wh^O*}eu&~S6H*UF>*HBNGo%c(eZ9y?Ej6bE7|LihN_RPKqt zUVq?B#b@qgFZZVyG^_;>#nv8N?G^1fYx)={g!A+fE>x@?^7e)>bvmxx*L%I&BxbzJX>cA2`R?;1~fHwH~-rcj}zN zcDMDBm^yHa?DwQ<I$s3xytL_FiKNCP)^EoTC(Mw ztj{OK>kMfaPNH!FA2!2qoW(~e&s$-5_b5v36|+8doxQG?4jd+F8itE8F}W_s=g=lL z0Y~OC>#|!jmemOU6t^r81cc5vLRcgAU{KN9bC=ScH1MbgXjU?~V;Fq>JcF_O$R&j( znL+HG%8JU;Ry!567)Jg6jKin~8cicX+j0vaO;F;--E>m<_&ov=dKcHcghSIwx#Ky&Zo&nM9vLf!9yedyRrFXsy=+tu< zOkHF1Jey_Y3?W?UGI6bHLKZmn0-t=5&G*W1Q#yqS;&WwJaku$$&+!>7z(`4_1{v0EQt9m>24*o1-l)I2D{0>|Q z7bGsEj7fI=2(lIi6&`=XlURXKo&XY@L<)Zm5;cp#4{?WGypb|*9E~PMwhA{p%6pD| z%hzyzc$6EWDvvBv@CN$PM28e~%J{beMavNTeB-*fJxlzJk~}r~ zH__Xw=pQ(X*dy{T{(FXnAg_T16a|Kbx?!Q#H!_9TnYZ~z=(FTP5!4EbhK1ram@E_| z9vTj%GvaCEKsaqMEFS77whJS@%UJvpSLW%cBRi!qV6oqT1!MhGEW~4H84y?XIHzZ$ zIKZ9uS|e`)AyknDp*4e0V+o;>p4tDfp`!7kg}*Mc#_<@QT|W9JG+3^D zK|qEe(>S{4u~{(c+w@8$ zE1cuEuE`%knlQoo?>2DxVYS=K(@PUYX!(hRC2lquS5jL{suu)O5C=kE7#C(aLIvDbxU1 zG$tVn`D~)kDO%wij>>$eI4Sh7LeL^ysRT_1}!!Fn;S_n#}!l4@Fqv4Pq0S z&hSYnMgqx9ZKLP2x~1NRq1Uv@@K?NO%0p67H1TnQ+M1sv=K+%UlnGl|H=El%-nbg2 zLv&ZkmJ9$>8`4$bWV~__C+ZfRvdd@rhCU@nQon4DgfdOt{j!j9t^62>ev79ePZSl! zG<}9IURnj&eb}pvY3eFu%n~y@UxS%c{Pykz->Sr>w=)i7X=^tWExm5h#3!=O)P9Q`Oo}9QJKej$*}H__r8R8%-8Grm8bZ+lZht n;$bjYDD%9`DeAwmpR2#4-fKz*a=v`Y?QV9jbl1^d?{54bG+I4q literal 9393 zcmcgyOOP8^745%1qxs3>@z`+^C3X@Nh#i|h1h6xMoj^Qs5<)0dQ9@B`1|=G4{JO^( zs}zbVyDG5oh9#?H2P>!)Rj`CDTUhl9iVd@bWr`{|=f2mimL=P9rbtiqRrl?FuV3GL z?!D)|p6BQ1%NDke><@qR;bWHd51yo(hl2~)qepDZ3a!xYSq)o$ogKH~+SEBcZ^v)= zJ3%AZDKrW@#YScn^1AG{v|uvcn~FRthb=-16o@4P!sxo79~H+D8$yiKjZfy}Fys zz1>m$POlkthn=Jl#i|>2qNLnv$KC6#xI5?{Tu9rXy~E=rhy$9X?x+eC3sk@+R^684 zu7&#!aBd|5oYZM4^gP&#cUt@Pa^gpOyB(zpyj*NHyZvt5Y$lZ#x?$MqztpxY$h-4U>K6S`}aH-Mb??Gb$bu7@K>j{K1~^0yp#)ZVPSY7IT8hbT@!)B|-AKd@}f z@UZt2zehVJ!7hTNeIxO&w|aXWRbvAYNLGMAyVdLQ%_4fyvOT9_SDdn4c7}_&ci~-i z`%*oixB9J}4!l)vHg^W$UXRC>X7k!!tEaDsrPNdGe%`pE-Rbq3&GEwll1Fh}aSVb2 z9|sYgT(lRKMbpMfwtcdgu3l`%1GTQYakp(^JE;II1J!Jcaw5?A47w-!KE!08j}Y_> zbjU2oCENmpyvM8}X9b)gL7TH;>?0Hl=1hlTakGn*W2rejA|EK~2o!3C;wUeb6+xyx zM?vqXhar+O?2Ry`m6gOoATB6`SY4?Caq_ zPA${;_|h0d`Yi}E_7F)GwuxLp@39$gpi!)z!R-i_&#>P*#nTjGX7;8&OHoTjYB;eO z9da6b3e~c>XacIV2)Bs$rR7J84{F1$B8rbx^<44slcQH9MlY*Y*hQ)`Sj`d#eVw86 zd7R8bXYA?XBS`co&Vtwj61^0qQWX46RYgyEBx>jtHT2H&Vg@}8H+QRjE`d0&G3Jah zmKv$XRvEg!iZkOEpi8hdd}QXUPJ=6b-*xsu9NY(7ZkiT9&mnw)g1NEo%|ud-*G0%+ z>Gq4*BbG=@gUA`4m_1ZM(Ir_vZFs?4`8v%Du_UFVN*AYCe7P zBGhC)B`Si#g-u11s}cm51Y=kWNsz9l7W33HNvXy3I_@O{(P9}#c^V?rqwXaB9%x0R zTQarQOB&i=WJ`gzl+fcmWz>#IhU6TRLz6jF4DbtKN9qywV~SUeZ8ETb7iUowLQ?6N z8h{uKtC@SIfj*H%nTb9oEATYn9nAxnB2&JHv(X zK(mtni4rAk{2e}kct%a1SJX@(ohIk@Ac?Ya1s)wlnwd=G#{zm{kI^3>W+FYM;)bG6 zP;L@;pNV-&i)x-~V-RyNfI?g(nZHd=P?9JrgTj?T|K~9EazW-(s;+f&V_iIlJY3-& zXj{Skn1-3Nd5MR`O~Lp9<3Xq9JiR7V6w!Y0Uu69^d5K5l_!z?JxQa~#?ON7fvAEytlXAHf_899#Iv z$lWFr@uj6J?3*opuy6m`tI`r@xFf**z8n=s!Kfgt#obgkMj;jkkN5_Uze({eiVGBD z^AyMRCvo=AVLz|K@)YmO==+#&jA>1bJB;Cqb}=R9m~3PeeR;xghbTT| zdCgRGs;^P|bqLgl=kR07kopq!Qof`J&Fg3+m~{~n{7iqY|@vO-DCG^HVGa}0wF6HfWh_IrQf5vBJVMR?Fd$#ckw}_=boi2p-w!LNF zbHxfXaY@RceNr;A@N&^lRt$;BS>d%`8`;QuYn-X+6{?ZL>4OfDYbV~o%B1##5HV*A zVkA77nr@(HT}^^~B_T-D$tQuI*!|gb>9@FO=r&eTK)Hv@>HAD0l1Act(Rxu0;+`DI ztdQ91+qlzA022Sj&8x3}Ps8Fg^s`{`d+5!WG$R_$XJa}93TUd|uxq|X(XJL0HM3E` z3Fa6Dz=3mL928j1XTpGmlV*z=5Hn$LUSRO_oiM;mabiI-^loBWZ(9V00Zc(JQVL8k zmn19EDbWRo==Nf);Efjs^K`v#CYUdvH zAZ5DrT6br+*Fl-ivIygt4-xoqk~#^W*hRL$-?odKK_ZN0F;odsWtPjpz~bPiXbG=L z36$!mX==TolL%)B8Uq_RO1zy`w{Mmj27573BTJfdINsfhWdb%E0)Iv8h`8ahY}FoC zvlp6nKi`P^I`)YDGndHX0vREX^tvlntibGA-LvFlwieOQUphL2$Z~x9{+|t zMM*(Xb7+)c%E%P4e^;PPml&NRCV&E3gVJJPf(1!mjT9LAj9kJgHgn0G zK&UhUp%gtiwylUNSwY7|vh8|Ba*V*~*;#N(f=JSyOm+3#?Lh(dZyLG+!MwACh4TvJ z@W|L!(>NokX8=q3VF2GruJ1@i)00+y5OPNzk`<3IZIvS~2i-xNC2(HBBXTb%_%!Q8 zlEp6ay3poRJgQ|P%V+wobfCb5+jHHU`moX)ye zFC(AKu4^nylgUkg=+TSk86N#OfRl(;;;|rj3^otAq;c>pM2bc(yQJVVB7t961H^R4f7+?IpN8J=!XjFZv{o{F1t zpZC-`XaTz!_3t>XXwp;mbuUGam|+?{%uh7OI(C_p-lYzjg*2Ue#-HeBEQx%CSRz-s=Jzl|;eV9*pwDZ~+UJE5fO?@pXk_ZL2m(tz_DQGuXmzDpt*%v{tUgs;t{$n* JL#|a9{|E6da$o=e diff --git a/networks/networks.py b/networks/networks.py index 706fa7f..d0090fa 100644 --- a/networks/networks.py +++ b/networks/networks.py @@ -2,175 +2,6 @@ import tensorflow.keras as keras from tensorflow.keras.layers import Dense,Conv2D,Flatten,Concatenate,MaxPooling2D -class PPONetwork(keras.Model): - def __init__(self,n_actions,conv=False): - super(PPONetwork,self).__init__() - - self.HiddenLayers = [] - - if conv: - self.HiddenLayers.append( Conv2D(32,kernel_size=8,strides=(4,4),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=4,strides=(2,2),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - self.HiddenLayers.append( Flatten() ) - - self.HiddenLayers.append( Dense(256,activation='relu') ) - self.HiddenLayers.append( Dense(512,activation='relu') ) - - self.v = Dense(1,activation='linear') - self.pi = Dense(n_actions,activation='softmax') - - def call(self,state): - x = state - - for layer in self.HiddenLayers: - x = layer(x) - - policy = self.pi(x) - value = self.v(x) - - return policy, value - - - -class ActorCriticNetwork(keras.Model): - def __init__(self, n_actions, name='actor_critic'): - super(ActorCriticNetwork, self).__init__() - self.n_actions = n_actions - self.model_name = name - - self.layer1 = Dense(1024, activation='relu') - self.layer2 = Dense(512, activation='relu') - self.v = Dense(1, activation='linear') - self.pi = Dense(n_actions,activation='softmax') - - def call(self,state): - value = self.layer1(state) - value = self.layer2(value) - - pi = self.pi(value) - v = self.v(value) - - return v,pi - -class PolicyGradientNetwork(keras.Model): - def __init__(self,n_actions): - super(PolicyGradientNetwork, self).__init__() - self.n_actions = n_actions - - self.fc1 = Dense(256,activation='relu') - self.fc2 = Dense(256,activation='relu') - self.pi = Dense(n_actions,activation='softmax') - - def call(self,state): - value = self.fc1(state) - value = self.fc2(value) - - pi = self.pi(value) - - return pi - - -class DQNetwork(keras.Model): - def __init__(self,action_size,conv=False): - super(DQNetwork, self).__init__() - self.HiddenLayers = [] - - if conv: - self.HiddenLayers.append( Conv2D(32,kernel_size=8,strides=(4,4),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=4,strides=(2,2),activation='relu') ) - self.HiddenLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - self.HiddenLayers.append( Flatten() ) - self.HiddenLayers.append( Dense(units=512, activation='relu') ) - - self.value = Dense(units=action_size, activation='linear') - - def call(self,state): - x = state - - for layer in self.HiddenLayers: - x = layer(x) - - value = self.value(x) - - return value - -class MitsosPPONet(keras.Model): - def __init__(self,n_actions): - super(MitsosPPONet, self).__init__() - self.ConvLayers = [] - self.ConvLayers.append( Conv2D(64,kernel_size=9,activation='relu') ) - self.ConvLayers.append( Conv2D(64,kernel_size=5,activation='relu') ) - self.ConvLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - - self.flatten = Flatten() - self.concat = Concatenate(axis=-1) - - self.DenseLayers = [] - self.DenseLayers.append( Dense(512,activation='relu') ) - self.DenseLayers.append( Dense(256,activation='relu') ) - - self.policy = Dense(n_actions,activation='softmax') - self.value = Dense(1,activation='linear') - - def call(self,state): - x1 = state[0] #stacked frames - x2 = state[1] #stacked sensor values - - for layer in self.ConvLayers: - x1 = layer(x1) - - x1 = self.flatten(x1) - x2 = self.flatten(x2) - x = self.concat([x1,x2]) - - for layer in self.DenseLayers: - x = layer(x) - - pi = self.policy(x) - v = self.value(x) - - return pi,v - - -class MitsosDQNet(keras.Model): - def __init__(self,action_size): - super(MitsosDQNet, self).__init__() - self.ConvLayers = [] - self.ConvLayers.append( Conv2D(64,kernel_size=9,activation='relu') ) - self.ConvLayers.append( Conv2D(64,kernel_size=5,activation='relu') ) - #self.ConvLayers.append( Conv2D(64,kernel_size=3,activation='relu') ) - - self.flatten = Flatten() - self.concat = Concatenate(axis=-1) - - self.DenseLayers = [] - self.DenseLayers.append( Dense(units=512, activation='relu') ) - self.DenseLayers.append( Dense(units=512, activation='relu') ) - self.DenseLayers.append( Dense(units=512, activation='relu') ) - - self.value = Dense(units=action_size, activation='linear') - - def call(self,state): - x1 = state[0] #stacked frames - x2 = state[1] #stacked sensor values - - for layer in self.ConvLayers: - x1 = layer(x1) - - x1 = self.flatten(x1) - x2 = self.flatten(x2) - x = self.concat([x1,x2]) - - for layer in self.DenseLayers: - x = layer(x) - - v = self.value(x) - - return v - - -################################################################################################################################ class DenseNet(keras.Model): def __init__(self,units=[64]): @@ -223,7 +54,7 @@ def call(self,INPUT): class ConvDQN(keras.Model): def __init__(self,output_size): - super(Net1,self).__init__() + super(ConvDQN,self).__init__() self.conv = ConvNet(filters=[64,64]) self.main = DenseNet(units=[128,128]) diff --git a/setupvizdoom.sh b/setupvizdoom.sh index b166ad1..2c5d4f5 100644 --- a/setupvizdoom.sh +++ b/setupvizdoom.sh @@ -18,10 +18,9 @@ tar xf julia-1.3.0-linux-x86_64.tar.gz sudo ln -s ~/julia-1.3.0/bin/julia /usr/local/bin/julia pip install vizdoom -pip install varname -julia +# julia -using Pkg -Pkg.add("CxxWrap") \ No newline at end of file +# using Pkg +# Pkg.add("CxxWrap") \ No newline at end of file