Skip to content

Commit

Permalink
Fix idle runner being memory leaked
Browse files Browse the repository at this point in the history
when its allocation is restarted by Nomad.

Fix logic created in 354c16c.
  • Loading branch information
mpass99 committed Jun 6, 2024
1 parent 1ddccb3 commit cbcd5f2
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 28 deletions.
2 changes: 1 addition & 1 deletion internal/environment/aws_environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,6 @@ func (a *AWSEnvironment) AddRunner(_ runner.Runner) {
panic("not supported")
}

func (a *AWSEnvironment) DeleteRunner(_ string) (ok bool) {
func (a *AWSEnvironment) DeleteRunner(_ string) (r runner.Runner, ok bool) {
panic("not supported")
}
6 changes: 3 additions & 3 deletions internal/environment/nomad_environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,10 @@ func (n *NomadEnvironment) AddRunner(r runner.Runner) {
n.idleRunners.Add(r.ID(), r)
}

func (n *NomadEnvironment) DeleteRunner(id string) (ok bool) {
_, ok = n.idleRunners.Get(id)
func (n *NomadEnvironment) DeleteRunner(id string) (r runner.Runner, ok bool) {
r, ok = n.idleRunners.Get(id)
n.idleRunners.Delete(id)
return ok
return r, ok
}

func (n *NomadEnvironment) IdleRunnerCount() uint {
Expand Down
2 changes: 1 addition & 1 deletion internal/runner/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func createBasicEnvironmentMock(id dto.EnvironmentID) *ExecutionEnvironmentMock
environment.On("CPULimit").Return(uint(0))
environment.On("MemoryLimit").Return(uint(0))
environment.On("NetworkAccess").Return(false, nil)
environment.On("DeleteRunner", mock.AnythingOfType("string")).Return(false)
environment.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil, false)
environment.On("ApplyPrewarmingPoolSize").Return(nil)
environment.On("IdleRunnerCount").Return(uint(1)).Maybe()
environment.On("PrewarmingPoolSize").Return(uint(1)).Maybe()
Expand Down
5 changes: 3 additions & 2 deletions internal/runner/execution_environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ type ExecutionEnvironment interface {
Sample() (r Runner, ok bool)
// AddRunner adds an existing runner to the idle runners of the environment.
AddRunner(r Runner)
// DeleteRunner removes an idle runner from the environment.
// DeleteRunner removes an idle runner from the environment and returns it.
// This function handles only the environment. The runner has to be destroyed separately.
// ok is true iff the runner was found (and deleted).
DeleteRunner(id string) (ok bool)
DeleteRunner(id string) (r Runner, ok bool)
// IdleRunnerCount returns the number of idle runners of the environment.
IdleRunnerCount() uint
}
Expand Down
76 changes: 70 additions & 6 deletions internal/runner/execution_environment_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 13 additions & 6 deletions internal/runner/nomad_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,21 +312,28 @@ func (m *NomadRunnerManager) onAllocationStopped(runnerID string, reason error)
return false
}

r, stillActive := m.usedRunners.Get(runnerID)
if stillActive {
r, stillUsed := m.usedRunners.Get(runnerID)
if stillUsed {
m.usedRunners.Delete(runnerID)
if err := r.Destroy(reason); err != nil {
log.WithError(err).Warn("Runner of stopped allocation cannot be destroyed")
}
}

environment, ok := m.GetEnvironment(environmentID)
if ok {
stillActive = stillActive || environment.DeleteRunner(runnerID)
go m.checkPrewarmingPoolAlert(environment, false)
if !ok {
return !stillUsed
}

r, stillIdle := environment.DeleteRunner(runnerID)
if stillIdle {
if err := r.Destroy(reason); err != nil {
log.WithError(err).Warn("Runner of stopped allocation cannot be destroyed")
}
}
go m.checkPrewarmingPoolAlert(environment, false)

return !stillActive
return !(stillUsed || stillIdle)
}

// onRunnerDestroyed is the callback when the runner destroys itself.
Expand Down
18 changes: 9 additions & 9 deletions internal/runner/nomad_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ func mockIdleRunners(environmentMock *ExecutionEnvironmentMock) {
if !ok {
log.Fatal("Cannot parse ID")
}
_, ok = idleRunner.Get(id)
deleteCall.ReturnArguments = mock.Arguments{ok}
r, ok := idleRunner.Get(id)
deleteCall.ReturnArguments = mock.Arguments{r, ok}
if !ok {
return
}
Expand Down Expand Up @@ -167,7 +167,7 @@ func (s *ManagerTestSuite) TestClaimAddsRunnerToUsedRunners() {

func (s *ManagerTestSuite) TestClaimRemovesRunnerWhenMarkAsUsedFails() {
s.exerciseEnvironment.On("Sample", mock.Anything).Return(s.exerciseRunner, true)
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(false)
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil, false)
s.apiMock.On("DeleteJob", mock.AnythingOfType("string")).Return(nil)
util.MaxConnectionRetriesExponential = 1
modifyMockedCall(s.apiMock, "MarkRunnerAsUsed", func(call *mock.Call) {
Expand Down Expand Up @@ -199,7 +199,7 @@ func (s *ManagerTestSuite) TestGetReturnsErrorIfRunnerNotFound() {

func (s *ManagerTestSuite) TestReturnRemovesRunnerFromUsedRunners() {
s.apiMock.On("DeleteJob", mock.AnythingOfType("string")).Return(nil)
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(false)
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil, false)
s.nomadRunnerManager.usedRunners.Add(s.exerciseRunner.ID(), s.exerciseRunner)
err := s.nomadRunnerManager.Return(s.exerciseRunner)
s.Nil(err)
Expand All @@ -209,7 +209,7 @@ func (s *ManagerTestSuite) TestReturnRemovesRunnerFromUsedRunners() {

func (s *ManagerTestSuite) TestReturnCallsDeleteRunnerApiMethod() {
s.apiMock.On("DeleteJob", mock.AnythingOfType("string")).Return(nil)
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(false)
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil, false)
err := s.nomadRunnerManager.Return(s.exerciseRunner)
s.Nil(err)
s.apiMock.AssertCalled(s.T(), "DeleteJob", s.exerciseRunner.ID())
Expand All @@ -220,7 +220,7 @@ func (s *ManagerTestSuite) TestReturnReturnsErrorWhenApiCallFailed() {
s.apiMock.On("DeleteJob", mock.AnythingOfType("string")).Return(tests.ErrDefault)
defer s.apiMock.On("DeleteJob", mock.AnythingOfType("string")).Return(nil)
defer tests.RemoveMethodFromMock(&s.apiMock.Mock, "DeleteJob")
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(false)
s.exerciseEnvironment.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil, false)

util.MaxConnectionRetriesExponential = 1
util.InitialWaitingDuration = 2 * tests.ShortTimeout
Expand Down Expand Up @@ -442,7 +442,7 @@ func (s *ManagerTestSuite) TestOnAllocationStopped() {
environment.AddRunner(r)
alreadyRemoved := s.nomadRunnerManager.onAllocationStopped(tests.DefaultRunnerID, nil)
s.False(alreadyRemoved)
s.NoError(r.Destroy(nil))
s.Error(r.ctx.Err(), "The runner should be destroyed and its context canceled")
})
s.Run("returns false and stops inactivity timer", func() {
runner, runnerDestroyed := testStoppedInactivityTimer(s)
Expand Down Expand Up @@ -556,7 +556,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_Load() {
runnerManager.usedRunners.Purge()
s.Run("Restart timeout of used runner", func() {
apiMock.On("DeleteJob", mock.AnythingOfType("string")).Return(nil)
environmentMock.On("DeleteRunner", mock.AnythingOfType("string")).Once().Return(false)
environmentMock.On("DeleteRunner", mock.AnythingOfType("string")).Once().Return(nil, false)
timeout := 1

_, job := helpers.CreateTemplateJob()
Expand Down Expand Up @@ -674,7 +674,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_checkPrewarmingPoolAlert_reloadsR

environment.On("PrewarmingPoolSize").Return(uint(1)).Twice()
environment.On("IdleRunnerCount").Return(uint(0)).Twice()
environment.On("DeleteRunner", mock.Anything).Return(false).Once()
environment.On("DeleteRunner", mock.Anything).Return(nil, false).Once()

s.Require().Empty(m.usedRunners.Length())
_, usedJob := helpers.CreateTemplateJob()
Expand Down

0 comments on commit cbcd5f2

Please sign in to comment.