Skip to content

Commit

Permalink
adding kill model and dealy to start traning, seem to do better (wip)
Browse files Browse the repository at this point in the history
  • Loading branch information
Julio Jerez committed Dec 18, 2023
1 parent d883735 commit e95d3df
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ namespace ndCarpole_1
return m_model->IsTerminal();
}

void ResetModel() const
void ResetModel()
{
m_model->ResetModel();
}
Expand Down Expand Up @@ -316,7 +316,7 @@ namespace ndCarpole_1
m_pole->SetVelocity(poleVeloc);
}

void ResetModel() const
void ResetModel()
{
m_cart->SetMatrix(m_cartMatrix);
m_pole->SetMatrix(m_poleMatrix);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ namespace ndCarpole_0
return m_model->IsTerminal();
}

void ResetModel() const
void ResetModel()
{
m_model->ResetModel();
}
Expand Down Expand Up @@ -317,7 +317,7 @@ namespace ndCarpole_0
m_pole->SetVelocity(poleVeloc);
}

void ResetModel() const
void ResetModel()
{
m_cart->SetMatrix(m_cartMatrix);
m_pole->SetMatrix(m_poleMatrix);
Expand Down
131 changes: 85 additions & 46 deletions newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ namespace ndQuadruped_1
return m_model->IsTerminal();
}

void ResetModel() const
void ResetModel()
{
m_model->m_control->Reset();
for (ndInt32 i = 0; i < m_basePose.GetCount(); i++)
Expand Down Expand Up @@ -1738,12 +1738,17 @@ namespace ndQuadruped_1
ndControllerAgent_trainer(const HyperParameters& hyperParameters)
:ndBrainAgentContinueVPG_Trainer<ND_AGENT_INPUTSIZE, m_actionsSize>(hyperParameters)
,m_bestActor(m_actor)
,m_basePose()
,m_bodies()
,m_outFile(nullptr)
,m_model(nullptr)
,m_timer(ndGetTimeInMicroseconds())
,m_maxGain(-1.0e10f)
,m_maxFrames(3000)
,m_killCounter(0)
,m_startTraining(0)
//,m_stopTraining(5000000)
,m_stopTraining(500000)
,m_timer(ndGetTimeInMicroseconds())
,m_modelIsTrained(false)
{
SetName(CONTROLLER_NAME);
Expand Down Expand Up @@ -1772,12 +1777,18 @@ namespace ndQuadruped_1

ndBrainFloat GetReward() const
{
ndBrainFloat reward = m_model->CalculateReward();
if (reward > ndBrainFloat(0.4f))
{
m_killCounter = 0;
}

if (IsTerminal())
{
return ndBrainFloat(-1.0f);
}

return m_model->CalculateReward();
return reward;
}

virtual void ApplyActions(ndBrainFloat* const actions) const
Expand Down Expand Up @@ -1812,78 +1823,106 @@ namespace ndQuadruped_1
////state = state && (m_startTraning >= 64);
//
//return m_model->IsTerminal();
if (m_killCounter > 256)
{
return true;
}
return false;
}

void ResetModel() const
void ResetModel()
{
m_killCounter = 0;
m_startTraining = 0;
m_model->m_control->Reset();
for (ndInt32 i = 0; i < m_basePose.GetCount(); i++)
{
m_basePose[i].SetPose();
}
}

void Step()
{
if (m_startTraining > 100)
{
ndBrainAgentContinueVPG_Trainer::Step();
}
else
{
ndBrainFixSizeVector<m_actionsSize> actions;
actions.Set(ndBrainFloat(0.0f));
m_model->ApplyActions(&actions[0]);
}
}

void OptimizeStep()
{
ndInt32 stopTraining = GetFramesCount();
if (stopTraining <= m_stopTraining)
if (m_startTraining > 100)
{
ndInt32 episodeCount = GetEposideCount();
ndBrainAgentContinueVPG_Trainer::OptimizeStep();

episodeCount -= GetEposideCount();
if (m_averageFramesPerEpisodes.GetAverage() >= ndFloat32(m_maxFrames))
ndInt32 stopTraining = GetFramesCount();
if (stopTraining <= m_stopTraining)
{
if (m_averageQvalue.GetAverage() > m_maxGain)
ndInt32 episodeCount = GetEposideCount();
ndBrainAgentContinueVPG_Trainer::OptimizeStep();

episodeCount -= GetEposideCount();
if (m_averageFramesPerEpisodes.GetAverage() >= ndFloat32(m_maxFrames))
{
m_bestActor.CopyFrom(m_actor);
m_maxGain = m_averageQvalue.GetAverage();
ndExpandTraceMessage("best actor episode: %d\taverageFrames: %f\taverageValue %f\n", GetEposideCount(), m_averageFramesPerEpisodes.GetAverage(), m_averageQvalue.GetAverage());
if (m_averageQvalue.GetAverage() > m_maxGain)
{
m_bestActor.CopyFrom(m_actor);
m_maxGain = m_averageQvalue.GetAverage();
ndExpandTraceMessage("best actor episode: %d\taverageFrames: %f\taverageValue %f\n", GetEposideCount(), m_averageFramesPerEpisodes.GetAverage(), m_averageQvalue.GetAverage());
}
}
}

if (episodeCount && !IsSampling())
{
ndExpandTraceMessage("step: %d\treward: %g\tframes: %g\n", GetFramesCount(), m_averageQvalue.GetAverage(), m_averageFramesPerEpisodes.GetAverage());
if (m_outFile)

if (episodeCount && !IsSampling())
{
fprintf(m_outFile, "%g\n", m_averageQvalue.GetAverage());
fflush(m_outFile);
ndExpandTraceMessage("step: %d\treward: %g\tframes: %g\n", GetFramesCount(), m_averageQvalue.GetAverage(), m_averageFramesPerEpisodes.GetAverage());
if (m_outFile)
{
fprintf(m_outFile, "%g\n", m_averageQvalue.GetAverage());
fflush(m_outFile);
}
}

if (stopTraining == m_stopTraining)
{
char fileName[1024];
m_modelIsTrained = true;
m_actor.CopyFrom(m_bestActor);
ndGetWorkingFileName(GetName().GetStr(), fileName);
SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
ndExpandTraceMessage("training time: %g\n seconds", ndFloat32(ndFloat64(timer) * ndFloat32(1.0e-6f)));
}
}

if (stopTraining == m_stopTraining)
{
char fileName[1024];
m_modelIsTrained = true;
m_actor.CopyFrom(m_bestActor);
ndGetWorkingFileName(GetName().GetStr(), fileName);
SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
ndExpandTraceMessage("training time: %g\n seconds", ndFloat32(ndFloat64(timer) * ndFloat32(1.0e-6f)));
}

//if (m_model->IsOutOfBounds())
//{
// m_model->TelePort();
//}

m_killCounter++;
}

//if (m_model->IsOutOfBounds())
//{
// m_model->TelePort();
//}
//m_startTraning++;

m_startTraining++;
}

FILE* m_outFile;
ndBrain m_bestActor;
ndFixSizeArray<ndBasePose, 32> m_basePose;
ndFixSizeArray<ndBodyDynamic*, 32> m_bodies;
FILE* m_outFile;
ndRobot* m_model;
ndUnsigned64 m_timer;
ndFloat32 m_maxGain;
ndInt32 m_maxFrames;
mutable ndInt32 m_killCounter;
ndInt32 m_startTraining;
ndInt32 m_stopTraining;
ndUnsigned64 m_timer;
bool m_modelIsTrained;
ndFixSizeArray<ndBasePose, 32> m_basePose;
ndFixSizeArray<ndBodyDynamic*, 32> m_bodies;
};

ndRobot(ndSharedPtr<ndBrainAgent>& agent)
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ namespace ndUnicycle
return m_model->IsTerminal();
}

void ResetModel() const
void ResetModel()
{
m_model->ResetModel();
}
Expand Down Expand Up @@ -345,7 +345,7 @@ namespace ndUnicycle
return ndReal(reward);
}

void ResetModel() const
void ResetModel()
{
for (ndInt32 i = 0; i < m_basePose.GetCount(); i++)
{
Expand Down
2 changes: 1 addition & 1 deletion newton-4.00/sdk/dBrain/ndBrainAgent.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class ndBrainAgent: public ndClassAlloc
virtual void InitWeights(ndBrainFloat weighVariance, ndBrainFloat biasVariance) = 0;

protected:
virtual void ResetModel() const = 0;
virtual void ResetModel() = 0;
virtual bool IsTerminal() const = 0;
virtual ndBrainFloat GetReward() const = 0;
virtual ndInt32 GetEpisodeFrames() const = 0;
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ class ndBrainAgentContinueVPG: public ndBrainAgent
void Step();

protected:
void ResetModel();
void OptimizeStep();
bool IsTrainer() const;
void ResetModel() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
Expand Down Expand Up @@ -97,7 +97,7 @@ ndBrainFloat ndBrainAgentContinueVPG<statesDim, actionDim>::GetReward() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentContinueVPG<statesDim, actionDim>::ResetModel() const
void ndBrainAgentContinueVPG<statesDim, actionDim>::ResetModel()
{
ndAssert(0);
}
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/sdk/dBrain/ndBrainAgentDDPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class ndBrainAgentDDPG: public ndBrainAgent
void Step();

protected:
void ResetModel();
void OptimizeStep();
bool IsTrainer() const;
void ResetModel() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
Expand Down Expand Up @@ -78,7 +78,7 @@ ndBrainFloat ndBrainAgentDDPG<statesDim, actionDim>::GetReward() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDDPG<statesDim, actionDim>::ResetModel() const
void ndBrainAgentDDPG<statesDim, actionDim>::ResetModel()
{
ndAssert(0);
}
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/sdk/dBrain/ndBrainAgentDQN.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ class ndBrainAgentDQN: public ndBrainAgent
void Step();

protected:
void ResetModel();
void OptimizeStep();
bool IsTrainer() const;
void ResetModel() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
Expand Down Expand Up @@ -93,7 +93,7 @@ ndBrainFloat ndBrainAgentDQN<statesDim, actionDim>::GetReward() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDQN<statesDim, actionDim>::ResetModel() const
void ndBrainAgentDQN<statesDim, actionDim>::ResetModel()
{
ndAssert(0);
}
Expand Down
5 changes: 3 additions & 2 deletions newton-4.00/sdk/dBrain/ndBrainAgentDiscreteVPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ class ndBrainAgentDiscreteVPG: public ndBrainAgent
void Step();

protected:
void ResetModel();
void OptimizeStep();
bool IsTrainer() const;
void ResetModel() const;

bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
Expand Down Expand Up @@ -98,7 +99,7 @@ ndBrainFloat ndBrainAgentDiscreteVPG<statesDim, actionDim>::GetReward() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDiscreteVPG<statesDim, actionDim>::ResetModel() const
void ndBrainAgentDiscreteVPG<statesDim, actionDim>::ResetModel()
{
ndAssert(0);
}
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/sdk/dBrain/ndBrainAgentTD3.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class ndBrainAgentTD3: public ndBrainAgent
void Step();

protected:
void ResetModel();
void OptimizeStep();
bool IsTrainer() const;
void ResetModel() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
Expand Down Expand Up @@ -78,7 +78,7 @@ ndBrainFloat ndBrainAgentTD3<statesDim, actionDim>::GetReward() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentTD3<statesDim, actionDim>::ResetModel() const
void ndBrainAgentTD3<statesDim, actionDim>::ResetModel()
{
ndAssert(0);
}
Expand Down

0 comments on commit e95d3df

Please sign in to comment.