Skip to content

Commit

Permalink
Make sure scheduler threads don't ACK the quiescence protocol CNF mes…
Browse files Browse the repository at this point in the history
…sages if they have an actor waiting to be run (#4583)
  • Loading branch information
dipinhora authored Jan 9, 2025
1 parent 9b1e66d commit 6db1561
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 7 deletions.
3 changes: 3 additions & 0 deletions .release-notes/4583.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## Make sure scheduler threads don't ACK the quiescence protocol CNF messages if they have an actor waiting to be run

Prior to this, the pinned actor thread could cause early termination/quiescence of a pony program if there were only pinned actors active. This change fixes the issue to ensure that pony programs with only pinned actors active will no longer terminate too early.
17 changes: 10 additions & 7 deletions src/libponyrt/sched/scheduler.c
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ static void handle_sched_unblock(scheduler_t* sched)
pony_assert(sched->block_count <= scheduler_count);
}

static bool read_msg(scheduler_t* sched)
static bool read_msg(scheduler_t* sched, pony_actor_t* actor)
{
#ifdef USE_RUNTIMESTATS
uint64_t used_cpu = ponyint_sched_cpu_used(&sched->ctx);
Expand Down Expand Up @@ -506,8 +506,11 @@ static bool read_msg(scheduler_t* sched)
{
pony_assert(PONY_UNKNOWN_SCHEDULER_INDEX != sched->index);

// Echo the token back as ACK(token).
send_msg(sched->index, 0, SCHED_ACK, m->i);
if(NULL == actor)
{
// Echo the token back as ACK(token) only if we don't have an actor to run.
send_msg(sched->index, 0, SCHED_ACK, m->i);
}
break;
}

Expand Down Expand Up @@ -758,7 +761,7 @@ static pony_actor_t* suspend_scheduler(scheduler_t* sched,
if(actor != NULL)
break;

if(read_msg(sched))
if(read_msg(sched, actor))
{
// An actor was unmuted and added to our run queue. Pop it and return.
// Effectively, we are "stealing" from ourselves. We need to verify that
Expand Down Expand Up @@ -925,7 +928,7 @@ static pony_actor_t* steal(scheduler_t* sched)

uint64_t tsc2 = ponyint_cpu_tick();

if(read_msg(sched))
if(read_msg(sched, actor))
{
// An actor was unmuted and added to our run queue. Pop it and return.
// Effectively, we are "stealing" from ourselves. We need to verify that
Expand Down Expand Up @@ -1154,7 +1157,7 @@ static void run(scheduler_t* sched)
// In response to reading a message, we might have unmuted an actor and
// added it back to our queue. if we don't have an actor to run, we want
// to pop from our queue to check for a recently unmuted actor
if(read_msg(sched) && actor == NULL)
if(read_msg(sched, actor) && actor == NULL)
{
actor = pop_global(sched);
}
Expand Down Expand Up @@ -1419,7 +1422,7 @@ static void run_pinned_actors()
// scheduler should be handled by the pinned actor scheduler but for the moment
// that is how things work and the actor will eventually come back to this thread
// to be run anyways.
read_msg(sched);
read_msg(sched, actor);

// Termination. all the normal scheduler threads have decided there is no
// more work to do so we can shutdown
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
100
40 changes: 40 additions & 0 deletions test/full-program-tests/regression-4582/main.pony
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use @pony_exitcode[None](code: I32)
use @usleep[I32](micros: U32) if not windows
use @Sleep[None](millis: U32) if windows
use "actor_pinning"

actor Main
let _env: Env
let _auth: PinUnpinActorAuth

new create(env: Env) =>
_env = env
_auth = PinUnpinActorAuth(env.root)
ActorPinning.request_pin(_auth)
check_pinned()

be check_pinned() =>
if ActorPinning.is_successfully_pinned(_auth) then
do_stuff(100)
else
check_pinned()
end

be do_stuff(i: I32) =>
// sleep for a while so that the quiescence CNF/ACK protocol can happen
ifdef windows then
@Sleep(10)
else
@usleep(10000)
end
if i < 0 then
// set the exit code if this behavior has been run enough times
// issue 4582 identified early quiescence/termination if only pinned
// actors remained active
@pony_exitcode(100)
else
do_stuff(i - 1)
end

be done() =>
ActorPinning.request_unpin(_auth)

0 comments on commit 6db1561

Please sign in to comment.