Skip to content

Commit

Permalink
Fix rare termination logic failures that could result in early shutdo…
Browse files Browse the repository at this point in the history
…wn (#4556)

Prior to this commit, there was a very rare edge case in the
termination logic that could result in early shutdown resulting
in a segfault.

This commit simplifies and reworks the shutdown/termination logic
in order to make it more robust with less edge cases.

The logic now:

* does not un-noisy an actor from the ASIO thread until the
  relevant ASIO event is destroyed instead of when it is
  unsubscribed. This is important because the ASIO subsystem
  still has a reference to the actor and can send a message to it
  until the ASIO event is destroyed even if it has been
  unsubscribed
* always runs the CNF/ACK protocol to all schedulers instead of
  only the active ones
* disables scheduler scaling to ensure all schedulers are active
  for the duration of the termination CNF/ACK protocol to avoid /
  minimize complexity from schedulers suspending during the
  termination process
* ensures the local scheduler tracking of ASIO noisiness is more
  accurate and robust to messages being received out of order
  • Loading branch information
dipinhora authored Dec 4, 2024
1 parent a8de799 commit 60722ad
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 166 deletions.
5 changes: 5 additions & 0 deletions .release-notes/4556.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## Fix rare termination logic failures that could result in early shutdown

There was a very rare edge case in the termination logic that could result in early shutdown resulting in a segfault.

The edge cases have been addressed and the shutdown/termination logic has been overhauled to make it simpler and more robust.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ test-cross-ci: test-core test-stdlib-debug test-stdlib-release test-examples
test-check-version: all
$(SILENT)cd '$(outDir)' && ./ponyc --version

test-core: all test-libponyrt test-libponyc test-full-programs-release test-full-programs-debug
test-core: all test-libponyrt test-libponyc test-full-programs-debug test-full-programs-release

test-libponyrt: all
$(SILENT)cd '$(outDir)' && $(debuggercmd) ./libponyrt.tests --gtest_shuffle $(testextras)
Expand Down
16 changes: 0 additions & 16 deletions src/libponyrt/asio/epoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,22 +466,6 @@ PONY_API void pony_asio_event_unsubscribe(asio_event_t* ev)
asio_backend_t* b = ponyint_asio_get_backend();
pony_assert(b != NULL);

if(ev->noisy)
{
uint64_t old_count = ponyint_asio_noisy_remove();
// tell scheduler threads that asio has no noisy actors
// if the old_count was 1
if (old_count == 1)
{
ponyint_sched_unnoisy_asio(SPECIAL_THREADID_EPOLL);

// maybe wake up a scheduler thread if they've all fallen asleep
ponyint_sched_maybe_wakeup_if_all_asleep(PONY_UNKNOWN_SCHEDULER_INDEX);
}

ev->noisy = false;
}

epoll_ctl(b->epfd, EPOLL_CTL_DEL, ev->fd, NULL);

if(ev->flags & ASIO_TIMER)
Expand Down
11 changes: 11 additions & 0 deletions src/libponyrt/asio/event.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ PONY_API void pony_asio_event_destroy(asio_event_t* ev)
return;
}

if(ev->noisy)
{
uint64_t old_count = ponyint_asio_noisy_remove();
// tell scheduler threads that asio has no noisy actors
// if the old_count was 1
if (old_count == 1)
ponyint_sched_unnoisy_asio(PONY_UNKNOWN_SCHEDULER_INDEX);

ev->noisy = false;
}

ev->flags = ASIO_DESTROYED;

// When we let go of an event, we treat it as if we had received it back from
Expand Down
16 changes: 0 additions & 16 deletions src/libponyrt/asio/iocp.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,22 +340,6 @@ PONY_API void pony_asio_event_unsubscribe(asio_event_t* ev)
asio_backend_t* b = ponyint_asio_get_backend();
pony_assert(b != NULL);

if(ev->noisy)
{
uint64_t old_count = ponyint_asio_noisy_remove();
// tell scheduler threads that asio has no noisy actors
// if the old_count was 1
if (old_count == 1)
{
ponyint_sched_unnoisy_asio(SPECIAL_THREADID_IOCP);

// maybe wake up a scheduler thread if they've all fallen asleep
ponyint_sched_maybe_wakeup_if_all_asleep(PONY_UNKNOWN_SCHEDULER_INDEX);
}

ev->noisy = false;
}

if((ev->flags & ASIO_TIMER) != 0)
{
// Need to cancel a timer.
Expand Down
16 changes: 0 additions & 16 deletions src/libponyrt/asio/kqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,22 +407,6 @@ PONY_API void pony_asio_event_unsubscribe(asio_event_t* ev)
asio_backend_t* b = ponyint_asio_get_backend();
pony_assert(b != NULL);

if(ev->noisy)
{
uint64_t old_count = ponyint_asio_noisy_remove();
// tell scheduler threads that asio has no noisy actors
// if the old_count was 1
if (old_count == 1)
{
ponyint_sched_unnoisy_asio(SPECIAL_THREADID_KQUEUE);

// maybe wake up a scheduler thread if they've all fallen asleep
ponyint_sched_maybe_wakeup_if_all_asleep(PONY_UNKNOWN_SCHEDULER_INDEX);
}

ev->noisy = false;
}

struct kevent event[4];
int i = 0;

Expand Down
Loading

0 comments on commit 60722ad

Please sign in to comment.