Skip to content

Commit

Permalink
Paranoid RunQueue errors. (#27948)
Browse files Browse the repository at this point in the history
* Verbose RunQueue errors.

* Looser tracking

* Add line numbers.

* Point fingers at subsystems.

* SSinput debugging, useless debugging cleanup
  • Loading branch information
FunnyMan3595 authored Jan 27, 2025
1 parent 9bf8b92 commit 6eeea2b
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 15 deletions.
28 changes: 13 additions & 15 deletions code/controllers/master.dm
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ GLOBAL_REAL(Master, /datum/controller/master) = new
/// Did inits finish with no one logged in
var/initializations_finished_with_no_players_logged_in

/// The type of the last subsystem to be fire()'d.
var/last_type_processed
/// The last subsystem to be fire()'d.
var/datum/controller/subsystem/last_processed

/// Cache for the loading screen - cleared after
var/list/ss_in_init_order = list()
Expand Down Expand Up @@ -172,7 +172,7 @@ GLOBAL_REAL(Master, /datum/controller/master) = new
msg += "\t [varname] = [varval]\n"
log_world(msg)

var/datum/controller/subsystem/BadBoy = Master.last_type_processed
var/datum/controller/subsystem/BadBoy = Master.last_processed
var/FireHim = FALSE
if(istype(BadBoy))
msg = null
Expand Down Expand Up @@ -431,8 +431,8 @@ GLOBAL_REAL(Master, /datum/controller/master) = new
continue

if(queue_head)
if(RunQueue() <= 0) //error running queue
stack_trace("MC: RunQueue failed. Current error_level is [round(error_level, 0.25)]")
if(!RunQueue())
stack_trace("MC: RunQueue returned early during [last_processed.name] ([last_processed.last_task()]). Current error_level is [round(error_level, 0.25)].")
if(error_level > 1) //skip the first error,
if(!SoftReset(tickersubsystems, runlevel_sorted_subsystems))
error_level++
Expand Down Expand Up @@ -501,7 +501,6 @@ GLOBAL_REAL(Master, /datum/controller/master) = new
/// RunQueue - Run thru the queue of subsystems to run, running them while balancing out their allocated tick precentage
/// Returns 0 if runtimed, a negitive number for logic errors, and a positive number if the operation completed without errors
/datum/controller/master/proc/RunQueue()
. = 0
var/datum/controller/subsystem/queue_node
var/queue_node_flags
var/queue_node_priority
Expand Down Expand Up @@ -537,9 +536,9 @@ GLOBAL_REAL(Master, /datum/controller/master) = new
bg_calc = TRUE
else if(bg_calc)
//error state, do sane fallback behavior
if(. == 0)
log_world("MC: Queue logic failure, non-background subsystem queued to run after a background subsystem: [queue_node] queue_prev:[queue_node.queue_prev]")
. = -1
var/message = "MC: Queue logic failure, non-background subsystem queued to run after a background subsystem: [queue_node] queue_prev:[queue_node.queue_prev]"
log_world(message)
stack_trace(message)
current_tick_budget = queue_priority_count //this won't even be right, but is the best we have.
bg_calc = FALSE

Expand All @@ -551,9 +550,9 @@ GLOBAL_REAL(Master, /datum/controller/master) = new
tick_precentage = tick_remaining * (queue_node_priority / current_tick_budget)
else
//error state
if(. == 0)
log_world("MC: tick_budget sync error. [json_encode(list(current_tick_budget, queue_priority_count, queue_priority_count_bg, bg_calc, queue_node, queue_node_priority))]")
. = -1
var/message = "MC: tick_budget sync error. [json_encode(list(current_tick_budget, queue_priority_count, queue_priority_count_bg, bg_calc, queue_node, queue_node_priority))]"
log_world(message)
stack_trace(message)
tick_precentage = tick_remaining //just because we lost track of priority calculations doesn't mean we can't try to finish off the run, if the error state persists, we don't want to stop ticks from happening

tick_precentage = max(tick_precentage*0.5, tick_precentage-queue_node.tick_overrun)
Expand All @@ -563,7 +562,7 @@ GLOBAL_REAL(Master, /datum/controller/master) = new
ran = TRUE

queue_node_paused = (queue_node.state == SS_PAUSED || queue_node.state == SS_PAUSING)
last_type_processed = queue_node
last_processed = queue_node

queue_node.state = SS_RUNNING

Expand Down Expand Up @@ -613,8 +612,7 @@ GLOBAL_REAL(Master, /datum/controller/master) = new

queue_node = queue_node.queue_next

if(. == 0)
. = 1
return TRUE

//resets the queue, and all subsystems, while filtering out the subsystem lists
// called if any mc's queue procs runtime or exit improperly.
Expand Down
4 changes: 4 additions & 0 deletions code/controllers/subsystem.dm
Original file line number Diff line number Diff line change
Expand Up @@ -353,3 +353,7 @@
out["sleep_count"] = fire_sleep_count
out["custom"] = list() // Override as needed on child
return out

/// Allows a subsystem to report what it was doing in case of a silent crash.
/datum/controller/subsystem/proc/last_task()
return "No task specified."
12 changes: 12 additions & 0 deletions code/controllers/subsystem/SSinput.dm
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ SUBSYSTEM_DEF(input)
/// List of clients whose input to process in loop.
var/list/client/processing = list()

var/last_ckey
var/last_x
var/last_y
var/last_z

/datum/controller/subsystem/input/Initialize()
refresh_client_macro_sets()

Expand All @@ -26,6 +31,10 @@ SUBSYSTEM_DEF(input)
set waitfor = FALSE
var/list/to_cull
for(var/client/C in processing)
last_ckey = C.ckey
last_x = C.mob?.x
last_y = C.mob?.y
last_z = C.mob?.z
if(processing[C] + AUTO_CULL_TIME < world.time)
if(!length(C.input_data.keys_held))
LAZYADD(to_cull, C)
Expand All @@ -42,4 +51,7 @@ SUBSYSTEM_DEF(input)
var/client/user = clients[i]
user.set_macros()

/datum/controller/subsystem/input/last_task()
return "[last_ckey] at [last_x], [last_y], [last_z]"

#undef AUTO_CULL_TIME

0 comments on commit 6eeea2b

Please sign in to comment.