Skip to content

Commit

Permalink
lightningd: add option to crash itself after some time.
Browse files Browse the repository at this point in the history
We have CI runs which timeout (after 2 hours).  It's not clear why,
but we can at least eliminate CLN lockups as the answer.

Since pytest disabled the --timeout option on test shutdown, we could be
seeing an issue on stopping taking a long time?

Signed-off-by: Rusty Russell <[email protected]>
  • Loading branch information
rustyrussell committed Jan 27, 2025
1 parent 9565b3a commit 4e887c7
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 0 deletions.
4 changes: 4 additions & 0 deletions contrib/msggen/msggen/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -20552,6 +20552,10 @@
"value_int": 3,
"source": "cmdline"
},
"dev-crash-after": {
"value_str": "3600",
"source": "cmdline"
},
"dev-fail-on-subdaemon-fail": {
"set": true,
"source": "cmdline"
Expand Down
4 changes: 4 additions & 0 deletions doc/schemas/lightning-listconfigs.json
Original file line number Diff line number Diff line change
Expand Up @@ -2678,6 +2678,10 @@
"value_int": 3,
"source": "cmdline"
},
"dev-crash-after": {
"value_str": "3600",
"source": "cmdline"
},
"dev-fail-on-subdaemon-fail": {
"set": true,
"source": "cmdline"
Expand Down
30 changes: 30 additions & 0 deletions lightningd/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,31 @@ static char *opt_ignore(void *unused)
return NULL;
}

static void handle_alarm(int sig)
{
abort();
}

static char *opt_set_crash_timeout(const char *arg, struct lightningd *ld)
{
struct sigaction act;
u32 time;
char *errstr = opt_set_u32(arg, &time);
if (errstr)
return errstr;

/* In case we're *REALLY* stuck, use alarm() */
memset(&act, 0, sizeof(act));
act.sa_handler = handle_alarm;
act.sa_flags = 0;

if (sigaction(SIGALRM, &act, NULL) != 0)
err(1, "Setting up SIGARLM handler");

alarm(time);
return NULL;
}

static void dev_register_opts(struct lightningd *ld)
{
/* We might want to debug plugins, which are started before normal
Expand Down Expand Up @@ -978,6 +1003,10 @@ static void dev_register_opts(struct lightningd *ld)
opt_set_u32, opt_show_u32,
&ld->dev_low_prio_anchor_blocks,
"How many blocks to aim for low-priority anchor closes (default: 2016)");
clnopt_witharg("--dev-crash-after", OPT_DEV,
opt_set_crash_timeout, NULL,
ld,
"Crash if we are still going after this long.");
/* This is handled directly in daemon_developer_mode(), so we ignore it here */
clnopt_noarg("--dev-debug-self", OPT_DEV,
opt_ignore,
Expand Down Expand Up @@ -2219,6 +2248,7 @@ bool is_known_opt_cb_arg(char *(*cb_arg)(const char *, void *))
|| cb_arg == (void *)opt_add_accept_htlc_tlv
|| cb_arg == (void *)opt_set_codex32_or_hex
|| cb_arg == (void *)opt_subd_dev_disconnect
|| cb_arg == (void *)opt_set_crash_timeout
|| cb_arg == (void *)opt_add_api_beg
|| cb_arg == (void *)opt_force_featureset
|| cb_arg == (void *)opt_force_privkey
Expand Down
3 changes: 3 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ class LightningNode(utils.LightningNode):
def __init__(self, *args, **kwargs):
utils.LightningNode.__init__(self, *args, **kwargs)

# This is a recent innovation, and we don't want to nail pyln-testing to this version.
self.daemon.opts['dev-crash-after'] = 3600

# We have some valgrind suppressions in the `tests/`
# directory, so we can add these to the valgrind configuration
# (not generally true when running pyln-testing, hence why
Expand Down

0 comments on commit 4e887c7

Please sign in to comment.