Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: custom log file behaviour #159

Merged
merged 39 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
f50794c
feat: added flags for intended logfile behaviour - no code, yet
cmeesters Oct 20, 2024
b64b3a9
fix: syntax errors and formatting
cmeesters Oct 21, 2024
a7d9df7
fix: working interface
cmeesters Oct 31, 2024
5133877
Merge branch 'main' into feat/logfeatures
cmeesters Nov 7, 2024
9c68936
fix: log prefix instead of logdir. - not working
cmeesters Nov 7, 2024
099292e
Merge branch 'main' into feat/logfeatures
cmeesters Nov 11, 2024
db4172a
feat: implementing all features as described
cmeesters Nov 18, 2024
0d6a200
fix: removed unnecessary pathlib import
cmeesters Nov 18, 2024
34adb4e
fix: lininting issues
cmeesters Nov 18, 2024
730433f
fix: using atexit to decouple the function from __del__, moved all co…
cmeesters Nov 19, 2024
490a7c3
fix: deleted unused import
cmeesters Nov 19, 2024
a9ff35c
fix: deleted unused import
cmeesters Nov 19, 2024
e567349
fix: linting issues
cmeesters Nov 19, 2024
4770c8b
fix: linting issues II
cmeesters Nov 19, 2024
9cc019b
feat: not rellying on '/home/$USER' any more, this is dangerous. Inst…
cmeesters Nov 19, 2024
bfd9cd6
fix: removed trailing whitespace
cmeesters Nov 19, 2024
d7e0e93
fix: using os.path.join for path concatenation, like it should be
cmeesters Nov 19, 2024
39cf201
Update snakemake_executor_plugin_slurm/__init__.py
cmeesters Nov 19, 2024
568080a
Merge branch 'feat/logfeatures' of github.com:snakemake/snakemake-exe…
cmeesters Nov 19, 2024
e727989
fix: formatting and linting
cmeesters Nov 19, 2024
51ae157
fix: moved cleanup code before __post_init__
cmeesters Nov 19, 2024
e43f108
fix: removed one more trailing whitespace
cmeesters Nov 19, 2024
88b6705
fix: those who want to keep all logs should be pleased
cmeesters Nov 19, 2024
009e216
docs: documenting the new feature
cmeesters Nov 19, 2024
8260f6b
fix: removed table of command line flags special to the executor - it…
cmeesters Dec 5, 2024
f750600
feat: same code - based on on the pathlib library
cmeesters Dec 9, 2024
b661dd8
Update snakemake_executor_plugin_slurm/utils.py
cmeesters Jan 2, 2025
aaad25d
fix: no multiline warnings
cmeesters Jan 2, 2025
6f74d18
fix: removed outcommented code
cmeesters Jan 2, 2025
63b4f59
fix: reordered such that functions follow 'post_init'
cmeesters Jan 2, 2025
829a889
fix: converted help strings to single line strings
cmeesters Jan 2, 2025
c9c0eed
fix: reverted to previous default of logging in workdir
cmeesters Jan 6, 2025
8a18089
fix: back to default SLURM logdir NOT being in HOME, all code now bas…
cmeesters Jan 7, 2025
1ef9e98
fix: removed (once more) the additional flags section
cmeesters Jan 7, 2025
9764842
feat: documentation on the new features
cmeesters Jan 7, 2025
59cf40e
Update snakemake_executor_plugin_slurm/utils.py
cmeesters Jan 7, 2025
c9c7b8e
fix: recursively deleting log subdirs
cmeesters Jan 8, 2025
739f878
Update snakemake_executor_plugin_slurm/__init__.py
johanneskoester Jan 8, 2025
d6f5567
Update snakemake_executor_plugin_slurm/__init__.py
johanneskoester Jan 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 97 additions & 9 deletions snakemake_executor_plugin_slurm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
__email__ = "[email protected]"
__license__ = "MIT"

import atexit
import csv
from io import StringIO
import os
Expand Down Expand Up @@ -31,13 +32,51 @@

@dataclass
class ExecutorSettings(ExecutorSettingsBase):
logdir: Optional[str] = field(
johanneskoester marked this conversation as resolved.
Show resolved Hide resolved
default=f"/home/{os.environ['USER']}/.snakemake/slurm_logs",
metadata={
"help": """
Per default the SLURM log directory (writing output is
required by SLURM) is '~/.snakemake/slurm_logs'.
This flag allows to set an alternative directory.
""",
"env_var": False,
"required": False,
},
)
cmeesters marked this conversation as resolved.
Show resolved Hide resolved
keep_successful_logs: bool = field(
default=False,
metadata={
"help": """
Per default SLURM log files will be deleted upon sucessful
completion of a job. Whenever a SLURM job fails, its log
file will be preserved.
This flag allows to keep all SLURM log files, even those
of successful jobs.
""",
"env_var": False,
"required": False,
},
)
cmeesters marked this conversation as resolved.
Show resolved Hide resolved
delete_logfiles_older_than: Optional[int] = field(
default=10,
metadata={
"help": """
Per default SLURM log files in the SLURM log directory
of a workflow will be deleted after 10 days. For this,
best leave the default log directory unaltered.
Setting this flag allows to change this behaviour.
If set to <=0, no old files will be deleted.
"""
},
)
cmeesters marked this conversation as resolved.
Show resolved Hide resolved
init_seconds_before_status_checks: Optional[int] = field(
default=40,
metadata={
"help": """
Defines the time in seconds before the first status
check is performed after job submission.
""",
Defines the time in seconds before the first status
check is performed after job submission.
""",
"env_var": False,
"required": False,
},
Expand All @@ -47,7 +86,8 @@ class ExecutorSettings(ExecutorSettingsBase):
metadata={
"help": """
Allow requeuing preempted of failed jobs,
if no cluster default. Results in `sbatch ... --requeue ...`
if no cluster default. Results in
`sbatch ... --requeue ...`
This flag has no effect, if not set.
""",
cmeesters marked this conversation as resolved.
Show resolved Hide resolved
"env_var": False,
Expand Down Expand Up @@ -92,6 +132,36 @@ def __post_init__(self):
self._fallback_partition = None
self._preemption_warning = False # no preemption warning has been issued

def clean_old_logs(logdir, age_cutoff):
"""
Function to delete files older than 'age_cutoff'
in the SLURM 'logdir'
"""
if age_cutoff <= 0:
return
cutoff_secs = age_cutoff * 86400
current_time = time.time()
self.logger.info(f"Cleaning up log files older than {age_cutoff} day(s)")

for root, _, files in os.walk(logdir, topdown=False):
for fname in files:
file_path = os.path.join(root, fname)
try:
file_age = current_time - os.stat(file_path).st_mtime
if file_age > cutoff_secs:
os.remove(file_path)
except (OSError, FileNotFoundError) as e:
self.logger.warning(f"Could not delete file {file_path}: {e}")
# remove empty rule top dir, if empty
if len(os.listdir(root)) == 0:
os.rmdir(root)

atexit.register(
clean_old_logs,
self.workflow.executor_settings.logdir,
self.workflow.executor_settings.delete_logfiles_older_than,
)
cmeesters marked this conversation as resolved.
Show resolved Hide resolved

def warn_on_jobcontext(self, done=None):
if not done:
if "SLURM_JOB_ID" in os.environ:
Expand All @@ -104,6 +174,9 @@ def warn_on_jobcontext(self, done=None):
delete_slurm_environment()
done = True

# def delete_old_logs(self):
# self.workflow.executor_settings.delete_logfiles_older_than

cmeesters marked this conversation as resolved.
Show resolved Hide resolved
cmeesters marked this conversation as resolved.
Show resolved Hide resolved
def additional_general_args(self):
return "--executor slurm-jobstep --jobs 1"

Expand All @@ -123,18 +196,21 @@ def run_job(self, job: JobExecutorInterface):
except AttributeError:
wildcard_str = ""

slurm_logfile = os.path.abspath(
f".snakemake/slurm_logs/{group_or_rule}/{wildcard_str}/%j.log"
slurm_logfile = (
self.workflow.executor_settings.logdir
+ os.path.sep
+ f"{group_or_rule}/{wildcard_str}/%j.log"
)
logdir = os.path.dirname(slurm_logfile)

cmeesters marked this conversation as resolved.
Show resolved Hide resolved
slurm_logdir = os.path.dirname(slurm_logfile)
# this behavior has been fixed in slurm 23.02, but there might be plenty of
# older versions around, hence we should rather be conservative here.
assert "%j" not in logdir, (
assert "%j" not in slurm_logdir, (
"bug: jobid placeholder in parent dir of logfile. This does not work as "
"we have to create that dir before submission in order to make sbatch "
"happy. Otherwise we get silent fails without logfiles being created."
)
os.makedirs(logdir, exist_ok=True)
os.makedirs(slurm_logdir, exist_ok=True)

# generic part of a submission string:
# we use a run_uuid as the job-name, to allow `--name`-based
Expand Down Expand Up @@ -380,6 +456,18 @@ async def check_active_jobs(
self.report_job_success(j)
any_finished = True
active_jobs_seen_by_sacct.remove(j.external_jobid)
if not self.workflow.executor_settings.keep_successful_logs:
self.logger.debug(
f"""removing log for successful job
with SLURM ID '{j.external_jobid}'"""
)
try:
if os.path.exists(j.aux["slurm_logfile"]):
os.remove(j.aux["slurm_logfile"])
except (OSError, FileNotFoundError) as e:
self.logger.warning(
f"Could not remove log file {j.aux['slurm_logfile']}: {e}"
)
elif status == "PREEMPTED" and not self._preemption_warning:
self._preemption_warning = True
self.logger.warning(
Expand Down
3 changes: 3 additions & 0 deletions snakemake_executor_plugin_slurm/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# utility functions for the SLURM executor plugin

import os
import logging

logger = logging.getLogger(__name__)
cmeesters marked this conversation as resolved.
Show resolved Hide resolved


def delete_slurm_environment():
Expand Down
Loading