From ccd5074d873c24e0408c88cbb50e55a2feb97281 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Tue, 7 May 2024 11:27:19 +0100 Subject: [PATCH] Archive Illumina samplesheet if present --- Changes | 3 +++ lib/npg_pipeline/function/run_data_to_irods_archiver.pm | 8 +++++++- t/20-function-run_data_to_irods_archiver.t | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Changes b/Changes index 8dcf4327..cfcb2ad2 100644 --- a/Changes +++ b/Changes @@ -2,6 +2,9 @@ LIST OF CHANGES --------------- - Removing Tidyp dependency from CI + - Added 'SampleSheet.csv' file from the top level of the run folder to + a list of archived run-level Illumina data. This file is only present + in MiSeq run folders. release 68.2.0 - Added '--process_separately_lanes' to the pipeline to explicitly exclude diff --git a/lib/npg_pipeline/function/run_data_to_irods_archiver.pm b/lib/npg_pipeline/function/run_data_to_irods_archiver.pm index 221b3e44..1c07679e 100644 --- a/lib/npg_pipeline/function/run_data_to_irods_archiver.pm +++ b/lib/npg_pipeline/function/run_data_to_irods_archiver.pm @@ -19,6 +19,10 @@ override 'create' => sub { my $job_name_prefix = join q{_}, q{publish_run_data2irods}, $self->id_run(); $self->assign_common_definition_attrs($ref, $job_name_prefix); + # Exclude directories, which might have copies of the top-level files + # that are being archived. SampleSheet.csv is expected to be present + # only for MiSeq runs. + my $command = join q[ ], $PUBLISH_SCRIPT_NAME, q{--restart_file}, $self->restart_file_path($job_name_prefix), @@ -27,8 +31,10 @@ override 'create' => sub { q{--source_directory}, $self->runfolder_path(), q{--include}, q['RunInfo.xml'], q{--include}, q['[Rr]unParameters.xml'], + q{--include}, q['SampleSheet.csv'], q{--include}, q[InterOp], q{--exclude}, q[Analysis], + q{--exclude}, q[Data], q{--id_run}, $self->id_run, q{--logconf}, $self->conf_file_path('log4perl_publish_illumina.conf'); @@ -95,7 +101,7 @@ Marina Gourtovaia =head1 LICENSE AND COPYRIGHT -Copyright (C) 2019 Genome Research Ltd. +Copyright (C) 2018,2019,2021,2022,2023,2024 Genome Research Ltd. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/t/20-function-run_data_to_irods_archiver.t b/t/20-function-run_data_to_irods_archiver.t index 96b3d91c..a4b145a9 100644 --- a/t/20-function-run_data_to_irods_archiver.t +++ b/t/20-function-run_data_to_irods_archiver.t @@ -13,7 +13,7 @@ my $util = t::util->new(); my $tmp_dir = $util->temp_directory(); my $script = q{npg_publish_illumina_run.pl}; -my $inexcludes = qr/--include 'RunInfo\.xml' --include '\[Rr\]unParameters\.xml' --include InterOp --exclude Analysis/; +my $inexcludes = qr/--include 'RunInfo\.xml' --include '\[Rr\]unParameters\.xml' --include 'SampleSheet\.csv' --include InterOp --exclude Analysis --exclude Data/; my $defaults = { default => {