Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework update submissions to allow re-submissions #256

Open
wants to merge 14 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 195 additions & 110 deletions bin/submission_new.py

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion conf/test_params.config
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,11 @@ params {
submission_mode = 'ftp' // 'ftp' or 'sftp'
submission_output_dir = "submission_outputs"
submission_prod_or_test = "test" // "prod" if submitting
submission_config = "${projectDir}/bin/config_files/mpxv_config.yaml"
submission_wait_time = 380
send_submission_email = false
submission_config = "${projectDir}/bin/config_files/mpxv_config.yaml"
update_submission = false
fetch_reports_only = false // only try to fetch reports
// batch_name = "batch1"

// for update_submission:
Expand Down
49 changes: 49 additions & 0 deletions modules/local/fetch_submission/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FETCH SUBMISSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
process FETCH_SUBMISSION {

publishDir "$params.output_dir/$params.submission_output_dir", mode: 'copy', overwrite: params.overwrite_output

conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

input:
val wait_time
tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2), path(annotations_path)
path submission_config

// define the command line arguments based on the value of params.submission_test_or_prod, params.send_submission_email
def test_flag = params.submission_prod_or_test == 'test' ? '--test' : ''
def send_submission_email = params.send_submission_email == true ? '--send_email' : ''
def biosample = params.biosample == true ? '--biosample' : ''
def sra = params.sra == true ? '--sra' : ''
def genbank = params.genbank == true ? '--genbank' : ''

script:
"""
submission_new.py \
--fetch \
--submission_name $meta.id \
--config_file $submission_config \
--metadata_file $validated_meta_path \
--species $params.species \
--output_dir . \
${fasta_path ? "--fasta_file $fasta_path" : ""} \
${annotations_path ? "--annotation_file $annotations_path" : ""} \
${fastq_1 ? "--fastq1 $fastq_1" : ""} \
${fastq_2 ? "--fastq2 $fastq_2" : ""} \
--custom_metadata_file $params.custom_fields_file \
--submission_mode $params.submission_mode \
$test_flag \
$send_submission_email \
$genbank $sra $biosample

"""
output:
//path "${validated_meta_path.getBaseName()}", emit: submission_files
path "${validated_meta_path.getBaseName()}/*.csv", emit: submission_report
}
2 changes: 1 addition & 1 deletion modules/local/general_util/wait/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process WAIT {


input:
val submission_signal
//val submission_signal
val wait_time

script:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/initial_submission/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,5 @@ process SUBMISSION {
"""
output:
path "${validated_meta_path.getBaseName()}", emit: submission_files
//path "*.csv", emit: submission_log
//path ""${validated_meta_path.getBaseName()}/*.csv", emit: submission_report
}
9 changes: 6 additions & 3 deletions modules/local/update_submission/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ process UPDATE_SUBMISSION {
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

input:
val wait_time
tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2), path(annotations_path)
path submission_config

Expand All @@ -22,13 +21,17 @@ process UPDATE_SUBMISSION {
def biosample = params.biosample == true ? '--biosample' : ''
def sra = params.sra == true ? '--sra' : ''
def genbank = params.genbank == true ? '--genbank' : ''
// get absolute path if relative dir passed
def resolved_output_dir = params.output_dir.startsWith('/') ? params.output_dir : "${baseDir}/${params.output_dir}"


script:
"""
submission_new.py \
--update \
--submission_name $meta.id \
--config_file $submission_config \
--submission_report ${resolved_output_dir}/${params.submission_output_dir}/${meta.id}/submission_report.csv \
--config_file $submission_config \
--metadata_file $validated_meta_path \
--species $params.species \
--output_dir . \
Expand All @@ -45,5 +48,5 @@ process UPDATE_SUBMISSION {
"""
output:
path "${validated_meta_path.getBaseName()}", emit: submission_files
path "*.csv", emit: submission_log
//path ""${validated_meta_path.getBaseName()}/*.csv", emit: submission_report
}
5 changes: 3 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ params {
biosample = true
submission_mode = 'ftp' // 'ftp' or 'sftp'
submission_output_dir = "submission_outputs"
submission_wait_time = 380 // time in seconds
submission_config = "${projectDir}/bin/config_files/<your-ncbi-config>.yaml"
submission_prod_or_test = "test" // "prod" if submitting
submission_config = "${projectDir}/bin/config_files/<your-ncbi-config>.yaml"
submission_wait_time = 380 // time in seconds
send_submission_email = false
update_submission = false
fetch_reports_only = false // only try to fetch reports

// general params
help = false
Expand Down
60 changes: 51 additions & 9 deletions subworkflows/local/submission.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

include { SUBMISSION } from '../../modules/local/initial_submission/main'
include { FETCH_SUBMISSION } from '../../modules/local/fetch_submission/main'
include { UPDATE_SUBMISSION } from '../../modules/local/update_submission/main'
include { WAIT } from '../../modules/local/general_util/wait/main'
include { MERGE_UPLOAD_LOG } from "../../modules/local/general_util/merge_upload_log/main"
Expand All @@ -16,20 +17,61 @@ workflow INITIAL_SUBMISSION {
submission_ch // meta.id, tsv, fasta, fastq1, fastq2, gff
submission_config
wait_time

main:
// submit the files to database of choice (after fixing config and getting wait time)
SUBMISSION ( submission_ch, submission_config )

// Declare channels to dynamically handle conditional process outputs
Channel.empty().set { submission_files } // Default for SUBMISSION output
Channel.empty().set { update_files } // Default for UPDATE_SUBMISSION output
Channel.empty().set { fetched_reports } // Default for FETCH_SUBMISSION output
// actual process to initiate wait
WAIT ( SUBMISSION.out.submission_files.collect(), wait_time )
//WAIT ( SUBMISSION.out.submission_files.collect(), wait_time )
WAIT ( wait_time )

if ( params.fetch_reports_only == true ) {
// Check if submission folder exists and run report fetching module
submission_ch
.map { meta, validated_meta_path, fasta_path, fastq_1, fastq_2, annotations_path ->
def folder_path = file("${params.output_dir}/${params.submission_output_dir}/${meta.id}")
if (!folder_path.exists()) {
throw new IllegalStateException("Submission folder does not exist for ID: ${meta.id}")
}
// Return the tuple unchanged if the folder exists
return tuple(meta, validated_meta_path, fasta_path, fastq_1, fastq_2, annotations_path)
}
.set { valid_submission_ch } // Create a new validated channel

FETCH_SUBMISSION ( WAIT.out, valid_submission_ch, submission_config )
.set { fetched_reports }
}

else if ( params.update_submission == false ) {
// submit the files to database of choice
SUBMISSION ( submission_ch, submission_config )
.set { submission_files }

// try to fetch & parse the report.xml
// todo: this maybe doesn't need to take all the inputs from submission (or maybe doesn't need to be a separate module)
FETCH_SUBMISSION ( WAIT.out, submission_ch, submission_config )
.set { fetched_reports }
}

// process for updating the submitted samples
UPDATE_SUBMISSION ( WAIT.out, submission_ch, submission_config )
// if params.update_submission is true, update an existing submission
else if ( params.update_submission == true ) {
// process for updating the submitted samples
UPDATE_SUBMISSION ( submission_ch, submission_config )
.set { update_files }

// try to fetch & parse the report.xml
FETCH_SUBMISSION ( WAIT.out, submission_ch, submission_config )
.set { fetched_reports }
}

emit:
submission_files = UPDATE_SUBMISSION.out.submission_files
//submission_log = UPDATE_SUBMISSION.out.submission_log
submission_files = submission_files
update_files = update_files
fetched_reports = fetched_reports
//submission_files = SUBMISSION.out.submission_files
//submission_log = SUBMISSION.out.submission_log

//to do: add GISAID module
}