From b3ad797790a726560035c9ae660ca98b251618b3 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 21 Nov 2023 14:41:54 +0000 Subject: [PATCH 1/9] fix inheritance of undef parameter values bug add test for this --- bin/vtfp.pl | 7 +- t/10-vtfp-vtfile_v2.t | 187 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 186 insertions(+), 8 deletions(-) diff --git a/bin/vtfp.pl b/bin/vtfp.pl index d7c7ea44d..16aeb7c49 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -678,18 +678,13 @@ sub fetch_param_entry { } } - my $candidate; if(exists $param_entry->{_value}) { - $candidate = $param_entry; # already evaluated, return cached value (allowing undef) + return $param_entry; # already evaluated, return cached value (allowing undef) } push @{$aux->{irp}}, $param_name; $retval = resolve_subst_constructor($param_name, $param_entry->{subst_constructor}, $params, $ewi, $aux); - if(not $retval and $candidate) { - $retval = $candidate->{_value}; - } - if(defined $retval) { $param_entry->{_value} = $retval; } diff --git a/t/10-vtfp-vtfile_v2.t b/t/10-vtfp-vtfile_v2.t index d4cdedd47..008200bf7 100644 --- a/t/10-vtfp-vtfile_v2.t +++ b/t/10-vtfp-vtfile_v2.t @@ -1,7 +1,7 @@ use strict; use warnings; use Carp; -use Test::More tests => 6; +use Test::More tests => 7; use Test::Cmd; use File::Slurp; use Perl6::Slurp; @@ -10,6 +10,7 @@ use File::Temp qw(tempdir); use Cwd; my $tdir = tempdir(CLEANUP => 1); +warn q[tdir: ], $tdir; my $odir = getcwd(); my $test = Test::Cmd->new( prog => $odir.'/bin/vtfp.pl', workdir => q()); @@ -568,7 +569,7 @@ subtest 'multilevel_vtf_required_param' => sub { { id => 'vfile', type => 'OUTFILE', - name => { subst_constructor => { vals => [ 'tmp.', {subst => 'ext', 'required' => 'true'} ], postproc => { op => 'concat', pad => ''} }, } + name => { subst_constructor => { vals => [ 'tmp.', {subst => 'ext', required => JSON::true} ], postproc => { op => 'concat', pad => ''} }, } }, ] }; @@ -628,4 +629,186 @@ subtest 'multilevel_vtf_required_param' => sub { is_deeply ($vtfp_results, $expected_result, 'multilevel local param reeval'); }; +subtest 'multilevel_vtf_forced_undef' => sub { + plan tests => 4; + + my $top_container = { + version => '2.0', + description => 'outermost of a nest of test VTFILEs', + subst_params => [ + { id => 'top_sp_contents', required => 'false', default => 'top SP' }, + { + id => 'top_box', + required => 'false', + default => 'TB_DFLT', + subst_constructor => { + vals => [ 'TB [', {subst => 'top_sp_contents'}, '] TB' ], + postproc => {op => 'concat', pad => ''} + } + } + ], + nodes => [ + { + id => 'middle', + type => 'VTFILE', + node_prefix => 'mid_', + name => 'middle.json' + } + ], + edges => [] + }; + + my $middle = { + version => '2.0', + description => 'middle of a nest of test VTFILEs', + subst_params => [ + {id => 'mid_sp_contents', required => 'false', default => 'mid SP'}, + { + id => 'mid_box', + required => 'false', + default => 'MID_DFLT', + subst_constructor => { + vals => [ 'MIDB [', {subst => 'mid_sp_contents'}, '] MIDB' ], + postproc => {op => 'concat', pad => ''} + } + } + ], + nodes => [ + { + id => 'bottom', + type => 'VTFILE', + node_prefix => 'bot_', + name => 'bottom.json' + }, + { + id => 'blather', + type => 'EXEC', + use_STDIN => 'false', + use_STDOUT => 'true', + cmd => [ + 'echo', + {subst => 'top_box'}, {subst => 'mid_box'}, {subst => 'bot_box'} + ] + } + ], + edges => [] + }; + + my $bottom = { + version => '2.0', + description => 'innermost of a nest of test VTFILEs', + subst_params => [ + {id => 'bot_sp_contents', required => 'false', default => 'bot SP'}, + { + id => 'bot_box', + required => 'false', + default => 'BOT_DFLT', + subst_constructor => { + vals => [ 'BOTB [', {subst => 'bot_sp_contents'}, '] BOTB' ], + postproc => {op => 'concat', pad => ''} + } + } + ], + nodes => [ + { + id => 'haver', + type => 'EXEC', + use_STDIN => 'false', + use_STDOUT => 'true', + cmd => [ + 'echo', + {subst => 'top_box'}, {subst => 'mid_box'}, {subst => 'bot_box'} + ] + } + ], + edges => [] + }; + + my ($template, $fn); + $fn = $template = $tdir.q[/10-vtfp-multilevel_vtf_forced_undef.json]; + my $contents = to_json($top_container); + write_file($fn, $contents); + + $fn = $tdir.q[/middle.json]; + $contents = to_json($middle); + write_file($fn, $contents); + + $fn = $tdir.q[/bottom.json]; + $contents = to_json($bottom); + write_file($fn, $contents); + + my $exit_status = $test->run(chdir => $test->curdir, args => qq[-no-absolute_program_paths -verbosity_level 0 -template_path $tdir $template]); + ok($exit_status>>8 == 0, "non-zero exit: $exit_status"); + my $vtfp_results = from_json($test->stdout); + my $vtfp_err = $test->stderr; + + my $expected_result = { + 'version' => '2.0', + 'edges' => [], + 'nodes' => [ + { + 'id' => 'mid_blather', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MIDB [mid SP] MIDB' + ] + }, + { + 'id' => 'mid_bot_haver', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MIDB [mid SP] MIDB', + 'BOTB [bot SP] BOTB' + ], + } + ] + }; + is_deeply ($vtfp_results, $expected_result, 'multilevel vtf forced undef (no nullkeys)'); + + $exit_status = $test->run(chdir => $test->curdir, args => qq[-no-absolute_program_paths -verbosity_level 0 -template_path $tdir -nullkeys mid_sp_contents $template]); + ok($exit_status>>8 == 0, "non-zero exit: $exit_status"); + $vtfp_results = from_json($test->stdout); + $vtfp_err = $test->stderr; + + $expected_result = { + 'version' => '2.0', + 'edges' => [], + 'nodes' => [ + { + 'id' => 'mid_blather', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MID_DFLT' + ] + }, + { + 'id' => 'mid_bot_haver', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MID_DFLT', + 'BOTB [bot SP] BOTB' + ], + } + ] + }; + + is_deeply ($vtfp_results, $expected_result, 'multilevel vtf forced undef (nullkeys: mid_sp_contents)'); +}; + 1; From e26eb644204fd0f1fe9ee481cfaf03b9c4e45dd9 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 21 Nov 2023 16:00:58 +0000 Subject: [PATCH 2/9] allow initial input bam to be produced by "bambi i2b" or some alternate method; select between i2b or reanalysis from product crams using i2b_switch parameter, or select an arbitrary VTFILE template using the s1_produce_init_bam_method parameter incorporates required vtfp fix for parameter value inheritance bug --- data/vtlib/bambi_i2b.json | 119 +++++++++++++++++ ...2bam_phix_deplex_wtsi_stage1_template.json | 122 +++--------------- data/vtlib/pib_reanalysis.json | 74 +++++++++++ 3 files changed, 208 insertions(+), 107 deletions(-) create mode 100644 data/vtlib/bambi_i2b.json create mode 100644 data/vtlib/pib_reanalysis.json diff --git a/data/vtlib/bambi_i2b.json b/data/vtlib/bambi_i2b.json new file mode 100644 index 000000000..80d52de3d --- /dev/null +++ b/data/vtlib/bambi_i2b.json @@ -0,0 +1,119 @@ +{ +"version":"2.0", +"description":"This pipeline starts with Illumina2Bam, and ends by running SplitBamByReadGroup to create separate BAM files for each sample.", +"subgraph_io":{ + "ports":{ + "inputs":{}, + "outputs":{ + "_stdout_":"i2b" + } + } +}, +"subst_params":[ + {"id":"rpt","description":"Run/Position/TagIndex, though here it is generally only Run/Position. Used in defaults/constructors for i2b_rg,decoder_metrics,md5_file,seqchksum_file,spatial_filter_file and filtered_bam"}, + {"id":"i2b_run_path","description":"path to runfolder. Provides a base on which i2b_runfolder_path can be constructed"}, + {"id":"i2b_runfolder","description":"runfolder directory. Provides a base on which platform unit and i2b_runfolder_path can be constructed"}, + {"id":"i2b_runfolder_path","description":"full path to runfolder. Provides a base on which platform unit, intensities directory and (indirectly) basecalls directories can be constructed", + "subst_constructor":{ + "vals":[ {"subst":"i2b_run_path","required":"yes"}, {"subst":"i2b_runfolder","required":"yes"} ], + "postproc":{"op":"concat","pad":"/"} + } + + }, + { + "id":"i2b_intensity_dir", + "required":"yes", + "description":"Illumina intensities directory including config xml file, and clocs, locs or pos files under lane directory, using Data/Intensities directory under runfolder if not given", + "subst_constructor":{ + "vals":[ {"subst":"i2b_runfolder_path","required":"yes"}, "/", {"subst":"i2b_data_intensities_dir_suffix","ifnull":"Data/Intensities"} ], + "postproc":{"op":"concat","pad":""} + } + }, + { + "id":"i2b_basecalls_dir", + "required":"no", + "description":"Illumina basecalls directory including config xml file, and filter files, bcl, maybe scl files under lane cycle directory, using BaseCalls directory under intensities if not given.", + "subst_constructor":{ + "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_basecalls_dir_suffix","ifnull":"BaseCalls"} ], + "postproc":{"op":"concat","pad":""} + } + }, + { + "id":"i2b_bam_basecalls_dir", + "required":"no", + "description":"full path to the BAM_basecalls directory; a default parameter value for the tag_metrics qc check", + "subst_constructor":{ + "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_bam_basecalls_dir_suffix","required":"yes"} ], + "postproc":{"op":"concat","pad":""} + } + }, + {"id":"i2b_lane","required":"yes","comment":"Lane number"}, + { + "id":"i2b_pu", + "description":"The platform unit, using runfolder name plus lane number if not given", + "subst_constructor":{ + "vals":[ {"subst":"i2b_runfolder"}, "_", {"subst":"i2b_lane"} ], + "postproc":{"op":"concat","pad":""} + } + }, + {"id":"i2b_library_name","description":"The name of the sequenced library"}, + {"id":"i2b_rg", "description":"ID used to link RG header record with RG tag in SAM record", "default":{"subst":"rpt"}}, + {"id":"i2b_bc_seq_val","required":"no","description":"Tag name for barcode sequence. Illumina2bam default is BC"}, + {"id":"i2b_bc_qual_val","required":"no","description":"Tag name for barcode quality. Illumina2bam default is QT"}, + {"id":"i2b_sec_bc_seq_val","required":"no","description":"Tag name for second barcode sequence. Illumina2bam default is null"}, + {"id":"i2b_sec_bc_qual_val","required":"no","description":"Tag name for second barcode quality. Illumina2bam default is null"}, + {"id":"i2b_study_name","description":"The name of the study"}, + { + "id":"i2b_sample_aliases", + "default":"UNSPECIFIED", + "comment":"produces a comma-separated string from array of i2b_sample_alias values", + "subst_constructor":{ + "vals":{"subst":"i2b_sample_alias", "ifnull":["UNSPEC"]}, + "postproc":{"op":"concat","pad":","} + } + } +], +"nodes":[ + { + "id":"i2b", + "type":"EXEC", + "description":"Create the initial BAM file from the data generated by the Illumina machine using bambi i2b", + "use_STDIN":false, + "use_STDOUT":true, + "cmd":[ + "bambi", + "i2b", + {"subst":"i2b_intensity_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--intensity-dir", {"subst":"i2b_intensity_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_basecalls_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--basecalls-dir", {"subst":"i2b_basecalls_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_lane_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--lane", {"subst":"i2b_lane","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_pu_flag","ifnull":{"subst_constructor":{"vals":[ "--platform-unit", {"subst":"i2b_pu"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_rg_flag","ifnull":{"subst_constructor":{"vals":[ "--read-group-id", {"subst":"i2b_rg"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--barcode-tag", {"subst":"i2b_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--quality-tag", {"subst":"i2b_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_sec_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-barcode-tag", {"subst":"i2b_sec_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_sec_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-quality-tag", {"subst":"i2b_sec_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_tile_flag","ifnull":{"subst_constructor":{"vals":[ "--first-tile", {"subst":"i2b_first_tile"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_tile_limit_flag","ifnull":{"subst_constructor":{"vals":[ "--tile-limit", {"subst":"i2b_tile_limit"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_library_flag","ifnull":{"subst_constructor":{"vals":[ "--library-name", {"subst":"i2b_library_name"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_study_name_flag","ifnull":{"subst_constructor":{"vals":[ "--study-name", {"subst":"i2b_study_name"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_sample_alias_flag","ifnull":{"subst_constructor":{"vals":[ "--sample-alias", {"subst":"i2b_sample_aliases"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_bc_read_flag","ifnull":{"subst_constructor":{"vals":[ "--bc-read", {"subst":"i2b_bc_read"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_thread_count_flag","ifnull":{"subst_constructor":{"vals":[ "--threads", {"subst":"i2b_thread_count"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_tqlen_flag","ifnull":{"subst_constructor":{"vals":[ "--queue-len", {"subst":"i2b_tqlen"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"select":"i2b_nocall_qual_switch", "required":true, "select_range":[1], "default":"off", "cases":{ "on": "--nocall-quality", "off":[]}}, + {"subst":"i2b_arbitrary_flags", "comment":"this allows arbitrary sets of flag strings to be inserted in the command" }, + "--output-file=-", + "--compression-level=0" + ] + } +], +"edges":[] +} diff --git a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json index 52aa889cc..dd6c56255 100644 --- a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json +++ b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json @@ -2,117 +2,25 @@ "description":"This pipeline starts with Illumina2Bam, and ends by running SplitBamByReadGroup to create separate BAM files for each sample.", "version":"2.0", "subst_params":[ - {"id":"rpt","description":"Run/Position/TagIndex, though here it is generally only Run/Position. Used in defaults/constructors for i2b_rg,decoder_metrics,md5_file,seqchksum_file,spatial_filter_file and filtered_bam"}, - {"id":"i2b_run_path","description":"path to runfolder. Provides a base on which i2b_runfolder_path can be constructed"}, - {"id":"i2b_runfolder","description":"runfolder directory. Provides a base on which platform unit and i2b_runfolder_path can be constructed"}, - {"id":"i2b_runfolder_path","description":"full path to runfolder. Provides a base on which platform unit, intensities directory and (indirectly) basecalls directories can be constructed", - "subst_constructor":{ - "vals":[ {"subst":"i2b_run_path","required":"yes"}, {"subst":"i2b_runfolder","required":"yes"} ], - "postproc":{"op":"concat","pad":"/"} - } - - }, - { - "id":"i2b_intensity_dir", - "required":"yes", - "description":"Illumina intensities directory including config xml file, and clocs, locs or pos files under lane directory, using Data/Intensities directory under runfolder if not given", - "subst_constructor":{ - "vals":[ {"subst":"i2b_runfolder_path","required":"yes"}, "/", {"subst":"i2b_data_intensities_dir_suffix","ifnull":"Data/Intensities"} ], - "postproc":{"op":"concat","pad":""} - } - }, - { - "id":"i2b_basecalls_dir", - "required":"no", - "description":"Illumina basecalls directory including config xml file, and filter files, bcl, maybe scl files under lane cycle directory, using BaseCalls directory under intensities if not given.", - "subst_constructor":{ - "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_basecalls_dir_suffix","ifnull":"BaseCalls"} ], - "postproc":{"op":"concat","pad":""} - } - }, - { - "id":"i2b_bam_basecalls_dir", - "required":"no", - "description":"full path to the BAM_basecalls directory; a default parameter value for the tag_metrics qc check", - "subst_constructor":{ - "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_bam_basecalls_dir_suffix","required":"yes"} ], - "postproc":{"op":"concat","pad":""} - } - }, - {"id":"i2b_lane","required":"yes","comment":"Lane number"}, - { - "id":"i2b_pu", - "description":"The platform unit, using runfolder name plus lane number if not given", - "subst_constructor":{ - "vals":[ {"subst":"i2b_runfolder"}, "_", {"subst":"i2b_lane"} ], - "postproc":{"op":"concat","pad":""} - } - }, - {"id":"i2b_library_name","description":"The name of the sequenced library"}, - {"id":"i2b_rg", "description":"ID used to link RG header record with RG tag in SAM record", "default":{"subst":"rpt"}}, - {"id":"i2b_bc_seq_val","required":"no","description":"Tag name for barcode sequence. Illumina2bam default is BC"}, - {"id":"i2b_bc_qual_val","required":"no","description":"Tag name for barcode quality. Illumina2bam default is QT"}, - {"id":"i2b_sec_bc_seq_val","required":"no","description":"Tag name for second barcode sequence. Illumina2bam default is null"}, - {"id":"i2b_sec_bc_qual_val","required":"no","description":"Tag name for second barcode quality. Illumina2bam default is null"}, - {"id":"i2b_study_name","description":"The name of the study"}, - { - "id":"i2b_sample_aliases", - "default":"UNSPECIFIED", - "comment":"produces a comma-separated string from array of i2b_sample_alias values", - "subst_constructor":{ - "vals":{"subst":"i2b_sample_alias", "ifnull":["UNSPEC"]}, - "postproc":{"op":"concat","pad":","} - } - }, + {"id":"s1_runfolder_path","required":"yes","default":{"subst":"i2b_runfolder_path"}, "comment":"Stage1 runfolder path"}, + {"id":"s1_basecalls_dir","required":"yes","default":{"subst":"i2b_basecalls_dir"}, "comment":"Stage1 BAM_basecalls directory"}, + {"id":"s1_lane","required":"yes","default":{"subst":"i2b_lane"}, "comment":"Stage1 lane number"}, {"id":"qc_check_id_run","required":"yes"}, - {"id":"qc_check_position","required":"yes","default":{"subst":"i2b_lane"}}, - {"id":"qc_check_qc_in_dir","required":"yes","default":{"subst_constructor":{"vals":[{"subst":"i2b_runfolder_path"}, "Data/Intensities", {"subst":"i2b_bam_basecalls_dir"}],"postproc":{"op":"concat","pad":"/"}}}}, + {"id":"qc_check_position","required":"yes","default":{"subst":"s1_lane"}}, + {"id":"qc_check_qc_in_dir","required":"yes","default":{"subst_constructor":{"vals":[{"subst":"s1_runfolder_path"}, "Data/Intensities", {"subst":"s1_bam_basecalls_dir"}],"postproc":{"op":"concat","pad":"/"}}}}, {"id":"qc_check_qc_out_dir","required":"yes","default":{"subst_constructor":{"vals":[{"subst":"qc_check_qc_in_dir"}, "no_cal/archive/qc"],"postproc":{"op":"concat","pad":"/"}}}}, - {"id":"run_lane_label", "description":"label constructed from id_run and position", "default":{"subst_constructor":{"vals":[{"subst":"qc_check_id_run"}, "_", {"subst":"i2b_lane"}],"postproc":{"op":"concat","pad":""}}}}, + {"id":"run_lane_label", "description":"label constructed from id_run and position", "default":{"subst_constructor":{"vals":[{"subst":"qc_check_id_run"}, "_", {"subst":"s1_lane"}],"postproc":{"op":"concat","pad":""}}}}, {"id":"s1_output_format", "description":"output format for deplexed reads (bam/cram/etc)", "default":"cram"} ], "nodes":[ - { - "id":"illumina2bam", - "type":"EXEC", - "use_STDIN":false, - "use_STDOUT":true, - "comment":"Actual executable used depends on the value of the 12b_implementation parameter: java - use illumina2bam (default); bambi - use new bambi i2b", - "cmd":[ - "bambi", - "i2b", - {"subst":"i2b_intensity_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--intensity-dir", {"subst":"i2b_intensity_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_basecalls_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--basecalls-dir", {"subst":"i2b_basecalls_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_lane_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--lane", {"subst":"i2b_lane","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_pu_flag","ifnull":{"subst_constructor":{"vals":[ "--platform-unit", {"subst":"i2b_pu"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_rg_flag","ifnull":{"subst_constructor":{"vals":[ "--read-group-id", {"subst":"i2b_rg"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--barcode-tag", {"subst":"i2b_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--quality-tag", {"subst":"i2b_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_sec_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-barcode-tag", {"subst":"i2b_sec_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_sec_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-quality-tag", {"subst":"i2b_sec_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_tile_flag","ifnull":{"subst_constructor":{"vals":[ "--first-tile", {"subst":"i2b_first_tile"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_tile_limit_flag","ifnull":{"subst_constructor":{"vals":[ "--tile-limit", {"subst":"i2b_tile_limit"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_library_flag","ifnull":{"subst_constructor":{"vals":[ "--library-name", {"subst":"i2b_library_name"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_study_name_flag","ifnull":{"subst_constructor":{"vals":[ "--study-name", {"subst":"i2b_study_name"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_sample_alias_flag","ifnull":{"subst_constructor":{"vals":[ "--sample-alias", {"subst":"i2b_sample_aliases"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_bc_read_flag","ifnull":{"subst_constructor":{"vals":[ "--bc-read", {"subst":"i2b_bc_read"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_thread_count_flag","ifnull":{"subst_constructor":{"vals":[ "--threads", {"subst":"i2b_thread_count"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_tqlen_flag","ifnull":{"subst_constructor":{"vals":[ "--queue-len", {"subst":"i2b_tqlen"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"select":"i2b_nocall_qual_switch", "required":true, "select_range":[1], "default":"off", "cases":{ "on": "--nocall-quality", "off":[]}}, - {"subst":"i2b_arbitrary_flags", "comment":"this allows arbitrary sets of flag strings to be inserted in the command" }, - "--output-file=-", - "--compression-level=0" - ], - "description":"Create the initial BAM file from the data generated by the Illumina machine" - }, + { + "id":"produce_init_bam", + "type":"VTFILE", + "comment":"inputs: _stdin_ (bam), reference; outputs: _stdout_ (bam)", + "node_prefix":"pib_", + "name":{"subst":"s1_produce_init_bam_method", "required":true, "ifnull":{"select":"i2b_switch", "default":"i2b", "select_range":[1], "cases":{"i2b":"bambi_i2b.json", "reanalysis":"pib_reanalysis.json"}}}, + "description":"subgraph containing i2b or reanalysis initialisation" + }, { "id":"tee_i2b", "type":"EXEC", @@ -435,7 +343,7 @@ } ], "edges":[ - { "id":"illumina2bam_to_ti2b", "from":"illumina2bam", "to":"tee_i2b" }, + { "id":"illumina2bam_to_ti2b", "from":"produce_init_bam", "to":"tee_i2b" }, { "id":"ti2b_to_bamadapterfind", "from":"tee_i2b:baf", "to":"bamindexdecoder" }, { "id":"decoder_to_metrics", "from":"bamindexdecoder:metrics_file", "to":"decoder_metrics" }, { "id":"decoder_metrics_to_qc_tag_metrics", "from":"decoder_metrics", "to":"qc_tag_metrics_check" }, diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json new file mode 100644 index 000000000..d8c7f157a --- /dev/null +++ b/data/vtlib/pib_reanalysis.json @@ -0,0 +1,74 @@ +{ +"version":"2.0", +"description":"run bwa mem to to align input bam to supplied reference genome", +"subgraph_io":{ + "ports":{ + "inputs":{}, + "outputs":{ + "_stdout_":"name_collate" + } + } +}, +"subst_params":[], +"nodes":[ + { + "id":"crammerge", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "cmd": [ + "samtools", + "merge", + "-c", + "-O", "BAM", + "-l", "0", + {"select":"s1_input_format", "default":"cram", "select_range":[1], "cases":{ + "cram":["--input-fmt-option", "no_ref=1"], + "bam":["--input-fmt", "bam"] + }}, + "-", + {"subst":"incrams"} + ], + "description":"merge individual cram files from a sample into one bam file" + }, + { + "id":"reset", + "type":"EXEC", + "comment": "WIP", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "reset", + {"subst":"reset_keep_tag_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "--keep-tag", + {"subst":"reset_keep_tags","required":false, "ifnull":{"subst_constructor":{"vals":["RG","BC"], "postproc":{"op":"concat","pad":","}}}} + ] + }}}, + {"subst":"reset_output_format_flag","ifnull":{"subst_constructor":{"vals":["--output-fmt", {"subst":"reset_output_format", "ifnull":"BAM", "required":false}]}}}, + "--threads", {"subst":"reset_threads","required":true,"ifnull":4}, + {"subst":"reset_extra_flags", "required":false}, + "-", + "-" + ] + }, + { + "id":"name_collate", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "collate", "-O", + "-l", {"subst":"pib_reanalysis_collate_compression","required":true,"ifnull":["0"]}, + "--threads", {"subst":"pib_reanalysis_collate_threads","required":true,"ifnull":4}, + {"subst":"pib_reanalysis_collate_extra_flags", "required":false}, + "-" + ] + } +], +"edges":[ + { "id":"bamtofastq_to_int_fq", "from":"crammerge", "to":"reset" }, + { "id":"bwa_mem_to_s2b", "from":"reset", "to":"name_collate" } +] +} From 53b340a35249eb3af230fb592102c5dd814d8d4b Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 5 Dec 2023 19:25:57 +0000 Subject: [PATCH 3/9] add reheader step add --reject-PG flag to reset step --- data/vtlib/pib_reanalysis.json | 38 +++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json index d8c7f157a..f2602a600 100644 --- a/data/vtlib/pib_reanalysis.json +++ b/data/vtlib/pib_reanalysis.json @@ -12,7 +12,7 @@ "subst_params":[], "nodes":[ { - "id":"crammerge", + "id":"merge", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, @@ -20,6 +20,7 @@ "samtools", "merge", "-c", + "-r", "-O", "BAM", "-l", "0", {"select":"s1_input_format", "default":"cram", "select_range":[1], "cases":{ @@ -31,10 +32,30 @@ ], "description":"merge individual cram files from a sample into one bam file" }, + { + "id":"reheader", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "comment": "remove unwanted RG header lines", + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "reheader", + "--no-PG", + "-c", + {"subst":"reheader_script","required":true, + "ifnull":{"subst_constructor":{"vals":[ + "perl -ne \"(!/^\\@RG/ || /\\tID:", + {"subst":"reheader_rg_id", "required":true}, + "\\t/) && print;\"" + ], "postproc":{"op":"concat","pad":""}}}}, + {"subst":"pib_reanalysis_reheader_extra_flags", "required":false}, + "-" + ] + }, { "id":"reset", "type":"EXEC", - "comment": "WIP", + "comment": "reset bam stream", "use_STDIN": true, "use_STDOUT": true, "cmd": [ @@ -46,10 +67,16 @@ {"subst":"reset_keep_tags","required":false, "ifnull":{"subst_constructor":{"vals":["RG","BC"], "postproc":{"op":"concat","pad":","}}}} ] }}}, + {"subst":"reset_reject_PG_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "--reject-PG", + {"subst":"reset_reject_PG_id","required":false, "ifnull":"SCS"} + ] + }}}, {"subst":"reset_output_format_flag","ifnull":{"subst_constructor":{"vals":["--output-fmt", {"subst":"reset_output_format", "ifnull":"BAM", "required":false}]}}}, "--threads", {"subst":"reset_threads","required":true,"ifnull":4}, {"subst":"reset_extra_flags", "required":false}, - "-", "-" ] }, @@ -68,7 +95,8 @@ } ], "edges":[ - { "id":"bamtofastq_to_int_fq", "from":"crammerge", "to":"reset" }, - { "id":"bwa_mem_to_s2b", "from":"reset", "to":"name_collate" } + { "id":"merge_to_reheader", "from":"merge", "to":"reheader" }, + { "id":"reheader_to_reset", "from":"reheader", "to":"reset" }, + { "id":"reset_to_collate", "from":"reset", "to":"name_collate" } ] } From 07a046e23648411b4207c681f8ff5a5f9466225a Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 6 Dec 2023 15:11:13 +0000 Subject: [PATCH 4/9] add -h flag (header SAM file) to samtools merge step --- data/vtlib/pib_reanalysis.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json index f2602a600..6af36ea2a 100644 --- a/data/vtlib/pib_reanalysis.json +++ b/data/vtlib/pib_reanalysis.json @@ -27,6 +27,13 @@ "cram":["--input-fmt-option", "no_ref=1"], "bam":["--input-fmt", "bam"] }}, + {"subst":"merge_hdr_file_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "-h", + {"subst":"merge_hdr_file","required":true, "ifnull":{"subst_constructor":{"vals":[{"subst":"reanalysis_root"}, "/auxdata/", {"subst":"qc_check_id_run"}, "/", {"subst":"qc_check_id_run"},"_", {"subst":"s1_lane"}, ".rg_hdr.sam"], "postproc":{"op":"concat","pad":""}}}} + ] + }}}, "-", {"subst":"incrams"} ], From 98d8da2c1d89c2175ec9010a465a2e9ddc821e26 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 6 Dec 2023 16:37:21 +0000 Subject: [PATCH 5/9] give the reheader_rg_id parameter a default value (value of i2b_rg parameter) --- data/vtlib/pib_reanalysis.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json index 6af36ea2a..1dcefe6f5 100644 --- a/data/vtlib/pib_reanalysis.json +++ b/data/vtlib/pib_reanalysis.json @@ -52,7 +52,7 @@ {"subst":"reheader_script","required":true, "ifnull":{"subst_constructor":{"vals":[ "perl -ne \"(!/^\\@RG/ || /\\tID:", - {"subst":"reheader_rg_id", "required":true}, + {"subst":"reheader_rg_id", "required":true, "ifnull":{"subst":"i2b_rg"}}, "\\t/) && print;\"" ], "postproc":{"op":"concat","pad":""}}}}, {"subst":"pib_reanalysis_reheader_extra_flags", "required":false}, From b20b1566036720e646063c6a71a2c1b98d965283 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 6 Dec 2023 19:20:15 +0000 Subject: [PATCH 6/9] remove spurious --input-fmt flag from samtools merge command with BAM input --- data/vtlib/pib_reanalysis.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json index 1dcefe6f5..e06e39758 100644 --- a/data/vtlib/pib_reanalysis.json +++ b/data/vtlib/pib_reanalysis.json @@ -25,7 +25,7 @@ "-l", "0", {"select":"s1_input_format", "default":"cram", "select_range":[1], "cases":{ "cram":["--input-fmt-option", "no_ref=1"], - "bam":["--input-fmt", "bam"] + "bam":[] }}, {"subst":"merge_hdr_file_flag","ifnull":{ "subst_constructor":{ From 289d636eb8ca970eaf2449661ea46d1039eef94a Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 13 Dec 2023 16:09:58 +0000 Subject: [PATCH 7/9] add parameters to allow specification of sort order of input reads in reanalysis --- data/vtlib/pib_reanalysis.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json index e06e39758..4b00a6a2b 100644 --- a/data/vtlib/pib_reanalysis.json +++ b/data/vtlib/pib_reanalysis.json @@ -19,6 +19,12 @@ "cmd": [ "samtools", "merge", + {"select":"pib_merge_input_order", "default":"coord", "select_range":[1], "cases":{ + "coord":[], + "qname_alpha_numeric":"-n", + "qname_lexicographical":"-N" + }}, + {"subst_constructor":{"vals":["-t", {"subst":"pib_sort_tag", "required":false}]}}, "-c", "-r", "-O", "BAM", @@ -34,6 +40,7 @@ {"subst":"merge_hdr_file","required":true, "ifnull":{"subst_constructor":{"vals":[{"subst":"reanalysis_root"}, "/auxdata/", {"subst":"qc_check_id_run"}, "/", {"subst":"qc_check_id_run"},"_", {"subst":"s1_lane"}, ".rg_hdr.sam"], "postproc":{"op":"concat","pad":""}}}} ] }}}, + {"subst":"pib_merge_arbitrary_flags", "required":false}, "-", {"subst":"incrams"} ], From 5b443013ce5443110dbae733a9a942283c251500 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 15 Dec 2023 18:35:04 +0000 Subject: [PATCH 8/9] add -T (temporary file prefix) flag to collate step remove unneeded warn from test --- data/vtlib/pib_reanalysis.json | 7 +++++++ t/10-vtfp-vtfile_v2.t | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json index 4b00a6a2b..5bef5d2c2 100644 --- a/data/vtlib/pib_reanalysis.json +++ b/data/vtlib/pib_reanalysis.json @@ -104,6 +104,13 @@ "-l", {"subst":"pib_reanalysis_collate_compression","required":true,"ifnull":["0"]}, "--threads", {"subst":"pib_reanalysis_collate_threads","required":true,"ifnull":4}, {"subst":"pib_reanalysis_collate_extra_flags", "required":false}, + {"subst":"pib_collate_tempfile_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "-T", + {"subst":"pib_collate_tempfile_prefix","required":false, "ifnull":{"subst_constructor":{"vals":[ {"subst":"qc_check_id_run"}, {"subst":"s1_lane"}, "collate_tmp"], "postproc":{"op":"concat","pad":"_"}}}} + ] + }}}, "-" ] } diff --git a/t/10-vtfp-vtfile_v2.t b/t/10-vtfp-vtfile_v2.t index 008200bf7..f276a7af4 100644 --- a/t/10-vtfp-vtfile_v2.t +++ b/t/10-vtfp-vtfile_v2.t @@ -10,7 +10,6 @@ use File::Temp qw(tempdir); use Cwd; my $tdir = tempdir(CLEANUP => 1); -warn q[tdir: ], $tdir; my $odir = getcwd(); my $test = Test::Cmd->new( prog => $odir.'/bin/vtfp.pl', workdir => q()); From 8a50dc931fc6c0c9acf915b10335af14dcb4d620 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 19 Dec 2023 15:24:15 +0000 Subject: [PATCH 9/9] extend default list of preserved aux tags in reset command (now BC,QT,FI,RT,TC) --- data/vtlib/pib_reanalysis.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json index 5bef5d2c2..26e3723fb 100644 --- a/data/vtlib/pib_reanalysis.json +++ b/data/vtlib/pib_reanalysis.json @@ -78,7 +78,7 @@ "subst_constructor":{ "vals":[ "--keep-tag", - {"subst":"reset_keep_tags","required":false, "ifnull":{"subst_constructor":{"vals":["RG","BC"], "postproc":{"op":"concat","pad":","}}}} + {"subst":"reset_keep_tags","required":false, "ifnull":{"subst_constructor":{"vals":["RG","BC", "QT", "FI", "RT", "TC"], "postproc":{"op":"concat","pad":","}}}} ] }}}, {"subst":"reset_reject_PG_flag","ifnull":{