Skip to content

Commit

Permalink
Merge pull request #15 from cancerit/feature/append_to_trimmed_reads
Browse files Browse the repository at this point in the history
Feature/append to trimmed reads
  • Loading branch information
vaofford authored Aug 17, 2023
2 parents 96f1be9 + 6db1022 commit f711512
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 5 deletions.
68 changes: 68 additions & 0 deletions modules/local/read_modification/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}
36 changes: 36 additions & 0 deletions modules/local/read_modification/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'

params.options = [:]
options = initOptions(params.options)

process APPEND_STRINGS_TO_FQ {
tag "$meta.id"
label 'process_low'

publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'modified_fastq', meta:meta, publish_by_meta:['id']) }

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*.modified.fq.gz"), emit: reads

script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def input = reads
def append_start = params.append_start ? params.append_start : ""
def append_end = params.append_end ? params.append_end : ""
def append_quality_start = params.append_start ? params.append_quality*append_start.length() : ""
def append_quality_end = params.append_end ? params.append_quality*append_end.length() : ""
def output = "${prefix}.modified.fq.gz"

$/
zcat ${input} | \
sed -e '2~4s/^\(.*\)$/${append_start}\1${append_end}/' -e '4~4s/^\(.*\)$/${append_quality_start}\1${append_quality_end}/' | \
gzip > ${output}
/$
}
31 changes: 31 additions & 0 deletions modules/local/read_modification/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: read modification
description: Manipulation of FASTQ files
keywords:
- FASTQ

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:true ]
- reads:
type: file
description: |
List of input FASTQ files of size 1; i.e., single-end data.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:true ]
- reads:
type: file
description: |
List of output FASTQ files of size 1; i.e., single-end data.
authors:
- "@vaofford"
7 changes: 7 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,17 @@ params {
read_filtering_qc = false
seqkit_seq_options = null

// Modify FASTQ (e.g. add perfect primer sequence to reads)
read_modification = false
append_start = null
append_end = null
append_quality = null

// Quantification
quantification = null
oligo_library = null
transform_library = false
pyquest_library_converter_options = null

// Sequencing QC
raw_sequencing_qc = false
Expand Down
31 changes: 31 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,34 @@
}
}
},
"read_modification_options": {
"title": "Read modification options",
"type": "object",
"fa_icon": "fas fa-plus-circle",
"description": "Define pipeline options for read modification.",
"properties": {
"read_modification": {
"type": "boolean",
"description": "Define whether to add string and qualities to read.",
"help_text": "Set this to true if read modification (e.g. adding perfect primer to read) is required, else set to false."
},
"append_start": {
"type": "string",
"description": "Define string to add to start of read sequence.",
"help_text": "Define string to add to start of read sequence, else set to null."
},
"append_end": {
"type": "string",
"description": "Define string to add to end of read sequence.",
"help_text": "Define string to add to end of read sequence, else set to null."
},
"append_quality": {
"type": "string",
"description": "Define quality value to read quality (this should be a single character).",
"help_text": "Define quality value to read quality (this should be a single character), else set to null."
}
}
},
"quantification_options": {
"title": "Quantification options",
"type": "object",
Expand Down Expand Up @@ -421,6 +449,9 @@
{
"$ref": "#/definitions/read_filtering_options"
},
{
"$ref": "#/definitions/read_modification_options"
},
{
"$ref": "#/definitions/quantification_options"
},
Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/quantification.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ workflow QUANTIFICATION {
// MODULE: Run Python library transformer
//
TRANSFORM_LIBRARY_FOR_PYQUEST ( oligo_library )
}
}

if (params.quantification == "pyquest") {
//
Expand All @@ -44,7 +44,7 @@ workflow QUANTIFICATION {
} else {
PYQUEST ( reads, oligo_library )
}

ch_sample_library_counts = PYQUEST.out.library_counts
ch_sample_read_counts = PYQUEST.out.read_counts
ch_sample_stats = PYQUEST.out.stats
Expand Down
25 changes: 25 additions & 0 deletions subworkflows/local/read_modification.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//
// Read modification
//

params.options = [:]

include { APPEND_STRINGS_TO_FQ } from '../../modules/local/read_modification/main'

workflow READ_MODIFICATION {
take:
reads

main:
ch_modified_reads = Channel.empty()
if (params.read_modification) {
//
// MODULE: Append string to reads
//

APPEND_STRINGS_TO_FQ ( reads )
ch_modified_reads = APPEND_STRINGS_TO_FQ.out.reads
}
emit:
reads = ch_modified_reads
}
36 changes: 33 additions & 3 deletions workflows/sge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,24 @@ if (params.read_filtering_qc && !params.read_filtering) {
exit 1
}

// Check that when append_start, append_end or append_quality are set that read_modification has been set to true
if (!params.read_modification && (params.append_start || params.append_end || params.append_quality)) {
printErr("If append_start, append_end or append_quality is set, read_modification must be set to true.")
exit 1
}

// Check either append_start or append_end provided when read_modification is set
if (params.read_modification && !params.append_start && !params.append_end) {
printErr("If read_modification is set, a string must be provided for either append_start or append_end.")
exit 1
}

// Check append_quality provided when read_modification is set
if (params.read_modification && (!params.append_quality || params.append_quality.length() > 1)) {
printErr("If read_modification is set, a single quality character must be provided for append_quality.")
exit 1
}

// Check quantification is set if library is provided
if (params.oligo_library && !params.quantification) {
printErr("If a library file is provided by oligo_library, quantification must be set to true.")
Expand All @@ -102,7 +120,7 @@ if (params.quantification) {
}

// Check that quantification is set if transform_library is enabled
if (params.transform_library && !params.quantification ) {
if (params.transform_library && !params.quantification ) {
printErr("If transform_library is set to true, quantification must also be set to true.")
exit 1
}
Expand Down Expand Up @@ -149,6 +167,7 @@ include { READ_MERGING } from '../subworkflows/local/read_merging' addParams( op
include { ADAPTER_TRIMMING } from '../subworkflows/local/adapter_trimming' addParams( options: [:] )
include { PRIMER_TRIMMING } from '../subworkflows/local/primer_trimming' addParams( options: [:] )
include { READ_FILTERING } from '../subworkflows/local/read_filtering' addParams( options: [:] )
include { READ_MODIFICATION } from '../subworkflows/local/read_modification' addParams( options: [:] )
include { QUANTIFICATION } from '../subworkflows/local/quantification' addParams( options: [:] )
include { SEQUENCING_QC as RAW_SEQUENCING_QC;
SEQUENCING_QC as MERGED_SEQUENCING_QC;
Expand Down Expand Up @@ -287,7 +306,7 @@ workflow SGE {

if (params.read_filtering) {
READ_FILTERING ( ch_read_filter )
ch_reads_to_analyse = READ_FILTERING.out.reads
ch_reads_to_modify = READ_FILTERING.out.reads
ch_software_versions = ch_software_versions.mix(READ_FILTERING.out.versions)

//
Expand All @@ -299,7 +318,18 @@ workflow SGE {
ch_software_versions = ch_software_versions.mix(FILTERED_SEQUENCING_QC.out.fastqc_version, FILTERED_SEQUENCING_QC.out.seqkit_version)
}
} else {
ch_reads_to_analyse = ch_read_filter
ch_reads_to_modify = ch_read_filter
}

//
// SUBWORKFLOW: Run read modification (data must be SE by this stage)
//
// Purpose of this process is to add string (e.g. primer sequence without errors) and quality value to start and/or end of reads
if (params.read_modification) {
READ_MODIFICATION ( ch_reads_to_modify )
ch_reads_to_analyse = READ_MODIFICATION.out.reads
} else {
ch_reads_to_analyse = ch_reads_to_modify
}

//
Expand Down

0 comments on commit f711512

Please sign in to comment.