Skip to content

Commit

Permalink
Update to v3.0.0.2
Browse files Browse the repository at this point in the history
* Collation of cutadapt JSON results into single JSON file
* Collation of SeqKit statistics results into a single TSV file
* Update version of pyQUEST to version 1.1.0
    * Improved handling of 0-length reads
    * Ability to extract top 50 library-independent counts as FASTA
  • Loading branch information
vaofford committed Oct 11, 2023
2 parents 44e79f7 + 5079736 commit a018409
Show file tree
Hide file tree
Showing 13 changed files with 169 additions and 10 deletions.
1 change: 1 addition & 0 deletions .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ jobs:
allow-repeats: false

nf-core:
if: false
runs-on: ubuntu-latest
steps:

Expand Down
15 changes: 12 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,20 @@ Initial release of QUANTS, created with the [nf-core](https://nf-co.re/) templat
## 3.0.0.0 - [21st August 2023]

* Split read trimming into two stages
* Adapter trimming - removes user-defined adapter sequences and takes forward both trimmed and untrimmed reads
* Primer trimming - removes user-defined primer sequences and takes forward only trimmed reads
* Adapter trimming - removes user-defined adapter sequences and takes forward both trimmed and untrimmed reads
* Primer trimming - removes user-defined primer sequences and takes forward only trimmed reads
* Add a read modification process which can append user-defined sequences to trimmed reads
* Add library transformer to allow users to provide libraries in a different format (e.g. the meta CSV from VaLiAnT) and convert it for use with pyQUEST

## 3.0.0.1 - [12th September 2023]

* Primer trimming - bugfix to ensure cutadapt splits reads into trimmed and untrimmed files
* Primer trimming - bugfix to ensure cutadapt splits reads into trimmed and untrimmed files

## 3.0.0.2 - [11th October 2023]

* Collation of cutadapt JSON results into single JSON file
* Collation of SeqKit statistics results into a single TSV file
* Update version of pyQUEST to version 1.1.0
* Improved handling of 0-length reads
* Ability to extract top 50 library-independent counts as FASTA

4 changes: 4 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,8 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}

withName:COLLATE_CUTADAPT_JSONS {
executor = 'local'
}
}
45 changes: 45 additions & 0 deletions functions/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//
// takes channel, workflow object and name of output channel
// extracts desired output channel from workflow, combines it with workflow name and appends to input channel
//
def add_stats_with_stage(channel, workflow, String out_channel) {
return channel.mix(
workflow.out.getProperty(out_channel).combine(
[workflow.name.split(':').last()]
)
)
}

//
// removes stage suffix from the sample name
//
def trim_sample_name(sample_name) {
sample_name
.replaceFirst(/_raw$/, "")
.replaceFirst(/_primer_trimmed$/, "")
.replaceFirst(/_adapter_trimmed$/, "")
.replaceFirst(/_merged$/, "")
.replaceFirst(/_merged_filtered$/, "")
}

//
// each seqkit stat file prepends with two columns for sample and stage
//
def modify_seqkit_stats(meta, path, stage) {
// TODO should be removed in the future once sample name handling in the pipeline is consistent
def sample_name = trim_sample_name(meta.id)

newLines = []
file(path)
.readLines()
.eachWithIndex { it, i ->
if (i == 0) {
line = "sample" + "\t" + "stage" + "\t" + it
} else {
line = sample_name + "\t" + stage + "\t" + it
}
newLines.add(line)
}

return newLines.join("\n") + "\n"
}
6 changes: 3 additions & 3 deletions modules/local/cutadapt/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ process CUTADAPT {
output:
tuple val(meta), path('*_trimmed{,_1,_2}.fastq.gz') , emit: reads
tuple val(meta), path('*_untrimmed{,_1,_2}.fastq.gz'), emit: untrimmed_reads, optional: true
tuple val(meta), path('*.log') , emit: log
tuple val(meta), path('*.json') , emit: json
path '*.version.txt' , emit: version
tuple val(meta), path('*.log') , emit: log
tuple val(meta), path('*.json') , emit: json
path '*.version.txt' , emit: version

script:
def software = getSoftwareName(task.process)
Expand Down
25 changes: 25 additions & 0 deletions modules/local/cutadapt_json_collation/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import groovy.json.JsonSlurper

//
// takes cutadapt json filenames and stages for the sample and creates a record
//
def compose_cutadapt_jsons(meta, pathList, stageList) {
def jsonSlurper = new JsonSlurper()
def record = [:]

[pathList, stageList].transpose().each() { path, stage ->
def object = jsonSlurper.parse(path)

object["read_counts"]["read1_with_adapter_percent"] = 100 * object["read_counts"]["read1_with_adapter"] / object["read_counts"]["input"]
if (object["read_counts"]["read2_with_adapter"]){
object["read_counts"]["read2_with_adapter_percent"] = 100 * object["read_counts"]["read2_with_adapter"] / object["read_counts"]["input"]
} else {
object["read_counts"]["read2_with_adapter_percent"] = null
}

record[stage] = object
}

record = [(meta.id): record]
return record
}
33 changes: 33 additions & 0 deletions modules/local/cutadapt_json_collation/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import groovy.json.JsonOutput

// Import generic module functions
include { compose_cutadapt_jsons } from './functions'

process COLLATE_CUTADAPT_JSONS {
label 'process_low'
publishDir "${params.outdir}/cutadapt", mode: params.publish_dir_mode

input:
val inputList // list of tuples [meta, [list of jsons], [list of stages]]

output:
path 'cutadapt.json', emit: json

exec:
String filename = [task.workDir, 'cutadapt.json'].join(File.separator)

new File(filename).withWriter { writer ->
writer.writeLine('{')

inputList.eachWithIndex { e, index ->
def (meta, pathList, stageList) = e
def record = compose_cutadapt_jsons(meta, pathList, stageList)
String record_string = JsonOutput.toJson(record)
String comma = index + 1 < inputList.size() ? ',' : ''
String output_string = ' ' + record_string[1..-2] + comma
writer.writeLine(output_string)
}

writer.writeLine('}')
}
}
17 changes: 17 additions & 0 deletions modules/local/cutadapt_json_collation/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: cutadapt_json_collation
description: Collate all cutadapt output jsons into one file
keywords:
- cutadapt
input:
- inputList:
type: list
description: |
Groovy list containing tuples of three objects:
meta, list of cutadapt jsons, list of stages
output:
- json:
type: file
description: collated cutadapt json file for all samples
pattern: "cutadapt.json"
authors:
- "@y-popov"
2 changes: 1 addition & 1 deletion modules/local/pyquest/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process PYQUEST {
container "quay.io/biocontainers/flash2:2.2.00--h5bf99c6_3"
}
*/
container "quay.io/wtsicgp/pyquest:1.0.0"
container "quay.io/wtsicgp/pyquest:1.1.0"

input:
tuple val(meta), path(reads)
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ manifest {
description = 'Analysis pipeline for saturation genome editing screens'
mainScript = 'main.nf'
nextflowVersion = '!>=21.10.6'
version = '3.0.0.1'
version = '3.0.0.2'
}

// Function to ensure that resource requirements don't go beyond
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/adapter_trimming.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ workflow ADAPTER_TRIMMING {

CUTADAPT_ADAPTER ( reads )
ch_trimmed_reads = CUTADAPT_ADAPTER.out.reads
ch_trimmed_stats = CUTADAPT_ADAPTER.out.log
ch_trimmed_stats = CUTADAPT_ADAPTER.out.json
}
emit:
reads = ch_trimmed_reads
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/primer_trimming.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ workflow PRIMER_TRIMMING {

CUTADAPT_PRIMER ( reads )
ch_trimmed_reads = CUTADAPT_PRIMER.out.reads
ch_trimmed_stats = CUTADAPT_PRIMER.out.log
ch_trimmed_stats = CUTADAPT_PRIMER.out.json
}
emit:
reads = ch_trimmed_reads
Expand Down
25 changes: 25 additions & 0 deletions workflows/sge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,20 @@ include { SEQUENCING_QC as RAW_SEQUENCING_QC;
SEQUENCING_QC as PRIMER_TRIMMED_SEQUENCING_QC;
SEQUENCING_QC as FILTERED_SEQUENCING_QC
} from '../subworkflows/local/sequencing_qc' addParams( options: [:] )
include { COLLATE_CUTADAPT_JSONS } from '../modules/local/cutadapt_json_collation/main.nf' addParams( options: [:] )
// editorconfig-checker-disable

//
// MODULE: Installed directly from nf-core/modules
//
include { MULTIQC } from '../modules/nf-core/multiqc/main' addParams( options: multiqc_options )

//
// FUNCTIONS: collection of custom functions
//
include { modify_seqkit_stats } from '../functions/functions.nf'
include { add_stats_with_stage } from '../functions/functions.nf'

/*
========================================================================================
RUN MAIN WORKFLOW
Expand All @@ -194,6 +201,8 @@ def multiqc_report = []
workflow SGE {
// Set up empty channels
ch_software_versions = Channel.empty()
seqkit_stat_ch = Channel.empty()
cutadapt_jsons_ch = Channel.empty()

if (params.input_type == 'cram') {
//
Expand Down Expand Up @@ -224,6 +233,7 @@ workflow SGE {
ch_raw_read_qc = ch_raw_reads.map{it -> [[id: it[0].id + '_raw', single_end: it[0].single_end], it[1]]}
RAW_SEQUENCING_QC ( ch_raw_read_qc )
ch_software_versions = ch_software_versions.mix(RAW_SEQUENCING_QC.out.fastqc_version, RAW_SEQUENCING_QC.out.seqkit_version)
seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, RAW_SEQUENCING_QC, 'seqkit_stats')
}

//
Expand All @@ -233,13 +243,15 @@ workflow SGE {
// Run adapter trimming
ADAPTER_TRIMMING ( ch_adapter_trim )
ch_software_versions = ch_software_versions.mix(ADAPTER_TRIMMING.out.versions)
cutadapt_jsons_ch = add_stats_with_stage(cutadapt_jsons_ch, ADAPTER_TRIMMING, 'stats')
//
//SUBWORKFLOW: Run FASTQC on adapter trimmed reads
//
if (params.adapter_trimming_qc) {
ch_adapter_trimming_qc = ADAPTER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_adapter_trimmed', single_end: it[0].single_end], it[1]]}
ADAPTER_TRIMMED_SEQUENCING_QC ( ch_adapter_trimming_qc )
ch_software_versions = ch_software_versions.mix(ADAPTER_TRIMMED_SEQUENCING_QC.out.fastqc_version, ADAPTER_TRIMMED_SEQUENCING_QC.out.seqkit_version)
seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, ADAPTER_TRIMMED_SEQUENCING_QC, 'seqkit_stats')
}
// Send to next stage
ch_primer_trim = ADAPTER_TRIMMING.out.reads
Expand All @@ -254,13 +266,15 @@ workflow SGE {
// Run primer trimming
PRIMER_TRIMMING ( ch_primer_trim )
ch_software_versions = ch_software_versions.mix(PRIMER_TRIMMING.out.versions)
cutadapt_jsons_ch = add_stats_with_stage(cutadapt_jsons_ch, PRIMER_TRIMMING, 'stats')
//
//SUBWORKFLOW: Run FASTQC on primer trimmed reads
//
if (params.primer_trimming_qc) {
ch_primer_trimming_qc = PRIMER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_primer_trimmed', single_end: it[0].single_end], it[1]]}
PRIMER_TRIMMED_SEQUENCING_QC ( ch_primer_trimming_qc )
ch_software_versions = ch_software_versions.mix(PRIMER_TRIMMED_SEQUENCING_QC.out.fastqc_version, PRIMER_TRIMMED_SEQUENCING_QC.out.seqkit_version)
seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, PRIMER_TRIMMED_SEQUENCING_QC, 'seqkit_stats')
}
// Send to next stage
ch_read_merge = PRIMER_TRIMMING.out.reads
Expand All @@ -283,6 +297,7 @@ workflow SGE {
ch_merged_read_qc = ch_read_transform
MERGED_SEQUENCING_QC ( ch_merged_read_qc )
ch_software_versions = ch_software_versions.mix(MERGED_SEQUENCING_QC.out.fastqc_version, MERGED_SEQUENCING_QC.out.seqkit_version)
seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, MERGED_SEQUENCING_QC, 'seqkit_stats')
}
} else {
ch_read_transform = ch_read_merge
Expand Down Expand Up @@ -316,6 +331,7 @@ workflow SGE {
ch_filtered_read_qc = READ_FILTERING.out.reads.map{it -> [[id: it[0].id + '_filtered', single_end: true], it[1]]}
FILTERED_SEQUENCING_QC ( ch_filtered_read_qc )
ch_software_versions = ch_software_versions.mix(FILTERED_SEQUENCING_QC.out.fastqc_version, FILTERED_SEQUENCING_QC.out.seqkit_version)
seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, FILTERED_SEQUENCING_QC, 'seqkit_stats')
}
} else {
ch_reads_to_modify = ch_read_filter
Expand Down Expand Up @@ -361,6 +377,15 @@ workflow SGE {
ch_software_versions.map { it }.collect()
)

seqkit_stat_ch
.map { meta, file, stage -> modify_seqkit_stats(meta, file, stage) }
.collectFile(keepHeader: true, name: 'seqkit_stats.tsv', storeDir: "${params.outdir}/seqkit_stats")

cutadapt_jsons_ch
.groupTuple()
.toList()
| COLLATE_CUTADAPT_JSONS

//
// MODULE: MultiQC
//
Expand Down

0 comments on commit a018409

Please sign in to comment.