-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
155 lines (129 loc) · 7.77 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Config for analysis
# ----------------------------------------------------------------------------
# Relative paths from this directory containing the configuration (and where
# you will run the pipeline) to the subdirectory where the pipeline submodule
# is cloned, and where you build the docs. Typically your top-level
# Snakefile will be in the root directory and paths will be `dms-vep-pipeline`
# and `./`, but in this example they are upstream from this subdirectory.
# ----------------------------------------------------------------------------
pipeline_path: ../ # typically will be `dms-vep-pipeline-3` for real pipelines
docs: ../docs # typically will be `docs` for real pipelines
# ----------------------------------------------------------------------------
# Details on repo, used for docs. Change this to details for your project.
# ----------------------------------------------------------------------------
# Name of your main GitHub repo, eg https://github.com/<my_organization>/<my_repo>
github_repo_url: https://github.com/dms-vep/dms-vep-pipeline-3
# GitHub blob path to where results files are stored. Typically "{repo}/blob/{branch}",
# the "test_example" below is specific to this test example being within the pipeline
# and should not be needed for other pipelines.
github_blob_url: https://github.com/dms-vep/dms-vep-pipeline-3/blob/main/test_example
# Some descriptions and metadata about the analysis.
description: Deep mutational scanning (DMS) pipeline for a viral entry protein (VEP)
year: 2023
authors: Jesse Bloom
# ----------------------------------------------------------------------------
# Site numbering, mutation classification, and neut standards
# ----------------------------------------------------------------------------
# Map sequential 1, 2, numbering of the protein to the desired
# final reference numbering scheme. Required to have columns named
# "sequential_site" and "reference_site". If you just want to number in
# sequential numbering for everything, just make both entries sequential.
# Should also have a column called "region" that assigns each site to a
# region of the protein (eg, domain like RBD or NTD). Any additional
# columns ending in "site" are retained as tooltips.
site_numbering_map: data/site_numbering_map.csv
# Optional file (you can also omit) that specified per-mutation annotations,
# such as "nt changes to codon" for how many nucleotide changes are needed
# to the codon to access a mutation. Should have columns named "site" (the
# reference site number) and "mutant" (the amino-acid in question). All
# other columns represent the annotations. If you provide this, annotations in
# this file can be shown in the summary and average escape / functional effects
# plots by specifying the relevant column as an additional slider stat in the
# configuration for those plots.
mutation_annotations: data/mutation_annotations.csv
# Classify mutations into different categories, such as which ones are
# designed to be in the library. If you don't have different categories of
# designed mutations, just include all of the intended mutations with
# mutation type as "designed". The CSV specified below must have columns
# named "mutation_type", "amino_acid" or "mutant_aa", and either
# "reference_site" or "sequential_site" as specified by `site_col` key.
mutation_design_classification:
csv: data/mutation_design_classification.csv # CSV with data
site_col: sequential_site # site column, should be reference_site or sequential_site
# Neutralization standard barcodes. Should have columns "barcode" and "name"
# (giving name of this neutralization standard set). Can be empty CSV with
# those columns if no neutralization standards.
neut_standard_barcodes: data/neutralization_standard_barcodes.csv
# ----------------------------------------------------------------------------
# Parameters related to building barcode-variant lookup table
# ----------------------------------------------------------------------------
# There are two ways you can get the codon variants: download a pre-built codon
# variant table, or build them from PacBio CCSs yourself.
# If using pre-built variants specify URL for pre-built codon-variant table and
# gene (codon) sequence (beginning with "http" or "ftp") or just path to file.
# If these next two variables are "null" instead, then the variants are built
# from scratch using parameters below.
prebuilt_variants: null
prebuilt_geneseq: null
# Parameters for building variants from PacBio sequencing, only needed if
# not using pre-built variants.
pacbio_runs: data/PacBio_runs.csv # PacBio sequencing data
pacbio_amplicon: data/PacBio_amplicon.gb # Genbank file with PacBio amplicon
pacbio_amplicon_specs: data/PacBio_feature_parse_specs.yaml # alignparse feature parsing
variant_tags: # variant tags in PacBio amplicon, or "null" if no tags
variant_tag5:
variant_1: G
variant_2: C
wildtype: A
variant_tag3:
variant_1: G
variant_2: C
wildtype: A
max_ccs_error_rate: 1.0e-4 # only keep CCS if gene/barcode error rate <= this
consensus_params: # parameters for building PacBio consensus sequences
max_sub_diffs: null
max_indel_diffs: null
max_minor_sub_frac: 0.2
max_minor_indel_frac: 0.2
min_support: 2
# created files with sequences of parental protein
gene_sequence_codon: results/gene_sequence/codon.fasta
gene_sequence_protein: results/gene_sequence/protein.fasta
# created file with barcode-variant lookup table
codon_variants: results/variants/codon_variants.csv
# ----------------------------------------------------------------------------
# Parameters related to counting the variants from barcode sequencing
# ----------------------------------------------------------------------------
barcode_runs: data/barcode_runs.csv # Illumina barcode runs, set to null if no runs
# `duplicate_fastq_R1` specifies what to do if the same FASTQ is specified for multiple
# samples in `barcode_runs`. Options are "error", "warn", or "ignore". If you
# do not specifying `duplicate_fastq_R1` in this configuration, it defaults to "error".
duplicate_fastq_R1: error
# If the repo already includes the barcode counts tracked, and you just want to use
# those and **not** recompute by processing the FASTQ files in `barcode_runs`, set
# this to `true`.
use_precomputed_barcode_counts: false
# keyword parameters for `dms_variants.illuminabarcodeparser.IlluminaBarcodeParser`
# https://jbloomlab.github.io/dms_variants/dms_variants.illuminabarcodeparser.html#dms_variants.illuminabarcodeparser.IlluminaBarcodeParser
illumina_barcode_parser_params:
upstream: AACTCCACTAGGAACATTTCTCTCTCGAATCTAGA
downstream: ""
minq: 20
upstream_mismatch: 2
# ----------------------------------------------------------------------------
# Configuration related to other analyses
# ----------------------------------------------------------------------------
# For each variable, set to "null" or just don't provide if you aren't doing that type
# of analysis. Otherwise provide path to configuration for that analysis.
func_effects_config: data/func_effects_config.yml # Functional effects of mutations
antibody_escape_config: data/antibody_escape_config.yml # escape assays (eg, antibodies)
summaries_config: data/summaries_config.yml # Summaries across assays
# ----------------------------------------------------------------------------
# Configuration if you're going to build a custom VitePress Homepage
# ----------------------------------------------------------------------------
# Everything in this section can be ommitted unless you want to build a VitePress
# homepage that is nicely styled for the GitHub Pages.
# For details on building a VitePress homepage, see:
# https://github.com/dms-vep/dms-vep-pipeline-3/blob/add-vitepress-homepage/homepage/README.md
build_vitepress_homepage: true
homepage: ../homepage/public # typically will be `homepage/public` for real pipelines