-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
executable file
·121 lines (102 loc) · 6.07 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Config for analysis
# ----------------------------------------------------------------------------
# Relative paths from this directory containing the configuration (and where
# you will run the pipeline) to the subdirectory where the pipeline submodule
# is cloned, and where you build the docs. Typically your top-level
# Snakefile will be in the root directory and paths will be `dms-vep-pipeline`
# and `./`, but in this example they are upstream from this subdirectory.
# ----------------------------------------------------------------------------
pipeline_path: dms-vep-pipeline-3 # typically will be `dms-vep-pipeline` for real pipelines
docs: docs/public/ # typically will be `docs` for real pipelines
# ----------------------------------------------------------------------------
# Details on repo, used for docs. Change this to details for your project.
# ----------------------------------------------------------------------------
# Name of your main GitHub repo, eg https://github.com/<my_organization>/<my_repo>
github_repo_url: https://github.com/dms-vep/Nipah_Malaysia_RBP_DMS
# GitHub blob path to where results files are stored. Typically "{repo}/blob/{branch}",
# the "test_example" below is specific to this test example being within the pipeline
# and should not be needed for other pipelines.
github_blob_url: https://github.com/dms-vep/Nipah_Malaysia_RBP_DMS/blob/main
# Some descriptions and metadata about the analysis.
description: Deep mutational scanning of the Nipah Malaysian strain Receptor Binding Protein using a barcoded lentiviral vector
year: 2023
authors: Brendan Larsen, Teagan McMahon, and Jesse Bloom
# ----------------------------------------------------------------------------
# Site numbering, mutation classification, and neut standards
# ----------------------------------------------------------------------------
# Map sequential 1, 2, numbering of the protein to the desired
# final reference numbering scheme. Required to have columns named
# "sequential_site" and "reference_site". If you just want to number in
# sequential numbering for everything, just make both entries sequential.
# Should also have a column called "region" that assigns each site to a
# region of the protein (eg, domain like RBD or NTD).
site_numbering_map: data/site_numbering_map.csv
# Classify mutations into different categories, such as which ones are
# designed to be in the library. If you don't have different categories of
# designed mutations, just include all of the intended mutations with
# mutation type as "designed". The CSV specified below must have columns
# named "mutation_type", "amino_acid" or "mutant_aa", and either
# "reference_site" or "sequential_site" as specified by `site_col` key.
mutation_design_classification:
csv: data/designed_mutations.csv # CSV with data
site_col: sequential_site # site column, should be reference_site or sequential_site
# Neutralization standard barcodes. Should have columns "barcode" and "name"
# (giving name of this neutralization standard set). Can be empty CSV with
# those columns if no neutralization standards.
neut_standard_barcodes: data/neutralization_standard_barcodes.csv
# ----------------------------------------------------------------------------
# Parameters related to building barcode-variant lookup table
# ----------------------------------------------------------------------------
# There are two ways you can get the codon variants: download a pre-built codon
# variant table, or build them from PacBio CCSs yourself.
# If using pre-built variants specify URL for pre-built codon-variant table and
# gene (codon) sequence (beginning with "http" or "ftp") or just path to file.
# If these next two variables are "null" instead, then the variants are built
# from scratch using parameters below.
prebuilt_variants: null
prebuilt_geneseq: null
# Parameters for building variants from PacBio sequencing, only needed if
# not using pre-built variants.
pacbio_runs: data/PacBio_runs.csv # PacBio sequencing data
pacbio_amplicon: data/PacBio_amplicon.gb # Genbank file with PacBio amplicon
pacbio_amplicon_specs: data/PacBio_feature_parse_specs.yaml # alignparse feature parsing
variant_tags: # variant tags in PacBio amplicon, or "null" if no tags
variant_tag5:
variant_1: G
variant_2: C
wildtype: A
variant_tag3:
variant_1: G
variant_2: C
wildtype: A
max_ccs_error_rate: 1.0e-4 # only keep CCS if gene/barcode error rate <= this
consensus_params: # parameters for building PacBio consensus sequences
max_sub_diffs: null
max_indel_diffs: null
max_minor_sub_frac: 0.2
max_minor_indel_frac: 0.2
min_support: 3
# created files with sequences of parental protein
gene_sequence_codon: results/gene_sequence/codon.fasta
gene_sequence_protein: results/gene_sequence/protein.fasta
# created file with barcode-variant lookup table
codon_variants: results/variants/codon_variants.csv
# ----------------------------------------------------------------------------
# Parameters related to counting the variants from barcode sequencing
# ----------------------------------------------------------------------------
barcode_runs: data/barcode_runs.csv # Illumina barcode runs, set to null if no runs
# keyword parameters for `dms_variants.illuminabarcodeparser.IlluminaBarcodeParser`
# https://jbloomlab.github.io/dms_variants/dms_variants.illuminabarcodeparser.html#dms_variants.illuminabarcodeparser.IlluminaBarcodeParser
illumina_barcode_parser_params:
upstream: ACTCCACTAGGAACATTTCTCTCTCGAATCTAGA
downstream: ''
minq: 20
upstream_mismatch: 2
# ----------------------------------------------------------------------------
# Configuration related to other analyses
# ----------------------------------------------------------------------------
# For each variable, set to "null" or just don't provide if you aren't doing that type
# of analysis. Otherwise provide path to configuration for that analysis.
func_effects_config: data/func_effects_config.yml # Functional effects of mutations
antibody_escape_config: data/antibody_escape_config.yml # Antibody/serum escape
summaries_config: data/summaries_config.yml # Summaries across assays