-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunc_effects_config.yml
executable file
·245 lines (222 loc) · 11.8 KB
/
func_effects_config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Configurations for determining functional effects of mutations
# --------------------------------------------------------------------------------------
# Configuration for computing functional scores and fitting global epistasis models
# to each individual selection experiment
# --------------------------------------------------------------------------------------
# Define default parameters for computing functional scores.
# Can be provided to individual selections using the alias operator (*)
func_scores_params_default: &func_scores_params_default
pseudocount: 0.5
# `min_wt_count` and `min_wt_frac` are designed to raise errors if selection experiment
# has excessively low wildtype counts (which will lead to inaccurate estimates).
min_wt_count: 10000 # error if not at least this many wildtype counts
min_wt_frac: 0.001 # error if not at least this fraction of counts for wildtype
# `min_pre_selection_count` and `min_pre_selection_frac` drop variants with low counts
# or frequencies that may not have sufficient statistics for good estimates.
min_pre_selection_count: 25 # drop variants < this many pre-selection counts
# !!! Make min_pre_selection_frac LARGER for larger libraries, such 0.1 / (library size) !!!
min_pre_selection_frac: 0.000002 # drop variants that are < this fraction of all counts
# Define default parameters for fitting global epistasis models to each
# individual selection to get mutationo effects.
global_epistasis_params: &global_epistasis_params
# How to clip functional scores at upper / lower bounds. Allowable values:
# - median_stop: median func score of all variants with stop codons
# - null: no clipping
# - a number: clip at this number
clip_lower: median_stop
clip_upper: null
# Do we collapse barcodes with same variant?
collapse_identical_variants: false # {false, mean, median}
# Define how to compute functional scores. Each key is a selection experiment.
# A global epistasis model is used to get mutational effects for each selection.
# Recommended naming of selection experiments is:
# <library>-<post-selection sample date as YYMMDD>-<description>-<replicate>
# Each selection experiment should in turn provide the following keys:
# - `post_selection_sample`: sample after selection (eg, infection of cells)
# - `pre_selection_sample`: sample pre selection (eg, initial variant frequencies)
# - `func_score_params`: parameters for computing functional scores
# - `global_epistasis_params`: parameters for fitting global epistasis model
func_scores:
Lib1-230614_high_ACE2:
pre_selection_sample: Lib1-230614-VSVG_control_293T
post_selection_sample: Lib1-230614-no-antibody_control_highACE2
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
Lib2-230614_high_ACE2:
pre_selection_sample: Lib2-230614-VSVG_control_293T
post_selection_sample: Lib2-230614-no-antibody_control_highACE2
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
Lib1-230614_medium_ACE2:
pre_selection_sample: Lib1-230614-VSVG_control_293T
post_selection_sample: Lib1-230614-no-antibody_control_mediumACE2
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
Lib2-230614_medium_ACE2:
pre_selection_sample: Lib2-230614-VSVG_control_293T
post_selection_sample: Lib2-230614-no-antibody_control_mediumACE2
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
# --------------------------------------------------------------------------------------
# Configuration for averaging mutation functional effects across selections and plotting
# them.
# --------------------------------------------------------------------------------------
# Average/plot mutation functional effects from different selections w `avg_func_effects`
# Each key is a condition which has the following values:
# - `selections`: list of selections for which we average mutation functional effects
# - `avg_method`: how to average across the selections, options are "median" and "mean"
# - `floor_for_effect_std`: floor values at this before computing standard deviation.
# - `per_selection_tooltips`: whether to show per-selection effects via tooltips
# - `plot_kwargs`: keyword arguments passed to `polyclonal.plot.lineplot_and_heatmap`
# - `title`: title of plot. Will be suffixed with "(latent phenotype)" or ("functional score)"
# - `legend`: legend added to plot
# Define some defaults for each condition, used via the merge (<<) operator.
avg_func_effects_default: &avg_func_effects_default
avg_method: median
floor_for_effect_std: -2.5
per_selection_tooltips: true
plot_kwargs:
addtl_slider_stats:
times_seen: 3
effect_std: 1.6
addtl_slider_stats_as_max: [effect_std]
heatmap_max_at_least: 1
heatmap_min_at_least: -1
heatmap_min_fixed: -2.5
init_floor_at_zero: false
init_site_statistic: mean
site_zoom_bar_color_col: region # supplied in the `site_numbering_map`
slider_binding_range_kwargs:
times_seen:
step: 1
min: 1
max: 25
n_selections:
step: 1
sites_to_show:
legend: |
Interactive plot of the effects of mutations. Negative values indicated deleterious mutations,
positive values indicate beneficial mutations for the measured phenotype.
Use the site zoom bar at the top to zoom in on specific sites. The line plot shows a summary
statistic indicating the effects of mutations at each site. The heat map shows the effects of
individual mutations, with parental amino-acid identities indicated by x and gray
indicating non-measured mutations.
You can mouse over points to get details about individual measurements, include measurements
in individual selection experiments.
The options at the bottom of the plot let you modify the display, such as by selecting how
many different variants a mutation must be seen in to be shown (*minimum times_seen*),
how many different experimental selections the mutation was measured in
(*minimum n_selections*), what site summary statistic to show, etc.
The *minimum max of effect* at site is useful to select the sites where mutations have
the most positive functional effects.
# Define the functional effect conditions to average
avg_func_effects:
293T_high_ACE2_entry:
<<: *avg_func_effects_default
title: Mutation effects on entry into 293T cells expressing high ACE2
selections:
- Lib1-230614_high_ACE2
- Lib2-230614_high_ACE2
293T_medium_ACE2_entry:
<<: *avg_func_effects_default
title: Mutation effects on entry into 293T cells expressing medium ACE2
selections:
- Lib1-230614_medium_ACE2
- Lib2-230614_medium_ACE2
# --------------------------------------------------------------------------------------
# Configuration for func effect shifts from `multidms` models comparing conditions.
# --------------------------------------------------------------------------------------
# Define `func_effect_shifts` comparisons of different conditions. Each key is a comparison.
# Each comparison should have the following keys:
# - `conditions` : dict keyed by condition names with values func scores from above.
# - `reference` : name of the reference condition, must be in `conditions`
# - `clip_lower` : how to clip functional scores at lower bound
# - `clip_upper` : how to clip functional scores at upper bound
# - `collapse_identical_barcodes` : do we collapse identical barcodes?
# - `latent_offset` : is there a condition specific offset in latent effects
# - `lasso_shifts` : list of strength of lasso regularization on shifts
# If you are not doing comparisons, just set `func_effect_shifts` to `null` or
# leave it out altogether.
# default settings for `func_effect_shifts` comparisons
func_effect_shifts_default: &func_effect_shifts_default
# How to clip functional scores at upper / lower bounds. Allowable values:
# - median_stop: median func score of all variants with stop codons
# - null: no clipping
# - a number: clip at this number
clip_lower: median_stop
clip_upper: null
# Do we collapse barcodes with same variant?
collapse_identical_variants: false # {false, mean, median}
# Do we have offset in latent effects for different conditions? `alpha_d` in `multidms`
latent_offset: true
# Strength of lasso regularization on shifts in `multidms`. You can try a range of
# values and then pick a best one at the averaging step.
lasso_shifts: [0.00001, 0.00005, 0.0001, 0.0002, 0.001]
func_effect_shifts:
Lib1-230614_ACE2_expression:
<<: *func_effect_shifts_default
reference: high_ACE2
conditions:
high_ACE2: Lib1-230614_high_ACE2
medium_ACE2: Lib1-230614_medium_ACE2
Lib2-230614_ACE2_expression:
<<: *func_effect_shifts_default
reference: high_ACE2
conditions:
high_ACE2: Lib2-230614_high_ACE2
medium_ACE2: Lib2-230614_medium_ACE2
# --------------------------------------------------------------------------------------
# Configuration for averaging func effect shifts from multiple comparisons.
# --------------------------------------------------------------------------------------
# Define `func_effect_shifts` comparisons to average. `avg_func_effect_shifts` is keyed
# by name of averaged comparisons. Keys within that are:
# - `title`: title of comparisons being averaged
# - `comparisons`: list of comparisons from `func_effect_shifts`, must have same
# condition names, lasso shifts, and reference.
# - `lasso_shift`: the single lasso shift to use for the final averaged values.
# - `avg_method`: how to average across the selections, options are "median" and "mean"
# - `plot_kwargs`: keyword arguments passed to `polyclonal.plot.lineplot_and_heatmap`
# - `title`: title of plot. Will be suffixed with "(latent phenotype)" or ("functional score)"
# - `legend`: legend added to plot
# Define some defaults for each condition, used via the merge (<<) operator.
avg_func_effect_shifts_default: &avg_func_effect_shifts_default
avg_method: median
per_comparison_tooltips: true
plot_kwargs:
addtl_slider_stats:
times_seen: 3
heatmap_max_at_least: 0.5
heatmap_min_at_least: -0.5
init_floor_at_zero: false
init_site_statistic: mean
site_zoom_bar_color_col: region # supplied in the `site_numbering_map`
slider_binding_range_kwargs:
times_seen:
step: 1
min: 1
max: 25
n_comparisons:
step: 1
legend: |
Interactive plot of shifts in effects of mutations. Negative values indicate mutations that
have shifted to become more deleterious; positive values indicate mutations shifted to
be more beneficial.
Use the site zoom bar at the top to zoom in on specific sites. The line plot shows a summary
statistic indicating the effects of mutations at each site. The heat map shows the effects of
individual mutations, with parental amino-acid identities indicated by x and gray
indicating non-measured mutations.
You can mouse over points to get details about individual measurements, including measurements
in individual comparisons.
The options at the bottom of the plot let you modify the display, such as by selecting how
many different variants a mutation must be seen in to be shown (*minimum times_seen*),
how many different experimental comparisons the mutation was measured in
(*minimum n_comparisons*), what site summary statistic to show, etc.
# Define the functional effect conditions to average
avg_func_effect_shifts:
target_cell_ACE2_expression_comparison:
<<: *avg_func_effect_shifts_default
title: Comparison of functional effects measured on 293T cells expressing different levels of ACE2
comparisons:
- Lib1-230614_ACE2_expression
- Lib2-230614_ACE2_expression
lasso_shift: 0.0001