-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunc_effects_config.yml
311 lines (288 loc) · 14 KB
/
func_effects_config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# Configurations for determining functional effects of mutations
# --------------------------------------------------------------------------------------
# Configuration for computing functional scores and fitting global epistasis models
# to each individual selection experiment
# --------------------------------------------------------------------------------------
# Define default parameters for computing functional scores.
# Can be provided to individual selections using the alias operator (*)
func_scores_params_default: &func_scores_params_default
pseudocount: 0.5
# `min_wt_count` and `min_wt_frac` are designed to raise errors if selection experiment
# has excessively low wildtype counts (which will lead to inaccurate estimates).
min_wt_count: 1000 # error if not at least this many wildtype counts
min_wt_frac: 0.001 # error if not at least this fraction of counts for wildtype
# `min_pre_selection_count` and `min_pre_selection_frac` drop variants with low counts
# or frequencies that may not have sufficient statistics for good estimates.
min_pre_selection_count: 10 # drop variants < this many pre-selection counts
# !!! Make min_pre_selection_frac LARGER for larger libraries, such 0.1 / (library size) !!!
min_pre_selection_frac: 0.00005 # drop variants that are < this fraction of all counts
# Define default parameters for fitting global epistasis models to each
# individual selection to get mutationo effects.
global_epistasis_params: &global_epistasis_params
# How to clip functional scores at upper / lower bounds. Allowable values:
# - median_stop: median func score of all variants with stop codons
# - null: no clipping
# - a number: clip at this number
clip_lower: median_stop
clip_upper: null
# Do we collapse barcodes with same variant?
collapse_identical_variants: false # {false, mean, median}
# Define how to compute functional scores. Each key is a selection experiment.
# A global epistasis model is used to get mutational effects for each selection.
# Recommended naming of selection experiments is:
# <library>-<post-selection sample date as YYMMDD>-<description>-<replicate>
# Each selection experiment should in turn provide the following keys:
# - `post_selection_sample`: sample after selection (eg, infection of cells)
# - `pre_selection_sample`: sample pre selection (eg, initial variant frequencies)
# - `func_score_params`: parameters for computing functional scores
# - `global_epistasis_params`: parameters for fitting global epistasis model
func_scores:
LibA-220210-293T_ACE2-1:
post_selection_sample: LibA-220210-no_antibody-1
pre_selection_sample: LibA-211028-VSVG-1
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
LibA-220210-293T_ACE2-2:
post_selection_sample: LibA-220210-no_antibody-2
pre_selection_sample: LibA-211028-VSVG-2
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
LibA-220302-293T_ACE2-1:
post_selection_sample: LibA-220302-no_antibody-1
pre_selection_sample: LibA-211028-VSVG-1
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
LibA-220302-293T_ACE2-2:
post_selection_sample: LibA-220302-no_antibody-2
pre_selection_sample: LibA-211028-VSVG-2
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
LibB-220302-293T_ACE2-1:
post_selection_sample: LibB-220302-no_antibody-1
pre_selection_sample: LibB-211028-VSVG-1
func_score_params: *func_scores_params_default
global_epistasis_params: *global_epistasis_params
# --------------------------------------------------------------------------------------
# Configuration for averaging mutation functional effects across selections and plotting
# them.
# --------------------------------------------------------------------------------------
# Average/plot mutation functional effects from different selections w `avg_func_effects`
# Each key is a condition which has the following values:
# - `selections`: list of selections for which we average mutation functional effects
# or if you only want to average some sites for a given selection then
# a dict keyed by the selection name and with the value being the sites
# to include from this selection. Sites should be specified as *sequential*
# sites as a list of site numbers or lists of inclusive ranges (so
# [1, [3, 5]] means [1, 3, 4, 5])
# - `avg_method`: how to average across the selections, options are "median" and "mean"
# - `per_selection_tooltips`: whether to show per-selection effects via tooltips
# - `floor_for_effect_std`: before computing effect std, floor effects at this value.
# - `plot_kwargs`: keyword arguments passed to `polyclonal.plot.lineplot_and_heatmap`
# Define some defaults for each condition, used via the merge (<<) operator.
avg_func_effects_default: &avg_func_effects_default
avg_method: median
per_selection_tooltips: true
floor_for_effect_std: -3
plot_kwargs:
addtl_slider_stats:
times_seen: 3
effect_std: 2
nt changes to codon: 3
n_selections: 1
addtl_slider_stats_as_max: [effect_std, nt changes to codon]
addtl_slider_stats_hide_not_filter: [nt changes to codon]
heatmap_max_at_least: 1
heatmap_min_at_least: -1
init_floor_at_zero: false
init_site_statistic: mean
site_zoom_bar_color_col: region # supplied in the `site_numbering_map`
slider_binding_range_kwargs:
times_seen:
step: 1
min: 1
max: 25
n_selections:
step: 1
nt changes to codon:
step: 1
min: 1
max: 3
# Define the functional effect conditions to average
avg_func_effects:
293T_ACE2_entry:
<<: *avg_func_effects_default
selections:
- LibA-220210-293T_ACE2-1
- LibA-220210-293T_ACE2-2
- LibA-220302-293T_ACE2-1
- LibA-220302-293T_ACE2-2
- LibB-220302-293T_ACE2-1
293T_ACE2_entry_by_region:
<<: *avg_func_effects_default
selections: # keep different regions for libA and libB
LibA-220210-293T_ACE2-1: [[1, 539]]
LibA-220210-293T_ACE2-2: [[1, 539]]
LibA-220302-293T_ACE2-1: [[1, 539]]
LibA-220302-293T_ACE2-2: [[1, 539]]
LibB-220302-293T_ACE2-1: [540, 541, [542, 1251]] # specified like this rather than [540, 1251] just for testing
# --------------------------------------------------------------------------------------
# Configuration for the simple difference of func effects from different conditions
# --------------------------------------------------------------------------------------
# Compute simple difference between average functional effects for different conditions.
# The difference is condition_1 minus condition_2. If you do not want to do this comparison,
# set `func_effect_diffs` to `null` or just leave it out altogether.
#
# Each key is a comparison. Under that name, you should have the following keys:
# - `condition_1`: first condition, keys are name and selection (which gives list of selections,
# or if you want to take specific regions for a selection then a dict with values
# giving the sequential-site regions as for `avg_func_effects`)
# - `condition_2`: second condition, keys are name and selection (which gives list of selections)
# - `avg_method`: how to average across selections for a condition, "median" or "mean"
# - `plot_kwargs`: keyword arguments passed to `polyclonal.plot.lineplot_and_heatmap`
func_effect_diffs_default: &func_effect_diffs_default
avg_method: median
plot_kwargs:
addtl_slider_stats:
times_seen: 3
difference_std: 2 # standard deviation difference across pairwise comparisons for mutation
fraction_pairs_w_mutation: 1 # fraction of all pairs between conditions w mutation
best_effect: -2 # effect must be >= for at least one condition
220210 effect: null # slider on effect in this condition, but no initial value
220302 effect: null # slider on effect in this condition, but no initial value
nt changes to codon: 3
addtl_slider_stats_hide_not_filter: [best_effect, 220210 effect, 220302 effect, nt changes to codon]
addtl_slider_stats_as_max: [difference_std, nt changes to codon]
heatmap_max_at_least: 1
heatmap_min_at_least: -1
init_floor_at_zero: false
init_site_statistic: mean_abs
site_zoom_bar_color_col: region # supplied in the `site_numbering_map`
slider_binding_range_kwargs:
times_seen:
step: 1
min: 1
max: 25
nt changes to codon:
step: 1
min: 1
max: 3
func_effect_diffs:
220210_vs_220302_comparison:
<<: *func_effect_diffs_default
condition_1:
name: 220210
selections:
- LibA-220210-293T_ACE2-1
- LibA-220210-293T_ACE2-2
condition_2:
name: 220302
selections:
- LibA-220302-293T_ACE2-1
- LibA-220302-293T_ACE2-2
220210_vs_220302_comparison_by_region: # only consider specific regions of each condition comparison
<<: *func_effect_diffs_default
condition_1:
name: 220210
selections:
LibA-220210-293T_ACE2-1: [[1, 539]]
LibA-220210-293T_ACE2-2: [540, 541, [542, 1251]] # specified like this rather than [540, 1251] just for testing
condition_2:
name: 220302
selections:
LibA-220302-293T_ACE2-1: [[1, 539]]
LibA-220302-293T_ACE2-2: [540, 541, [542, 1251]] # specified like this rather than [540, 1251] just for testing
# --------------------------------------------------------------------------------------
# Configuration for func effect shifts from `multidms` models comparing conditions.
# --------------------------------------------------------------------------------------
# Define `func_effect_shifts` comparisons of different conditions. Each key is a comparison.
# Each comparison should have the following keys:
# - `conditions` : dict keyed by condition names with values func scores from above.
# - `reference` : name of the reference condition, must be in `conditions`
# - `clip_lower` : how to clip functional scores at lower bound
# - `clip_upper` : how to clip functional scores at upper bound
# - `collapse_identical_barcodes` : do we collapse identical barcodes?
# - `latent_offset` : is there a condition specific offset in latent effects
# - `lasso_shifts` : list of strength of lasso regularization on shifts
# If you are not doing comparisons, just set `func_effect_shifts` to `null` or
# leave it out altogether.
# default settings for `func_effect_shifts` comparisons
func_effect_shifts_default: &func_effect_shifts_default
# How to clip functional scores at upper / lower bounds. Allowable values:
# - median_stop: median func score of all variants with stop codons
# - null: no clipping
# - a number: clip at this number
clip_lower: median_stop
clip_upper: null
# Do we collapse barcodes with same variant?
collapse_identical_variants: false # {false, mean, median}
# Do we have offset in latent effects for different conditions? `alpha_d` in `multidms`
latent_offset: true
# Strength of lasso regularization on shifts in `multidms`. You can try a range of
# values and then pick a best one at the averaging step.
lasso_shifts: [0.00001, 0.00005, 0.0001, 0.0002, 0.001]
func_effect_shifts:
LibA-date_comparison-1:
<<: *func_effect_shifts_default
reference: 220210
conditions:
220210: LibA-220210-293T_ACE2-1
220302: LibA-220302-293T_ACE2-1
LibA-date_comparison-2:
<<: *func_effect_shifts_default
reference: 220210
conditions:
220210: LibA-220210-293T_ACE2-2
220302: LibA-220302-293T_ACE2-2
# --------------------------------------------------------------------------------------
# Configuration for averaging func effect shifts from multiple comparisons.
# --------------------------------------------------------------------------------------
# Define `func_effect_shifts` comparisons to average. `avg_func_effect_shifts` is keyed
# by name of averaged comparisons. Keys within that are:
# - `comparisons`: list of comparisons from `func_effect_shifts`, must have same
# condition names, lasso shifts, and reference. If you want to take
# only specific regions from a comparison in the averages, then
# make this a dict with values the sequential site regions as
# for `avg_func_effects`.
# - `lasso_shift`: the single lasso shift to use for the final averaged values.
# - `avg_method`: how to average across the selections, options are "median" and "mean"
# - `plot_kwargs`: keyword arguments passed to `polyclonal.plot.lineplot_and_heatmap`
# Define some defaults for each condition, used via the merge (<<) operator.
avg_func_effect_shifts_default: &avg_func_effect_shifts_default
avg_method: median
per_comparison_tooltips: true
plot_kwargs:
addtl_slider_stats:
times_seen: 3
nt changes to codon: 3
addtl_slider_stats_as_max: [nt changes to codon]
addtl_slider_stats_hide_not_filter: [nt changes to codon]
heatmap_max_at_least: 0.5
heatmap_min_at_least: -0.5
init_floor_at_zero: false
init_site_statistic: mean
site_zoom_bar_color_col: region # supplied in the `site_numbering_map`
slider_binding_range_kwargs:
times_seen:
step: 1
min: 1
max: 25
n_comparisons:
step: 1
nt changes to codon:
step: 1
min: 1
max: 3
# Define the functional effect conditions to average
avg_func_effect_shifts:
date_comparison:
<<: *avg_func_effect_shifts_default
comparisons:
- LibA-date_comparison-1
- LibA-date_comparison-2
lasso_shift: 0.0001
date_comparison_by_region:
<<: *avg_func_effect_shifts_default
comparisons: # keep different regions for each comparison
LibA-date_comparison-1: [[1, 539]]
LibA-date_comparison-2: [540, 541, [542, 1251]] # specified like this rather than [540, 1251] just for testing
lasso_shift: 0.0001