data/func_effects_config.yml

# Configurations for determining functional effects of mutations

# --------------------------------------------------------------------------------------
# Configuration for computing functional scores and fitting global epistasis models
# to each individual selection experiment
# --------------------------------------------------------------------------------------

# Define default parameters for computing functional scores.
# Can be provided to individual selections using the alias operator (*)
func_scores_params_default: &func_scores_params_default
  pseudocount: 0.5
  # `min_wt_count` and `min_wt_frac` are designed to raise errors if selection experiment
  # has excessively low wildtype counts (which will lead to inaccurate estimates).
  min_wt_count: 10000  # error if not at least this many wildtype counts
  min_wt_frac: 0.001  # error if not at least this fraction of counts for wildtype
  # `min_pre_selection_count` and `min_pre_selection_frac` drop variants with low counts
  # or frequencies that may not have sufficient statistics for good estimates.
  min_pre_selection_count: 20  # drop variants < this many pre-selection counts
  # !!! Make min_pre_selection_frac LARGER for larger libraries, such 0.1 / (library size) !!!
  min_pre_selection_frac: 0.000002  # drop variants that are < this fraction of all counts

# Define default parameters for fitting global epistasis models to each
# individual selection to get mutationo effects.
global_epistasis_params: &global_epistasis_params
  # How to clip functional scores at upper / lower bounds. Allowable values:
  #  - median_stop: median func score of all variants with stop codons
  #  - null: no clipping
  #  - a number: clip at this number
  clip_lower: -6
  clip_upper: null
  # Do we collapse barcodes with same variant?
  collapse_identical_variants: false  # {false, mean, median}

# Define how to compute functional scores. Each key is a selection experiment.
# A global epistasis model is used to get mutational effects for each selection.
# Recommended naming of selection experiments is:
#   <library>-<post-selection sample date as YYMMDD>-<description>-<replicate>
# Each selection experiment should in turn provide the following keys:
#  - `post_selection_sample`: sample after selection (eg, infection of cells)
#  - `pre_selection_sample`: sample pre selection (eg, initial variant frequencies)
#  - `func_score_params`: parameters for computing functional scores
#  - `global_epistasis_params`: parameters for fitting global epistasis model
func_scores:
  Lib1-230613-293T:
    pre_selection_sample: Lib1-230613-VSVG-control
    post_selection_sample: Lib1-230613-no-antibody-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-230613-293T:
    pre_selection_sample: Lib2-230613-VSVG-control
    post_selection_sample: Lib2-230613-no-antibody-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib1-240125-293-SA23:
    pre_selection_sample: Lib1-240125-293-SA23-VSVG-control
    post_selection_sample: Lib1-240125-293-SA23-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-240125-293-SA23:
    pre_selection_sample: Lib2-240125-293-SA23-VSVG-control
    post_selection_sample: Lib2-240125-293-SA23-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib1-240125-293-SA26:
    pre_selection_sample: Lib1-240125-293-SA26-VSVG-control
    post_selection_sample: Lib1-240125-293-SA26-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-240125-293-SA26:
    pre_selection_sample: Lib2-240125-293-SA26-VSVG-control
    post_selection_sample: Lib2-240125-293-SA26-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib1-240704-293:
    pre_selection_sample: Lib1-240704-293-VSVG-control
    post_selection_sample: Lib1-240704-293-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-240704-293:
    pre_selection_sample: Lib2-240704-293-VSVG-control
    post_selection_sample: Lib2-240704-293-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib1-240704-CMAH-293:
    pre_selection_sample: Lib1-240704-CMAH-293-VSVG-control
    post_selection_sample: Lib1-240704-CMAH-293-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-240704-CMAH-293:
    pre_selection_sample: Lib2-240704-CMAH-293-VSVG-control
    post_selection_sample: Lib2-240704-CMAH-293-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params


  Lib1-241216-293-IGM:
    pre_selection_sample: Lib1-241216-293-IGM-VSVG-control
    post_selection_sample: Lib1-241216-293-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-241216-293-IGM:
    pre_selection_sample: Lib2-241216-293-IGM-VSVG-control
    post_selection_sample: Lib2-241216-293-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib1-241216-CMAH-IGM:
    pre_selection_sample: Lib1-241216-CMAH-293-IGM-VSVG-control
    post_selection_sample: Lib1-241216-CMAH-293-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-241216-CMAH-IGM:
    pre_selection_sample: Lib2-241216-CMAH-293-IGM-VSVG-control
    post_selection_sample: Lib2-241216-CMAH-293-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib1-241216-SA23-IGM:
    pre_selection_sample: Lib1-241216-293-SA23-IGM-VSVG-control
    post_selection_sample: Lib1-241216-293-SA23-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-241216-SA23-IGM:
    pre_selection_sample: Lib2-241216-293-SA23-IGM-VSVG-control
    post_selection_sample: Lib2-241216-293-SA23-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib1-241216-SA26-IGM:
    pre_selection_sample: Lib1-241216-293-SA26-IGM-VSVG-control
    post_selection_sample: Lib1-241216-293-SA26-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params
  Lib2-241216-SA26-IGM:
    pre_selection_sample: Lib2-241216-293-SA26-IGM-VSVG-control
    post_selection_sample: Lib2-241216-293-SA26-IGM-H5-control
    func_score_params: *func_scores_params_default
    global_epistasis_params: *global_epistasis_params


# --------------------------------------------------------------------------------------
# Configuration for averaging mutation functional effects across selections and plotting
# them.
# --------------------------------------------------------------------------------------

# Average/plot mutation functional effects from different selections w `avg_func_effects`
# Each key is a condition which has the following values:
#  - `selections`: list of selections for which we average mutation functional effects
#  - `avg_method`: how to average across the selections, options are "median" and "mean"
#  - `per_selection_tooltips`: whether to show per-selection effects via tooltips
#  - `plot_kwargs`: keyword arguments passed to `polyclonal.plot.lineplot_and_heatmap`

# Define some defaults for each condition, used via the merge (<<) operator.
avg_func_effects_default: &avg_func_effects_default
  avg_method: median
  floor_for_effect_std: -3
  per_selection_tooltips: true
  plot_kwargs:
    alphabet: [R,K,H,D,E,Q,N,S,T,Y,W,F,A,I,L,M,V,G,P,C,"'*'"]
    addtl_slider_stats:
      times_seen: 2
      effect_std: 3
      nt changes to codon: 3
    addtl_slider_stats_as_max: [effect_std, nt changes to codon]
    addtl_slider_stats_hide_not_filter: [nt changes to codon]
    heatmap_max_at_least: 2
    heatmap_min_at_least: -2
    init_floor_at_zero: false
    init_site_statistic: mean
    site_zoom_bar_color_col: region  # supplied in the `site_numbering_map`
    slider_binding_range_kwargs:
      times_seen:
        step: 1
        min: 1
        max: 20
      n_selections:
        step: 1
      nt changes to codon:
        step: 1
        min: 1
        max: 3

# Define the functional effect conditions to average
avg_func_effects:
  293T_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-230613-293T
      - Lib2-230613-293T
  293_SA23_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-240125-293-SA23
      - Lib2-240125-293-SA23
  293_SA26_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-240125-293-SA26
      - Lib2-240125-293-SA26
  293_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-240704-293
      - Lib2-240704-293
  293_CMAH_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-240704-CMAH-293
      - Lib2-240704-CMAH-293

  293_low-sera_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-241216-293-IGM
      - Lib2-241216-293-IGM
  293_low-sera_CMAH_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-241216-CMAH-IGM
      - Lib2-241216-CMAH-IGM
  293_SA23_low-sera_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-241216-SA23-IGM
      - Lib2-241216-SA23-IGM
  293_SA26_low-sera_entry:
    <<: *avg_func_effects_default
    selections:
      - Lib1-241216-SA26-IGM
      - Lib2-241216-SA26-IGM
# --------------------------------------------------------------------------------------
# Configuration for simple difference in functional effects across conditions
# --------------------------------------------------------------------------------------

# Compute simple difference between average functional effects for different conditions.
# The difference is condition_1 minus condition_2. If you do not want to do this comparison,
# set `func_effect_diffs` to `null` or just leave it out altogether.
#
# Each key is a comparison. Under that name, you should have the following keys:
#  - `condition_1`: first condition, keys are name and selection (which gives list of selections)
#  - `condition_2`: second condition, keys are name and selection (which gives list of selections)
#  - `avg_method`: how to average across selections for a condition, "median" or "mean"
#  - `per_selection_tooltips`: whether to show per-selection tooltips
#  - `plot_kwargs`: keyword arguments passed to `polyclonal.plot.lineplot_and_heatmap`
func_effect_diffs:
  SA26_vs_SA23_entry:
    condition_1:
      name: SA26 entry
      selections:
        - Lib1-240125-293-SA26
        - Lib2-240125-293-SA26
    condition_2:
      name: SA23 entry
      selections:
        - Lib1-240125-293-SA23
        - Lib2-240125-293-SA23
    avg_method: median
    per_selection_tooltips: true
    plot_kwargs:
      alphabet: [R,K,H,D,E,Q,N,S,T,Y,W,F,A,I,L,M,V,G,P,C]
      addtl_slider_stats:
        times_seen: 2
        difference_std: 2  # standard deviation difference across pairwise comparisons for mutation
        fraction_pairs_w_mutation: 1  # fraction of all pairs between conditions w mutation
        best_effect: -2  # effect must be >= for at least one condition
        SA23 entry effect: -8  # slider on effect in this condition, but no initial value
        SA26 entry effect: 0  # slider on effect in this condition, but no initial value
        nt changes to codon: 3
      addtl_slider_stats_hide_not_filter: [best_effect, SA23 entry effect, SA26 entry effect, nt changes to codon]
      addtl_slider_stats_as_max: [difference_std, nt changes to codon]
      heatmap_max_at_least: 1
      heatmap_min_at_least: -1
      init_floor_at_zero: true
      init_site_statistic: sum
      site_zoom_bar_color_col: region  # supplied in `site_numbering_map`
      slider_binding_range_kwargs:
        times_seen:
          step: 1
          min: 1
          max: 10
        nt changes to codon:
          step: 1
          min: 1
          max: 3
  SA26_vs_SA23_low-sera_entry:
    condition_1:
      name: SA26-IGM entry
      selections:
        - Lib1-241216-SA26-IGM
        - Lib2-241216-SA26-IGM
    condition_2:
      name: SA23-IGM entry
      selections:
        - Lib1-241216-SA23-IGM
        - Lib2-241216-SA23-IGM
    avg_method: median
    per_selection_tooltips: true
    plot_kwargs:
      alphabet: [R,K,H,D,E,Q,N,S,T,Y,W,F,A,I,L,M,V,G,P,C]
      addtl_slider_stats:
        times_seen: 2
        difference_std: 2  # standard deviation difference across pairwise comparisons for mutation
        fraction_pairs_w_mutation: 1  # fraction of all pairs between conditions w mutation
        best_effect: -2  # effect must be >= for at least one condition
        SA23-IGM entry effect: -8  # slider on effect in this condition, but no initial value
        SA26-IGM entry effect: 0  # slider on effect in this condition, but no initial value
        nt changes to codon: 3
      addtl_slider_stats_hide_not_filter: [best_effect, SA23-IGM entry effect, SA26-IGM entry effect, nt changes to codon]
      addtl_slider_stats_as_max: [difference_std, nt changes to codon]
      heatmap_max_at_least: 1
      heatmap_min_at_least: -1
      init_floor_at_zero: true
      init_site_statistic: sum
      site_zoom_bar_color_col: region  # supplied in `site_numbering_map`
      slider_binding_range_kwargs:
        times_seen:
          step: 1
          min: 1
          max: 10
        nt changes to codon:
          step: 1
          min: 1
          max: 3
  CMAH_vs_293_entry:
    condition_1:
      name: CMAH entry
      selections:
        - Lib1-240704-CMAH-293
        - Lib2-240704-CMAH-293
    condition_2:
      name: 293 entry
      selections:
        - Lib1-240704-293
        - Lib2-240704-293
    avg_method: median
    per_selection_tooltips: true
    plot_kwargs:
      alphabet: [R,K,H,D,E,Q,N,S,T,Y,W,F,A,I,L,M,V,G,P,C]
      addtl_slider_stats:
        times_seen: 2
        difference_std: 2  # standard deviation difference across pairwise comparisons for mutation
        fraction_pairs_w_mutation: 1  # fraction of all pairs between conditions w mutation
        best_effect: -2  # effect must be >= for at least one condition
        293 entry effect: -8  # slider on effect in this condition, but no initial value
        CMAH entry effect: -8  # slider on effect in this condition, but no initial value
        nt changes to codon: 3
      addtl_slider_stats_hide_not_filter: [best_effect, 293 entry effect, CMAH entry effect, nt changes to codon]
      addtl_slider_stats_as_max: [difference_std, nt changes to codon]
      heatmap_max_at_least: 1
      heatmap_min_at_least: -1
      init_floor_at_zero: true
      init_site_statistic: sum
      site_zoom_bar_color_col: region  # supplied in `site_numbering_map`
      slider_binding_range_kwargs:
        times_seen:
          step: 1
          min: 1
          max: 10
        nt changes to codon:
          step: 1
          min: 1
          max: 3
  CMAH_vs_293_low-sera_entry:
    condition_1:
      name: CMAH-IGM entry
      selections:
        - Lib1-241216-CMAH-IGM
        - Lib2-241216-CMAH-IGM
    condition_2:
      name: 293-IGM entry
      selections:
        - Lib1-241216-293-IGM
        - Lib2-241216-293-IGM
    avg_method: median
    per_selection_tooltips: true
    plot_kwargs:
      alphabet: [R,K,H,D,E,Q,N,S,T,Y,W,F,A,I,L,M,V,G,P,C]
      addtl_slider_stats:
        times_seen: 2
        difference_std: 2  # standard deviation difference across pairwise comparisons for mutation
        fraction_pairs_w_mutation: 1  # fraction of all pairs between conditions w mutation
        best_effect: -2  # effect must be >= for at least one condition
        293-IGM entry effect: -8  # slider on effect in this condition, but no initial value
        CMAH-IGM entry effect: -8  # slider on effect in this condition, but no initial value
        nt changes to codon: 3
      addtl_slider_stats_hide_not_filter: [best_effect, 293-IGM entry effect, CMAH-IGM entry effect, nt changes to codon]
      addtl_slider_stats_as_max: [difference_std, nt changes to codon]
      heatmap_max_at_least: 1
      heatmap_min_at_least: -1
      init_floor_at_zero: true
      init_site_statistic: sum
      site_zoom_bar_color_col: region  # supplied in `site_numbering_map`
      slider_binding_range_kwargs:
        times_seen:
          step: 1
          min: 1
          max: 10
        nt changes to codon:
          step: 1
          min: 1
          max: 3