From 251d33e79fe298ea698e62f9402e2899fd65f912 Mon Sep 17 00:00:00 2001 From: veghp Date: Mon, 27 Jan 2025 17:50:36 +0000 Subject: [PATCH] Fix #86 --- README.rst | 2 +- .../EnforceTranslation.py | 30 +++++++++---------- .../codon_optimization/AvoidRareCodons.py | 18 +++++------ .../codon_optimization/CodonOptimize.py | 12 ++++++-- .../codon_optimization/HarmonizeRCA.py | 3 +- .../MatchTargetCodonUsage.py | 22 +++++++------- .../codon_optimization/MaximizeCAI.py | 18 +++++------ .../codon_optimization/README.md | 3 ++ 8 files changed, 56 insertions(+), 52 deletions(-) diff --git a/README.rst b/README.rst index a39efeb..937c662 100644 --- a/README.rst +++ b/README.rst @@ -64,7 +64,7 @@ The example below will generate a random sequence and optimize it so that: EnforceTranslation(location=(500, 1400)) ], objectives=[CodonOptimize(species='e_coli', location=(500, 1400))] - ) + ) # Note: always use a codon optimisation specification with EnforceTranslation # SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE diff --git a/dnachisel/builtin_specifications/EnforceTranslation.py b/dnachisel/builtin_specifications/EnforceTranslation.py index 2baf543..677066a 100644 --- a/dnachisel/builtin_specifications/EnforceTranslation.py +++ b/dnachisel/builtin_specifications/EnforceTranslation.py @@ -20,6 +20,9 @@ class EnforceTranslation(CodonSpecification): Shorthand for annotations: "cds". + Note: always use a codon optimisation specification with EnforceTranslation. + + Parameters ----------- @@ -98,10 +101,7 @@ def set_location(self, location): len(location) != 3 * len(self.translation) ): raise ValueError( - ( - "Window size (%d bp) incompatible with translation " - "(%d aa)" - ) + ("Window size (%d bp) incompatible with translation " "(%d aa)") % (len(location), len(self.translation)) ) self.location = location @@ -119,10 +119,7 @@ def initialized_on_problem(self, problem, role): result = result.copy_with_changes(translation=translation) if len(result.location) != 3 * len(result.translation): raise ValueError( - ( - "Window size (%d bp) incompatible with translation " - "(%d aa)" - ) + ("Window size (%d bp) incompatible with translation " "(%d aa)") % (len(result.location), len(result.translation)) ) if (result.start_codon is not None) and result.translation[0] != "M": @@ -163,9 +160,11 @@ def evaluate(self, problem): problem, score=-len(errors_locations), locations=errors_locations, - message="All OK." - if len(errors_locations) == 0 - else "Wrong translation at locations %s" % errors_locations, + message=( + "All OK." + if len(errors_locations) == 0 + else "Wrong translation at locations %s" % errors_locations + ), ) def localized_on_window(self, new_location, start_codon, end_codon): @@ -179,7 +178,7 @@ def localized_on_window(self, new_location, start_codon, end_codon): translation=new_translation, boost=self.boost, genetic_table=self.genetic_table, - start_codon=self.start_codon if location_is_at_start else None + start_codon=self.start_codon if location_is_at_start else None, # has_start_codon=self.has_start_codon and location_is_at_start, ) @@ -196,11 +195,10 @@ def get_first_codon_choices(first_codon): return [first_codon] else: return [self.start_codon] # "ATG" + first_codon_location = self.codon_index_to_location(0) first_codon = first_codon_location.extract_sequence(sequence) - choices = [ - (first_codon_location, get_first_codon_choices(first_codon)) - ] + [ + choices = [(first_codon_location, get_first_codon_choices(first_codon))] + [ (self.codon_index_to_location(i), self.backtranslation_table[aa]) for i, aa in list(enumerate(self.translation))[1:] ] @@ -221,6 +219,6 @@ def __str__(self): def short_label(self): return "cds" - + def breach_label(self): return "protein sequence changed" diff --git a/dnachisel/builtin_specifications/codon_optimization/AvoidRareCodons.py b/dnachisel/builtin_specifications/codon_optimization/AvoidRareCodons.py index ba83e41..bdeea4e 100644 --- a/dnachisel/builtin_specifications/codon_optimization/AvoidRareCodons.py +++ b/dnachisel/builtin_specifications/codon_optimization/AvoidRareCodons.py @@ -11,8 +11,8 @@ class AvoidRareCodons(BaseCodonOptimizationClass): This can be seen as a "mild" form of codon optimization where only rare codons (which slow down protein synthesis) are considered. - WARNING: Make sure to always use this specification with EnforceTranslation - to preserve the amino-acid sequence. + Warning: always use this specification with an EnforceTranslation constraint + defined over the same location, to preserve the amino acid sequence. Shorthand for annotations: "no_rare_codons". @@ -25,7 +25,7 @@ class AvoidRareCodons(BaseCodonOptimizationClass): Name or TaxID of the species for which to optimize the sequence. A custom codon_usage_table can be provided instead (or in addition, for species names whose codon usage table cannot be imported). - + codon_usage_table Optional codon usage table of the species for which the sequence will be codon-optimized, which can be provided instead of ``species``. A dict of @@ -111,17 +111,17 @@ def evaluate(self, problem): problem, score=score, locations=locations, - message="All OK." - if len(locations) == 0 - else "Rare codons at locations %s" % locations, + message=( + "All OK." + if len(locations) == 0 + else "Rare codons at locations %s" % locations + ), ) def restrict_nucleotides(self, sequence, location=None): nonrare_codons = list(self.nonrare_codons) if self.location.strand == -1: - nonrare_codons = sorted( - [reverse_complement(c) for c in nonrare_codons] - ) + nonrare_codons = sorted([reverse_complement(c) for c in nonrare_codons]) return [ ((i, i + 3), nonrare_codons) for i in range(self.location.start, self.location.end, 3) diff --git a/dnachisel/builtin_specifications/codon_optimization/CodonOptimize.py b/dnachisel/builtin_specifications/codon_optimization/CodonOptimize.py index 3bd4d9e..b1909c4 100644 --- a/dnachisel/builtin_specifications/codon_optimization/CodonOptimize.py +++ b/dnachisel/builtin_specifications/codon_optimization/CodonOptimize.py @@ -10,7 +10,7 @@ def CodonOptimize( codon_usage_table=None, original_species=None, original_codon_usage_table=None, - boost=1.0 + boost=1.0, ): """Codon-optimize a coding sequence using a user-selected method. @@ -28,6 +28,10 @@ def CodonOptimize( codon whose usage in the target organism matches the usage of the original codon in its host organism (as per Claassens 2017). + Warning: always use this specification with an EnforceTranslation constraint + defined over the same location, to preserve the amino acid sequence. + + Parameters ========== species @@ -105,5 +109,7 @@ def CodonOptimize( original_codon_usage_table=original_codon_usage_table, boost=boost, ) - raise ValueError("`method` must be 'use_best_codon', 'match_codon_usage' " - f"or 'harmonize_rca', not {method!r}") + raise ValueError( + "`method` must be 'use_best_codon', 'match_codon_usage' " + f"or 'harmonize_rca', not {method!r}" + ) diff --git a/dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py b/dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py index 4aeb639..fed9efb 100644 --- a/dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py +++ b/dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py @@ -27,7 +27,8 @@ class HarmonizeRCA(BaseCodonOptimizationClass): algorithm (Angov 2008), which was much more complicated as it involved predicting "ribosome pausing" sites in the sequence. - Warning: always use with an EnforceTranslation constraint. + Warning: always use this specification with an EnforceTranslation constraint + defined over the same location, to preserve the amino acid sequence. Parameters diff --git a/dnachisel/builtin_specifications/codon_optimization/MatchTargetCodonUsage.py b/dnachisel/builtin_specifications/codon_optimization/MatchTargetCodonUsage.py index c02d569..a933727 100644 --- a/dnachisel/builtin_specifications/codon_optimization/MatchTargetCodonUsage.py +++ b/dnachisel/builtin_specifications/codon_optimization/MatchTargetCodonUsage.py @@ -20,6 +20,10 @@ class MatchTargetCodonUsage(BaseCodonOptimizationClass): host-to-target codon harmonization. See DnaChisel's HarmonizeRCA class for Codon Harmonization. + Warning: always use this specification with an EnforceTranslation constraint + defined over the same location, to preserve the amino acid sequence. + + Parameters ---------- @@ -71,9 +75,7 @@ class MatchTargetCodonUsage(BaseCodonOptimizationClass): shorthand_name = "match_codon_usage" - def __init__( - self, species=None, location=None, codon_usage_table=None, boost=1.0 - ): + def __init__(self, species=None, location=None, codon_usage_table=None, boost=1.0): BaseCodonOptimizationClass.__init__( self, species=species, @@ -130,8 +132,7 @@ def evaluate(self, problem): problem, score=score, locations=locations, - message="Codon opt. on window %s scored %.02E" - % (self.location, score), + message="Codon opt. on window %s scored %.02E" % (self.location, score), ) def localized_on_window(self, new_location, start_codon, end_codon): @@ -177,9 +178,7 @@ def compare_frequencies(self, codons, text_mode=False): for i, codon in enumerate(codons): codons_positions[codon].append(i) # aa: amino-acid - codons_frequencies = { - aa: {"total": 0} for aa in self.codon_usage_table - } + codons_frequencies = {aa: {"total": 0} for aa in self.codon_usage_table} for codon, positions in codons_positions.items(): count = len(positions) aa = self.codons_translations[codon] @@ -191,9 +190,7 @@ def compare_frequencies(self, codons, text_mode=False): if codon != "total": data[codon] = 1.0 * value / total codons_frequencies = { - aa: data - for aa, data in codons_frequencies.items() - if data["total"] + aa: data for aa, data in codons_frequencies.items() if data["total"] } comparisons = { aa: { @@ -209,8 +206,9 @@ def compare_frequencies(self, codons, text_mode=False): return dict_to_pretty_string(comparisons) else: return codons_positions, comparisons + def short_label(self): result = "match-codon-usage" if self.species is not None: result += " (%s)" % self.species - return result \ No newline at end of file + return result diff --git a/dnachisel/builtin_specifications/codon_optimization/MaximizeCAI.py b/dnachisel/builtin_specifications/codon_optimization/MaximizeCAI.py index ff59856..c080a7a 100644 --- a/dnachisel/builtin_specifications/codon_optimization/MaximizeCAI.py +++ b/dnachisel/builtin_specifications/codon_optimization/MaximizeCAI.py @@ -20,6 +20,10 @@ class MaximizeCAI(BaseCodonOptimizationClass): This score is between -inf. and 0 (0 meaning a perfectly optimal sequence). + Warning: always use this specification with an EnforceTranslation constraint + defined over the same location, to preserve the amino acid sequence. + + Parameters ---------- @@ -65,9 +69,7 @@ class MaximizeCAI(BaseCodonOptimizationClass): shorthand_name = "use_best_codon" - def __init__( - self, species=None, location=None, codon_usage_table=None, boost=1.0 - ): + def __init__(self, species=None, location=None, codon_usage_table=None, boost=1.0): BaseCodonOptimizationClass.__init__( self, species=species, @@ -105,12 +107,10 @@ def evaluate(self, problem): problem, score=freq - optimal, locations=[] if (freq == optimal) else [self.location], - message="Codon opt. on window %s scored %.02E" - % (self.location, score), + message="Codon opt. on window %s scored %.02E" % (self.location, score), ) current_usage = [ - self.codon_usage_table["log_codons_frequencies"][codon] - for codon in codons + self.codon_usage_table["log_codons_frequencies"][codon] for codon in codons ] optimal_usage = [ self.codon_usage_table["log_best_frequencies"][ct[codon]] @@ -125,8 +125,7 @@ def evaluate(self, problem): problem, score=score, locations=locations, - message="Codon opt. on window %s scored %.02E" - % (self.location, score), + message="Codon opt. on window %s scored %.02E" % (self.location, score), ) def label_parameters(self): @@ -137,4 +136,3 @@ def short_label(self): if self.species is not None: result += " (%s)" % self.species return result - diff --git a/dnachisel/builtin_specifications/codon_optimization/README.md b/dnachisel/builtin_specifications/codon_optimization/README.md index ca5ed15..4bc2e19 100644 --- a/dnachisel/builtin_specifications/codon_optimization/README.md +++ b/dnachisel/builtin_specifications/codon_optimization/README.md @@ -12,3 +12,6 @@ optimization that one can find in the literature. Finally, ``CodonOptimize`` is a generic pseudo-specification-class which uses a "mode" parameter to return a specification of one of the above classes. + +Warning: always use this specification with an EnforceTranslation constraint defined +over the same location, to preserve the amino acid sequence.