diff --git a/src/gpsea/model/_base.py b/src/gpsea/model/_base.py index bb8f27b5c..0a33fa060 100644 --- a/src/gpsea/model/_base.py +++ b/src/gpsea/model/_base.py @@ -1,7 +1,6 @@ import enum import typing -import hpotk class Sex(enum.Enum): diff --git a/src/gpsea/model/_cohort.py b/src/gpsea/model/_cohort.py index a8559fd05..f671bf48a 100644 --- a/src/gpsea/model/_cohort.py +++ b/src/gpsea/model/_cohort.py @@ -13,7 +13,7 @@ from ._variant import Variant, VariantInfo -I = typing.TypeVar('I', bound=hpotk.model.Identified) +IDENTIFIED = typing.TypeVar('IDENTIFIED', bound=hpotk.model.Identified) """ Anything that extends `Identified` (e.g. `Disease`, `Phenotype`, `Measurement`). """ @@ -289,8 +289,8 @@ def _check_id( @staticmethod def _find_first_by_id( term_id: hpotk.TermId, - items: typing.Iterable[I], - ) -> typing.Optional[I]: + items: typing.Iterable[IDENTIFIED], + ) -> typing.Optional[IDENTIFIED]: for m in items: if m.identifier == term_id: return m @@ -299,13 +299,13 @@ def _find_first_by_id( @staticmethod def _unique_identifiers_of_identified( - items: typing.Iterable[I], + items: typing.Iterable[IDENTIFIED], ) -> typing.Collection[hpotk.TermId]: return set(item.identifier for item in items) @staticmethod def _count_unique_identifiers( - items: typing.Iterable[I], + items: typing.Iterable[IDENTIFIED], ) -> int: return len(Patient._unique_identifiers_of_identified(items)) @@ -668,8 +668,8 @@ def _count_individuals_with_condition( def _iterate_through_items( self, - extract_items: typing.Callable[[Patient,], typing.Iterable[I]], - ) -> typing.Iterator[I]: + extract_items: typing.Callable[[Patient,], typing.Iterable[IDENTIFIED]], + ) -> typing.Iterator[IDENTIFIED]: return itertools.chain(item for individual in self._members for item in extract_items(individual)) def _get_most_common( @@ -689,7 +689,7 @@ def _get_most_common( @staticmethod def _count_distinct_items( - items: typing.Iterable[I], + items: typing.Iterable[IDENTIFIED], ) -> int: return len(set(item.identifier for item in items)) diff --git a/src/gpsea/model/_protein.py b/src/gpsea/model/_protein.py index 5c69e9527..4df41657a 100644 --- a/src/gpsea/model/_protein.py +++ b/src/gpsea/model/_protein.py @@ -83,7 +83,7 @@ def __repr__(self) -> str: def _deprecation_warning(): warnings.warn( - f"`FeatureType` was deprecated and will be removed prior `v1.0.0`. Use a `str` instead!", + "`FeatureType` was deprecated and will be removed prior `v1.0.0`. Use a `str` instead!", DeprecationWarning, ) diff --git a/src/gpsea/model/_test_gt.py b/src/gpsea/model/_test_gt.py index 1a033e75b..6d581406f 100644 --- a/src/gpsea/model/_test_gt.py +++ b/src/gpsea/model/_test_gt.py @@ -30,7 +30,7 @@ def test_iteration(self): assert len(labels) == len(gts) == len(genotypes) - assert all(l.label in ('A', 'C', 'D') for l in labels) + assert all(sample_labels.label in ('A', 'C', 'D') for sample_labels in labels) assert all( gt in (Genotype.HETEROZYGOUS, Genotype.HEMIZYGOUS, Genotype.HOMOZYGOUS_REFERENCE) for gt in gts diff --git a/src/gpsea/preprocessing/_config.py b/src/gpsea/preprocessing/_config.py index 82e204f05..3de08f8e8 100644 --- a/src/gpsea/preprocessing/_config.py +++ b/src/gpsea/preprocessing/_config.py @@ -401,7 +401,7 @@ def _configure_imprecise_sv_annotator( ): # Setup cache for SVs if cache_dir is not None: - sv_cache_dir = os.path.join(cache_dir, "sv_cache") + _sv_cache_dir = os.path.join(cache_dir, "sv_cache") # TODO: implement the cache. # os.makedirs(sv_cache_dir, exist_ok=True) # var_cache = VariantAnnotationCache(sv_cache_dir) diff --git a/src/gpsea/preprocessing/_generic.py b/src/gpsea/preprocessing/_generic.py index 4364573b6..2cfe3d376 100644 --- a/src/gpsea/preprocessing/_generic.py +++ b/src/gpsea/preprocessing/_generic.py @@ -41,7 +41,7 @@ def annotate(self, item: ImpreciseSvInfo) -> typing.Sequence[TranscriptAnnotatio def _map_to_variant_effects( self, - variant_class: str, + variant_class: VariantClass, ) -> typing.Sequence[VariantEffect]: if variant_class == VariantClass.DEL: return (VariantEffect.TRANSCRIPT_ABLATION,) diff --git a/src/gpsea/preprocessing/_vep.py b/src/gpsea/preprocessing/_vep.py index 7c9596027..4ec2cb0bb 100644 --- a/src/gpsea/preprocessing/_vep.py +++ b/src/gpsea/preprocessing/_vep.py @@ -186,7 +186,7 @@ def format_coordinates_for_vep_query(vc: VariantCoordinates) -> str: # TODO: Verify are working correctly else: if len(vc.ref) == 0 or len(vc.alt) == 0: - raise ValueError(f'Trimmed alleles are not yet supported!') + raise ValueError('Trimmed alleles are not yet supported!') if len(vc.ref) == 1 and len(vc.alt) != 1: # INS/DUP start = start + 1 # we must "trim" diff --git a/src/gpsea/view/_draw_variants.py b/src/gpsea/view/_draw_variants.py index a98ea9847..a8cd27ba1 100644 --- a/src/gpsea/view/_draw_variants.py +++ b/src/gpsea/view/_draw_variants.py @@ -36,7 +36,7 @@ def _calc_aa_based_pos(pos_bases, tx_coordinates): :param exons: exon positions """ print(f'{pos_bases=}') - exons, cds_start, cds_end = tx_coordinates.exons, tx_coordinates.cds_start, tx_coordinates.cds_end + exons, _cds_start, _cds_end = tx_coordinates.exons, tx_coordinates.cds_start, tx_coordinates.cds_end num_nt = 0 @@ -325,7 +325,7 @@ def draw_fig(self, tx_coordinates: TranscriptCoordinates, protein_meta: ProteinM # get minimum position on chromosome for all transcripts min_exon_limit = np.min(exon_limits) feature_limits = np.array([(feature.info.start, feature.info.end) for feature in protein_meta.protein_features]) - feature_types = [pf.feature_type for pf in protein_meta.protein_features] + _feature_types = [pf.feature_type for pf in protein_meta.protein_features] feature_limits = (feature_limits * 3) - 2 + min_exon_limit # to convert from codons to bases variant_locations = list() for ann in tx_anns: @@ -334,11 +334,11 @@ def draw_fig(self, tx_coordinates: TranscriptCoordinates, protein_meta: ProteinM if prot_eff_loc is not None: variant_locations.append([prot_eff_loc.start, prot_eff_loc.end]) variant_locations = np.array(variant_locations) - variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns]) + _variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns]) exon_labels = [f'{i + 1}' for i in range(len(exon_limits))] protein_track_x_min, protein_track_x_max = 0.15, 0.85 - protein_track_y_min, protein_track_y_max = 0.492, 0.508 + protein_track_y_min, _protein_track_y_max = 0.492, 0.508 exon_y_min, exon_y_max = 0.39, 0.43 font_size = 12 text_padding = 0.004 @@ -364,7 +364,7 @@ def preprocess(x_absolute): # x_axis x_axis_y = protein_track_y_min - 0.02 x_axis_min_x, x_axis_max_x = protein_track_x_min, protein_track_x_max - big_tick_length, small_tick_length = 0.01, 0.005 + big_tick_length, _small_tick_length = 0.01, 0.005 draw_line(x_axis_min_x, x_axis_y, x_axis_max_x, x_axis_y, line_color=self.axis_color, line_width=1.0) # main line draw_line(x_axis_min_x, x_axis_y - big_tick_length, x_axis_min_x, x_axis_y, line_color=self.axis_color, diff --git a/src/gpsea/view/_protein_visualizable.py b/src/gpsea/view/_protein_visualizable.py index 40ed0ebfe..babac8bed 100644 --- a/src/gpsea/view/_protein_visualizable.py +++ b/src/gpsea/view/_protein_visualizable.py @@ -1,16 +1,24 @@ import typing -from gpsea.model import * +from gpsea.model import ( + Cohort, + ProteinMetadata, + TranscriptAnnotation, + TranscriptCoordinates, + Variant, + VariantEffect, +) import numpy as np +from gpsea.model.genome._genome import Region -class ProteinVisualizable: +class ProteinVisualizable: def __init__( - self, - tx_coordinates: TranscriptCoordinates, - protein_meta: ProteinMetadata, - cohort: Cohort, + self, + tx_coordinates: TranscriptCoordinates, + protein_meta: ProteinMetadata, + cohort: Cohort, ) -> None: self._tx_coordinates = tx_coordinates self._protein_meta = protein_meta @@ -19,7 +27,7 @@ def __init__( transcript_annotations = ProteinVisualizable._get_tx_anns( cohort.all_variants(), self._tx_coordinates.identifier ) - self._variant_regions_on_protein = list() + variant_regions_on_protein: typing.List[Region] = list() self._variant_effect = list() for tx_ann in transcript_annotations: variant_effects = tx_ann.variant_effects @@ -27,7 +35,7 @@ def __init__( continue prot_eff_loc = tx_ann.protein_effect_location if prot_eff_loc is not None: - self._variant_regions_on_protein.append(prot_eff_loc) + variant_regions_on_protein.append(prot_eff_loc) self._variant_effect.append(variant_effects[0]) self._protein_feature_names = list() @@ -40,13 +48,17 @@ def __init__( self._protein_feature_starts.append(feature.info.start) self._protein_feature_ends.append(feature.info.end) - self._variant_locations = np.array([item.start for item in self._variant_regions_on_protein]) + self._variant_locations = np.array( + [item.start for item in variant_regions_on_protein] + ) - #variant_locations = (variant_locations * 3) - 2 + min_exon_limit # to convert from codons to bases - #variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns]) + # variant_locations = (variant_locations * 3) - 2 + min_exon_limit # to convert from codons to bases + # variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns]) # count marker occurrences and remove duplicates self._variant_locations_counted_absolute, self._marker_counts = np.unique( - self._variant_locations, axis=0, return_counts=True, + self._variant_locations, + axis=0, + return_counts=True, ) if protein_meta.protein_length > 0: @@ -59,8 +71,8 @@ def __init__( @staticmethod def _get_tx_anns( - variants: typing.Iterable[Variant], - tx_id: str, + variants: typing.Iterable[Variant], + tx_id: str, ) -> typing.Sequence[TranscriptAnnotation]: """ By default, the API returns transcript annotations for many transcripts. @@ -74,7 +86,9 @@ def _get_tx_anns( tx_ann = ann break if tx_ann is None: - raise ValueError(f'The transcript annotation for {tx_id} was not found!') + raise ValueError( + f"The transcript annotation for {tx_id} was not found!" + ) else: tx_anns.append(tx_ann) @@ -103,7 +117,7 @@ def protein_feature_starts(self) -> typing.Sequence[int]: @property def protein_feature_ends(self) -> typing.Sequence[int]: return self._protein_feature_ends - + @property def protein_feature_types(self) -> typing.Sequence[str]: return self._protein_feature_types @@ -129,7 +143,7 @@ def protein_length(self) -> int: @property def protein_feature_names(self) -> typing.Sequence[str]: return self._protein_feature_names - + @property def variant_effects(self) -> typing.Sequence[VariantEffect]: return self._variant_effect diff --git a/src/gpsea/view/_txp.py b/src/gpsea/view/_txp.py index de9bb351d..eda97bc01 100644 --- a/src/gpsea/view/_txp.py +++ b/src/gpsea/view/_txp.py @@ -2,9 +2,7 @@ from collections import defaultdict from matplotlib import pyplot as plt from matplotlib.patches import Rectangle -from matplotlib.collections import PatchCollection from matplotlib.lines import Line2D -import typing from gpsea.model import Variant, TranscriptCoordinates, ProteinMetadata @@ -30,15 +28,15 @@ def draw_variants(self, variants: typing.Iterable[Variant], tx: TranscriptCoordinates, protein: ProteinMetadata): title = f"{protein.protein_id} ({protein.label})" - fig, ax = plt.subplots(1, figsize=(10, 10)) + _, ax = plt.subplots(1, figsize=(10, 10)) protein_domains = set() - THRESHOLD = 2 + _THRESHOLD = 2 BOTTOM_MARGIN = 20 amino_acid_len = tx.get_codon_count() # draw a box that is ten aax tall, where aax is the dimension of one amino acid prot_start = get_interpolated_location_in_protein(1, amino_acid_len) prot_end = get_interpolated_location_in_protein(amino_acid_len, amino_acid_len) - box_height = 10/amino_acid_len + _box_height = 10/amino_acid_len prot_width = prot_end - prot_start + 1 protein_height = prot_width/20 #rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height) @@ -59,11 +57,11 @@ def draw_variants(self, variants: typing.Iterable[Variant], hgvs_cdna = hgvs variant_effects = tx_annot.variant_effects if len(variant_effects) > 1: - var_effect = "MULTIPLE" + _var_effect = "MULTIPLE" elif len(variant_effects) == 0: - var_effect = "UNKNOWN" + _var_effect = "UNKNOWN" else: - var_effect = variant_effects[0].name + _var_effect = variant_effects[0].name for p in tx_annot.protein_affected: for f in p.domains(): protein_domains.add(f.info) @@ -92,7 +90,7 @@ def draw_variants(self, variants: typing.Iterable[Variant], start = feature.start end = feature.end #print(name, start, end, box_color) - box_height = 10/amino_acid_len + _box_height = 10/amino_acid_len prot_width = prot_end - prot_start + 1 protein_height = prot_width/20 #rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height) diff --git a/tests/analysis/pscore/test_de_vries_scorer.py b/tests/analysis/pscore/test_de_vries_scorer.py index a96384c7f..e62d5edbf 100644 --- a/tests/analysis/pscore/test_de_vries_scorer.py +++ b/tests/analysis/pscore/test_de_vries_scorer.py @@ -4,7 +4,7 @@ import pytest from gpsea.analysis.pscore import DeVriesPhenotypeScorer -from gpsea.model import Patient, SampleLabels, Phenotype, Sex +from gpsea.model import Patient, Phenotype intrauterine_growth_retardation = 'HP:0001511' small_for_gestational_age = 'HP:0001518'