Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML Table generator #66

Merged
merged 3 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions docs/user-guide/validation.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,28 @@ Create a [ContentValidator](../api/validation/content_validator.md) for the phen
checks that the generated phenopackets have a minimum number of HPO terms, alleles, and variants.

```python title="Generating GA4GH phenopackets from a pyphetools individual list"
validator = ContentValidator(min_var=1, min_hpo=3)
errors = validator.validate_phenopacket_list(ppkt_list)
```
cohort = [individual1, individual2, individual3]
validator = ContentValidator(cohort=cohort, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.MONO_ALLELIC)
validated_individuals = cvalidator.get_validated_individual_list()
qc = QcVisualizer(ontology=hpo_ontology)
display(HTML(qc.to_html(validated_individuals)))
```

This will either print a message that no errors were found or show a table with a summary of the errors. If errors were found
with incorrect HPO ids or labels, they need to be corrected in the previous part of the script. If redundancies or ontology conflicts are found, these can be corrected automatically by the following command


```python title="Getting an individual list with corrected ontology errors (clean terms)"
cl_individuals = [vi.get_individual_with_clean_terms() for vi in validated_individuals]
```

Them the above analysis can be repeated to check the results.

```python title="Note the 'cohort' argument is pointing to the corrected individual objects"
cvalidator = CohortValidator(cohort=cl_individuals, ontology=hpo_ontology, min_allele=1, min_hpo=1, min_var=1)
qc = QcVisualizer(ontology=hpo_ontology)
display(HTML(qc.to_html(cvalidator.get_validated_individual_list())))
```


If this analysis shows no error, then the script can proceed to [visualize](visualization.md) and output the phenopackets.
2 changes: 1 addition & 1 deletion src/pyphetools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from . import validation


__version__ = "0.8.9"
__version__ = "0.8.12"


__all__ = [
Expand Down
2 changes: 2 additions & 0 deletions src/pyphetools/creation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .age_column_mapper import AgeColumnMapper
from .age_isoformater import AgeIsoFormater
from .allelic_requirement import AllelicRequirement
from .case_encoder import CaseEncoder
from .cohort_encoder import CohortEncoder
from .column_mapper import ColumnMapper
Expand Down Expand Up @@ -27,6 +28,7 @@
__all__ = [
"AgeColumnMapper",
"AgeIsoFormater",
"AllelicRequirement",
"CaseEncoder" ,
"CohortEncoder",
"ColumnMapper",
Expand Down
5 changes: 5 additions & 0 deletions src/pyphetools/creation/allelic_requirement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from enum import Enum

class AllelicRequirement(Enum):
MONO_ALLELIC = "monoallelic"
BI_ALLELIC = "biallelic"
1 change: 1 addition & 0 deletions src/pyphetools/creation/case_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def get_individual(self)-> Individual:
def get_phenopacket(self):
"""
:return: the GA4GH phenopacket corresponding to the current case report
:rtype: PPKt.Phenopacket
"""
individual = self.get_individual()
pmid = individual.pmid
Expand Down
3 changes: 2 additions & 1 deletion src/pyphetools/creation/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ class Constants:
FEMALE_SYMBOL = 'FEMALE'
OTHER_SEX_SYMBOL = 'OTHER'
UNKOWN_SEX_SYMBOL = 'UNKNOWN'
NOT_PROVIDED = 'NOT_PROVIDED'
NOT_PROVIDED = 'NOT_PROVIDED'

27 changes: 17 additions & 10 deletions src/pyphetools/creation/individual.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ class Individual:

def __init__(self,
individual_id:str,
hpo_terms:List[HpTerm]=[],
hpo_terms:List[HpTerm]=None,
pmid:str=None,
title:str=None,
sex:str=Constants.NOT_PROVIDED,
age:str=Constants.NOT_PROVIDED,
interpretation_list:List[PPKt.VariantInterpretation]=[],
interpretation_list:List[PPKt.VariantInterpretation]=None,
disease:Disease=None):
"""Constructor
"""
Expand All @@ -50,8 +50,14 @@ def __init__(self,
else:
self._sex = sex
self._age = age
self._hpo_terms = hpo_terms
self._interpretation_list = interpretation_list
if hpo_terms is None:
self._hpo_terms = list()
else:
self._hpo_terms = hpo_terms
if interpretation_list is None:
self._interpretation_list = list()
else:
self._interpretation_list = interpretation_list
self._disease = disease
self._pmid = pmid
self._title = title
Expand Down Expand Up @@ -245,15 +251,15 @@ def to_ga4gh_phenopacket(self, metadata, phenopacket_id=None):
return php

@staticmethod
def output_individuals_as_phenopackets(individual_list, metadata, pmid=None, outdir="phenopackets"):
"""write a list of Individial objects to file in GA4GH Phenopacket format
def output_individuals_as_phenopackets(individual_list, metadata:MetaData, outdir="phenopackets"):
"""write a list of Individual objects to file in GA4GH Phenopacket format

This methods depends on the MetaData object having a PMID and will fail otherwise

:param individual_list: List of individuals to be written to file as phenopackets
:type individual_list: List[Individual]
:param metadata: GA4GH Phenopacket Schema MetaData object
:type metadata: PPKt.MetaData
:param pmid: A string such as PMID:3415687. Defaults to None.
:type pmid: str
:param metadata: pyphetools MetaData object
:type metadata: MetaData
:param outdir: Path to output directory. Defaults to "phenopackets". Created if not exists.
:type outdir: str
"""
Expand All @@ -262,6 +268,7 @@ def output_individuals_as_phenopackets(individual_list, metadata, pmid=None, out
if not os.path.isdir(outdir):
os.makedirs(outdir)
written = 0
pmid = metadata.get_pmid()
for individual in individual_list:
phenopckt = individual.to_ga4gh_phenopacket(metadata=metadata)
json_string = MessageToJson(phenopckt)
Expand Down
48 changes: 31 additions & 17 deletions src/pyphetools/creation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,11 @@ def hpo(self, version):
:type version: str
"""
self._resource_d["hp"] = Resource(resource_id="hp",
name="human phenotype ontology",
namespace_prefix="HP",
iriprefix="http://purl.obolibrary.org/obo/HP_",
url="http://purl.obolibrary.org/obo/hp.owl",
version=version)
name="human phenotype ontology",
namespace_prefix="HP",
iriprefix="http://purl.obolibrary.org/obo/HP_",
url="http://purl.obolibrary.org/obo/hp.owl",
version=version)

def geno(self, version=default_versions.get('geno')):
"""_summary_
Expand Down Expand Up @@ -150,19 +150,19 @@ def mondo(self, version=default_versions.get('mondo')):
:param version: the Mondo version
"""
self._resource_d["mondo"] = Resource(resource_id="mondo",
name="Mondo Disease Ontology",
namespace_prefix="MONDO",
iriprefix="http://purl.obolibrary.org/obo/MONDO_",
url="http://purl.obolibrary.org/obo/mondo.obo",
version=version)
name="Mondo Disease Ontology",
namespace_prefix="MONDO",
iriprefix="http://purl.obolibrary.org/obo/MONDO_",
url="http://purl.obolibrary.org/obo/mondo.obo",
version=version)

def sequence_ontology(self, version=default_versions.get("so")):
self._resource_d["so"] = Resource(resource_id="so",
name="Sequence types and features ontology",
namespace_prefix="SO",
iriprefix="http://purl.obolibrary.org/obo/SO_",
url="http://purl.obolibrary.org/obo/so.obo",
version=version)
name="Sequence types and features ontology",
namespace_prefix="SO",
iriprefix="http://purl.obolibrary.org/obo/SO_",
url="http://purl.obolibrary.org/obo/so.obo",
version=version)

def set_external_reference(self, pmid, pubmed_title) -> None:
"""
Expand All @@ -178,7 +178,21 @@ def set_external_reference(self, pmid, pubmed_title) -> None:
pm = pmid.replace("PMID:", "")
self._extref.reference = f"https://pubmed.ncbi.nlm.nih.gov/{pm}"
self._extref.description = pubmed_title


def get_pmid(self)->str:
"""
:returns: The PubMed identifier
:rtype: str:
:raises ValueError: Throw an error if no PMID is available
"""
if self._extref is not None:
if self._extref.id.startswith("PMID"):
return self._extref.id
else:
raise ValueError(f"Malformed PMID in external reference: {self._extref.id}")
else:
raise ValueError("Could not get PMID because MetaData._extref was None")


def to_ga4gh(self):
"""
Expand Down
5 changes: 3 additions & 2 deletions src/pyphetools/validation/cohort_validator.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from typing import List
from ..creation.allelic_requirement import AllelicRequirement
from ..creation.individual import Individual
from .validated_individual import ValidatedIndividual
import hpotk

class CohortValidator:

def __init__(self, cohort:List[Individual], ontology:hpotk.MinimalOntology, min_var:int, min_hpo:int, min_allele:int=None) -> None:
def __init__(self, cohort:List[Individual], ontology:hpotk.MinimalOntology, min_hpo:int, allelic_requirement:AllelicRequirement=None) -> None:
self._cohort = cohort
self._validated_individual_list = []
for indi in cohort:
vindi = ValidatedIndividual(individual=indi)
vindi.validate(ontology=ontology, min_hpo=min_hpo, min_allele=min_allele, min_var=min_var)
vindi.validate(ontology=ontology, min_hpo=min_hpo, allelic_requirement=allelic_requirement)
self._validated_individual_list.append(vindi)

def get_validated_individual_list(self):
Expand Down
40 changes: 26 additions & 14 deletions src/pyphetools/validation/content_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import os
import phenopackets
from .phenopacket_validator import PhenopacketValidator
from .validation_result import ValidationResult
from .validation_result import ValidationResult, ValidationResultBuilder
from ..creation.allelic_requirement import AllelicRequirement
from ..creation.individual import Individual
from typing import List, Union

Expand All @@ -26,18 +27,15 @@ class ContentValidator(PhenopacketValidator):
Note that this class does not test for all errors. Use phenopacket-tools to check for redundant or conflicting
annotations.

:param min_var: minimum number of variants for this phenopacket to be considered valid
:type min_var: int
:param min_hpo: minimum number of phenotypic features (HP terms) for this phenopacket to be considered valid
:type min_hpo: int
:param min_allele: minimum number of alleles for this phenopacket to be considered valid
:type min_allele: int
:param allelic_requirement: used to check number of alleles and variants
:type allelic_requirement: AllelicRequirement

"""
def __init__(self, min_var:int, min_hpo:int, min_allele:int=None) -> None:
self._min_var = min_var
def __init__(self, min_hpo:int, allelic_requirement:AllelicRequirement=None) -> None:
self._min_hpo = min_hpo
self._min_allele = min_allele
self._allelic_requirement = allelic_requirement


def validate_individual(self, individual:Individual) -> List[ValidationResult]:
Expand Down Expand Up @@ -66,12 +64,26 @@ def validate_individual(self, individual:Individual) -> List[ValidationResult]:
if n_pf < self._min_hpo:
msg = f"Minimum HPO terms required {self._min_hpo} but only {n_pf} found"
validation_results.append(ValidationResult.error(phenopacket_id=pp_id, message=msg))
if n_var < self._min_var:
msg = f"Minimum variants required {self._min_var} but only {n_var} found"
validation_results.append(ValidationResult.error(phenopacket_id=pp_id, message=msg))
if self._min_allele is not None and n_alleles < self._min_allele:
msg = f"Minimum alleles required {self._min_allele} but only {n_alleles} found"
validation_results.append(ValidationResult.error(phenopacket_id=pp_id, message=msg))
if self._allelic_requirement is None:
return validation_results
if self._allelic_requirement == AllelicRequirement.MONO_ALLELIC:
if n_var != 1:
msg = f"Expected one variant for monoallelic but got {n_var} variants"
val_result = ValidationResultBuilder(phenopacket_id=pp_id).error().incorrect_variant_count().set_message(msg=msg).build()
validation_results.append(val_result)
if n_alleles != 1:
msg = f"Expected one allele for monoallelic but got {n_alleles} alleles"
val_result = ValidationResultBuilder(phenopacket_id=pp_id).error().incorrect_allele_count().set_message(msg=msg).build()
validation_results.append(val_result)
elif self._allelic_requirement == AllelicRequirement.BI_ALLELIC:
if n_var < 1 or n_var > 2:
msg = f"Expected one or two variant for biallelic but got {n_var} variants"
val_result = ValidationResultBuilder(phenopacket_id=pp_id).error().incorrect_variant_count().set_message(msg=msg).build()
validation_results.append(val_result)
if n_alleles != 2:
msg = f"Expected two alleles for biallelic but got {n_alleles} alleles"
val_result = ValidationResultBuilder(phenopacket_id=pp_id).error().incorrect_allele_count().set_message(msg=msg).build()
validation_results.append(val_result)
return validation_results

def validate_phenopacket(self, phenopacket) -> List[ValidationResult]:
Expand Down
12 changes: 5 additions & 7 deletions src/pyphetools/validation/validated_individual.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

from ..creation.allelic_requirement import AllelicRequirement
from ..creation.individual import Individual
from .content_validator import ContentValidator
from typing import List
Expand All @@ -13,19 +13,17 @@ def __init__(self, individual:Individual) -> None:
self._clean_terms = []
self._validation_errors = []

def validate(self, ontology:hpotk.MinimalOntology, min_var:int, min_hpo:int, min_allele:int=None) -> None:
def validate(self, ontology:hpotk.MinimalOntology, min_hpo:int, allelic_requirement:AllelicRequirement=None) -> None:
"""validate an Individual object for errors in the Ontology or the minimum number of HPO terms/alleles/variants

:param ontology: HPO object
:type ontology: hpotk.MinimalOntology
:param min_var: minimum number of variants for this phenopacket to be considered valid
:type min_var: int
:param min_hpo: minimum number of phenotypic features (HP terms) for this phenopacket to be considered valid
:type min_hpo: int
:param min_allele: minimum number of alleles for this phenopacket to be considered valid
:type min_allele: int
:param allelic_requirement: used to check number of alleles and variants
:type allelic_requirement: AllelicRequirement
"""
cvalidator = ContentValidator(min_hpo=min_hpo, min_allele=min_allele, min_var=min_var)
cvalidator = ContentValidator(min_hpo=min_hpo, allelic_requirement=allelic_requirement)
validation_results = cvalidator.validate_individual(individual=self._individual)
self._validation_errors.extend(validation_results)
qc = OntologyQC(individual=self._individual, ontology=ontology)
Expand Down
12 changes: 6 additions & 6 deletions src/pyphetools/validation/validation_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class Category(Enum):
REDUNDANT = 1
CONFLICT = 2
INSUFFICIENT_HPOS = 3
INSUFFICIENT_ALLELES = 4
INSUFFICIENT_VARIANTS = 5
INCORRECT_ALLELE_COUNT = 4
INCORRECT_VARIANT_COUNT = 5
MALFORMED_ID = 6
MALFORMED_LABEL = 7
UNKNOWN = 8
Expand Down Expand Up @@ -126,12 +126,12 @@ def insufficient_hpos(self):
self._category = Category.INSUFFICIENT_HPOS
return self

def insufficient_alleles(self):
self._category = Category.INSUFFICIENT_ALLELES
def incorrect_allele_count(self):
self._category = Category.INCORRECT_ALLELE_COUNT
return self

def insufficient_variants(self):
self._category = Category.INSUFFICIENT_VARIANTS
def incorrect_variant_count(self):
self._category = Category.INCORRECT_VARIANT_COUNT
return self

def set_message(self, msg):
Expand Down
36 changes: 36 additions & 0 deletions src/pyphetools/visualization/html_table_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import List



class HtmlTableGenerator:
"""
Helper class to generator an HTML table. This class is not intended to be used by client code.

"""

def __init__(self, caption, header_items:List[str], rows:List[List[str]]) -> None:
self._html_rows = []
self._n_columns = len(header_items)
self._html_rows.append('<table style="border: 2px solid black;">')
self._html_rows.append(f'<caption>{caption}</caption>')
self._html_rows.append(self._format_header(header_items=header_items))
for row in rows:
self._html_rows.append(self._format_row(row))
self._html_rows.append('</table>') # close table content


def _format_header(self, header_items):
wrapped_items = [f"<th>{x}</th>" for x in header_items]
return "<tr>" + "".join(wrapped_items) + "</tr>"

def _format_row(self, row:List[str]):
if len(row) != self._n_columns:
# should never happen if we construct the tables correctly
raise ValueError(f"All rows need to have {self._n_columns} columns")
wrapped_items = [f"<td>{x}</td>" for x in row]
return "<tr>" + "".join(wrapped_items) + "</tr>"

def get_html(self):
return "\n".join(self._html_rows)


Loading