Skip to content

Commit

Permalink
feat: integrate cellxgene-ontology-guide with cellxgene-schema CLI (#815
Browse files Browse the repository at this point in the history
)
  • Loading branch information
nayib-jose-gloria authored Apr 3, 2024
1 parent 38d3043 commit bc86a51
Show file tree
Hide file tree
Showing 12 changed files with 33 additions and 410 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/push_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ jobs:
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
- name: Set up Python 3.8
- name: Set up Python 3.10
uses: actions/setup-python@v1
with:
python-version: 3.8
python-version: "3.10"
- name: Python cache
uses: actions/cache@v1
with:
Expand Down
16 changes: 0 additions & 16 deletions cellxgene_schema_cli/cellxgene_schema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,6 @@ def remove_labels(input_file, output_file):
anndata_label_remover.adata.write(output_file)


@click.command(
name="convert",
short_help="Convert an h5ad from version 2.0.0 to version 3.0.0",
help="Convert an h5ad from version 2.0.0 to version 3.0.0. No validation will be performed on either"
"the input or the output file.",
deprecated=True,
)
@click.argument("input_file", nargs=1, type=click.Path(exists=True, dir_okay=False))
@click.argument("output_file", nargs=1, type=click.Path(exists=False, dir_okay=False))
def convert(input_file, output_file):
from .convert import convert

convert(input_file, output_file)


@click.command(
name="migrate",
short_help="Convert an h5ad to the latest schema version.",
Expand All @@ -110,7 +95,6 @@ def migrate(input_file, output_file, collection_id, dataset_id):


schema_cli.add_command(schema_validate)
schema_cli.add_command(convert)
schema_cli.add_command(migrate)
schema_cli.add_command(remove_labels)

Expand Down
154 changes: 0 additions & 154 deletions cellxgene_schema_cli/cellxgene_schema/convert.py

This file was deleted.

6 changes: 2 additions & 4 deletions cellxgene_schema_cli/cellxgene_schema/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,8 @@ def migrate(input_file, output_file, collection_id, dataset_id):
# https://github.com/chanzuckerberg/single-cell-curation/blob/43f891005fb9439dbbb747fa0df8f0435ebf3f7c/cellxgene_schema_cli/cellxgene_schema/validate.py#L761-L762
for key, value in list(dataset.uns.items()):
if any(
[
isinstance(value, sparse_class)
for sparse_class in (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix)
]
isinstance(value, sparse_class)
for sparse_class in (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix)
):
if value.nnz == 0: # number non-zero
del dataset.uns[key]
Expand Down
164 changes: 1 addition & 163 deletions cellxgene_schema_cli/cellxgene_schema/ontology.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import enum
import gzip
import json
import os
from typing import List, Set, Union
from typing import Union

from . import env

Expand Down Expand Up @@ -108,164 +107,3 @@ def get_length(self, gene_id: str) -> int:
return self.gene_dict[gene_id][1]
else:
raise ValueError(f"The id '{gene_id}' is not a valid ENSEMBL id for '{self.species}'")


class OntologyChecker:
"""Handles checking ontology term ids, retrieves ontology labels and ancestors"""

JSON_FILE = env.PARSED_ONTOLOGIES_FILE

def __init__(self): # type: ignore
with gzip.open(self.JSON_FILE, "rt") as json_o:
self.ontology_dict = json.load(json_o)

def get_ontologies(self) -> List[str]:
"""
rtype list[str]
return: a list of ontologies available in the checker
"""

return list(self.ontology_dict.keys())

def get_term_dict(self, ontology: str, term_id: str) -> dict: # type: ignore
"""
Returns a dictionary with all the information from a given ontology and term_id
:param str ontology: the ontology id
:param str term_id: the ontology term id
:rtype dict
:return Dictionary with all the information for the given term id
"""

self.assert_term_id(ontology, term_id)
return self.ontology_dict[ontology][term_id] # type: ignore

def get_term_label(self, ontology: str, term_id: str) -> str:
"""
Returns the label associated to an ontology term id
:param str ontology: the ontology id
:param str term_id: the ontology term id
:rtype str
:return Label associated to term id
"""

self.assert_term_id(ontology, term_id)
return self.ontology_dict[ontology][term_id]["label"] # type: ignore

def get_term_ancestors(self, ontology: str, term_id: str) -> Set[str]:
"""
Returns the ancestors of an ontology id
:param str ontology: the ontology id
:param str term_id: the ontology term id
:rtype Set[str]
:return All term ids that are ancestors of the query term id.
"""

self.assert_term_id(ontology, term_id)
return set(self.ontology_dict[ontology][term_id]["ancestors"])

def is_valid_ontology(self, ontology: str) -> bool:
"""
Returns True if the ontology is present in the ontology dict
:param str ontology: the ontology id
:rtype bool
:return True if the ontology is present in the ontology dict, False otherwise
"""

return ontology in self.ontology_dict

def is_term_id_deprecated(self, ontology: str, term_id: str) -> bool:
"""
Returns True if the id has been deprecated (obsolete) in the ontology
:param str ontology: the ontology id
:param str term_id: the ontology term id
:rtype bool
:return True if id has been deprecated
"""

self.assert_term_id(ontology, term_id)

return self.ontology_dict[ontology][term_id]["deprecated"] # type: ignore

def is_valid_term_id(self, ontology: str, term_id: str) -> bool:
"""
Returns True if term_id is a valid id from ontology
:param str ontology: the ontology id
:param str term_id: the ontology term id
:rtype bool
:return True if term_id is a valid id from ontology, False otherwise
"""

self.assert_ontology(ontology)

return term_id in self.ontology_dict[ontology]

def is_descendent_of(self, ontology: str, query_term_id: str, target_term_id: str) -> bool:
"""
Returns True if query_term_id is a descendent of target_term_id in a given ontology
:param str ontology: the ontology id
:param str query_term_id: the ontology term id
:param str target_term_id: the ontology term id
:rtype bool
:return True if query_term_id is a descendent of target_term_id in a given ontology, False otherwise
"""

self.assert_term_id(ontology, query_term_id)
self.assert_term_id(ontology, target_term_id)

return target_term_id in self.get_term_ancestors(ontology, query_term_id)

def assert_ontology(self, ontology: str): # type: ignore
"""
Raises error if ontology is not present in the ontology dict
:param str ontology: the ontology id
:rtype None
"""

if not self.is_valid_ontology(ontology):
raise ValueError(f"The ontology '{ontology}' is not present in the ontology checker")

def assert_term_id(self, ontology: str, term_id: str): # type: ignore
"""
Raises error if term_id is not present in ontology
:param str ontology: the ontology id
:param str term_id: the ontology term id
:rtype None
"""

if not self.is_valid_term_id(ontology, term_id):
raise ValueError(f"The term id '{term_id}' is not present in the ontology '{ontology}'")

def assert_descendent_of(self, ontology: str, query_term_id: str, target_term_id: str): # type: ignore
"""
Raises error if query_term_id is not a descendent of target_term_id in a given ontology
:param str ontology: the ontology id
:param str query_term_id: the ontology term id
:param str target_term_id: the ontology term id
:rtype None
"""

if not self.is_descendent_of(ontology, query_term_id, target_term_id):
raise ValueError(
f"The term id '{query_term_id}' is not a descendent of the term id '{target_term_id}'"
f" in the ontology '{ontology}'"
)
Loading

0 comments on commit bc86a51

Please sign in to comment.