Skip to content

Commit

Permalink
fix: use 'descendant' descriptor instead of 'child' (#839)
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Hegeman authored Apr 2, 2024
1 parent 0aa0002 commit 38d3043
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 99 deletions.
6 changes: 3 additions & 3 deletions cellxgene_schema_cli/cellxgene_schema/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def update_categorical_column_vals(dataframe, column_name, update_map):

# Set suspension type

# mappings of assays (or assays + child term assays) to corresponding suspension_type
# mappings of assays (or assays + descendant term assays) to corresponding suspension_type
# valid assays with multiple possible suspension_types shown but commented out
match_assays = {
# 'EFO:0010010': ['cell', 'nucleus'],
Expand All @@ -97,7 +97,7 @@ def update_categorical_column_vals(dataframe, column_name, update_map):
"EFO:0030027": "nucleus",
}

match_assays_or_children = {
match_assays_or_descendants = {
# 'EFO:0030080': ['cell', 'nucleus'],
"EFO:0007045": "nucleus",
"EFO:0009294": "cell",
Expand All @@ -113,7 +113,7 @@ def assign_suspension_type(item):
if item in match_assays:
return match_assays[item]
else:
for k, v in match_assays_or_children.items():
for k, v in match_assays_or_descendants.items():
try:
if k == item or ontology_checker.is_descendent_of("EFO", item, k):
return v
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ max_size_for_seurat: 2147483647 # 2^31 - 1 (max value for 4-byte signed int)
raw:
obs:
assay_ontology_term_id:
not_children_of:
not_descendants_of:
EFO:
- EFO:0007045 # ATAC-seq
- EFO:0008804 # Methyl-seq
Expand Down Expand Up @@ -130,7 +130,7 @@ components:
to_column: cell_type
assay_ontology_term_id:
error_message_suffix: >-
Only children terms of either 'EFO:0002772' or 'EFO:0010183' are allowed for assay_ontology_term_id
Only descendant terms of either 'EFO:0002772' or 'EFO:0010183' are allowed for assay_ontology_term_id
type: curie
curie_constraints:
ontologies:
Expand All @@ -145,7 +145,7 @@ components:
type: curie
to_column: assay
disease_ontology_term_id:
error_message_suffix: "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or children terms thereof, or children terms of 'MONDO:0000001' (disease) are allowed"
error_message_suffix: "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or descendant terms thereof, or descendant terms of 'MONDO:0000001' (disease) are allowed"
type: curie
curie_constraints:
ontologies:
Expand All @@ -167,7 +167,7 @@ components:
to_column: disease
organism_ontology_term_id:
type: curie
error_message_suffix: "Only children term ids of 'NCBITaxon:33208' for metazoan are allowed."
error_message_suffix: "Only descendant term ids of 'NCBITaxon:33208' for metazoan are allowed."
curie_constraints:
ontologies:
- NCBITaxon
Expand Down Expand Up @@ -201,7 +201,7 @@ components:
rule: "tissue_type == 'tissue' | tissue_type == 'organoid'"
error_message_suffix: >-
When 'tissue_type' is 'tissue' or 'organoid',
'tissue_ontology_term_id' MUST be a child term id of 'UBERON:0001062' (anatomical entity).
'tissue_ontology_term_id' MUST be a descendant term id of 'UBERON:0001062' (anatomical entity).
type: curie
curie_constraints:
ontologies:
Expand Down Expand Up @@ -327,7 +327,7 @@ components:
# If organism is not humnan nor mouse
error_message_suffix: >-
When 'organism_ontology_term_id' is not 'NCBITaxon:10090' nor 'NCBITaxon:9606',
'development_stage_ontology_term_id' MUST be a child term id of 'UBERON:0000105'
'development_stage_ontology_term_id' MUST be a descendant term id of 'UBERON:0000105'
excluding 'UBERON:0000071', or unknown.
curie_constraints:
ontologies:
Expand Down Expand Up @@ -365,7 +365,7 @@ components:
during submission so that the assay(s) can be added to the schema definition document.
dependencies:
-
# If assay_ontology_term_id is EFO:0030080 or its children, 'suspension_type' MUST be 'cell' or 'nucleus'
# If assay_ontology_term_id is EFO:0030080 or its descendants, 'suspension_type' MUST be 'cell' or 'nucleus'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -375,12 +375,12 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0030080 or its children
when 'assay_ontology_term_id' is EFO:0030080 or its descendants
enum:
- "cell"
- "nucleus"
-
# If assay_ontology_term_id is EFO:0007045 or its children, 'suspension_type' MUST be 'nucleus'
# If assay_ontology_term_id is EFO:0007045 or its descendants, 'suspension_type' MUST be 'nucleus'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -390,11 +390,11 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0007045 or its children
when 'assay_ontology_term_id' is EFO:0007045 or its descendants
enum:
- "nucleus"
-
# If assay_ontology_term_id is EFO:0009294 or its children, 'suspension_type' MUST be 'cell'
# If assay_ontology_term_id is EFO:0009294 or its descendants, 'suspension_type' MUST be 'cell'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -404,11 +404,11 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0009294 or its children
when 'assay_ontology_term_id' is EFO:0009294 or its descendants
enum:
- "cell"
-
# If assay_ontology_term_id is EFO:0010184 or its children, 'suspension_type' MUST be 'cell' or 'nucleus'
# If assay_ontology_term_id is EFO:0010184 or its descendants, 'suspension_type' MUST be 'cell' or 'nucleus'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -418,12 +418,12 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0010184 or its children
when 'assay_ontology_term_id' is EFO:0010184 or its descendants
enum:
- "cell"
- "nucleus"
-
# If assay_ontology_term_id is EFO:0009918 or its children, 'suspension_type' MUST be 'na'
# If assay_ontology_term_id is EFO:0009918 or its descendants, 'suspension_type' MUST be 'na'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -433,11 +433,11 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0009918 or its children
when 'assay_ontology_term_id' is EFO:0009918 or its descendants
enum:
- "na"
-
# If assay_ontology_term_id is EFO:0700000 or its children, 'suspension_type' MUST be 'na'
# If assay_ontology_term_id is EFO:0700000 or its descendants, 'suspension_type' MUST be 'na'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -447,11 +447,11 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0700000 or its children
when 'assay_ontology_term_id' is EFO:0700000 or its descendants
enum:
- "na"
-
# If assay_ontology_term_id is EFO:0008994 or its children, 'suspension_type' MUST be 'na'
# If assay_ontology_term_id is EFO:0008994 or its descendants, 'suspension_type' MUST be 'na'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -461,11 +461,11 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0008994 or its children
when 'assay_ontology_term_id' is EFO:0008994 or its descendants
enum:
- "na"
-
# If assay_ontology_term_id is EFO:0008919 or its children, 'suspension_type' MUST be 'cell'
# If assay_ontology_term_id is EFO:0008919 or its descendants, 'suspension_type' MUST be 'cell'
complex_rule:
match_ancestors:
column: assay_ontology_term_id
Expand All @@ -475,7 +475,7 @@ components:
inclusive: True
type: categorical
error_message_suffix: >-
when 'assay_ontology_term_id' is EFO:0008919 or its children
when 'assay_ontology_term_id' is EFO:0008919 or its descendants
enum:
- "cell"
-
Expand Down
22 changes: 11 additions & 11 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _has_forbidden_curie_ancestor(
self, term_id: str, column_name: str, forbidden_def: Dict[str, List[str]]
) -> bool:
"""
Validate if a single curie term id is a child term of any forbidden ancestors.
Validate if a single curie term id is a descendant term of any forbidden ancestors.
If there is a forbidden ancestor detected, it adds it to self.errors.
:param str term_id: the curie term id to validate
Expand All @@ -118,7 +118,7 @@ def _has_forbidden_curie_ancestor(
for ancestor in forbidden_def[ontology_name]:
if ONTOLOGY_CHECKER.is_descendent_of(ontology_name, term_id, ancestor):
self.errors.append(
f"'{term_id}' in '{column_name}' is not allowed. Child terms of "
f"'{term_id}' in '{column_name}' is not allowed. Descendant terms of "
f"'{ancestor}' are not allowed."
)
return True
Expand All @@ -131,7 +131,7 @@ def _validate_curie_ancestors(
inclusive: bool = False,
) -> bool:
"""
Validate a single curie term id is a valid child of any allowed ancestors
Validate a single curie term id is a valid descendant of any allowed ancestors
:param str term_id: the curie term id to validate
:param dict{str: list[str]} allowed_ancestors: keys must be ontology names and values must lists of
Expand All @@ -151,8 +151,8 @@ def _validate_curie_ancestors(
is_valid_term_id = ONTOLOGY_CHECKER.is_valid_term_id(ontology_name, term_id)
is_valid_ancestor_id = ONTOLOGY_CHECKER.is_valid_term_id(ontology_name, ancestor)
if is_valid_term_id & is_valid_ancestor_id:
is_child = ONTOLOGY_CHECKER.is_descendent_of(ontology_name, term_id, ancestor)
checks.append(is_child)
is_descendant = ONTOLOGY_CHECKER.is_descendent_of(ontology_name, term_id, ancestor)
checks.append(is_descendant)

if True not in checks:
return False
Expand Down Expand Up @@ -516,7 +516,7 @@ def _validate_column_dependencies(
def _generate_match_ancestors_query_fn(self, rule_def: Dict):
"""
Generates vectorized function and args to query a pandas dataframe. Function will determine whether values from
a specified column is a child term to a group of specified ancestors, returning a Bool.
a specified column is a descendant term to a group of specified ancestors, returning a Bool.
:param rule_def: defines arguments to pass into vectorized ancestor match validation function
:return: Tuple(function, Tuple(str, List[str], List[str]))
"""
Expand Down Expand Up @@ -992,10 +992,10 @@ def _validate_annotation_mapping(self, component_name: str, component: Mapping):
f"The size of the ndarray stored for a 'adata.{component_name}['{key}']' MUST NOT be zero."
)

def _are_children_of(self, component: str, column: str, ontology_name: str, ancestors: List[str]) -> bool:
def _are_descendants_of(self, component: str, column: str, ontology_name: str, ancestors: List[str]) -> bool:
"""
Checks if elements in the specified column of the component (e.g. 'assay_ontology_term_id' of 'adata.obs') are
children of the given ancestors.
descendants of the given ancestors.
Ancestors checks are inclusive, meaning that a value is its own ancestor as well.
Expand All @@ -1005,7 +1005,7 @@ def _are_children_of(self, component: str, column: str, ontology_name: str, ance
:param List[str] ancestors: List of ancestors
:rtype bool
:return True if any value in column is children of any ancestor.
:return True if any value in column is a descendant of any ancestor.
"""

curies = getattr(getattr(self.adata, component), column)
Expand Down Expand Up @@ -1142,9 +1142,9 @@ def _validate_raw(self):
for component, component_rules in self.schema_def["raw"].items():
for column, column_rules in component_rules.items():
for rule, rule_def in column_rules.items():
if rule == "not_children_of":
if rule == "not_descendants_of":
for ontology_name, ancestors in rule_def.items():
checks.append(not self._are_children_of(component, column, ontology_name, ancestors))
checks.append(not self._are_descendants_of(component, column, ontology_name, ancestors))
else:
raise ValueError(f"'{rule}' rule in raw definition of the schema is not implemented ")

Expand Down
10 changes: 5 additions & 5 deletions cellxgene_schema_cli/scripts/ontology_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _parse_owls(
for onto_class in onto.classes():
term_id = onto_class.name.replace("_", ":")

# Skip terms that are not direct children from this ontology
# Skip terms that are not direct descendants from this ontology
if onto.name != term_id.split(":")[0]:
continue

Expand Down Expand Up @@ -172,10 +172,10 @@ def _parse_owls(
# Gets ancestors
ancestors = _get_ancestors(onto_class, onto.name)

# If "children_of" specified in owl info then skip the current term if it is
# not a children of those indicated.
if (onto.name in owl_info and "children_of" in owl_info[onto.name]) and (
not list(set(ancestors) & set(owl_info[onto.name]["children_of"]))
# If "descendants_of" specified in owl info then skip the current term if it is
# not a descendants of those indicated.
if (onto.name in owl_info and "descendants_of" in owl_info[onto.name]) and (
not list(set(ancestors) & set(owl_info[onto.name]["descendants_of"]))
):
onto_dict[onto.name].pop(term_id)
continue
Expand Down
Loading

0 comments on commit 38d3043

Please sign in to comment.