From 38d3043dd167766c56ae3db7bbbd454f95aa2425 Mon Sep 17 00:00:00 2001 From: Daniel Hegeman Date: Tue, 2 Apr 2024 09:23:39 -0700 Subject: [PATCH] fix: use 'descendant' descriptor instead of 'child' (#839) --- .../cellxgene_schema/convert.py | 6 +- .../schema_definitions/schema_definition.yaml | 44 ++++++------ .../cellxgene_schema/validate.py | 22 +++--- .../scripts/ontology_processing.py | 10 +-- .../tests/test_schema_compliance.py | 72 +++++++++---------- schema/5.1.0/schema.md | 44 ++++++------ 6 files changed, 99 insertions(+), 99 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/convert.py b/cellxgene_schema_cli/cellxgene_schema/convert.py index 9f54e4c29..d19a8169e 100644 --- a/cellxgene_schema_cli/cellxgene_schema/convert.py +++ b/cellxgene_schema_cli/cellxgene_schema/convert.py @@ -82,7 +82,7 @@ def update_categorical_column_vals(dataframe, column_name, update_map): # Set suspension type - # mappings of assays (or assays + child term assays) to corresponding suspension_type + # mappings of assays (or assays + descendant term assays) to corresponding suspension_type # valid assays with multiple possible suspension_types shown but commented out match_assays = { # 'EFO:0010010': ['cell', 'nucleus'], @@ -97,7 +97,7 @@ def update_categorical_column_vals(dataframe, column_name, update_map): "EFO:0030027": "nucleus", } - match_assays_or_children = { + match_assays_or_descendants = { # 'EFO:0030080': ['cell', 'nucleus'], "EFO:0007045": "nucleus", "EFO:0009294": "cell", @@ -113,7 +113,7 @@ def assign_suspension_type(item): if item in match_assays: return match_assays[item] else: - for k, v in match_assays_or_children.items(): + for k, v in match_assays_or_descendants.items(): try: if k == item or ontology_checker.is_descendent_of("EFO", item, k): return v diff --git a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml index e53960fd9..9e32769ff 100644 --- a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml +++ b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml @@ -8,7 +8,7 @@ max_size_for_seurat: 2147483647 # 2^31 - 1 (max value for 4-byte signed int) raw: obs: assay_ontology_term_id: - not_children_of: + not_descendants_of: EFO: - EFO:0007045 # ATAC-seq - EFO:0008804 # Methyl-seq @@ -130,7 +130,7 @@ components: to_column: cell_type assay_ontology_term_id: error_message_suffix: >- - Only children terms of either 'EFO:0002772' or 'EFO:0010183' are allowed for assay_ontology_term_id + Only descendant terms of either 'EFO:0002772' or 'EFO:0010183' are allowed for assay_ontology_term_id type: curie curie_constraints: ontologies: @@ -145,7 +145,7 @@ components: type: curie to_column: assay disease_ontology_term_id: - error_message_suffix: "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or children terms thereof, or children terms of 'MONDO:0000001' (disease) are allowed" + error_message_suffix: "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or descendant terms thereof, or descendant terms of 'MONDO:0000001' (disease) are allowed" type: curie curie_constraints: ontologies: @@ -167,7 +167,7 @@ components: to_column: disease organism_ontology_term_id: type: curie - error_message_suffix: "Only children term ids of 'NCBITaxon:33208' for metazoan are allowed." + error_message_suffix: "Only descendant term ids of 'NCBITaxon:33208' for metazoan are allowed." curie_constraints: ontologies: - NCBITaxon @@ -201,7 +201,7 @@ components: rule: "tissue_type == 'tissue' | tissue_type == 'organoid'" error_message_suffix: >- When 'tissue_type' is 'tissue' or 'organoid', - 'tissue_ontology_term_id' MUST be a child term id of 'UBERON:0001062' (anatomical entity). + 'tissue_ontology_term_id' MUST be a descendant term id of 'UBERON:0001062' (anatomical entity). type: curie curie_constraints: ontologies: @@ -327,7 +327,7 @@ components: # If organism is not humnan nor mouse error_message_suffix: >- When 'organism_ontology_term_id' is not 'NCBITaxon:10090' nor 'NCBITaxon:9606', - 'development_stage_ontology_term_id' MUST be a child term id of 'UBERON:0000105' + 'development_stage_ontology_term_id' MUST be a descendant term id of 'UBERON:0000105' excluding 'UBERON:0000071', or unknown. curie_constraints: ontologies: @@ -365,7 +365,7 @@ components: during submission so that the assay(s) can be added to the schema definition document. dependencies: - - # If assay_ontology_term_id is EFO:0030080 or its children, 'suspension_type' MUST be 'cell' or 'nucleus' + # If assay_ontology_term_id is EFO:0030080 or its descendants, 'suspension_type' MUST be 'cell' or 'nucleus' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -375,12 +375,12 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0030080 or its children + when 'assay_ontology_term_id' is EFO:0030080 or its descendants enum: - "cell" - "nucleus" - - # If assay_ontology_term_id is EFO:0007045 or its children, 'suspension_type' MUST be 'nucleus' + # If assay_ontology_term_id is EFO:0007045 or its descendants, 'suspension_type' MUST be 'nucleus' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -390,11 +390,11 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0007045 or its children + when 'assay_ontology_term_id' is EFO:0007045 or its descendants enum: - "nucleus" - - # If assay_ontology_term_id is EFO:0009294 or its children, 'suspension_type' MUST be 'cell' + # If assay_ontology_term_id is EFO:0009294 or its descendants, 'suspension_type' MUST be 'cell' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -404,11 +404,11 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0009294 or its children + when 'assay_ontology_term_id' is EFO:0009294 or its descendants enum: - "cell" - - # If assay_ontology_term_id is EFO:0010184 or its children, 'suspension_type' MUST be 'cell' or 'nucleus' + # If assay_ontology_term_id is EFO:0010184 or its descendants, 'suspension_type' MUST be 'cell' or 'nucleus' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -418,12 +418,12 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0010184 or its children + when 'assay_ontology_term_id' is EFO:0010184 or its descendants enum: - "cell" - "nucleus" - - # If assay_ontology_term_id is EFO:0009918 or its children, 'suspension_type' MUST be 'na' + # If assay_ontology_term_id is EFO:0009918 or its descendants, 'suspension_type' MUST be 'na' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -433,11 +433,11 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0009918 or its children + when 'assay_ontology_term_id' is EFO:0009918 or its descendants enum: - "na" - - # If assay_ontology_term_id is EFO:0700000 or its children, 'suspension_type' MUST be 'na' + # If assay_ontology_term_id is EFO:0700000 or its descendants, 'suspension_type' MUST be 'na' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -447,11 +447,11 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0700000 or its children + when 'assay_ontology_term_id' is EFO:0700000 or its descendants enum: - "na" - - # If assay_ontology_term_id is EFO:0008994 or its children, 'suspension_type' MUST be 'na' + # If assay_ontology_term_id is EFO:0008994 or its descendants, 'suspension_type' MUST be 'na' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -461,11 +461,11 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0008994 or its children + when 'assay_ontology_term_id' is EFO:0008994 or its descendants enum: - "na" - - # If assay_ontology_term_id is EFO:0008919 or its children, 'suspension_type' MUST be 'cell' + # If assay_ontology_term_id is EFO:0008919 or its descendants, 'suspension_type' MUST be 'cell' complex_rule: match_ancestors: column: assay_ontology_term_id @@ -475,7 +475,7 @@ components: inclusive: True type: categorical error_message_suffix: >- - when 'assay_ontology_term_id' is EFO:0008919 or its children + when 'assay_ontology_term_id' is EFO:0008919 or its descendants enum: - "cell" - diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 041aecb7b..6ed8de188 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -105,7 +105,7 @@ def _has_forbidden_curie_ancestor( self, term_id: str, column_name: str, forbidden_def: Dict[str, List[str]] ) -> bool: """ - Validate if a single curie term id is a child term of any forbidden ancestors. + Validate if a single curie term id is a descendant term of any forbidden ancestors. If there is a forbidden ancestor detected, it adds it to self.errors. :param str term_id: the curie term id to validate @@ -118,7 +118,7 @@ def _has_forbidden_curie_ancestor( for ancestor in forbidden_def[ontology_name]: if ONTOLOGY_CHECKER.is_descendent_of(ontology_name, term_id, ancestor): self.errors.append( - f"'{term_id}' in '{column_name}' is not allowed. Child terms of " + f"'{term_id}' in '{column_name}' is not allowed. Descendant terms of " f"'{ancestor}' are not allowed." ) return True @@ -131,7 +131,7 @@ def _validate_curie_ancestors( inclusive: bool = False, ) -> bool: """ - Validate a single curie term id is a valid child of any allowed ancestors + Validate a single curie term id is a valid descendant of any allowed ancestors :param str term_id: the curie term id to validate :param dict{str: list[str]} allowed_ancestors: keys must be ontology names and values must lists of @@ -151,8 +151,8 @@ def _validate_curie_ancestors( is_valid_term_id = ONTOLOGY_CHECKER.is_valid_term_id(ontology_name, term_id) is_valid_ancestor_id = ONTOLOGY_CHECKER.is_valid_term_id(ontology_name, ancestor) if is_valid_term_id & is_valid_ancestor_id: - is_child = ONTOLOGY_CHECKER.is_descendent_of(ontology_name, term_id, ancestor) - checks.append(is_child) + is_descendant = ONTOLOGY_CHECKER.is_descendent_of(ontology_name, term_id, ancestor) + checks.append(is_descendant) if True not in checks: return False @@ -516,7 +516,7 @@ def _validate_column_dependencies( def _generate_match_ancestors_query_fn(self, rule_def: Dict): """ Generates vectorized function and args to query a pandas dataframe. Function will determine whether values from - a specified column is a child term to a group of specified ancestors, returning a Bool. + a specified column is a descendant term to a group of specified ancestors, returning a Bool. :param rule_def: defines arguments to pass into vectorized ancestor match validation function :return: Tuple(function, Tuple(str, List[str], List[str])) """ @@ -992,10 +992,10 @@ def _validate_annotation_mapping(self, component_name: str, component: Mapping): f"The size of the ndarray stored for a 'adata.{component_name}['{key}']' MUST NOT be zero." ) - def _are_children_of(self, component: str, column: str, ontology_name: str, ancestors: List[str]) -> bool: + def _are_descendants_of(self, component: str, column: str, ontology_name: str, ancestors: List[str]) -> bool: """ Checks if elements in the specified column of the component (e.g. 'assay_ontology_term_id' of 'adata.obs') are - children of the given ancestors. + descendants of the given ancestors. Ancestors checks are inclusive, meaning that a value is its own ancestor as well. @@ -1005,7 +1005,7 @@ def _are_children_of(self, component: str, column: str, ontology_name: str, ance :param List[str] ancestors: List of ancestors :rtype bool - :return True if any value in column is children of any ancestor. + :return True if any value in column is a descendant of any ancestor. """ curies = getattr(getattr(self.adata, component), column) @@ -1142,9 +1142,9 @@ def _validate_raw(self): for component, component_rules in self.schema_def["raw"].items(): for column, column_rules in component_rules.items(): for rule, rule_def in column_rules.items(): - if rule == "not_children_of": + if rule == "not_descendants_of": for ontology_name, ancestors in rule_def.items(): - checks.append(not self._are_children_of(component, column, ontology_name, ancestors)) + checks.append(not self._are_descendants_of(component, column, ontology_name, ancestors)) else: raise ValueError(f"'{rule}' rule in raw definition of the schema is not implemented ") diff --git a/cellxgene_schema_cli/scripts/ontology_processing.py b/cellxgene_schema_cli/scripts/ontology_processing.py index 1d0e71ac9..ef41aa31b 100644 --- a/cellxgene_schema_cli/scripts/ontology_processing.py +++ b/cellxgene_schema_cli/scripts/ontology_processing.py @@ -135,7 +135,7 @@ def _parse_owls( for onto_class in onto.classes(): term_id = onto_class.name.replace("_", ":") - # Skip terms that are not direct children from this ontology + # Skip terms that are not direct descendants from this ontology if onto.name != term_id.split(":")[0]: continue @@ -172,10 +172,10 @@ def _parse_owls( # Gets ancestors ancestors = _get_ancestors(onto_class, onto.name) - # If "children_of" specified in owl info then skip the current term if it is - # not a children of those indicated. - if (onto.name in owl_info and "children_of" in owl_info[onto.name]) and ( - not list(set(ancestors) & set(owl_info[onto.name]["children_of"])) + # If "descendants_of" specified in owl info then skip the current term if it is + # not a descendants of those indicated. + if (onto.name in owl_info and "descendants_of" in owl_info[onto.name]) and ( + not list(set(ancestors) & set(owl_info[onto.name]["descendants_of"])) ): onto_dict[onto.name].pop(term_id) continue diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index a918f728f..b68c01ff4 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -386,7 +386,7 @@ def test_obsolete_term_id(self, validator_with_adata): def test_assay_ontology_term_id(self, validator_with_adata, assay_ontology_term_id, error): """ assay_ontology_term_id categorical with str categories. - This MUST be an EFO term that is a child of either "EFO:0002772" or "EFO:0010183" + This MUST be an EFO term that is a descendant of either "EFO:0002772" or "EFO:0010183" """ validator = validator_with_adata validator.adata.obs.loc[validator.adata.obs.index[0], "assay_ontology_term_id"] = assay_ontology_term_id @@ -462,7 +462,7 @@ def test_development_stage_ontology_term_id_mouse(self, validator_with_adata): def test_development_stage_ontology_term_id_all_species(self, validator_with_adata): """ - All other it MUST be children of UBERON:0000105 and not UBERON:0000071 + All other it MUST be descendants of UBERON:0000105 and not UBERON:0000071 """ validator = validator_with_adata obs = validator.adata.obs @@ -477,11 +477,11 @@ def test_development_stage_ontology_term_id_all_species(self, validator_with_ada assert validator.errors == [ "ERROR: 'EFO:0000001' in 'development_stage_ontology_term_id' is " "not a valid ontology term id of 'UBERON'. When 'organism_ontology_term_id' is not 'NCBITaxon:10090' " - "nor 'NCBITaxon:9606', 'development_stage_ontology_term_id' MUST be a child term id of " + "nor 'NCBITaxon:9606', 'development_stage_ontology_term_id' MUST be a descendant term id of " "'UBERON:0000105' excluding 'UBERON:0000071', or unknown." ] - # All other it MUST be children of UBERON:0000105 and not UBERON:0000071 + # All other it MUST be descendants of UBERON:0000105 and not UBERON:0000071 # Fail case UBERON:0000071 validator.errors = [] obs.loc[obs.index[0], "organism_ontology_term_id"] = "NCBITaxon:10114" @@ -494,7 +494,7 @@ def test_development_stage_ontology_term_id_all_species(self, validator_with_ada assert validator.errors == [ "ERROR: 'UBERON:0000071' in 'development_stage_ontology_term_id' is not allowed. When " "'organism_ontology_term_id' is not 'NCBITaxon:10090' " - "nor 'NCBITaxon:9606', 'development_stage_ontology_term_id' MUST be a child term id of " + "nor 'NCBITaxon:9606', 'development_stage_ontology_term_id' MUST be a descendant term id of " "'UBERON:0000105' excluding 'UBERON:0000071', or unknown.", ] @@ -502,8 +502,8 @@ def test_disease_ontology_term_id(self, validator_with_adata): """ disease_ontology_term_id categorical with str categories. This MUST be one of: - PATO:0000461 for normal or healthy - - child of MONDO:0000001 for disease - - self or child of MONDO:0021178 for injury + - descendant of MONDO:0000001 for disease + - self or descendant of MONDO:0021178 for injury """ validator = validator_with_adata obs = validator.adata.obs @@ -513,7 +513,7 @@ def test_disease_ontology_term_id(self, validator_with_adata): validator.validate_adata() assert validator.errors == [ "ERROR: 'EFO:0000001' in 'disease_ontology_term_id' is not a valid ontology term id of 'MONDO, PATO'. " - "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or children terms thereof, or children terms of 'MONDO:0000001' (disease) are allowed" + "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or descendant terms thereof, or descendant terms of 'MONDO:0000001' (disease) are allowed" ] # Invalid PATO term id @@ -522,7 +522,7 @@ def test_disease_ontology_term_id(self, validator_with_adata): validator.validate_adata() assert validator.errors == [ "ERROR: 'PATO:0001894' in 'disease_ontology_term_id' is not an allowed term id. " - "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or children terms thereof, or children terms of 'MONDO:0000001' (disease) are allowed" + "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or descendant terms thereof, or descendant terms of 'MONDO:0000001' (disease) are allowed" ] # Invalid MONDO term id - disease characteristic @@ -531,7 +531,7 @@ def test_disease_ontology_term_id(self, validator_with_adata): validator.validate_adata() assert validator.errors == [ "ERROR: 'MONDO:0021125' in 'disease_ontology_term_id' is not an allowed term id. " - "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or children terms thereof, or children terms of 'MONDO:0000001' (disease) are allowed" + "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or descendant terms thereof, or descendant terms of 'MONDO:0000001' (disease) are allowed" ] # Invalid MONDO term id - disease parent term @@ -540,7 +540,7 @@ def test_disease_ontology_term_id(self, validator_with_adata): validator.validate_adata() assert validator.errors == [ "ERROR: 'MONDO:0000001' in 'disease_ontology_term_id' is not an allowed term id. " - "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or children terms thereof, or children terms of 'MONDO:0000001' (disease) are allowed" + "Only 'PATO:0000461' (normal), 'MONDO:0021178' (injury) or descendant terms thereof, or descendant terms of 'MONDO:0000001' (disease) are allowed" ] # Valid PATO term id - healthy @@ -549,7 +549,7 @@ def test_disease_ontology_term_id(self, validator_with_adata): validator.validate_adata() assert validator.errors == [] - # Valid MONDO term id - disease child term + # Valid MONDO term id - disease descendant term validator.errors = [] obs.loc[obs.index[0], "disease_ontology_term_id"] = "MONDO:0005491" validator.validate_adata() @@ -561,7 +561,7 @@ def test_disease_ontology_term_id(self, validator_with_adata): validator.validate_adata() assert validator.errors == [] - # Valid MONDO term id - injury child term + # Valid MONDO term id - injury descendant term validator.errors = [] obs.loc[obs.index[0], "disease_ontology_term_id"] = "MONDO:0015796" validator.validate_adata() @@ -618,7 +618,7 @@ def test_tissue_ontology_term_id__unknown_invalid(self, validator_with_adata): assert not validator.validate_adata() assert validator.errors == [ "ERROR: 'unknown' in 'tissue_ontology_term_id' is not a valid ontology term id of 'UBERON'. " - "When 'tissue_type' is 'tissue' or 'organoid', 'tissue_ontology_term_id' MUST be a child " + "When 'tissue_type' is 'tissue' or 'organoid', 'tissue_ontology_term_id' MUST be a descendant " "term id of 'UBERON:0001062' (anatomical entity)." ] @@ -690,7 +690,7 @@ def test_self_reported_ethnicity_ontology_term_id__forbidden_term(self, validato def test_self_reported_ethnicity_ontology_term_id__forbidden_term_ancestor(self, validator_with_adata): """ Test self_reported_ethnicity_ontology_term error message when passed an ontology term that has - both itself and its children forbidden + both itself and its descendants forbidden """ validator = validator_with_adata error_message_suffix = validator.schema_def["components"]["obs"]["columns"][ @@ -709,10 +709,10 @@ def test_self_reported_ethnicity_ontology_term_id__forbidden_term_ancestor(self, ) ] - def test_self_reported_ethnicity_ontology_term_id__forbidden_term_child(self, validator_with_adata): + def test_self_reported_ethnicity_ontology_term_id__forbidden_term_descendant(self, validator_with_adata): """ - Test self_reported_ethnicity_ontology_term error message when passed the child term of an ontology term that has - both itself and its children forbidden + Test self_reported_ethnicity_ontology_term error message when passed the descendant term of an ontology term that has + both itself and its descendants forbidden """ validator = validator_with_adata error_message_suffix = validator.schema_def["components"]["obs"]["columns"][ @@ -727,7 +727,7 @@ def test_self_reported_ethnicity_ontology_term_id__forbidden_term_child(self, va assert validator.errors == [ self.get_format_error_message( error_message_suffix, - "ERROR: 'HANCESTRO:0306' in 'self_reported_ethnicity_ontology_term_id' is not allowed. Child terms " + "ERROR: 'HANCESTRO:0306' in 'self_reported_ethnicity_ontology_term_id' is not allowed. Descendant terms " "of 'HANCESTRO:0304' are not allowed.", ) ] @@ -902,7 +902,7 @@ def test_self_reported_ethnicity_ontology_term_id__multi_term_list(self, validat def test_organism_ontology_term_id(self, validator_with_adata): """ - organism_ontology_term_id categorical with str categories. This MUST be a child of NCBITaxon:33208. + organism_ontology_term_id categorical with str categories. This MUST be a descendant of NCBITaxon:33208. """ validator = validator_with_adata obs = validator.adata.obs @@ -918,7 +918,7 @@ def test_organism_ontology_term_id(self, validator_with_adata): validator.validate_adata() assert validator.errors == [ "ERROR: 'EFO:0000001' in 'organism_ontology_term_id' is not a valid " - "ontology term id of 'NCBITaxon'. Only children term ids of 'NCBITaxon:33208' for metazoan are allowed." + "ontology term id of 'NCBITaxon'. Only descendant term ids of 'NCBITaxon:33208' for metazoan are allowed." ] def test_tissue_ontology_term_id_base(self, validator_with_adata): @@ -934,7 +934,7 @@ def test_tissue_ontology_term_id_base(self, validator_with_adata): assert validator.errors == [ "ERROR: 'EFO:0000001' in 'tissue_ontology_term_id' is not a valid ontology term id of " "'UBERON'. When 'tissue_type' is 'tissue' or 'organoid', 'tissue_ontology_term_id' MUST be a " - "child term id of 'UBERON:0001062' (anatomical entity)." + "descendant term id of 'UBERON:0001062' (anatomical entity)." ] def test_tissue_ontology_term_id_cell_culture__suffix_in_term_id(self, validator_with_adata): @@ -1011,12 +1011,12 @@ def test_tissue_ontology_term_id_organoid(self, validator_with_adata): assert validator.errors == [ "ERROR: 'UBERON:0000057 (organoid)' in 'tissue_ontology_term_id' is not a valid ontology term id of " "'UBERON'. When 'tissue_type' is 'tissue' or 'organoid', 'tissue_ontology_term_id' MUST be a " - "child term id of 'UBERON:0001062' (anatomical entity)." + "descendant term id of 'UBERON:0001062' (anatomical entity)." ] - def test_tissue_ontology_term_id_child_of_anatomical_entity__tissue(self, validator_with_adata): + def test_tissue_ontology_term_id_descendant_of_anatomical_entity__tissue(self, validator_with_adata): """ - Tissue ontology term ID must be a CHILD TERM of 'UBERON:0001062' (anatomical entity) if tissue_type is + Tissue ontology term ID must be a descendant term of 'UBERON:0001062' (anatomical entity) if tissue_type is organoid or tissue. """ validator = validator_with_adata @@ -1027,12 +1027,12 @@ def test_tissue_ontology_term_id_child_of_anatomical_entity__tissue(self, valida assert validator.errors == [ "ERROR: 'UBERON:0001062' in 'tissue_ontology_term_id' is not an allowed term id. " "When 'tissue_type' is 'tissue' or 'organoid', 'tissue_ontology_term_id' " - "MUST be a child term id of 'UBERON:0001062' (anatomical entity)." + "MUST be a descendant term id of 'UBERON:0001062' (anatomical entity)." ] - def test_tissue_ontology_term_id_child_of_anatomical_entity__organoid(self, validator_with_adata): + def test_tissue_ontology_term_id_descendant_of_anatomical_entity__organoid(self, validator_with_adata): """ - Tissue ontology term ID must be a CHILD TERM of 'UBERON:0001062' (anatomical entity) if tissue_type is + Tissue ontology term ID must be a descendant term of 'UBERON:0001062' (anatomical entity) if tissue_type is organoid or tissue. """ validator = validator_with_adata @@ -1044,7 +1044,7 @@ def test_tissue_ontology_term_id_child_of_anatomical_entity__organoid(self, vali assert validator.errors == [ "ERROR: 'UBERON:0001062' in 'tissue_ontology_term_id' is not an allowed term id. " "When 'tissue_type' is 'tissue' or 'organoid', 'tissue_ontology_term_id' " - "MUST be a child term id of 'UBERON:0001062' (anatomical entity)." + "MUST be a descendant term id of 'UBERON:0001062' (anatomical entity)." ] def test_tissue_type(self, validator_with_adata): @@ -1064,7 +1064,7 @@ def test_tissue_type(self, validator_with_adata): def test_sex_ontology_term_id(self, validator_with_adata): """ sex_ontology_term_id categorical with str categories. - This MUST be a child of PATOPATO:0001894 for phenotypic sex or "unknown" if unavailable + This MUST be a descendant of PATOPATO:0001894 for phenotypic sex or "unknown" if unavailable """ validator = validator_with_adata obs = validator.adata.obs @@ -1192,34 +1192,34 @@ def test_suspension_type_ancestors_inclusive(self, validator_with_adata, assay, assert validator.errors == [ f"ERROR: Column 'suspension_type' in dataframe 'obs' contains invalid values " f"'['{invalid_suspension_type}']'. Values must be one of {suspension_types} when " - f"'assay_ontology_term_id' is {assay} or its children" + f"'assay_ontology_term_id' is {assay} or its descendants" ] - def test_suspension_type_with_child_term_id_failure(self, validator_with_adata): + def test_suspension_type_with_descendant_term_id_failure(self, validator_with_adata): """ suspension_id categorical with str categories. This field MUST be "cell", "nucleus", or "na". The allowed values depend on the assay_ontology_term_id. MUST support matching against ancestor term rules if specified. """ validator = validator_with_adata obs = validator.adata.obs - obs.loc[obs.index[0], "assay_ontology_term_id"] = "EFO:0030008" # child of EFO:0009294 + obs.loc[obs.index[0], "assay_ontology_term_id"] = "EFO:0030008" # descendant of EFO:0009294 obs.loc[obs.index[0], "suspension_type"] = "nucleus" validator.validate_adata() assert validator.errors == [ "ERROR: Column 'suspension_type' in dataframe 'obs' contains invalid values " "'['nucleus']'. Values must be one of ['cell'] when " - "'assay_ontology_term_id' is EFO:0009294 or its children" + "'assay_ontology_term_id' is EFO:0009294 or its descendants" ] - def test_suspension_type_with_child_term_id_success(self, validator_with_adata): + def test_suspension_type_with_descendant_term_id_success(self, validator_with_adata): """ suspension_id categorical with str categories. This field MUST be "cell", "nucleus", or "na". The allowed values depend on the assay_ontology_term_id. MUST support matching against ancestor term rules if specified. """ validator = validator_with_adata obs = validator.adata.obs - obs.loc[obs.index[0], "assay_ontology_term_id"] = "EFO:0008904" # child of EFO:0007045 + obs.loc[obs.index[0], "assay_ontology_term_id"] = "EFO:0008904" # descendant of EFO:0007045 obs["suspension_type"][0] = "nucleus" validator.validate_adata() diff --git a/schema/5.1.0/schema.md b/schema/5.1.0/schema.md index 717c8286e..6bfc65aca 100644 --- a/schema/5.1.0/schema.md +++ b/schema/5.1.0/schema.md @@ -362,12 +362,12 @@ Curators MUST annotate the following columns in the `obs` dataframe: Value categorical with str categories. This MUST be an EFO term and either:

- An assay based on 10X Genomics products SHOULD either be "EFO:0008995" for 10x technology or preferably its most accurate child. An assay based on SMART (Switching Mechanism at the 5' end of the RNA Template) or SMARTer technology SHOULD either be "EFO:0010184" for Smart-like or preferably its most accurate child.

+ An assay based on 10X Genomics products SHOULD either be "EFO:0008995" for 10x technology or preferably its most accurate descendant. An assay based on SMART (Switching Mechanism at the 5' end of the RNA Template) or SMARTer technology SHOULD either be "EFO:0010184" for Smart-like or preferably its most accurate descendant.


Recommended values for specific assays:

@@ -499,7 +499,7 @@ Curators MUST annotate the following columns in the `obs` dataframe:
A term from the set of month-old stages
(e.g. MmusDv:0000062)
-
Otherwise, for all other organisms this MUST be the most accurate child of UBERON:0000105 for life cycle stage, excluding UBERON:0000071 for death stage. +
Otherwise, for all other organisms this MUST be the most accurate descendant of UBERON:0000105 for life cycle stage, excluding UBERON:0000071 for death stage. @@ -521,8 +521,8 @@ Curators MUST annotate the following columns in the `obs` dataframe: categorical with str categories. This MUST be one of:

@@ -603,7 +603,7 @@ Curators MUST annotate the following columns in the `obs` dataframe: Value - categorical with str categories. This MUST be a child of NCBITaxon:33208 for Metazoa. + categorical with str categories. This MUST be a descendant of NCBITaxon:33208 for Metazoa. @@ -635,7 +635,7 @@ Curators MUST annotate the following columns in the `obs` dataframe: href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0002?lang=en" >"HANCESTRO:0002" - for regions and its children + for regions and its descendants
  • "HANCESTRO:0304" - for ancestry status and its children + for ancestry status and its descendants
  • "GEO:000000374" - for continent and its children: + for continent and its descendants: