Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More readable output for validate #307

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ ignore =
S318 # don't worry about unsafe xml
S310 # TODO remove this later and switch to using requests
B018 # This is 'useless' statements which are new atm.
B009 # Prevents me from using get_attr after checking has_attr
exclude =
sssom/sssom_datamodel.py
sssom/cliquesummary.py
Expand Down
13 changes: 10 additions & 3 deletions sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from .typehints import Metadata
from .util import (
MappingSetDataFrame,
SssomMalformedYamlError,
are_params_slots,
augment_metadata,
is_curie,
Expand Down Expand Up @@ -114,18 +115,24 @@ def parse_file(


def validate_file(
input_path: str, validation_types: List[SchemaValidationType]
input_path: str, validation_types: List[SchemaValidationType], verbose: bool = False
) -> None:
"""Validate the incoming SSSOM TSV according to the SSSOM specification.

:param input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
:param validation_types: A list of validation types to run.
:param verbose: If true, print detailed error message.
"""
# Two things to check:
# 1. All prefixes in the DataFrame are define in prefix_map
# 2. All columns in the DataFrame abide by sssom-schema.
msdf = parse_sssom_table(file_path=input_path)
validate(msdf=msdf, validation_types=validation_types)
try:
msdf = parse_sssom_table(file_path=input_path)
validate(msdf=msdf, validation_types=validation_types)
except SssomMalformedYamlError as e:
print("Your SSSOM mapping set metadata is malformed YAML.")
if verbose:
print(e)


def split_file(input_path: str, output_directory: Union[str, Path]) -> None:
Expand Down
20 changes: 15 additions & 5 deletions sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,12 @@
URI_SSSOM_MAPPINGS,
MappingSetDataFrame,
NoCURIEException,
SssomMalformedYamlError,
curie_from_uri,
get_file_extension,
get_sssom_error_message,
is_multivalued_slot,
purl_to_source_id,
raise_for_bad_path,
read_pandas,
to_mapping_set_dataframe,
Expand Down Expand Up @@ -534,9 +537,13 @@ def from_alignment_minidom(
elif node_name == "onto2":
ms[OBJECT_SOURCE_ID] = e.firstChild.nodeValue
elif node_name == "uri1":
ms[SUBJECT_SOURCE] = e.firstChild.nodeValue
ms[SUBJECT_SOURCE] = purl_to_source_id(
uri=e.firstChild.nodeValue, prefix_map=prefix_map
)
elif node_name == "uri2":
ms[OBJECT_SOURCE] = e.firstChild.nodeValue
ms[OBJECT_SOURCE] = purl_to_source_id(
uri=e.firstChild.nodeValue, prefix_map=prefix_map
)

ms.mappings = mlist # type: ignore
_set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
Expand Down Expand Up @@ -745,9 +752,12 @@ def _read_metadata_from_table(path: Union[str, Path]) -> Dict[str, Any]:
break

if yamlstr:
meta = yaml.safe_load(yamlstr)
logging.info(f"Meta={meta}")
return meta
try:
meta = yaml.safe_load(yamlstr)
logging.info(f"Meta={meta}")
return meta
except Exception as e:
raise SssomMalformedYamlError(get_sssom_error_message(e))
return {}


Expand Down
29 changes: 28 additions & 1 deletion sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,10 +941,24 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) ->
return map_dict


def get_sssom_error_message(e: Exception):
"""Get a human readble error message from an exception."""
msg = ""
if hasattr(e, "message"):
msg = getattr(e, "message")
else:
str(e)
return msg


class NoCURIEException(ValueError):
"""An exception raised when a CURIE can not be parsed with a given prefix map."""


class SssomMalformedYamlError(ValueError):
"""An exception raised when the YAML header is malformed."""


CURIE_RE = re.compile(r"[A-Za-z0-9_.]+[:][A-Za-z0-9_]")


Expand Down Expand Up @@ -1012,6 +1026,19 @@ def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]:
return list(set(prefixes))


def purl_to_source_id(uri: str, prefix_map: Mapping[str, str]) -> str:
"""Parse a CURIE from an PURL to serve as a generic source reference (by splitting of file extension).

:param uri: The URI to parse. If this is already a CURIE, return directly.
:param prefix_map: The prefix map against which the IRI is checked
:return: A CURIE
"""
curie = curie_from_uri(uri=uri, prefix_map=prefix_map)
if curie.endswith(".owl"):
curie = curie[:-4]
return curie


def filter_out_prefixes(
df: pd.DataFrame, filter_prefixes: List[str], features: list = KEY_FEATURES
) -> pd.DataFrame:
Expand Down Expand Up @@ -1245,7 +1272,7 @@ def sort_df_rows_columns(
col for col in SCHEMA_DICT["slots"].keys() if col in df.columns
]
df = df.reindex(column_sequence, axis=1)
if by_rows and len(df) > 0:
if by_rows and len(df) > 0 and len(df.columns) > 0:
df = df.sort_values(by=df.columns[0], ignore_index=True)
return df

Expand Down
14 changes: 11 additions & 3 deletions sssom/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,34 @@

from sssom.context import add_built_in_prefixes_to_prefix_map
from sssom.parsers import to_mapping_set_document
from sssom.util import MappingSetDataFrame, get_all_prefixes
from sssom.util import MappingSetDataFrame, SssomMalformedYamlError, get_all_prefixes

from .constants import SCHEMA_YAML, SchemaValidationType


def validate(
msdf: MappingSetDataFrame, validation_types: List[SchemaValidationType]
msdf: MappingSetDataFrame,
validation_types: List[SchemaValidationType],
verbose=False,
) -> None:
"""Validate SSSOM files against `sssom-schema` using linkML's validator function.

:param msdf: MappingSetDataFrame.
:param validation_types: SchemaValidationType
:param verbose: If true, print detailed error message.
"""
validation_methods = {
SchemaValidationType.JsonSchema: validate_json_schema,
SchemaValidationType.Shacl: validate_shacl,
SchemaValidationType.PrefixMapCompleteness: check_all_prefixes_in_curie_map,
}
for vt in validation_types:
validation_methods[vt](msdf)
try:
validation_methods[vt](msdf)
except SssomMalformedYamlError as e:
print("Your SSSOM mapping set metadata is malformed YAML.")
if verbose:
print(e)


def validate_json_schema(msdf: MappingSetDataFrame) -> None:
Expand Down
Loading