diff --git a/.travis.yml b/.travis.yml index 9a923ab..5463151 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,13 @@ language: scala +sudo: required + +before_install: + - sudo apt-get update -qq + +install: + - sudo apt-get install -y python3-venv -qq script: + - source environment.sh + - python3 scripts/syngo_uniprot_resolver.py -f resources/SynGO_export_test.json -o resources/SynGO_export_test.json - sbt "run -ni resources/SynGO_export_test.json" diff --git a/environment.sh b/environment.sh new file mode 100644 index 0000000..a70e7a9 --- /dev/null +++ b/environment.sh @@ -0,0 +1,5 @@ +#!/bin/bash +python3 -m venv env +. env/bin/activate + +pip3 install -r requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..69de461 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pandas +requests diff --git a/scripts/__pycache__/uniprot_wrapper.cpython-36.pyc b/scripts/__pycache__/uniprot_wrapper.cpython-36.pyc new file mode 100644 index 0000000..ee1eb5b Binary files /dev/null and b/scripts/__pycache__/uniprot_wrapper.cpython-36.pyc differ diff --git a/scripts/syngo_uniprot_resolver.py b/scripts/syngo_uniprot_resolver.py new file mode 100644 index 0000000..becd186 --- /dev/null +++ b/scripts/syngo_uniprot_resolver.py @@ -0,0 +1,94 @@ +import json +import requests +import argparse +from pandas import read_csv +from io import StringIO +from os import path +from uniprot_wrapper import UniprotWrapper, UniprotWrapper2 + +p = argparse.ArgumentParser() +p.add_argument('-f', "--filename", type=str, required=True, help="input filename of SynGO annotation export JSON") +p.add_argument('-o', "--outfile", type=str, required=False, help="output filename") + +def main(): + args = p.parse_args() + + filename = args.filename + with open(filename) as f: + data = json.load(f) + + wrapper = UniprotWrapper2() + extensions = [] + ext_genes = [] + ext_sets = [] + id_map = {} + + for a in data["SynGO"]: + for m in a['models']: + uniprot_id = m["uniprot"] + if uniprot_id not in id_map: + id_map[uniprot_id] = {} + + id_map = wrapper.lookup_uniprot(list(id_map.keys())) + + # + # + + for a in data["SynGO"]: + models = [] + for m in a['models']: + uniprot_id = m['uniprot'] + if id_map[uniprot_id] == {}: + uniprot_id = uniprot_id.split("-")[0] # Adjust for isoforms + noctua_gene_id = wrapper.get_noctua_gene_id(id_map, uniprot_id) + if noctua_gene_id is None: + noctua_gene_id = m["uniprot"] + m["noctua_gene_id"] = noctua_gene_id + resulting_dbs = list(set(wrapper.DBS_TO_LOOKUP) & set(id_map[uniprot_id].keys())) + if len(resulting_dbs) > 0: + m["id_db_source"] = resulting_dbs[0] + else: + m["id_db_source"] = "uniprot" + m["taxon_id"] = UniprotWrapper.get_field_for_id(id_map, "taxon_id", uniprot_id) # Gotta do these when can't infer from id_db_source + for field in wrapper.OTHER_FIELDS_TO_FETCH: + if field in id_map[uniprot_id]: + m[field] = UniprotWrapper.get_field_for_id(id_map, field, uniprot_id) + if "gene_symbol" in id_map[uniprot_id]: + m["gene_symbol"] = UniprotWrapper.get_field_for_id(id_map, "gene_symbol", uniprot_id) + if "gene_symbol" not in m: + # Report missing info for this one + print(m['uniprot'] + " - " + a['combi_id'] + " - " + str(UniprotWrapper.get_field_for_id(id_map, "GENENAME", uniprot_id))) + ext_relations = [] + for e in m['extensions']: + for k in e.keys(): + go_terms = [] + uberon_terms = [] + other_terms = [] + if k not in extensions: + extensions.append(k) + if k not in ext_relations: + ext_relations.append(k) + for t in e[k]: + if ":" not in t and t not in ext_genes: + ext_genes.append(t) + if t.startswith("GO:"): + go_terms.append(t) + elif t.startswith("UBERON:"): + uberon_terms.append(t) + else: + other_terms.append(t) + # if len(go_terms) > 1 or len(uberon_terms) > 1 or len(other_terms) > 0: + # ext_sets.append([a["combi_id"],e[k]]) + if len(set(ext_relations)) > 1: + ext_sets.append(a["combi_id"]) + models.append(m) + a['models'] = models + + outfile = args.outfile + if outfile is None: + outfile = path.splitext(filename)[0] + "_updated" + path.splitext(filename)[1] + with open(outfile, "w+") as wf: + json.dump(data, wf, indent=4) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/uniprot_wrapper.py b/scripts/uniprot_wrapper.py new file mode 100644 index 0000000..1e0b1a5 --- /dev/null +++ b/scripts/uniprot_wrapper.py @@ -0,0 +1,182 @@ +import json +import requests +import argparse +import logging +from pandas import read_csv +from io import StringIO +from os import path +from UniProtMapper import ProtMapper + + +p = argparse.ArgumentParser() +p.add_argument('-f', "--filename", type=str, required=True, help="input filename of space-separated UniProt ID list") +p.add_argument('-o', "--outfile", type=str, required=False, help="output filename for result JSON") +p.add_argument('-d', "--debug_level", type=str, required=False, help="logging level") + +class UniprotWrapper(): + # DBS_TO_LOOKUP = ["MGI_ID", "RGD_ID", "FLYBASE_ID", "WORMBASE_ID", "HGNC_ID"] + DBS_TO_LOOKUP = ["MGI_ID", "RGD_ID", "FLYBASE_ID", "WORMBASE_ID"] + OTHER_FIELDS_TO_FETCH = ["GENENAME"] + + def make_uniprot_call(self, uniprot_ids, current_map=None): + request_min = 0 + while request_min < len(uniprot_ids): + print(request_min) + for field in self.DBS_TO_LOOKUP + self.OTHER_FIELDS_TO_FETCH: + r = requests.get('https://www.uniprot.org/uploadlists/?from=ACC&to=' + field + '&format=tab&query=' + " ".join(uniprot_ids[request_min:request_min+500])) + uniprot_results = read_csv(StringIO(r.text), delimiter='\t') + + for index, row in uniprot_results.iterrows(): + logging.debug(row[0] + " - " + field) + current_map[row[0]][field] = row[1] + + request_min += 500 # For some reason requesting >1000 results in 400 error + return current_map + + def lookup_uniprot(self, uniprot_ids, current_map=None, isoform_check=True): + if current_map is None: + current_map = {} + for uid in uniprot_ids: + current_map[uid] = {} + current_map = self.make_uniprot_call(uniprot_ids, current_map) + + # Adjust for isoforms + if isoform_check: + redo_ids = [] + for k in current_map: + if current_map[k] == {} and "-" in k: + redo_id = k.split("-")[0] + redo_ids.append(redo_id) + if redo_ids: + current_map = self.make_uniprot_call(redo_ids, current_map) + + return current_map + + @staticmethod + def one_off_call(uniprot_id): + r = requests.get('http://www.uniprot.org/uniprot/' + uniprot_id + '.txt') + return UniprotWrapper.get_gene_label(r.text.split("\n")) + + @staticmethod + def get_organism_id(uniprot_id): + r = requests.get("https://rest.uniprot.org/uniprotkb/{}.tsv?fields=accession,organism_id".format(uniprot_id)) + return UniprotWrapper.parse_taxon_from_tsv(r.text) + + @staticmethod + def get_gene_label(result_lines): + gene_name = "" + species = "" + for line in result_lines: + if line.startswith("GN Name="): + gene_name = line[5:len(line)].split(";")[0].split("{")[0] + gene_name = gene_name[5:len(gene_name)].rstrip() + elif line.startswith("OS"): + species = line[5:len(line)] + species = species.split(" ") + species = species[0][0] + species[1][0:3] + label = gene_name + " " + species + return label + + @staticmethod + def get_taxon_id(uniprot_id): + r = requests.get('http://www.uniprot.org/uniprot/' + uniprot_id + '.txt') + return UniprotWrapper.parse_taxon_from_txt(r.text) + + @staticmethod + def parse_taxon_from_txt(result_text): + taxon_prefix = "OX NCBI_TaxID=" + taxon_id = "" + for line in result_text.split("\n"): + if line.startswith(taxon_prefix): + taxon_id = line[len(taxon_prefix):len(line)].rstrip(";").split("{")[0].rstrip() + return taxon_id + + @staticmethod + def parse_taxon_from_tsv(result_text): + taxon_id = "" + for line in result_text.split("\n"): + if not line or line.startswith("Entry"): + continue + else: + taxon_id = line.split("\t")[1].rstrip() + return taxon_id + + @staticmethod + def get_field_for_id(current_map, field, uniprot_id): + if field in current_map[uniprot_id]: + return str(current_map[uniprot_id][field]) + + def get_noctua_gene_id(self, current_map, uniprot_id): + noctua_gene_id = None + for db in self.DBS_TO_LOOKUP: + if db in current_map[uniprot_id]: + noctua_gene_id = UniprotWrapper.get_field_for_id(current_map, db, uniprot_id) + return noctua_gene_id + + +class UniprotWrapper2(UniprotWrapper): + DBS_TO_LOOKUP = ["MGI", "RGD", "FlyBase", "WormBase", "ZFIN"] + OTHER_FIELDS_TO_FETCH = ["gene_names"] + mapper = ProtMapper() + + def make_uniprot_call(self, uniprot_ids, current_map=None): + # mapped_ids = {} + for field in self.DBS_TO_LOOKUP: + result, failed = self.mapper.get( + # ids=list(uniprot_ids), from_db="UniProtKB_AC-ID", to_db=field, fields=self.OTHER_FIELDS_TO_FETCH + ids = list(uniprot_ids), from_db = "UniProtKB_AC-ID", to_db = field + ) + # result is a pandas DataFrame + # iterate through result and add From key to mapped_ids with To as value + for index, row in result.iterrows(): + from_id = row["From"] + to_id = row["To"] + # if field == "RGD" and not to_id.startswith("RGD:"): + # to_id = "RGD:" + str(int(to_id)) + current_map[from_id][field] = to_id + # print("yo") + + # set uniprot_ids to remaining for querying on the next DB + uniprot_ids = failed + # # Add remaining unmapped IDs to mapped_ids so they get printed out + # for uid in uniprot_ids: + # current_map[uid] = "" + result, failed = self.mapper.get( + # ids=list(uniprot_ids), from_db="UniProtKB_AC-ID", to_db=field, fields=self.OTHER_FIELDS_TO_FETCH + ids=list(current_map.keys()) + ) + for index, row in result.iterrows(): + uniprot_id = row["From"] + gene_names = row["Gene Names"] + taxon_id = row["Organism (ID)"] + first_gene_name = gene_names.split(" ")[0] + current_map[uniprot_id]["gene_symbol"] = first_gene_name + current_map[uniprot_id]["taxon_id"] = taxon_id + + return current_map + + +def main(): + args = p.parse_args() + + if args.debug_level is not None: + logging.basicConfig(level=args.debug_level) + + filename = args.filename + outfile = args.outfile + id_map = {} + with open(filename) as f: + + wrapper = UniprotWrapper() + + id_map = wrapper.lookup_uniprot(f.read().split(" ")) + + if outfile is not None: + with open(path.splitext(filename)[0] + "_output" + path.splitext(filename)[1], "w") as wf: + json.dump(id_map, wf, indent=4) + else: + print(id_map) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/main/scala/org/geneontology/syngo2lego/LegoModel.scala b/src/main/scala/org/geneontology/syngo2lego/LegoModel.scala index a07739d..e311bd9 100644 --- a/src/main/scala/org/geneontology/syngo2lego/LegoModel.scala +++ b/src/main/scala/org/geneontology/syngo2lego/LegoModel.scala @@ -2,18 +2,17 @@ package org.geneontology.syngo2lego import org.semanticweb.owlapi.apibinding.OWLManager import dosumis.brainscowl.BrainScowl import org.phenoscape.scowl._ - -import org.semanticweb.owlapi.model.OWLOntology -import org.semanticweb.owlapi.model.OWLClass -import org.semanticweb.owlapi.model.IRI +import org.semanticweb.owlapi.model.{IRI, OWLAnnotationProperty, OWLClass, OWLOntology} import org.semanticweb.owlapi.search.EntitySearcher -import rapture.json._, jsonBackends.jawn._ +import rapture.json._ +import jsonBackends.jawn._ + import scala.io.Source import java.io.File import java.util.Date import java.text.SimpleDateFormat -class LegoModel (val jmodel : Json, val GO : BrainScowl, add_import_statement: Boolean) { +class LegoModel (val jmodel : Json, val GO : BrainScowl, add_import_statement: Boolean, context_lookup: Json) { /** Take one model worth of SynGO JSON (as Json) @@ -36,28 +35,124 @@ class LegoModel (val jmodel : Json, val GO : BrainScowl, add_import_statement: B val dc_date = AnnotationProperty("http://purl.org/dc/elements/1.1/date") val model_status = AnnotationProperty("http://geneontology.org/lego/modelstate") val provided_by = AnnotationProperty("http://purl.org/pav/providedBy") - owl_model.annotateOntology(Annotation(provided_by, "SynGO-VU")) - owl_model.annotateOntology(Annotation(title, syngo_id + test)) + val in_taxon = AnnotationProperty("https://w3id.org/biolink/vocab/in_taxon") + var mods = this.jmodel.models.as[List[Json]] + val model_ns = base + syngo_id + + // val title_value = mods.head.GENENAME + "_" + aspect + "_" + mods.head.annotationid + val title_value = get_title() + // owl_model.annotateOntology(Annotation(provided_by, "SynGO-VU")) + owl_model.annotateOntology(Annotation(provided_by, "https://syngo.vu.nl")) + owl_model.annotateOntology(Annotation(title, title_value)) // Files have no assoc date (except for in comments). So, for now at least, // generating here to fulfill loading requirements. val datestamp = this.jmodel.datestamp.as[String] owl_model.annotateOntology(Annotation(dc_date, datestamp)) owl_model.annotateOntology(Annotation(model_status, this.jmodel.status.as[String])) - - val mods = this.jmodel.models.as[List[Json]] - val model_ns = base + syngo_id + var file_extension = "" - for (mod <- mods) { + var model_count = 0 + for (mod <- mods) { // Add in check of JSON integrity mod - var sm = new SimpleModel(model_ns, owl_model, mod, GO) - sm.generate() + var skip_model = false + val deprecated_fields = GO.getSpecAnnotationsOnEntity( + query_short_form = mod.goTerm.as[String].replace(":", "_"), + ap_short_form = "deprecated") + if (deprecated_fields.length > 0) { + val deprecated_field = deprecated_fields.head.isDeprecatedIRIAnnotation() + println(deprecated_field) + if (deprecated_field) { + // Look for a "term replaced by" term + var has_replacement_term = false + val term_annotations = GO.getAnnotationsOnEntity(query_short_form = mod.goTerm.as[String].replace(":", "_")) + if (term_annotations.length > 0) { + for (ta <- term_annotations) { + val prop = ta.getProperty() + val value = ta.getValue() + if (prop.equals(AnnotationProperty("http://purl.obolibrary.org/obo/IAO_0100001"))) { + has_replacement_term = true + } + } + } + if (!has_replacement_term) { + skip_model = true + println(s"Skipping ${syngo_id}.") + } + } + } + val term_annotations = GO.getAnnotationsOnEntity(query_short_form = mod.goTerm.as[String].replace(":", "_")) + if (term_annotations.length > 0) { + for (ta <- term_annotations) { + val prop = ta.getProperty() + val value = ta.getValue() + if (prop.equals(AnnotationProperty("http://purl.obolibrary.org/obo/IAO_0100001"))) { + println(value) + } + } + } + if (!skip_model) { + var sm = new SimpleModel(model_ns, owl_model, mod, GO, context_lookup) + sm.generate() + + val taxon_prefix = "http://purl.obolibrary.org/obo/NCBITaxon_" + var taxon = mod.id_db_source.as[String] match { + case "HGNC_ID" => "9606" + case "RGD" => "10116" + case "MGI" => "10090" + case "FlyBase" => "7227" + case "WormBase" => "6239" + case "ZFIN" => "7955" + case default => "" + } + if (taxon == "") { + // Likely non-MOD and taxon_id was individually retrieved. Try fetching taxon_id key + try { + taxon = mod.taxon_id.as[String] + } catch { + case _: rapture.data.TypeMismatchException => taxon = "" + } + } + if (taxon != "") { + owl_model.annotateOntology(Annotation(in_taxon, taxon_prefix + taxon)) + } + model_count+=1 + } } if (this.add_import_statement) { owl_model.add_import("http://purl.obolibrary.org/obo/go/extensions/go-lego.owl") file_extension = ".ttl" } - owl_model.save(syngo_id + test + ".ttl", "ttl") // - owl_model.sleep() + + // Only save if there's actually anything to say + if (model_count > 0) { + owl_model.save(syngo_id + test + ".ttl", "ttl") // + owl_model.sleep() + } + + def get_title(): String = { + val found_aspects = GO.getSpecAnnotationsOnEntity( + query_short_form = mods.head.goTerm.as[String].replace(":", "_"), + ap_short_form = "hasOBONamespace") + var aspect = "" + var found_aspect = "" + if (found_aspects.length > 0) { + found_aspect = found_aspects.head.getValue.asLiteral.get.getLiteral + } + if (found_aspect == "cellular_component") { + aspect = "CC" + } else if (found_aspect == "molecular_function") { + aspect = "MF" + } else if (found_aspect == "biological_process") { + aspect = "BP" + } + var gene_name = "" + try { + gene_name = mods.head.GENENAME.as[String] + } catch { + case gene: MissingValueException => gene_name = "SYNGO" + } + return gene_name + "_" + aspect + "_" + mods.head.annotationid.as[Int] + } } \ No newline at end of file diff --git a/src/main/scala/org/geneontology/syngo2lego/Main.scala b/src/main/scala/org/geneontology/syngo2lego/Main.scala index 9dd7322..150da21 100644 --- a/src/main/scala/org/geneontology/syngo2lego/Main.scala +++ b/src/main/scala/org/geneontology/syngo2lego/Main.scala @@ -19,9 +19,12 @@ object Main extends(App) { Cli.parse(args).withCommand(new Cat) { case cat => // Takes full JSON file as input, splits it up into models. // Generates individual OWL files from each model - val go = new BrainScowl("resources/go-simple.ofn") // TODO switch to pulling this dynamically from URL + val go = new BrainScowl("resources/go.owl") // TODO switch to pulling this dynamically from URL val synGO_file = Source.fromFile(cat.json_file).getLines.mkString val synGO_json = Json.parse(synGO_file) + val url = "https://raw.githubusercontent.com/prefixcommons/biocontext/master/registry/go_context.jsonld" + var lookup_str = scala.io.Source.fromFile("resources/go_context.jsonld").mkString + val idOrg_ns_lookup = Json.parse(lookup_str).context // At this point - should run check of json - outer struc // Loop over models calling genModel // var status = "delete" @@ -32,7 +35,7 @@ object Main extends(App) { // status = "publish" // } for (a <- synGO_json.SynGO.as[List[Json]]) { - var lm = new LegoModel(a, go, cat.no_imports) + var lm = new LegoModel(a, go, cat.no_imports, idOrg_ns_lookup) } go.sleep() } diff --git a/src/main/scala/org/geneontology/syngo2lego/SimpleModel.scala b/src/main/scala/org/geneontology/syngo2lego/SimpleModel.scala index e367021..6e8fcc9 100644 --- a/src/main/scala/org/geneontology/syngo2lego/SimpleModel.scala +++ b/src/main/scala/org/geneontology/syngo2lego/SimpleModel.scala @@ -1,29 +1,38 @@ package org.geneontology.syngo2lego -import rapture.json._, jsonBackends.jawn._ +import rapture.json._ +import jsonBackends.jawn._ import org.phenoscape.scowl._ import org.semanticweb.owlapi.model.IRI import org.semanticweb.owlapi.model._ import org.semanticweb.owlapi.search.EntitySearcher import dosumis.brainscowl.BrainScowl import org.semanticweb.owlapi.apibinding.OWLManager + import scala.language.postfixOps -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.{ArrayBuffer, ListBuffer} import scala.collection.JavaConversions._ import collection.JavaConverters._ import java.util.Date import java.text.SimpleDateFormat class SimpleModel (val model_ns: String, var ont : BrainScowl, - val jmodel : Json, val go: BrainScowl) { - // Passing rather too many vars. Deeper class model with some + val jmodel : Json, val go: BrainScowl, val idOrg_ns_lookup: Json) { + // Passing rather too many vars. Deeper class model with some // inheritance might be good here instead... // Declare globals // Namespaces (In the OWL sense - AKA Base IRIs) val obo_ns = "http://purl.obolibrary.org/obo/" - val idOrg_ns = "http://identifiers.org/" + val idOrg_ns = "http://identifiers.org/uniprot/" + val lookup_key_lookup = Map("uniprot" -> "UniProtKB", + "HGNC_ID" -> "UniProtKB", + "RGD" -> "RGD", + "MGI" -> "MGI", + "FlyBase" -> "FB", + "WormBase" -> "WB", + "ZFIN" -> "ZFIN") val synGO_ns = "http://syngo.vu.nl/" // OP vals // This should really be pulled from a config file @@ -33,7 +42,7 @@ class SimpleModel (val model_ns: String, var ont : BrainScowl, // Lookup needed to translate identifiers from JSON val OP_lookup = Map( "part_of" -> part_of, "occurs_in" -> occurs_in) // More concise way to do this? - + val primary_class = Class(obo_ns + this.jmodel.goTerm.as[String].replace(":", "_")) // Add check that primary class exists in GO! @@ -41,13 +50,19 @@ class SimpleModel (val model_ns: String, var ont : BrainScowl, // Annotations // Global 'cos they go on everything. val dc_contributor = AnnotationProperty("http://purl.org/dc/elements/1.1/contributor") -// val contributor = IRI.create(synGO_ns + jmodel.username.as[String]) val comment = AnnotationProperty("http://www.w3.org/2000/01/rdf-schema#comment") - val contributor = "SynGO:" + jmodel.username.as[String] + val contributors = jmodel.username.as[String].split(';') + val contributor_prefix = "https://orcid.org/" val source = "PMID:" + jmodel.pmid.as[String] - val evidence = AnnotationProperty("http://geneontology.org/lego/evidence") + val evidence = AnnotationProperty("http://geneontology.org/lego/evidence") + val provided_by = AnnotationProperty("http://purl.org/pav/providedBy") + // val provided_by_value = "SynGO-VU" + val provided_by_value = "https://syngo.vu.nl" - this.ont.annotateOntology(Annotation (dc_contributor, contributor)) + // this.ont.annotateOntology(Annotation (dc_contributor, contributor)) + for (c <- contributors) { + this.ont.annotateOntology(Annotation (dc_contributor, contributor_prefix + c)) + } val dc_date = AnnotationProperty("http://purl.org/dc/elements/1.1/date") val species = jmodel.comments.species.as[String] @@ -69,15 +84,19 @@ class SimpleModel (val model_ns: String, var ont : BrainScowl, // Do NOT use goDomain from the JSON as MFs are mislabelled as BPs: // e.g. This labeled as BP GO:1905056 calcium-transporting ATPase activity involved in ... - val primary_aspect = go.getSpecTextAnnotationsOnEntity( + val primary_aspect = go.getSpecAnnotationsOnEntity( query_short_form = this.jmodel.goTerm.as[String].replace(":", "_"), - ap_short_form = "hasOBONamespace").head // head bakes in cardinality without check! + ap_short_form = "hasOBONamespace").head.getValue.asLiteral.get.getLiteral // head bakes in cardinality without check! def new_ind() : OWLNamedIndividual = { val i = NamedIndividual(this.model_ns + java.util.UUID.randomUUID.toString) - this.ont.add_axiom(i Annotation (dc_contributor, contributor)) // Also needs date. - this.ont.add_axiom(i Annotation (dc_date, date)) // Also needs date. + // this.ont.add_axiom(i Annotation (dc_contributor, contributor)) // Also needs date. + for (c <- contributors) { + this.ont.add_axiom(i Annotation (dc_contributor, contributor_prefix + c)) + } + this.ont.add_axiom(i Annotation (dc_date, date)) // Also needs date. + this.ont.add_axiom(i Annotation (provided_by, provided_by_value)) return i } @@ -94,14 +113,35 @@ class SimpleModel (val model_ns: String, var ont : BrainScowl, } def new_gp(): OWLNamedIndividual = { + + println(this.jmodel.id_db_source.as[String]) + println(idOrg_ns_lookup.MGI) // return new_typed_ind(obo_ns + "UniProtKB_" + this.jmodel.uniprot.as[String]) - return new_typed_ind(idOrg_ns + "uniprot/" + this.jmodel.uniprot.as[String]) + //TODO maybe try com.google.gson.Gson? + val ns = lookup_key_lookup(this.jmodel.id_db_source.as[String]) match { + case "UniProtKB" => idOrg_ns_lookup.UniProtKB.as[String] + case "RGD" => idOrg_ns_lookup.RGD.as[String] + case "MGI" => idOrg_ns_lookup.MGI.as[String] + case "FB" => idOrg_ns_lookup.FB.as[String] + case "WB" => idOrg_ns_lookup.WB.as[String] + case "ZFIN" => idOrg_ns_lookup.ZFIN.as[String] + } + return new_typed_ind(ns + this.jmodel.noctua_gene_id.as[String]) } def new_primary_ind(): OWLNamedIndividual = { return new_typed_ind(obo_ns + this.jmodel.goTerm.as[String].replace(":", "_")) } - + + def get_aspect(term: String): String = { + if (!term.startsWith("GO:")) { + return "" + } + val aspect : String = go.getSpecAnnotationsOnEntity( + query_short_form = term.replace(":", "_"), + ap_short_form = "hasOBONamespace").head.getValue.asLiteral.get.getLiteral + return aspect + } // Annotations on edges get attached to individuals - which are then used to annotate the edge // "evidence": { "system": [ @@ -126,7 +166,8 @@ class SimpleModel (val model_ns: String, var ont : BrainScowl, // Also annotates model with contributor // Should probably add these to the ontology too - but feels like wrong place to do it. /// this.ont.add_axiom(ont.ontology Annotation(dc_source, source)) - val dc_source = AnnotationProperty("http://purl.org/dc/elements/1.1/source") + val dc_source = AnnotationProperty("http://purl.org/dc/elements/1.1/source") + // val dc_source = AnnotationProperty(obo_ns + "SEPIO:0000124") // Not yet supported by GPAD export var out = Set[OWLAnnotation]() for ((k,v) <- jmodel.evidence.as[Map[String, Json]]) { // TODO annotated inds with syngo evidence codes. Needs extension to JSON. @@ -137,6 +178,12 @@ class SimpleModel (val model_ns: String, var ont : BrainScowl, if (!m.isEmpty) { val ann = new_typed_ind(obo_ns + eco.replace(":", "_")) // this.ont.add_axiom(ann Annotation (dc_source, source)) + // Adding contributor(s), date, and providedBy to all edges + for (c <- contributors) { + out += Annotation(dc_contributor, contributor_prefix + c) + } + out += Annotation(dc_date, date) + out += Annotation(provided_by, provided_by_value) out += Annotation(evidence, ann) } else { println(s"Ignoring ${eco} as it doesn't look like an ECO term.") @@ -192,15 +239,52 @@ class SimpleModel (val model_ns: String, var ont : BrainScowl, val extensions = jmodel.extensions.as[List[Json]] + def sort_extensions(extension_terms: List[String]): List[String] = { + var sorted_list = List[String]() + var go_terms = ListBuffer[String]() + var cl_terms = ListBuffer[String]() + var uberon_terms = ListBuffer[String]() + for (term <- extension_terms) { + if (term.startsWith("GO:")) { + go_terms += term + } else if (term.startsWith("CL:")) { + cl_terms += term + } else if (term.startsWith("UBERON:")) { + uberon_terms += term + } + } + + sorted_list = sorted_list ::: go_terms.toList + sorted_list = sorted_list ::: cl_terms.toList + sorted_list = sorted_list ::: uberon_terms.toList + + return sorted_list + } + def extend(primary_ind: OWLNamedIndividual, extension: Json){ // Checks Json, uses it to extend pimary ind val ext = extension.as[Map[String, List[String]]] for ((k,v) <- ext) { - val rel = OP_lookup(k) - for (o <- v) { + var rel = OP_lookup(k) + var previous_o = this.jmodel.goTerm.as[String] + var previous_aspect = primary_aspect + var previous_oi = primary_ind + // MF -occursin-> CC –occursin-> CCextension –partof-> Celltype(CL ontology) –part of-> Anatomy(Uberon ontology) + // val sorted_v = v.sorted // sorting should be more robust than alphabetical + val sorted_v = sort_extensions(v) + for (o <- sorted_v) { val oi = new_typed_ind(obo_ns + o.replace(":", "_")) + val oa = get_aspect(o) + // if previous_o is a CC term and o is UBERON, default rel to "part of" + if (previous_aspect == "cellular_component" & o.startsWith("UBERON:")) { + println("setting UBERON to part_of") + rel = part_of + } this.ont.add_axiom(ObjectPropertyAssertion(gen_annotations(jmodel.evidence), - rel,primary_ind, oi)) + rel, previous_oi, oi)) + previous_o = o + previous_aspect = oa + previous_oi = oi } } }