diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 50d1a4dfc..93c56c764 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -15,13 +15,13 @@ jobs: build: runs-on: ubuntu-latest env: - _JAVA_OPTIONS: "--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED" + JDK_JAVA_OPTIONS: "--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.lang.reflect=ALL-UNNAMED" steps: - uses: actions/checkout@v3 - uses: actions/setup-java@v3 with: - java-version: 17 + java-version: 20 distribution: 'temurin' cache: maven - - name: Build with Maven and JDK-17 + - name: Build with Maven and JDK-20 run: mvn --batch-mode --update-snapshots package diff --git a/Dockerfile b/Dockerfile index 3ce927ed8..ee9150d24 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,17 @@ -# builds the image after maven build from local sources -# todo: test (perhaps get the data from PC2 and the fat JAR/WAR from M2 repo instead of using local build) -FROM openjdk:17 -ARG WRK=target/work -VOLUME /work -WORKDIR /work -COPY target/cpath2.war . -# copy the data except listed in .dockerignore -COPY $WRK . +FROM eclipse-temurin:latest +ARG APP_JAR +# persistent volume containing the app data (biopax model ad index files; +# can use target/work demo/test data) +COPY ${APP_JAR} cpath2.war +#home/work dir (properties, data) to mount as docker volume/bind (with e.g. terraform or docker-compose) ENV CPATH2_HOME /work -ENTRYPOINT ["java", "-Djava.security.egd=file:/dev/./urandom", "-Dfile.encoding=UTF-8", "-Xmx64g",\ -"-Dspring.profiles.active=docker", "-jar", "cpath2.war", "--server", \ -"--add-opens=java.base/java.lang=ALL-UNNAMED", \ -"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED"] +ENV JDK_JAVA_OPTIONS="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" +#start the app in the workdir to use the "production" application.properties (if present) +WORKDIR ${CPATH2_HOME} +ENTRYPOINT ["java", "-server", "-Djava.security.egd=file:/dev/./urandom", \ +"-Dfile.encoding=UTF-8", "-Xss32m", "-Xmx64g", \ +"-Dspring.profiles.active=docker", \ +"-Dpaxtools.model.safeset=list", \ +"-Dpaxtools.normalizer.use-latest-registry=true", \ +"-jar", "/cpath2.war", "--server"] EXPOSE 8080 diff --git a/README.md b/README.md index bb811817a..fce764011 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,11 @@ Expect that most queries or example links won't return any result as there are n instance; try to find e.g. all the pathways there with `http://localhost:8080/search?q=*&type=pathway` (remove `type` parameter to list all the objects; use "Accept:application/xml" header to get XML instead of JSON result). +Alternatively, can run/debug the demo/dev app as: + + mvn spring-boot:run + + ## Configuration ### Working directory @@ -94,7 +99,8 @@ Copy the latest paxtools.jar into this current directory and run - sh export.sh 2>&1 >console.out & (- which takes overnight or a day and night); upload/copy/move (but keep at least blacklist.txt, *All.BIOPAX.owl.gz) -all the files from this here and ../data/ directories to the file server, or configure so that they can be downloaded from e.g. `http://www.pathwaycommons.org/archives/PC2/v{version_number}` (or else). +all the files from this here and ../data/ directories to the file server, or configure so that they can be downloaded +from e.g. `http://www.pathwaycommons.org/archives/PC2/v{version_number}` (or else). Once the instance is configured and data processed, run the web service using the same script as follows: @@ -116,7 +122,7 @@ simply include to the cpath2.sh Java options like: ### Data One (a data manager) has to find, download, re-pack (zip) and put original -biological pathway data files to the data/ sub-directory. +biological pathway data files to the data/ subdirectory. #### Warehouse data @@ -138,3 +144,21 @@ Prepare original BioPAX and PSI-MI/PSI-MITAB data archives in the 'data' folder - download (wget) original files or archives from the pathway resource (e.g., `wget http://www.reactome.org/download/current/biopax3.zip`) - extract what you need (e.g. some species data only) - create a new zip archive using name like `.zip` (datasource identifier, e.g., `reactome_human.zip`). + + +## Docker + +### build the project and image from sources +``` +mvn clean install +mvn dockerfile:build +#mvn dockerfile:tag +#mvn dockerfile:push +``` + +### run +Run with docker (can also do with compose or terraform). +Have to bind /work dir (test/demo instance data is in the target/work) +``` +docker run --rm --name cpath2 -v '/target/work:/work' -p 8080:8080 -it pathwaycommons/cpath2 +``` diff --git a/pom.xml b/pom.xml index e1567499e..5e0191bba 100644 --- a/pom.xml +++ b/pom.xml @@ -9,28 +9,28 @@ org.springframework.boot spring-boot-starter-parent - 2.7.3 + 3.2.2 - Pathway Commons builder and server + cPath2 pathwaycommons cpath2 - 13.0.0-SNAPSHOT - Biological pathways/interactions data integration and services + 14 + Bio pathways/interactions integration and service (using BioPAX and Paxtools) https://pathwaycommons.github.io/cpath2 2009 - 5.3.22 cpath.Application MIT github - 5.3.0-SNAPSHOT - 5.1.0-SNAPSHOT + 6.0.0-SNAPSHOT + 6.0.0-SNAPSHOT UTF-8 - 7.5.0 - -Xmx3g -Dfile.encoding=UTF-8 -ea --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED - ${settings.localRepository}/org/springframework/spring-instrument/${spring.version}/spring-instrument-${spring.version}.jar + 9.7.0 + -Xmx3g -Dfile.encoding=UTF-8 -ea --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED + + target/spring-instrument.jar @@ -39,13 +39,13 @@ - Travis CI - https://travis-ci.org/PathwayCommons/cpath2 + GitHub + https://github.com/PathwayCommons/cpath2/actions/workflows/maven.yml Pathway Commons - http://www.pathwaycommons.org + https://www.pathwaycommons.org @@ -158,8 +158,9 @@ cpath2 + org.apache.maven.plugins maven-compiler-plugin - 3.8.1 + 3.11.0 17 @@ -172,10 +173,8 @@ target/work - - **/*Test.java - false + alphabetical @@ -186,57 +185,49 @@ target/work - alphabetical - - **/*IT.java - false + alphabetical com.spotify dockerfile-maven-plugin - 1.4.4 + 1.4.13 pathwaycommons/cpath2 - target/${project.build.finalName}.war + target/${project.build.finalName}.war - - - - - - - - - - - - - - - - - org.springframework.boot - spring-boot-maven-plugin + docker-image + deploy - repackage + build + tag + push + + ${project.version} + + + + org.springframework.boot + spring-boot-maven-plugin ${start-class} - ${jvm.options} + ${jvm.options} -Dpaxtools.model.safeset=list -Dpaxtools.normalizer.use-latest-registry=true ${agent} + -s + org.apache.maven.plugins maven-war-plugin false @@ -245,33 +236,37 @@ org.apache.maven.plugins maven-dependency-plugin - 3.1.1 + 3.6.0 - copy - package + copy-spring-instrument-jar copy + + + + org.springframework + spring-instrument + jar + spring-instrument.jar + ${project.build.directory} + + + ${project.build.directory} + - - - - org.springframework - spring-instrument - jar - false - ${project.build.directory} - spring-instrument.jar - - - + + + + + javax.validation validation-api @@ -288,24 +283,19 @@ 1.3.2 - com.sun.activation - javax.activation - 1.2.0 + jakarta.activation + jakarta.activation-api + 2.1.2 - javax.transaction - javax.transaction-api - 1.3 + jakarta.xml.bind + jakarta.xml.bind-api + 4.0.0 - javax.xml.bind - jaxb-api - 2.4.0-b180830.0359 - - - org.glassfish.jaxb - jaxb-runtime - 2.4.0-b180830.0438 + com.sun.xml.bind + jaxb-impl + 4.0.3 runtime @@ -342,80 +332,11 @@ org.biopax.validator biopax-validator ${validator.version} - - - commons-logging - commons-logging - - - - javax.servlet - servlet-api - - - log4j - log4j - - org.biopax.paxtools json-converter ${paxtools.version} - - - commons-logging - commons-logging - - - - - org.apache.jena - apache-jena-libs - pom - 3.2.0 - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - com.fasterxml.staxmate - staxmate - - - - - com.hp.hpl.jena - jena - 2.6.4 - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - stax - stax-api - - - com.fasterxml.staxmate - staxmate - - - - - commons-collections - commons-collections - 3.2.2 org.apache.commons @@ -427,12 +348,6 @@ commons-text 1.10.0 - - com.h2database - h2 - 2.2.220 - runtime - org.apache.lucene lucene-core @@ -445,29 +360,23 @@ org.apache.lucene - lucene-analyzers-common + lucene-queryparser ${lucene.version} org.apache.lucene - lucene-queryparser + lucene-analysis-common ${lucene.version} pathwaycommons reconx-to-biopax - 1.2.0-SNAPSHOT - - - junit - junit - - + 2.0.0-SNAPSHOT org.sbml.jsbml jsbml-core - 1.3.1 + 1.6.1 org.apache.logging.log4j @@ -477,6 +386,14 @@ junit junit + + com.fasterxml.woodstox + woodstox-core + + + com.thoughtworks.xstream + xstream + @@ -509,23 +426,11 @@ com.fasterxml.jackson.core jackson-core - runtime com.fasterxml.jackson.core jackson-databind - runtime - - - org.springframework.boot - spring-boot-starter-data-jpa - - - javax.transaction - javax.transaction-api - - org.springframework.boot @@ -535,12 +440,25 @@ org.springframework.boot spring-boot-starter-web + + + org.yaml + snakeyaml + + + + + jakarta.servlet.jsp.jstl + jakarta.servlet.jsp.jstl-api + 3.0.0 - javax.servlet - jstl + org.glassfish.web + jakarta.servlet.jsp.jstl + 3.0.1 + org.apache.tomcat.embed tomcat-embed-jasper @@ -562,14 +480,20 @@ true - org.springframework.restdocs - spring-restdocs-mockmvc - test + org.springdoc + springdoc-openapi-starter-webmvc-ui + 2.2.0 + + + org.yaml + snakeyaml + + - org.springdoc - springdoc-openapi-ui - 1.6.11 + org.projectlombok + lombok + 1.18.24 diff --git a/src/main/java/cpath/Application.java b/src/main/java/cpath/Application.java index a799d5095..f95c46bef 100644 --- a/src/main/java/cpath/Application.java +++ b/src/main/java/cpath/Application.java @@ -1,6 +1,6 @@ package cpath; -import cpath.service.api.CPathService; +import cpath.service.api.Service; import org.apache.commons.lang3.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,8 +18,8 @@ public static void main(String[] args) { LOG.info("Starting the web application..."); application.setAdditionalProfiles("web"); ConfigurableApplicationContext applicationContext = application.run(args); - CPathService service = applicationContext.getBean(CPathService.class); - service.init(); + Service service = applicationContext.getBean(Service.class); + service.init(); //init the service, read-only index, and model, after the web app/services started! } else { application.setWebApplicationType(WebApplicationType.NONE); if (ArrayUtils.contains(args, "-b") || ArrayUtils.contains(args, "--build")) { @@ -32,5 +32,4 @@ public static void main(String[] args) { applicationContext.close(); } } - } diff --git a/src/main/java/cpath/analysis/EntityFeaturesSummary.java b/src/main/java/cpath/analysis/EntityFeaturesSummary.java index 7123f838c..d5478f214 100644 --- a/src/main/java/cpath/analysis/EntityFeaturesSummary.java +++ b/src/main/java/cpath/analysis/EntityFeaturesSummary.java @@ -1,5 +1,6 @@ package cpath.analysis; +import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -41,7 +42,7 @@ public void execute(Model model) { for(String id : javaPropertyDatasources.split(",")) providerURIs.add(model.getXmlBase()+id); - final Set allExpForms = model.getObjects(ExperimentalForm.class); + final Collection allExpForms = model.getObjects(ExperimentalForm.class); for(EntityFeature ef : model.getObjects(EntityFeature.class)) { Set providers = new HashSet<>(); diff --git a/src/main/java/cpath/cleaner/PantherCleaner.java b/src/main/java/cpath/cleaner/PantherCleaner.java index 7cc852019..bf4e6f06b 100644 --- a/src/main/java/cpath/cleaner/PantherCleaner.java +++ b/src/main/java/cpath/cleaner/PantherCleaner.java @@ -31,10 +31,10 @@ */ final class PantherCleaner implements Cleaner { - private static Logger log = LoggerFactory.getLogger(PantherCleaner.class); + private static Logger log = LoggerFactory.getLogger(PantherCleaner.class); - public void clean(InputStream data, OutputStream cleanedData) + public void clean(InputStream data, OutputStream cleanedData) { SimpleIOHandler simpleReader = new SimpleIOHandler(BioPAXLevel.L3); Model originalModel = simpleReader.convertFromOWL(data); @@ -46,7 +46,7 @@ public void clean(InputStream data, OutputStream cleanedData) throw new RuntimeException("Human data (e.g. BioSource) not found."); // fix the taxonomy xref.db standard name - human.getXref().iterator().next().setDb("Taxonomy"); + human.getXref().iterator().next().setDb("taxonomy"); //Remove/replace non-human BioSources, SequenceEntityReferences //Pathways all have organism=null; let's set 'human' for all @@ -74,7 +74,7 @@ public void clean(InputStream data, OutputStream cleanedData) } }); exec.execute(() -> { - for(Pathway p : new ClassFilterSet(objects, Pathway.class)) { + for(Pathway p : new ClassFilterSet<>(objects, Pathway.class)) { if(p.getUri().startsWith("http://identifiers.org/panther.pathway/") || !p.getPathwayComponent().isEmpty() || !p.getPathwayOrder().isEmpty()) { //- seems they don't use pathwayOrder property, anyway @@ -102,8 +102,7 @@ public void clean(InputStream data, OutputStream cleanedData) } //clone the model (to actually get rid of removed objects in all object properties) - final Model cleanModel = (new Cloner(SimpleEditorMap.L3, BioPAXLevel.L3.getDefaultFactory())) - .clone(originalModel, originalModel.getObjects()); + final Model cleanModel = (new Cloner(SimpleEditorMap.L3, BioPAXLevel.L3.getDefaultFactory())).clone(originalModel.getObjects()); log.info((objects.size()-cleanModel.getObjects().size()) + " non-human objects (and all corresponding properties) were cleared."); originalModel = null; // free some memory, perhaps... diff --git a/src/main/java/cpath/cleaner/ReactomeCleaner.java b/src/main/java/cpath/cleaner/ReactomeCleaner.java index bc4bfc0d8..bb059c84c 100644 --- a/src/main/java/cpath/cleaner/ReactomeCleaner.java +++ b/src/main/java/cpath/cleaner/ReactomeCleaner.java @@ -7,6 +7,7 @@ import java.util.Map; import java.util.Set; +import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.controller.ModelUtils; import org.biopax.paxtools.io.SimpleIOHandler; import org.biopax.paxtools.model.BioPAXLevel; @@ -25,7 +26,7 @@ * Implementation of Cleaner interface for Reactome data. * * Can normalize URIs for some Reactome Entity class objects (pathways, interaction) - * to http://identifiers.org/reactome/R-* form if a unification xref with the stable Reactome ID is found. + * to bioregistry.io/reactome:R-* form if a unification xref with the stable Reactome ID is found. * Removes "unstable" Reactome ID xref from objects where a stable ID is present. */ final class ReactomeCleaner implements Cleaner { @@ -40,14 +41,15 @@ public void clean(InputStream data, OutputStream cleanedData) // Normalize pathway URIs, where possible, using Reactome stable IDs // Since v54, Reactome stable ID format has been changed to like: "R-HSA-123456" - final Map newUriToEntityMap = new HashMap(); + final Map newUriToEntityMap = new HashMap<>(); final Set processes = new HashSet<>(model.getObjects(Process.class)); for(Process proc : processes) { - if (proc.getUri().startsWith("http://identifiers.org/reactome/")) + if (StringUtils.contains(proc.getUri(),"identifiers.org/reactome") + || StringUtils.contains(proc.getUri(), "bioregistry.io/reactome")) continue; //skip for already normalized pathway or interaction - final Set uxrefs = new ClassFilterSet( + final Set uxrefs = new ClassFilterSet<>( new HashSet<>(proc.getXref()), UnificationXref.class); for (UnificationXref x : uxrefs) { if (x.getDb() != null && x.getDb().equalsIgnoreCase("Reactome")) { @@ -55,9 +57,9 @@ public void clean(InputStream data, OutputStream cleanedData) //remove 'REACTOME:' (length=9) prefix if present (it's optional - according to MIRIAM) if (stableId.startsWith("REACTOME:")) stableId = stableId.substring(9); - // stableID is like 'R-HSA-123456' (or old REACT_12345) now... + // stableID is like 'R-HSA-123456' - final String uri = "http://identifiers.org/reactome/" + stableId; + final String uri = "https://bioregistry.io/reactome:" + stableId; if (!model.containsID(uri) && !newUriToEntityMap.containsKey(uri)) { //save it in the map to replace the URI later (see below) @@ -90,7 +92,7 @@ public void clean(InputStream data, OutputStream cleanedData) } // Remove unstable UnificationXrefs like "Reactome Database ID Release XX" - // if there is a stable one in the same object + // if there is a stable xref in the same object // Since Reactome v54, stable ID format is different (not like REACT_12345...) final Set xrefsToRemove = new HashSet<>(); for(Xref xref: new HashSet<>(model.getObjects(Xref.class))) { diff --git a/src/main/java/cpath/cleaner/SmpdbCleaner.java b/src/main/java/cpath/cleaner/SmpdbCleaner.java new file mode 100644 index 000000000..842cec6fa --- /dev/null +++ b/src/main/java/cpath/cleaner/SmpdbCleaner.java @@ -0,0 +1,79 @@ +package cpath.cleaner; + +import cpath.service.api.Cleaner; +import org.biopax.paxtools.controller.ModelUtils; +import org.biopax.paxtools.io.SimpleIOHandler; +import org.biopax.paxtools.model.BioPAXLevel; +import org.biopax.paxtools.model.Model; +import org.biopax.paxtools.model.level3.*; +import org.biopax.paxtools.model.level3.Process; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.*; + +/** + * Implementation of Cleaner interface for the SMPDB BioPAX L3 pathway data + * and Pathbank (similar to SMPDB). + */ +final class SmpdbCleaner implements Cleaner { + + public void clean(InputStream data, OutputStream cleanedData) { + // create bp model from dataFile + SimpleIOHandler simpleReader = new SimpleIOHandler(BioPAXLevel.L3); + Model model = simpleReader.convertFromOWL(data); + +// As we managed to get only human data archive from SMPDB there is no need for filtering by organism anymore - + if (!model.containsID(model.getXmlBase() + "Reference/TAXONOMY_9606") + && !model.containsID(model.getXmlBase() + "Reference/Taxonomy_9606") + && !model.getObjects(BioSource.class).isEmpty()) + throw new RuntimeException("Highly likely non-human datafile (skip)."); + + // Normalize Pathway URIs KEGG stable id, where possible + Set pathways = new HashSet<>(model.getObjects(Pathway.class)); + for (Pathway pw : pathways) { + //since Apr-2018, top/main pathway URIs are there already normalized (true for Pathbank 2019 as well) + + for (PathwayStep step : new HashSet<>(pw.getPathwayOrder())) { + if (step.getNextStep().isEmpty() && step.getNextStepOf().isEmpty()) { + for (Process process : step.getStepProcess()) + if (process instanceof Interaction && !Interaction.class.equals(process.getModelInterface())) + pw.addPathwayComponent(process); + pw.removePathwayOrder(step); + } + } + + //remove all Interaction.class (base) objects + for (Interaction it : new HashSet<>(model.getObjects(Interaction.class))) { + if (Interaction.class.equals(it.getModelInterface())) + model.remove(it); + } + + //remove sub-pathways + for (Pathway pathway : new HashSet<>(model.getObjects(Pathway.class))) { + if (pathway.getName().contains("SubPathway")) { + model.remove(pathway); + for (Pathway pp : new HashSet<>(pathway.getPathwayComponentOf())) + pp.removePathwayComponent(pathway); + } + } + + } + + for (Named o : model.getObjects(Named.class)) { + //move bogus dummy names to comments + for (String name : new HashSet<>(o.getName())) { + if (name.startsWith("SubPathway")) { + o.removeName(name); + o.addComment(name); + } + } + } + +// ModelUtils.replace(model, replacements); + ModelUtils.removeObjectsIfDangling(model, UtilityClass.class); + + // convert model back to OutputStream for return + simpleReader.convertToOWL(model, cleanedData); + } +} diff --git a/src/main/java/cpath/converter/ChebiOboConverter.java b/src/main/java/cpath/converter/ChebiOboConverter.java index f9be64800..e2228d6f0 100644 --- a/src/main/java/cpath/converter/ChebiOboConverter.java +++ b/src/main/java/cpath/converter/ChebiOboConverter.java @@ -35,7 +35,6 @@ class ChebiOboConverter extends BaseConverter { private static Logger log = LoggerFactory.getLogger(ChebiOboConverter.class); - private final String _IDENTIFIERS_ORG = "http://identifiers.org/"; private final String _ENTRY_START = "[Term]"; private final String _ID = "id: "; private final String _ALT_ID = "alt_id: "; @@ -71,7 +70,7 @@ public void convert(InputStream is, OutputStream os) { continue; } - Map chebiEntryMap = new HashMap(); + Map chebiEntryMap = new HashMap<>(); while (scanner.hasNextLine()) { line = scanner.nextLine(); @@ -131,13 +130,13 @@ private void updateMapEntry(Map map, String key, String line) { //It now generates an SMR for every ChEBI entry, even those without InChIKey (top classes, pill/pharma terms) private void buildSmallMoleculeReference(Model model, Map chebiEntryMap) { // create new URI, SMR, and primary xref: - String id = chebiEntryMap.get(_ID); - SmallMoleculeReference smr = model - .addNew(SmallMoleculeReference.class, _IDENTIFIERS_ORG+"chebi/"+id); - String xuri = Normalizer.uri(xmlBase, "ChEBI", id, UnificationXref.class); + String id = chebiEntryMap.get(_ID); //e.g. "CHEBI:422" + String ruri = Normalizer.uri(xmlBase, "chebi", id, SmallMoleculeReference.class); + SmallMoleculeReference smr = model.addNew(SmallMoleculeReference.class, ruri); + String xuri = Normalizer.uri(xmlBase, "chebi", id, UnificationXref.class); UnificationXref x = model.addNew(UnificationXref.class, xuri); x.setId(id); - x.setDb("ChEBI"); + x.setDb("chebi"); smr.addXref(x); // set displayName @@ -150,7 +149,8 @@ private void buildSmallMoleculeReference(Model model, Map chebiE String[] alt = chebiEntryMap.get(_ALT_ID).split("\t"); for(String altid : alt) { RelationshipXref rx = CPathUtils - .findOrCreateRelationshipXref(RelTypeVocab.SECONDARY_ACCESSION_NUMBER, "ChEBI", altid, model, false); + .findOrCreateRelationshipXref(RelTypeVocab.SECONDARY_ACCESSION_NUMBER, "chebi", + altid, model); smr.addXref(rx); } } @@ -162,11 +162,10 @@ private void buildSmallMoleculeReference(Model model, Map chebiE String[] synonyms = entry.split("\t"); for (String sy : synonyms) { Matcher matcher = namePattern.matcher(sy); - if (!matcher.find()) + if (!matcher.find()) { throw new IllegalStateException("Pattern failed to find a quoted text within: " + sy); - - String name = matcher.group(1); //get the name/value only - + } + String name = matcher.group(1); if (sy.contains("IUPAC_NAME")) { smr.setStandardName(name); } else if (sy.contains("InChIKey")) { @@ -176,7 +175,7 @@ private void buildSmallMoleculeReference(Model model, Map chebiE } //add RX because a InChIKey can map to several CHEBI IDs RelationshipXref rx = CPathUtils - .findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "InChIKey", name, model, false); + .findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "InChIKey", name, model); smr.addXref(rx); } else if (sy.contains("InChI=")) { String structureUri = Normalizer @@ -190,11 +189,9 @@ private void buildSmallMoleculeReference(Model model, Map chebiE smr.setStructure(structure); } else if (sy.contains("FORMULA")) { smr.setChemicalFormula(name); - smr.addName(name); //add - possible helps mapping by name + smr.addName(name); //helps to map/search by name } else if (sy.contains("MASS")) { smr.setMolecularWeight(Float.parseFloat(name)); - } else if (sy.contains("CHARGE") || sy.contains("MONOISOTOPIC_MASS")) { - // TODO: save charge, monoisotopic mass? } else { smr.addName(name); //incl. for SMILES } @@ -213,7 +210,7 @@ private void buildSmallMoleculeReference(Model model, Map chebiE // Skip all xrefs except CAS, KEGG (C*, D*), etc., // which are used for id-mapping, merging, full-text search, and graph queries. if (DB.equals("CAS") || DB.equals("DRUGBANK") || DB.equals("HMDB")) { - RelationshipXref rx = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, xdb, xid, model, false); + RelationshipXref rx = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, xdb, xid, model); smr.addXref(rx); } else if (DB.startsWith("WIKIPEDIA")) { smr.addName(id); @@ -222,7 +219,7 @@ private void buildSmallMoleculeReference(Model model, Map chebiE xdb += " Compound"; else if(xid.startsWith("D")) xdb += " Drug"; - RelationshipXref rx = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, xdb, xid, model, false); + RelationshipXref rx = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, xdb, xid, model); smr.addXref(rx); } } else { diff --git a/src/main/java/cpath/converter/ChebiOntologyAnalysis.java b/src/main/java/cpath/converter/ChebiOntologyAnalysis.java index a77369d2a..08955bb5a 100644 --- a/src/main/java/cpath/converter/ChebiOntologyAnalysis.java +++ b/src/main/java/cpath/converter/ChebiOntologyAnalysis.java @@ -47,12 +47,11 @@ private void processOBOEntry(StringBuilder entryBuffer, Model model) throws IOEx // get SMR for entry out of Warehouse Collection childChebiIDs = getValuesByREGEX(entryBuffer, CHEBI_OBO_ID_REGEX); if (childChebiIDs.size() != 1) { - log.error("processOBOEntry(), got none or >1 ID in: " + entryBuffer.toString() + "; skipped."); + log.error("processOBOEntry(), got none or >1 ID in: " + entryBuffer + "; skipped."); return; } final String thisID = childChebiIDs.iterator().next(); - SmallMoleculeReference thisSMR = (SmallMoleculeReference) model - .getByID("http://identifiers.org/chebi/CHEBI:" + thisID); + SmallMoleculeReference thisSMR = (SmallMoleculeReference) model.getByID("bioregistry.io/chebi:" + thisID.toLowerCase()); if (thisSMR == null) { log.debug("processOBOEntry(), Skipped (not found): " + thisID); return; @@ -62,8 +61,7 @@ private void processOBOEntry(StringBuilder entryBuffer, Model model) throws IOEx Collection parentChebiIDs = getValuesByREGEX(entryBuffer, CHEBI_OBO_ISA_REGEX); for (String parentChebiID : parentChebiIDs) { RelationshipXref xref = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.MULTIPLE_PARENT_REFERENCE, - "chebi", "CHEBI:"+parentChebiID, model, false); - thisSMR.addComment("is_a CHEBI:" + parentChebiID); + "chebi", parentChebiID, model); thisSMR.addXref(xref); } @@ -72,27 +70,23 @@ private void processOBOEntry(StringBuilder entryBuffer, Model model) throws IOEx for (String relationship : relationships) { String[] parts = relationship.split(_COLON); RelationshipXref xref = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.ADDITIONAL_INFORMATION, - "chebi", "CHEBI:"+parts[1], model, false); - thisSMR.addComment(parts[0].toLowerCase() + " CHEBI:" + parts[1]); + "chebi", parts[1], model); + thisSMR.addComment(parts[0].toLowerCase() + " " + parts[1]); thisSMR.addXref(xref); } } /** * Given an OBO entry, returns the values matched by the given regex. If - * regex contains more that one capture group, a ":" will be used to delimit + * regex contains more than one capture group, a ":" will be used to delimit * them. */ private Collection getValuesByREGEX(StringBuilder entryBuffer, - Pattern regex) throws IOException { + Pattern regex) { Collection toReturn = new ArrayList<>(); Scanner scanner = new Scanner(entryBuffer.toString()); - if (log.isDebugEnabled()) { - log.debug("getValue(), key: " + regex.toString()); - } - while (scanner.hasNextLine()) { String line = scanner.nextLine(); Matcher matcher = regex.matcher(line); @@ -101,7 +95,7 @@ private Collection getValuesByREGEX(StringBuilder entryBuffer, for (int lc = 1; lc <= matcher.groupCount(); lc++) { toAdd += matcher.group(lc) + _COLON; } - toReturn.add(toAdd.substring(0, toAdd.length() - 1));//to remove ending ':' + toReturn.add(toAdd.substring(0, toAdd.length() - 1)); //to remove ending ':' } } diff --git a/src/main/java/cpath/converter/ReconxConverter.java b/src/main/java/cpath/converter/ReconxConverter.java index f4fbad2f8..ccadd21b2 100644 --- a/src/main/java/cpath/converter/ReconxConverter.java +++ b/src/main/java/cpath/converter/ReconxConverter.java @@ -4,8 +4,6 @@ import org.biopax.paxtools.model.BioPAXLevel; import org.biopax.paxtools.model.Model; import org.humanmetabolism.converter.SbmlToBiopaxConverter; -import org.sbml.jsbml.SBMLDocument; -import org.sbml.jsbml.SBMLReader; import javax.xml.stream.XMLStreamException; import java.io.InputStream; @@ -18,12 +16,11 @@ public class ReconxConverter extends BaseConverter { public void convert(InputStream is, OutputStream os) { Model bpModel; - + SbmlToBiopaxConverter sbmlToBiopaxConverter = new SbmlToBiopaxConverter(); try { - SBMLDocument sbmlDocument = SBMLReader.read(is); - SbmlToBiopaxConverter sbmlToBiopaxConverter = new SbmlToBiopaxConverter(); - sbmlToBiopaxConverter.setMakePathway(false); //won't generate that all-interactions root model pathway - bpModel = sbmlToBiopaxConverter.convert(sbmlDocument); + sbmlToBiopaxConverter.setMakePathway(false); + //do not produce root/top pathway + bpModel = sbmlToBiopaxConverter.convert(is); } catch (XMLStreamException e) { throw new RuntimeException("Failed to convert Recon2 SBML to BioPAX.", e); } diff --git a/src/main/java/cpath/converter/UniprotConverter.java b/src/main/java/cpath/converter/UniprotConverter.java index e6c739b97..41a3de3f9 100644 --- a/src/main/java/cpath/converter/UniprotConverter.java +++ b/src/main/java/cpath/converter/UniprotConverter.java @@ -10,7 +10,6 @@ import org.biopax.paxtools.model.level3.PositionStatusType; import org.biopax.paxtools.model.level3.ProteinReference; import org.biopax.paxtools.model.level3.RelationshipXref; -import org.biopax.paxtools.model.level3.SequenceInterval; import org.biopax.paxtools.model.level3.SequenceModificationVocabulary; import org.biopax.paxtools.model.level3.SequenceSite; import org.biopax.paxtools.model.level3.UnificationXref; @@ -35,13 +34,11 @@ final class UniprotConverter extends BaseConverter { private static final Logger log = LoggerFactory.getLogger(UniprotConverter.class); public void convert(InputStream is, OutputStream os) { - // ref to reader here so - // we can close in finally clause - Model model = BioPAXLevel.L3.getDefaultFactory().createModel(); - model.setXmlBase(xmlBase); + Model model = BioPAXLevel.L3.getDefaultFactory().createModel(); + model.setXmlBase(xmlBase); Scanner scanner = new Scanner(is, "UTF-8"); - final HashMap dataElements = new HashMap(); + final HashMap dataElements = new HashMap<>(); log.info("convert(), starting to read data..."); long linesReadSoFar = 0; while (scanner.hasNextLine()) { @@ -58,8 +55,9 @@ public void convert(InputStream is, OutputStream os) { String acNames = dataElements.get("AC").toString(); StringBuilder xrefs = dataElements.get("DR"); final String idLine = dataElements.get("ID").toString(); - StringBuilder sq = dataElements.get("SQ"); //SEQUENCE SUMMARY - StringBuilder sequence = dataElements.get(" "); //SEQUENCE +// we will not process/save the canonical sequence - +// StringBuilder sq = dataElements.get("SQ"); //SEQUENCE SUMMARY +// StringBuilder sequence = dataElements.get(" "); //SEQUENCE StringBuilder features = dataElements.get("FT"); //strict format in 6-75 char in each FT line ProteinReference proteinReference = newProteinReferenceWithAccessionXrefs(idLine, acNames, model); @@ -78,7 +76,7 @@ public void convert(InputStream is, OutputStream os) { // also add Gene Names to PR names (can be >1 due to isoforms) proteinReference.addName(symbol); RelationshipXref rXRef = CPathUtils - .findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "HGNC Symbol", symbol, model, false); + .findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "HGNC Symbol", symbol, model); proteinReference.addXref(rXRef); } } @@ -93,30 +91,25 @@ public void convert(InputStream is, OutputStream os) { } } - // add some info from CC fields to BioPAX comments - if (comments != null) { - setComments (comments.toString(), proteinReference); - } +// comments take too much space and can confuse full-text search (too generic, not specific for the model we build) +// // add some info from CC fields to BioPAX comments +// if (comments != null) { +// setComments (comments.toString(), proteinReference); +// } -// won't store canonical sequences (in practice, it does not help and may even mislead: -// in fact, one usually needs to know an isoform sequence (variant) and its version exactly) -// if(sequence != null) { //set sequence (remove spaces) -// String seq = sequence.toString().replaceAll("\\s", ""); -// proteinReference.setSequence(seq); -// proteinReference.addComment(sq.toString()); //sequence summary -// } +// won't store canonical sequences (in practice, it does not help and may even mislead) - //create modified residue features - if(features != null) - createModResFeatures(features.toString(), proteinReference, model); +// won't generate MFs from FT, MOD_RES records anymore (we don't use them; let's save storage/RAM for now) +// //create modified residue features +// if(features != null) +// createModResFeatures(features.toString(), proteinReference, model); // debug: write the one-protein-reference model log.debug("convert(). so far line# " + linesReadSoFar); dataElements.clear(); - } - else { //continue read and collect current Uniprot entry lines - /* The two-character line-type code that begins each line is + } else { //continue read and collect current Uniprot entry lines + /* The two-character line-type code that begins each line is * always followed by three blanks, so that the actual * information begins with the sixth character. */ @@ -124,8 +117,7 @@ public void convert(InputStream is, OutputStream os) { String data = line.substring(5); if (data.startsWith("-------") || data.startsWith("Copyrighted") || - data.startsWith("Distributed")) - { + data.startsWith("Distributed")) { // do nothing } else { //important for correct splitting DR rows @@ -151,13 +143,10 @@ public void convert(InputStream is, OutputStream os) { new SimpleIOHandler(BioPAXLevel.L3).convertToOWL(model, os); } - + /* - * Sets name and synonyms on protein reference. - * - * @param proteinReference ProteinReference - * @param deField String - */ + * Sets name and synonyms on protein reference. + */ private void setNameAndSynonyms (ProteinReference proteinReference, String deField) { // With the latest UNIPROT Export, the DE Line contains multiple fields. // For example: @@ -175,15 +164,14 @@ private void setNameAndSynonyms (ProteinReference proteinReference, String deFie String fieldValue = parts[1].trim(); // after 1 Oct 2014, remove evidence (e.g., {type|source} - {ECO:0000269|PubMed10433554}) - // at the name's end (see http://www.uniprot.org/changes/evidences) + // at the name's end (see http://www.uniprot.org/changes/evidences) int idx = fieldValue.indexOf(" {"); if(idx>0) fieldValue = fieldValue.substring(0, idx); if ("RecName: Full".equals(fieldName)) { - proteinReference.setStandardName(fieldValue); - } - else { - proteinReference.addName(fieldValue); + proteinReference.setStandardName(fieldValue); + } else { + proteinReference.addName(fieldValue); } } } @@ -192,34 +180,35 @@ private void setNameAndSynonyms (ProteinReference proteinReference, String deFie /** * Sets the Current Organism Information. - * - * @param organismName String + * + * @param organismName String * @param organismTaxId String * @param proteinReference ProteinReference * @param model target biopax model */ - private void setOrganism(String organismName, String organismTaxId, - ProteinReference proteinReference, Model model) { - String parts[] = organismTaxId.replaceAll(";", "").split("="); - String taxId = parts[1]; - //since 1 Oct 2014, have to remove {evidence} after the taxId (after space); see http://www.uniprot.org/changes/evidences - int idx = taxId.indexOf(" {"); - if(idx > 0) - taxId = taxId.substring(0, idx); - - parts = organismName.split("\\("); // - by first occurrence of '(' - String name = parts[0].trim(); - BioSource bioSource = getBioSource(taxId, name, model); - proteinReference.setOrganism(bioSource); - } + private void setOrganism(String organismName, String organismTaxId, + ProteinReference proteinReference, Model model) { + String parts[] = organismTaxId.replaceAll(";", "").split("="); + String taxId = parts[1]; + + //since 1 Oct 2014, have to remove {evidence} after the taxId (after space); see http://www.uniprot.org/changes/evidences + int idx = taxId.indexOf(" {"); + if(idx > 0) { + taxId = taxId.substring(0, idx); + } + + parts = organismName.split("\\("); // - by first occurrence of '(' + String name = parts[0].trim(); + BioSource bioSource = getBioSource(taxId, name, model); + proteinReference.setOrganism(bioSource); + } /** - * Sets some BioPAX comments - * ("INTERACTION" sections of UniProt CC fields, - * gene symbols, and copyright.) - * - * @param comments String - * @param proteinReference ProteinReference + * Sets some BioPAX comments for the ProteinReference + * (except for "INTERACTION" sections of UniProt CC fields) + * + * @param comments String + * @param proteinReference ProteinReference */ private void setComments (String comments, ProteinReference proteinReference) { @@ -238,8 +227,8 @@ private void setComments (String comments, ProteinReference proteinReference) reducedComments.append (" COPYRIGHT: UniProt Consortium (www.uniprot.org). Distributed under " + "the Creative Commons Attribution-NoDerivs License."); } - - proteinReference.addComment(reducedComments.toString()); + + proteinReference.addComment(reducedComments.toString()); } @@ -249,35 +238,35 @@ private void setComments (String comments, ProteinReference proteinReference) * @param dbRefs String (concatenated 'DR' lines) */ private void setXRefsFromDRs (String dbRefs, ProteinReference proteinReference, Model model) { - final String lines[] = dbRefs.split("\n"); - - for (String line : lines) { - //remove everything after '.' (e.g., isoform refs, comments, etc.) - String xref = line.trim(); - //every line ends with '.' or '. [blah-...]' something - int lastDotIdx = xref.lastIndexOf("."); - xref = xref.substring(0,lastDotIdx); - String parts[] = xref.split(";"); - - // get the db name part - String db = parts[0].trim(); - - // use only some of prot. ref. identity resources - // (to make Xrefs and then use them for id-mapping); - // skip for other, not identity, ID types, - // e.g., refs to pathway databases, ontologies, etc. - // see also: http://www.uniprot.org/docs/dbxref for the db name abbreviations used in 'DR' lines - if ( //TODO which 'DR' ID types do we want for id-mapping? (all are used by Merger; some - in queries) - !db.equalsIgnoreCase("GENEID") // NCBI Gene (EntrezGene) - && !db.equalsIgnoreCase("REFSEQ") - && !db.equalsIgnoreCase("ENSEMBL") - && !db.equalsIgnoreCase("HGNC") - && !db.equalsIgnoreCase("PDB") - && !db.equalsIgnoreCase("IPI") //International Protein Index (deprecated; use UniProt) - && !db.equalsIgnoreCase("EMBL") //nucleotide sequence database - && !db.equalsIgnoreCase("PIR") //NCBI Protein - && !db.equalsIgnoreCase("PHARMGKB") - && !db.equalsIgnoreCase("DIP") + final String lines[] = dbRefs.split("\n"); + + for (String line : lines) { + //remove everything after '.' (e.g., isoform refs, comments, etc.) + String xref = line.trim(); + //every line ends with '.' or '. [blah-...]' something + int lastDotIdx = xref.lastIndexOf("."); + xref = xref.substring(0,lastDotIdx); + String parts[] = xref.split(";"); + + // get the db name part + String db = parts[0].trim(); + + // use only some of prot. ref. identity resources + // (to make Xrefs and then use them for id-mapping); + // skip for other, not identity, ID types, + // e.g., refs to pathway databases, ontologies, etc. + // see also: http://www.uniprot.org/docs/dbxref for the db name abbreviations used in 'DR' lines + if ( // which 'DR' ID types do we want for id-mapping? (all are used by Merger; some - in queries) + !db.equalsIgnoreCase("GENEID") // NCBI Gene (EntrezGene) + && !db.equalsIgnoreCase("REFSEQ") + && !db.equalsIgnoreCase("ENSEMBL") + && !db.equalsIgnoreCase("HGNC") + && !db.equalsIgnoreCase("PDB") + && !db.equalsIgnoreCase("IPI") //International Protein Index (deprecated; use UniProt) + && !db.equalsIgnoreCase("EMBL") //nucleotide sequence database + && !db.equalsIgnoreCase("PIR") //NCBI Protein + && !db.equalsIgnoreCase("PHARMGKB") + && !db.equalsIgnoreCase("DIP") // && !db.equalsIgnoreCase("INTERPRO") // && !db.equalsIgnoreCase("PANTHER") //PANTHER Family // && !db.equalsIgnoreCase("GENECARDS") //ID, not Symbol @@ -287,52 +276,52 @@ private void setXRefsFromDRs (String dbRefs, ProteinReference proteinReference, // && !db.equalsIgnoreCase("PIRSF") // && !db.equalsIgnoreCase("PROSITE") // && !db.equalsIgnoreCase("ORTHODB") - ) continue; - - String fixedDb = db; - if (db.equalsIgnoreCase("GENEID")) - fixedDb = "NCBI Gene"; // - preferred name - - //iterate over the ID tokens of the same DR line, skipping non-ID comments, etc. (ending) - for (int j = 1; j < parts.length; j++) { - String id = parts[j].trim(); - //at the end of a DR line in some cases (e.g, GeneID or RefSeq)? - if(id.equals("-")) break; - - //skip PDB annotations - if(db.equalsIgnoreCase("PDB") && !id.matches("^[0-9][A-Za-z0-9]{3}$")) break; - //no more Ensembl IDs (skip comments) - else if (db.equalsIgnoreCase("ENSEMBL") && !id.startsWith("ENS")) break; - //no more InterPro IDs (skip comments) + ) continue; + + String fixedDb = db; + if (db.equalsIgnoreCase("GENEID")) + fixedDb = "NCBI Gene"; // - preferred name + + //iterate over the ID tokens of the same DR line, skipping non-ID comments, etc. (ending) + for (int j = 1; j < parts.length; j++) { + String id = parts[j].trim(); + //at the end of a DR line in some cases (e.g, GeneID or RefSeq)? + if(id.equals("-")) break; + + //skip PDB annotations + if(db.equalsIgnoreCase("PDB") && !id.matches("^[0-9][A-Za-z0-9]{3}$")) break; + //no more Ensembl IDs (skip comments) + else if (db.equalsIgnoreCase("ENSEMBL") && !id.startsWith("ENS")) break; + //no more InterPro IDs (skip comments) // else if (db.equalsIgnoreCase("INTERPRO") && !id.startsWith("IPR")) break; // else if (db.equalsIgnoreCase("PANTHER") && !id.startsWith("PTHR")) break; // else if (db.equalsIgnoreCase("PRINTS") && !id.startsWith("PR")) break; - else if (db.equalsIgnoreCase("PHARMGKB") && !id.startsWith("PA")) break; + else if (db.equalsIgnoreCase("PHARMGKB") && !id.startsWith("PA")) break; // else if (db.equalsIgnoreCase("ORTHODB") && !id.startsWith("EOG")) break; - else if (db.equalsIgnoreCase("DIP") && !id.startsWith("DIP-")) break; - else if (db.equalsIgnoreCase("EMBL")) { - if(!id.matches("^(\\w+\\d+(\\.\\d+)?)|(NP_\\d+)$")) - break; - fixedDb = "Nucleotide Sequence Database"; - //last ID in a HGNC line is in fact gene name - } else if(db.equalsIgnoreCase("HGNC") && !id.startsWith("HGNC:")) { - fixedDb = "HGNC Symbol"; - } - //remove .version from RefSeq IDs - else if (db.equalsIgnoreCase("REFSEQ")) { - // extract only RefSeq AC from AC.Version ID form - fixedDb = "RefSeq"; - id = id.replaceFirst("\\.\\d+", ""); + else if (db.equalsIgnoreCase("DIP") && !id.startsWith("DIP-")) break; + else if (db.equalsIgnoreCase("EMBL")) { + if(!id.matches("^(\\w+\\d+(\\.\\d+)?)|(NP_\\d+)$")) + break; + fixedDb = "Nucleotide Sequence Database"; + //last ID in a HGNC line is in fact gene name + } else if(db.equalsIgnoreCase("HGNC") && !id.startsWith("HGNC:")) { + fixedDb = "HGNC Symbol"; + } + //remove .version from RefSeq IDs + else if (db.equalsIgnoreCase("REFSEQ")) { + // extract only RefSeq AC from AC.Version ID form + fixedDb = "RefSeq"; + id = id.replaceFirst("\\.\\d+", ""); + } + + //ok to create a new rel. xref with type "identity" + RelationshipXref rXRef = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, fixedDb, id, model); + proteinReference.addXref(rXRef); + // this xref type is then used for id-mapping in the Merger and queries; } - - //ok to create a new rel. xref with type "identity" - RelationshipXref rXRef = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, fixedDb, id, model, false); - proteinReference.addXref(rXRef); - // this xref type is then used for id-mapping in the Merger and queries; - } } - - } + + } /** @@ -343,18 +332,18 @@ else if (db.equalsIgnoreCase("REFSEQ")) { */ private Collection getGeneSymbols(String geneName, ProteinReference proteinReference) { - Collection symbls = new ArrayList<>(); + Collection symbls = new ArrayList<>(); String parts[] = geneName.split(";\\s*(and)?"); for (int i=0; i1 due to isoforms if (subParts[0].trim().equals("Name")) { - //remove {evidence}; see http://www.uniprot.org/changes/evidences (GN) - String gn = subParts[1]; - int idx = gn.indexOf(" {"); - if(idx>0) - gn = gn.substring(0, idx); - symbls.add(gn); + //remove {evidence}; see http://www.uniprot.org/changes/evidences (GN) + String gn = subParts[1]; + int idx = gn.indexOf(" {"); + if(idx>0) + gn = gn.substring(0, idx); + symbls.add(gn); } } return symbls; @@ -368,18 +357,18 @@ private Collection getGeneSymbols(String geneName, ProteinReference prot */ private Collection getGeneSynonyms(String geneName, ProteinReference proteinReference) { - Collection syns = new ArrayList<>(); + Collection syns = new ArrayList<>(); String parts[] = geneName.split(";\\s*(and)?"); for (int i=0; i0) - currentSynonym = currentSynonym.substring(0, idx); + //remove {evidence}; see http://www.uniprot.org/changes/evidences (GN) + int idx = currentSynonym.indexOf(" {"); + if(idx>0) + currentSynonym = currentSynonym.substring(0, idx); syns.add(currentSynonym); } } @@ -390,25 +379,24 @@ private Collection getGeneSynonyms(String geneName, ProteinReference pro /** * Sets Unification XRefs. - * @param dbName value for 'db' property of the xref + * @param dbName value for 'db' property of the xref * @param id value for 'id' property of the xref - * @param proteinReference a protein reference to add the xref - * @param model the BioPAX model - */ + * @param proteinReference a protein reference to add the xref + * @param model the BioPAX model + */ private void setUnificationXRef(String dbName, String id, ProteinReference proteinReference, Model model) { - id = id.trim(); - dbName = dbName.trim(); - String rdfId = Normalizer.uri(model.getXmlBase(), dbName, id, UnificationXref.class); - - UnificationXref x = (UnificationXref) model.getByID(rdfId); - if (x == null) { - x = model.addNew(UnificationXref.class, rdfId); - x.setDb(dbName); - x.setId(id); - x.addComment("PRIMARY"); - } - - proteinReference.addXref(x); + id = id.trim(); + dbName = dbName.trim(); + String rdfId = Normalizer.uri(model.getXmlBase(), dbName, id, UnificationXref.class); + + UnificationXref x = (UnificationXref) model.getByID(rdfId); + if (x == null) { + x = model.addNew(UnificationXref.class, rdfId); + x.setDb(dbName); + x.setId(id); + } + + proteinReference.addXref(x); } /* @@ -416,177 +404,120 @@ private void setUnificationXRef(String dbName, String id, ProteinReference prote * from a pre-processed UniProt record: assigns the standard URI and * unification xrefs. */ - private ProteinReference newProteinReferenceWithAccessionXrefs(String idLine, String accessions, Model model) - { + private ProteinReference newProteinReferenceWithAccessionXrefs(String idLine, String accessions, Model model) { // accession numbers as array final List acList = new ArrayList<>(Arrays.asList(accessions.split(";"))); - // Pop the the first item, the primary AC, to generate canonical URI and unif. xref: + // Pop the first item, the primary AC, to generate canonical URI and unif. xref: final String primaryId = acList.remove(0).trim(); - final String uri = "http://identifiers.org/uniprot/" + primaryId; + final String uri = "bioregistry.io/uniprot:" + primaryId; // create a new PR with the name and primary unification xref ProteinReference proteinReference = model.addNew(ProteinReference.class, uri); String entryId = idLine.split("\\s+")[0]; //such as 'CALM_HUMAN' proteinReference.setDisplayName(entryId); //also use the ID (e.g., CALM_HUMAN) for a special RelationshipXref - RelationshipXref rXRef = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "uniprot", entryId, model, false); + RelationshipXref rXRef = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "uniprot", entryId, model); proteinReference.addXref(rXRef); //add the primary accession number unification xref - setUnificationXRef("UniProt Knowledgebase", primaryId, proteinReference, model); + setUnificationXRef("uniprot", primaryId, proteinReference, model); // add 'secondary-ac' type RXs: for (String acEntry : acList) { rXRef = CPathUtils.findOrCreateRelationshipXref( - RelTypeVocab.SECONDARY_ACCESSION_NUMBER, "UniProt Knowledgebase", acEntry.trim(), model, false); + RelTypeVocab.SECONDARY_ACCESSION_NUMBER, "uniprot", acEntry.trim(), model); proteinReference.addXref(rXRef); } proteinReference.addComment(idLine); - + return proteinReference; } /** * Gets a biosource */ - private BioSource getBioSource(String taxId, String name, Model model) - { + private BioSource getBioSource(String taxId, String name, Model model) { // check taxonomy ID is integer value - Integer taxonomy = null; + Integer taxonomy; try { taxonomy = Integer.valueOf(taxId); } catch (NumberFormatException e) { - throw new RuntimeException("Faild to convert " + taxId - + " into integer taxonomy ID", e); + throw new RuntimeException("Failed to convert " + taxId + " into integer taxonomy ID", e); } - BioSource toReturn = null; - + BioSource toReturn; // check the organism was previously used, re-use it if(taxonomy==null || taxonomy <= 0) { throw new RuntimeException("Illegal taxonomy ID: " + taxId); } else { - String uri = "http://identifiers.org/taxonomy/" + taxonomy; + String uri = "bioregistry.io/ncbitaxon:" + taxonomy; if (model.containsID(uri)) { toReturn = (BioSource) model.getByID(uri); } else { - toReturn = (BioSource) model - .addNew(BioSource.class, uri); + toReturn = model.addNew(BioSource.class, uri); toReturn.setStandardName(name); toReturn.setDisplayName(name); - UnificationXref taxonXref = (UnificationXref) model - .addNew(UnificationXref.class, Normalizer - .uri(model.getXmlBase(), "TAXONOMY", taxId, UnificationXref.class)); - taxonXref.setDb("taxonomy"); + UnificationXref taxonXref = model.addNew(UnificationXref.class, Normalizer + .uri(model.getXmlBase(), "ncbitaxon", taxId, UnificationXref.class)); + taxonXref.setDb("ncbi taxonomy"); taxonXref.setId(taxId); - toReturn.addXref((UnificationXref) taxonXref); + toReturn.addXref(taxonXref); } } return toReturn; } - + /* * Parses only "FT MOD_RES N M Term..." lines data and creates protein modification features and sites; - * original data line cannot exceed 70 chars, but can span multiple lines (usually just one or two), - * and ends with '.'; extra lines were originally like "FT end-of-term." - * (dot is used only on the last line), but "FT " and all the leading spaces up to original position - * no. 35 in the second etc. lines were already removed from the final 'features' text. - * In other words, here, 'features' string contains concatenated with '.' lines like - * "MOD_RES N M full-term-name" (and such strings can be longer than 65 chars) + * original data line cannot exceed 70 chars, but can span multiple lines and ends with '.' */ - private void createModResFeatures(final String features, - final ProteinReference pr, Model model) - { + private void createModResFeatures(final String features, + final ProteinReference pr, Model model) { // using a special "not greedy" regex! - Pattern pattern = Pattern.compile("MOD_RES.+?\\."); - Matcher matcher = pattern.matcher(features); - int mfIndex = 0; - while(matcher.find()) { - String ftContent = matcher.group(); //i.e., not including "FT ", - //the term starts at 29th char (because "FT " at the beginning of each line already's gone) - String what = ftContent.substring(29, ftContent.length()-1);//excluding the final dot '.' - // split the result by ';' (e.g., it might now look like "Phosphothreonine; by CaMK4") - // to extract the modification type and create the standard PSI-MOD synonym; - String[] terms = what.toString().split(";"); + Pattern pattern = Pattern.compile("MOD_RES\\s+(\\d+)\\s*/note=\"([^\"]+)\""); + Matcher matcher = pattern.matcher(features); + int mfIndex = 0; + while(matcher.find()) { + // split the 2nd group by ';' (could be e.g. "Phosphothreonine; by CaMK4") + // to extract the modification type and create the standard PSI-MOD synonym; + String[] terms = matcher.group(2).split(";"); String mod = terms[0]; - - //remove non-standard ending comment from the standard CV term, - //right before the final dot, if present; i.e. it removes things like + + //(this seems for older format, but does not hurt to keep...) + //remove non-standard ending comment from the standard CV term, things like //"...(By similarity).", or "...(Probable).", "...(Potential)." - - // but should not be too greedy to left just "N6-" out of "N6-(pyridoxal phosphate)lysine (By similarity)." + //not greedy; i.e get "N6-(pyridoxal phosphate)lysine", not "N6-", from "N6-(pyridoxal phosphate)lysine (By similarity)." mod = mod.replaceFirst("\\([^()]+?\\)$","").trim(); - - //official PSI-MOD synonym (see http://www.ebi.ac.uk/ontology-lookup) - final String modTerm = "MOD_RES " + mod; - - //PSI-MOD ID will be auto-added by the biopax-validator/normalizer - + // Create the feature with CV and location - mfIndex++; - String uri = Normalizer.uri(model.getXmlBase(), + String uri = Normalizer.uri(model.getXmlBase(), null, pr.getDisplayName() + "_" + mfIndex, ModificationFeature.class); ModificationFeature modificationFeature = model.addNew(ModificationFeature.class, uri); - modificationFeature.addComment(ftContent); - + // get/create a new PSI-MOD SequenceModificationVocabulary (can be shared by many PRs) - uri = Normalizer.uri(model.getXmlBase(), "MOD", modTerm, SequenceModificationVocabulary.class); + uri = Normalizer.uri(model.getXmlBase(), "mod", mod, SequenceModificationVocabulary.class); // so, let's check if it exists in the temp. or target model: SequenceModificationVocabulary cv = (SequenceModificationVocabulary) model.getByID(uri); if(cv == null) { - // create a new SequenceModificationVocabulary cv = model.addNew(SequenceModificationVocabulary.class, uri); - cv.addTerm(modTerm); - //add the name without MOD_RES prefix as well (sometimes it is the valid one) cv.addTerm(mod); } modificationFeature.setModificationType(cv); - - // create feature location (site or interval) - final int start = Integer.parseInt(ftContent.substring(9, 15).trim()); - final int end = Integer.parseInt(ftContent.substring(16, 22).trim()); + final int loc = Integer.parseInt(matcher.group(1)); // so, let's check if the site exists in the temp. model - String idPart = pr.getDisplayName() + //e.g., CALM_HUMAN - from the ID line - "_" + start; - uri = Normalizer.uri(model.getXmlBase(), null, idPart, SequenceSite.class); - - SequenceSite startSite = (SequenceSite) model.getByID(uri); - if(startSite == null) { - startSite = model.addNew(SequenceSite.class, uri); - startSite.setPositionStatus(PositionStatusType.EQUAL); - startSite.setSequencePosition(start); - } - - if(start == end) { - modificationFeature.setFeatureLocation(startSite); - } else { - //create the second site (end) and sequence interval - - idPart = pr.getDisplayName() + //e.g., CALM_HUMAN - from the ID line - "_" + end; - uri = Normalizer.uri(model.getXmlBase(), null, idPart, SequenceSite.class); - - SequenceSite endSite = (SequenceSite) model.getByID(uri); - if(endSite == null) { - endSite = model.addNew(SequenceSite.class, uri); - endSite.setPositionStatus(PositionStatusType.EQUAL); - endSite.setSequencePosition(end); - } - - idPart = pr.getDisplayName() + "_" + start + "_" + end; - uri = Normalizer.uri(model.getXmlBase(), null, idPart, SequenceInterval.class); - - SequenceInterval sequenceInterval = (SequenceInterval) model.getByID(uri); - if(sequenceInterval == null) { - sequenceInterval = model.addNew(SequenceInterval.class, uri); - sequenceInterval.setSequenceIntervalBegin(startSite); - sequenceInterval.setSequenceIntervalEnd(endSite); - } - - modificationFeature.setFeatureLocation(sequenceInterval); + String idPart = pr.getDisplayName() + "_" + loc; + uri = Normalizer.uri(model.getXmlBase(), null, idPart, SequenceSite.class); + SequenceSite ss = (SequenceSite) model.getByID(uri); + if(ss == null) { + ss = model.addNew(SequenceSite.class, uri); + ss.setPositionStatus(PositionStatusType.EQUAL); + ss.setSequencePosition(loc); } - + modificationFeature.setFeatureLocation(ss); pr.addEntityFeature(modificationFeature); - } - + } + } } diff --git a/src/main/java/cpath/service/BiopaxConverter.java b/src/main/java/cpath/service/BiopaxConverter.java index 628659b6f..c70e6810f 100644 --- a/src/main/java/cpath/service/BiopaxConverter.java +++ b/src/main/java/cpath/service/BiopaxConverter.java @@ -208,8 +208,9 @@ private void convertToGSEA(Model m, OutputStream stream, Map opt private void convertToSIF(Model m, OutputStream out, boolean extended, Map options) { String db; - if ((db = options.get("db")) == null) + if ((db = options.get("db")) == null) { db = "hgnc symbol"; //default + } ConfigurableIDFetcher idFetcher = new ConfigurableIDFetcher(); idFetcher.chemDbStartsWithOrEquals("chebi"); @@ -227,8 +228,10 @@ private void convertToSIF(Model m, OutputStream out, String[] sifNames = options.get("pattern").split(","); sifTypes = new SIFType[sifNames.length]; int i = 0; - for (String t : sifNames) - sifTypes[i++] = SIFEnum.typeOf(t); + for (String t : sifNames) { + SIFEnum p = SIFEnum.typeOf(t); + if(p != null) sifTypes[i++] = p; + } } else { //default: apply all SIF rules but neighbor_of Collection c = new HashSet<>(Arrays.asList(SIFEnum.values())); @@ -258,7 +261,7 @@ private Set providers(Model m) { Set names = null; if (m != null) { - Set provs = m.getObjects(Provenance.class); + Collection provs = m.getObjects(Provenance.class); if (provs != null && !provs.isEmpty()) { names = new TreeSet<>(); for (Provenance prov : provs) { diff --git a/src/main/java/cpath/service/CPathUtils.java b/src/main/java/cpath/service/CPathUtils.java index ad9a84fb8..b60a3cfef 100644 --- a/src/main/java/cpath/service/CPathUtils.java +++ b/src/main/java/cpath/service/CPathUtils.java @@ -3,20 +3,20 @@ import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.OutputStream; import java.lang.reflect.Constructor; import java.lang.reflect.Method; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.nio.file.*; import java.util.*; -import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.zip.*; +import com.fasterxml.jackson.databind.ObjectMapper; import cpath.service.api.Cleaner; import cpath.service.api.Converter; import cpath.service.api.RelTypeVocab; +import cpath.service.metadata.Datasource; +import cpath.service.metadata.Metadata; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -29,23 +29,17 @@ import org.biopax.paxtools.model.Model; import org.biopax.paxtools.model.level3.*; import org.biopax.paxtools.normalizer.Normalizer; -import org.json.simple.JSONArray; -import org.json.simple.JSONObject; -import org.json.simple.parser.JSONParser; -import org.json.simple.parser.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.core.io.DefaultResourceLoader; import org.springframework.core.io.ResourceLoader; import org.springframework.util.Assert; -import cpath.service.jpa.Metadata; - public final class CPathUtils { private static Logger LOGGER = LoggerFactory.getLogger(CPathUtils.class); private static final String dataFileSuffixRegex = "[^.]+\\.gz$"; - // LOADER can handle file://, ftp://, http:// PROVIDER_URL resources + // LOADER can handle file://, ftp://, http:// resources public static final ResourceLoader LOADER = new DefaultResourceLoader(); private CPathUtils() { @@ -71,40 +65,28 @@ static void cleanupDirectory(String path, boolean createNew) { } } - /** - * For the given url, returns a collection of Metadata Objects. - * - * @param url String - * @return Collection - */ - static Collection readMetadata(final String url) { - // order of lines/records in the Metadata table does matter (since 2013/03); - // so List is used here instead of HashSet - List toReturn = new ArrayList<>(); - - // check args - if (url == null) { - throw new IllegalArgumentException("url must not be null"); + static Metadata readMetadata(String url) { + try { + return new ObjectMapper().readValue(LOADER.getResource(url).getInputStream(), Metadata.class); + } catch (Exception e) { + throw new RuntimeException(e); } + } - // get data from service + static void saveMetadata(Metadata metadata, String path) { + //path fix-up for metadata location property, e.g., file:metadata.json, classpath:metadata.json (test/demo) + if(StringUtils.startsWithIgnoreCase(path, "classpath:")) { + path = StringUtils.replaceIgnoreCase(path, "classpath:", "target/"); //for test/demo + } else if(StringUtils.startsWithIgnoreCase(path, "file:")) { + path = StringUtils.replaceIgnoreCase(path, "file:", ""); + } else if (StringUtils.containsIgnoreCase(path, ":")) { //duh... to be safe + path = StringUtils.substringAfter(path, ":"); + } try { - JSONObject jo = (JSONObject) new JSONParser().parse(new InputStreamReader(LOADER.getResource(url).getInputStream())); - for (JSONObject ds : (Iterable) jo.get("datasources")) { - Metadata.METADATA_TYPE type = Metadata.METADATA_TYPE.valueOf((String) ds.get("type")); - List names = (List) ((JSONArray) ds.get("name")).stream().collect(Collectors.toList()); - Metadata metadata = new Metadata((String) ds.get("identifier"), names, (String) ds.get("description"), - (String) ds.get("dataUrl"), (String) ds.get("homepageUrl"), (String) ds.get("iconUrl"), - type, (String) ds.get("cleanerClass"), (String) ds.get("converterClass"), - (String) ds.get("pubmedId"), (String) ds.get("availability")); - LOGGER.info("readMetadata(): adding Metadata: " + metadata.getIdentifier()); - toReturn.add(metadata); - } - } catch (ParseException | IOException e) { + new ObjectMapper().writeValue(Files.newOutputStream(Paths.get(path)), metadata); + } catch (Exception e) { throw new RuntimeException(e); } - - return toReturn; } /** @@ -166,74 +148,95 @@ public static void copy(InputStream is, OutputStream os) throws IOException { } /** - * From a warehouse (normalized) ER or CV URI, - * extract the identifier (e.g., UniProt or ChEBI). - * It depends on Normalizer and cPath2 Premerge - * using 'http://identifiers.org/*' URIs for such objects. + * From a normalized ER/CV URI, extract the id (uniprot, chebi,..) * - * @param uri URI - * @return local part URI - ID + * @param uri some (preferably normalized) ER or CV URI + * @return identifier; null when the URI is nothing like *identifiers.org/* or *bioregistry.io/* */ static String idFromNormalizedUri(String uri) { - Assert.isTrue(uri.contains("http://identifiers.org/"),"Not a Identifiers.org URI"); - return uri.substring(uri.lastIndexOf('/') + 1); + if(Stream.of("identifiers.org/", "bioregistry.io/") + .anyMatch(s -> StringUtils.containsIgnoreCase(uri, s))) { + + String id = uri.substring(uri.lastIndexOf('/') + 1); + //remove prefix/banana for now + if(StringUtils.contains(id,":")) { + id = StringUtils.substringAfter(id, ":"); + } + //add CID:/SID:/CHEBI: prefix to the id before id-mapping due to our id-mapping/index implementation + if(StringUtils.containsIgnoreCase(uri,"substance")) //contains 'substance' or 'pubchem...substance'... + id = "SID:" + id; + else if(StringUtils.containsIgnoreCase(uri,"compound") || StringUtils.containsIgnoreCase(uri,"pubchem")) + id = "CID:" + id; + else if(StringUtils.containsIgnoreCase(uri,"chebi")) + id = "CHEBI:" + id; + + return id; + } + + return null; } /** - * Auto-fix an ID of particular type before using it - * for id-mapping. This helps to map e.g., RefSeq versions ID and - * UniProt isoforms to primary UniProt accessions despite our id-mapping db - * does not have such records as e.g. "NP_12345.1 maps to P01234". + * Auto-fix some ID types before searching or saving in the id-mapping index. + * This helps to map e.g. a RefSeq version or UniProt isoform ID to the primary UniProt AC, + * despite our id-mapping index/table does not have records like "NP_12345.1 maps to P01234". * - * @param fromDb type of the identifier (standard resource name, e.g., RefSeq) - * @param fromId identifier + * @param db type of the identifier (standard resource name, e.g., RefSeq) + * @param id identifier * @return "fixed" ID */ - public static String fixSourceIdForMapping(String fromDb, String fromId) { - Assert.hasText(fromId, "fromId is empty"); - Assert.hasText(fromDb, "fromDb is empty"); + public static String fixIdForMapping(String db, String id) { + Assert.hasText(id, "fromId is empty"); + Assert.hasText(db, "fromDb is empty"); - String id = fromId; - String db = fromDb.toUpperCase(); + db = db.toUpperCase(); - if (db.startsWith("UNIPROT") || db.contains("SWISSPROT") || db.contains("TREMBL")) { + if (db.startsWith("UNIPROT")) { //always use UniProt ID instead of the isoform ID for mapping - if (id.contains("-")) + if (id.contains("-")) { id = id.replaceFirst("-\\d+$", ""); - } else if (db.equals("REFSEQ") && id.contains(".")) { + } + } + else if (db.equals("CHEBI")) { + //by design of this app, chebi id must always have 'CHEBI:' (banana+peel) prefix for id-mapping/indexing/searching + id = id.toUpperCase(); //converts ChEBI:*, chebi:*, etc. => CHEBI:* + if (!StringUtils.startsWith(id, "CHEBI:")) { + id = "CHEBI:" + id; + } + } + else if (db.equals("REFSEQ") && id.contains(".")) { //strip, e.g., refseq:NP_012345.2 to refseq:NP_012345 id = id.replaceFirst("\\.\\d+$", ""); - } else if (db.startsWith("KEGG") && id.matches(":\\d+$")) { - id = id.substring(id.lastIndexOf(':') + 1); //it's NCBI Gene ID; - } else if (db.contains("PUBCHEM") && (db.contains("SUBSTANCE") || db.contains("SID"))) { + } + else if (db.startsWith("KEGG") && id.matches(":\\d+$")) { + id = id.substring(id.lastIndexOf(':') + 1); //it's a NCBI Gene ID! + } + else if (db.contains("PUBCHEM") && (db.contains("SUBSTANCE") || db.contains("SID"))) { id = id.toUpperCase(); //ok for a SID //add prefix if not present if (!id.startsWith("SID:") && id.matches("^\\d+$")) id = "SID:" + id; - } else if (db.contains("PUBCHEM") && (db.contains("COMPOUND") || db.contains("CID"))) { + } + else if (db.contains("PUBCHEM") && (db.contains("COMPOUND") || db.contains("CID"))) { id = id.toUpperCase(); //ok for a CID //add prefix if not present - if (!id.startsWith("CID:") && id.matches("^\\d+$")) + if (!id.startsWith("CID:") && id.matches("^\\d+$")) { id = "CID:" + id; + } } return id; } /** - * Whether a string starts with any of the prefixes (case insensitive). + * Whether a string starts with any of the prefixes (case-insensitive). * - * @param s a string + * @param str a string * @param prefixes optional array of prefix terms to match * @return true/false */ - public static boolean startsWithAnyIgnoreCase(String s, String... prefixes) { - for (String prefix : prefixes) { - if (StringUtils.startsWithIgnoreCase(s, prefix)) { - return true; - } - } - return false; + public static boolean startsWithAnyIgnoreCase(String str, String... prefixes) { + return Arrays.stream(prefixes).anyMatch(p -> StringUtils.startsWithIgnoreCase(str, p)); } /** @@ -243,17 +246,21 @@ public static boolean startsWithAnyIgnoreCase(String s, String... prefixes) { * Note: the corresponding CV does not have a unification xref * (this method won't validate; so, non-standard CV terms can be used). * - * @param vocab relationship xref type - * @param model a biopax model where to find/add the xref - * @param isPrimaryId whether it's a primary ID/AC (then adds a comment) + * @param vocab relationship xref type + * @param model a biopax model where to find/add the xref */ public static RelationshipXref findOrCreateRelationshipXref( - RelTypeVocab vocab, String db, String id, Model model, boolean isPrimaryId) { + RelTypeVocab vocab, String db, String id, Model model) { Assert.notNull(vocab, "vocab is null"); RelationshipXref toReturn; - String uri = Normalizer.uri(model.getXmlBase(), db, id + "_" + vocab.toString(), RelationshipXref.class); + //if chebi, make sure 'CHEBI:' is present + if(StringUtils.equalsIgnoreCase(db, "chebi") && !StringUtils.startsWithIgnoreCase(id, "chebi:")) { + id = "CHEBI:" + id; + } + + String uri = Normalizer.uri(model.getXmlBase(), db, id + "_" + vocab, RelationshipXref.class); if (model.containsID(uri)) { return (RelationshipXref) model.getByID(uri); } @@ -262,8 +269,6 @@ public static RelationshipXref findOrCreateRelationshipXref( toReturn = model.addNew(RelationshipXref.class, uri); toReturn.setDb(db.toLowerCase()); toReturn.setId(id); - if (isPrimaryId) - toReturn.addComment("PRIMARY"); // create/add the relationship type vocabulary String relTypeCvUri = vocab.uri; //identifiers.org standard URI @@ -286,6 +291,11 @@ public static RelationshipXref findOrCreateRelationshipXref( return toReturn; } + /** + * Recursively extracts all unification and relationship xrefs from the BioPAX object and its children. + * @param bpe + * @return + */ static Set getXrefIds(BioPAXElement bpe) { final Set ids = new HashSet<>(); @@ -295,21 +305,28 @@ static Set getXrefIds(BioPAXElement bpe) { fetcher.setSkipSubPathways(true); //fetch all children of (implicit) type XReferrable, which means - either //BioSource or ControlledVocabulary or Evidence or Provenance or Entity or EntityReference - //(we actually want only the latter two types and their sub-types; will skip the rest later on): + //(we actually want only the latter two types and their subtypes; will skip the rest later on): Set children = fetcher.fetch(bpe, XReferrable.class); //include itself (- for fetcher only gets child elements) if (bpe instanceof XReferrable) children.add((XReferrable) bpe); for (XReferrable child : children) { - //skip for unwanted utility class child elements, such as Evidence,CV,Provenance - if (!(child instanceof Entity || child instanceof EntityReference)) + //skip unwanted utility class elements, such as Evidence, CV, Provenance + if (!(child instanceof Entity || child instanceof EntityReference)) { continue; + } // collect standard bio IDs (skip publications); // (we will use id-mapping later to associate more IDs) for (Xref x : child.getXref()) { if (!(x instanceof PublicationXref) && x.getId() != null && x.getDb() != null) { - ids.add(x.getId()); + String id = x.getId(); + //add 'CHEBI:' ("banana and peel" prefix) if it's missing + if(StringUtils.equalsIgnoreCase("chebi", x.getDb()) && + !StringUtils.startsWithIgnoreCase(id, "chebi:")) { + id = "CHEBI:" + id; + } + ids.add(id); } } } @@ -333,8 +350,8 @@ static InputStream gzipInputStream(String gzPath) { * * @return URI */ - static String getMetadataUri(Model model, Metadata metadata) { - return model.getXmlBase() + metadata.getIdentifier(); + static String getMetadataUri(Model model, Datasource datasource) { + return model.getXmlBase() + datasource.getIdentifier(); } /** @@ -382,7 +399,7 @@ private static Object newInstance(final String className) { /* * Generate a sanitized file name for an original source zip entry; - * this path will be stored in the corresponding Metadata.files collection + * this path will be stored in the corresponding Datasource.files collection * and then processed during premerge (clean, convert, normalize) and merge steps (ETL). */ static String originalFile(String dataSubDir, String zipEntryName) { diff --git a/src/main/java/cpath/service/ConsoleApplication.java b/src/main/java/cpath/service/ConsoleApplication.java index d22358eac..1810eb034 100644 --- a/src/main/java/cpath/service/ConsoleApplication.java +++ b/src/main/java/cpath/service/ConsoleApplication.java @@ -1,16 +1,15 @@ package cpath.service; import cpath.service.api.Analysis; -import cpath.service.api.CPathService; -import cpath.service.api.Searcher; +import cpath.service.metadata.Index; +import cpath.service.api.Service; import cpath.service.jaxb.SearchHit; import cpath.service.jaxb.SearchResponse; -import cpath.service.jpa.Metadata; -import cpath.service.jpa.Metadata.METADATA_TYPE; +import cpath.service.metadata.Datasource; +import cpath.service.metadata.Datasource.METADATA_TYPE; import org.apache.commons.cli.*; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.ArrayUtils; import org.biopax.paxtools.controller.SimpleEditorMap; import org.biopax.paxtools.io.*; import org.biopax.paxtools.model.BioPAXElement; @@ -48,7 +47,7 @@ public class ConsoleApplication implements CommandLineRunner { private static final String javaRunPaxtools = "nohup $JAVA_HOME/bin/java -Xmx60g -jar paxtools.jar"; @Autowired - private CPathService service; + private Service service; /** * Validator bean is available when "premerge" profile is activated; @@ -65,10 +64,9 @@ enum Stage { @Override public void run(String... args) throws Exception { - if (!Charset.defaultCharset().equals(Charset.forName("UTF-8"))) - LOG.error("Default Charset, " + Charset.defaultCharset() + - " (is NOT 'UTF-8'); problems with input data are possible..."); - + if (!Charset.defaultCharset().equals(Charset.forName("UTF-8"))) { + LOG.error("Default Charset " + Charset.defaultCharset() + " is NOT 'UTF-8'"); + } Options options = new Options(); Option o = Option.builder("b").longOpt("build") .desc("PREMERGE: parse metadata.json, expand input archives, clean, convert, normalize the data, create the " + @@ -95,6 +93,7 @@ public void run(String... args) throws Exception { .desc("run as web (service) app").build(); options.addOption(o); + // process command line args CommandLine cmd; try { cmd = new DefaultParser().parse(options, args); @@ -103,8 +102,8 @@ public void run(String... args) throws Exception { return; } - // process command line args and do smth. if (cmd.hasOption("build")) { + //Perform the data build from given stage (or from "premerge" when no value provided) to the end. Stage stage; try { stage = Stage.valueOf(cmd.getOptionValue("build").toUpperCase()); @@ -113,12 +112,11 @@ public void run(String... args) throws Exception { } switch ((stage != null) ? stage : Stage.PREMERGE) { case PREMERGE: - premerge(); + premerge(); //, and continue to "merge" case MERGE: - new Merger(service).merge(); - index(); + merge(); case POSTMERGE: - postmerge(); + postmerge(); //is the final stage } } else if (cmd.hasOption("export")) { String[] uris = new String[]{}; @@ -175,90 +173,44 @@ private void executeAnalysis(String analysisClass, boolean readOnly) { } - /* - * Builds a new BioPAX full-text index,creates the black list or ubiquitous molecules, - * and calculates/updates the total no. of pathways, interactions, physical entities in the main db. - */ - private void index() throws IOException { - LOG.info("index: indexing..."); - service.index(); - - // Updates counts of pathways, etc. and saves in the Metadata table. - // This depends on the full-text index, which must have been created already (otherwise, results will be wrong). - LOG.info("Updating pathway/interaction/participant counts per data source..."); - List pathwayMetadata = new ArrayList<>(); - for (Metadata md : service.metadata().findAll()) - if (!md.isNotPathwayData()) - pathwayMetadata.add(md); - // update counts for each non-warehouse metadata entry - for (Metadata md : pathwayMetadata) { - Model m = service.loadBiopaxModelByDatasource(md); //to count objects, by type - String name = md.standardName(); - md.setNumPathways(m.getObjects(Pathway.class).size()); - LOG.info(name + " - pathways: " + md.getNumPathways()); - md.setNumInteractions(m.getObjects(Interaction.class).size()); - LOG.info(name + " - interactions: " + md.getNumInteractions()); - md.setNumPhysicalEntities(m.getObjects(PhysicalEntity.class).size() + m.getObjects(Gene.class).size()); - LOG.info(name + " - participants: " + md.getNumPhysicalEntities()); - } - service.metadata().saveAll(pathwayMetadata); - - LOG.info("Generating the blacklist.txt..."); - //Generates, if not exist, the blacklist.txt - - //to exclude/keep ubiquitous small molecules (e.g. ATP) - //from graph query and output format converter results. - BlacklistGenerator3 gen = new BlacklistGenerator3(); - Blacklist blacklist = gen.generateBlacklist(service.getModel()); - // Write all the blacklisted ids to the output - if (blacklist != null) { - blacklist.write(service.settings().blacklistFile()); - } - - LOG.info("index: all done."); - } - /* * Executes the premerge stage: * organize, clean, convert, validate, normalize pathway/interaction data, * and create BioPAX utility class objects warehouse and id-mapping. */ private void premerge() { - - fetchMetadata(); - LOG.info("premerge: initializing DAO, validator, etc..."); - //test that officially supported organisms are specified (throws a runtime exception otherwise) + //check that organisms are specified; throw an exception otherwise service.settings().getOrganismTaxonomyIds(); LOG.info("premerge: this instance is configured to integrate and query " + " bio data about following organisms: " + Arrays.toString(service.settings().getOrganisms())); - - System.setProperty("hibernate.hbm2ddl.auto", "update"); - System.setProperty("net.sf.ehcache.disabled", "true"); - +// System.setProperty("net.sf.ehcache.disabled", "true"); //(there is no JPA/Hibernate/H2 anymore) PreMerger premerger = new PreMerger(service, validator); premerger.premerge(); - - // create the Warehouse BioPAX model (in the downloads dir) and id-mapping db table - if (!Files.exists(Paths.get(service.settings().warehouseModelFile()))) + // create the Warehouse BioPAX model and id-mapping db table + if (!Files.exists(Paths.get(service.settings().warehouseModelFile()))) { premerger.buildWarehouse(); - - //back to read-only schema mode (useful when called from the web Main page) - System.setProperty("hibernate.hbm2ddl.auto", "validate"); + } } - /* - * Loads data providers' metadata. - */ - private void fetchMetadata() { - System.setProperty("hibernate.hbm2ddl.auto", "update"); - // grab the data - // load the test metadata and create warehouse - for (Metadata mdata : CPathUtils.readMetadata(service.settings().getMetadataLocation())) - service.metadata().save(mdata); - //back to read-only schema mode (useful when called from the web admin app) - System.setProperty("hibernate.hbm2ddl.auto", "validate"); - } + private void merge() { + Merger biopaxMerger = new Merger(service); + biopaxMerger.merge(); + + LOG.info("Indexing BioPAX Model (this may take an hour or so)..."); + service.index().save(service.getModel()); + LOG.info("Generating blacklist.txt..."); + //Generates, if not exist, the blacklist.txt - + //to exclude/keep ubiquitous small molecules (e.g. ATP) + //from graph query and output format converter results. + BlacklistGenerator3 gen = new BlacklistGenerator3(); + Blacklist blacklist = gen.generateBlacklist(service.getModel()); + // Write all the blacklisted ids to the output + if (blacklist != null) { + blacklist.write(service.settings().blacklistFile()); + } + } /** * Exports a cpath2 BioPAX sub-model or full model to the specified file. @@ -284,24 +236,22 @@ private void exportData(final String output, String[] uris, String[] datasources if (uris.length == 0 && (datasources.length > 0 || types.length > 0)) { // initialize the search engine - Searcher searcher = new SearchEngine(model, service.settings().indexDir()); - + Index index = new IndexImpl(model, service.settings().indexDir(), false); Collection selectedUris = new HashSet<>(); - if (types.length > 0) { - //collect biopax object URIs of the specified types and sub-types, and data sources if specified + //collect biopax object URIs of the specified types and subtypes, and data sources if specified //(child biopax elements will be auto-included during the export to OWL) for (String bpInterfaceName : types) { - selectedUris.addAll(findAllUris(searcher, + selectedUris.addAll(findAllUris(index, biopaxTypeFromSimpleName(bpInterfaceName), datasources, null)); } } else { //collect all Entity URIs filtered by the not empty data sources list //(child Gene, PhysicalEntity, UtilityClass biopax elements will be auto-included // during the export to OWL; we do not want to export dangling Genes, PEs, etc., except for Complexes...) - selectedUris.addAll(findAllUris(searcher, Pathway.class, datasources, null)); - selectedUris.addAll(findAllUris(searcher, Interaction.class, datasources, null)); - selectedUris.addAll(findAllUris(searcher, Complex.class, datasources, null)); + selectedUris.addAll(findAllUris(index, Pathway.class, datasources, null)); + selectedUris.addAll(findAllUris(index, Interaction.class, datasources, null)); + selectedUris.addAll(findAllUris(index, Complex.class, datasources, null)); } uris = selectedUris.toArray(new String[]{}); @@ -316,7 +266,7 @@ private void exportData(final String output, String[] uris, String[] datasources } private Class biopaxTypeFromSimpleName(String type) { - // 'type' (a BioPAX L3 interface class name) is case insensitive + // 'type' (a BioPAX L3 interface class name) is case-insensitive for (Class c : SimpleEditorMap.L3 .getKnownSubClassesOf(BioPAXElement.class)) { if (c.getSimpleName().equalsIgnoreCase(type)) { @@ -328,12 +278,28 @@ private Class biopaxTypeFromSimpleName(String type) { } private void postmerge() throws IOException { - LOG.info("postmerge(), started..."); + LOG.info("postmerge: started"); + Model model; + // Updates counts of pathways, etc. and saves in the Metadata table. + // This depends on the full-text index created already + LOG.info("updating pathway/interaction/participant counts per data source..."); + // update counts for each non-warehouse metadata entry + for (Datasource ds : service.metadata().getDatasources()) { + ds.getFiles().clear(); //do not export to json + if(ds.getType().isNotPathwayData()) { + continue; + } + model = service.loadBiopaxModelByDatasource(ds); + ds.setNumPathways(model.getObjects(Pathway.class).size()); + ds.setNumInteractions(model.getObjects(Interaction.class).size()); + ds.setNumPhysicalEntities(model.getObjects(PhysicalEntity.class).size() + model.getObjects(Gene.class).size()); + } + CPathUtils.saveMetadata(service.metadata(), service.settings().getMetadataLocation()); //update the json file //load the main model LOG.info("loading the Main BioPAX Model..."); - Model model = CPathUtils.importFromTheArchive(service.settings().mainModelFile()); - LOG.info("loaded."); + model = CPathUtils.importFromTheArchive(service.settings().mainModelFile()); + LOG.info("loaded"); // create an imported data summary file.txt (issue#23) PrintWriter writer = new PrintWriter(new OutputStreamWriter(Files.newOutputStream( @@ -344,29 +310,25 @@ private void postmerge() throws IOException { .asList("#CPATH2:", service.settings().getName(), "version", service.settings().getVersion(), date))); writer.println("#Columns:\t" + String.join("\t", Arrays.asList( "ID", "DESCRIPTION", "TYPE", "HOMEPAGE", "PATHWAYS", "INTERACTIONS", "PARTICIPANTS"))); - Iterable allMetadata = service.metadata().findAll(); - for (Metadata m : allMetadata) { - //we use StringUtils.join instead String.join as there are only only char sequence objects + for (Datasource d : service.metadata().getDatasources()) { writer.println(StringUtils.join(Arrays.asList( - CPathUtils.getMetadataUri(model, m), m.getDescription(), m.getType(), m.getUrlToHomepage(), - m.getNumPathways(), m.getNumInteractions(), m.getNumPhysicalEntities()), "\t") - ); + CPathUtils.getMetadataUri(model, d), d.getDescription(), d.getType(), d.getHomepageUrl(), + d.getNumPathways(), d.getNumInteractions(), d.getNumPhysicalEntities()), "\t")); } writer.flush(); writer.close(); - LOG.info("generated datasources.txt"); + LOG.info("done datasources.txt"); - LOG.info("Creating the list of primary uniprot IDs..."); + LOG.info("creating the list of primary uniprot ACs..."); Set acs = new TreeSet<>(); //exclude publication xrefs Set xrefs = new HashSet<>(model.getObjects(UnificationXref.class)); xrefs.addAll(model.getObjects(RelationshipXref.class)); - long left = xrefs.size(); for (Xref x : xrefs) { String id = x.getId(); if (CPathUtils.startsWithAnyIgnoreCase(x.getDb(), "uniprot") && id != null && !acs.contains(id)) { - acs.addAll(service.map(id, "UNIPROT")); + acs.addAll(service.map(List.of(id), "UNIPROT")); } } writer = new PrintWriter(new OutputStreamWriter(Files.newOutputStream( @@ -380,13 +342,10 @@ private void postmerge() throws IOException { writer.close(); LOG.info("generated uniprot.txt"); - LOG.info("Init the full-text search engine..."); - final Searcher searcher = new SearchEngine(model, service.settings().indexDir()); - -// createBySpeciesBiopax(model, searcher); - + LOG.info("init the full-text search engine..."); + final Index index = new IndexImpl(model, service.settings().indexDir(), false); // generate the "Detailed" pathway data file: - createDetailedBiopax(model, searcher, allMetadata); + createDetailedBiopax(model, index); // generate the export.sh script (to run Paxtools commands for exporting the BioPAX files to other formats) LOG.info("writing 'export.sh' script to convert the BioPAX models to SIF, GSEA, SBGN..."); @@ -419,7 +378,6 @@ private void postmerge() throws IOException { writer.println("gzip pathways.txt *.json"); writer.println("echo \"All done.\""); writer.close(); - LOG.info("postmerge: done."); } @@ -427,7 +385,6 @@ private void writeScriptCommands(String bpFilename, PrintWriter writer, boolean //make output file name prefix that includes datasource and ends with '.': final String prefix = bpFilename.substring(0, bpFilename.indexOf("BIOPAX.")); final String commaSepTaxonomyIds = String.join(",", service.settings().getOrganismTaxonomyIds()); - if (exportToGSEA) { writer.println(String.format("%s %s '%s' '%s' %s 2>&1 &", javaRunPaxtools, "toGSEA", bpFilename, prefix + "hgnc.gmt", "'hgnc symbol' 'organisms=" + commaSepTaxonomyIds + "'"));//'hgnc symbol' - important @@ -436,74 +393,50 @@ private void writeScriptCommands(String bpFilename, PrintWriter writer, boolean writer.println("wait"); //important writer.println("echo \"Done converting " + bpFilename + " to GSEA.\""); } - writer.println(String.format("%s %s '%s' '%s' %s 2>&1 &", javaRunPaxtools, "toSIF", bpFilename, prefix + "hgnc.txt", "seqDb=hgnc -extended -andSif exclude=neighbor_of"));//'hgnc symbol' or 'hgnc' does not matter - //UniProt ID based extended SIF files can be huge, take too long to generate; skip for now. - writer.println("wait"); //important writer.println("echo \"Done converting " + bpFilename + " to SIF.\""); } - private Collection findAllUris(Searcher searcher, - Class type, String[] ds, String[] org) { + private Collection findAllUris(Index index, Class type, String[] ds, String[] org) { Collection uris = new ArrayList<>(); - - SearchResponse resp = searcher.search("*", 0, type, ds, org); + SearchResponse resp = index.search("*", 0, type, ds, org); int page = 0; while (!resp.isEmpty()) { for (SearchHit h : resp.getSearchHit()) uris.add(h.getUri()); //next page - resp = searcher.search("*", ++page, type, ds, org); + resp = index.search("*", ++page, type, ds, org); } - LOG.info("findAllUris(in " + type.getSimpleName() + ", ds: " + Arrays.toString(ds) + ", org: " + Arrays.toString(org) + ") " + "collected " + uris.size()); - return uris; } - private void createDetailedBiopax(final Model mainModel, Searcher searcher, Iterable allMetadata) { + private void createDetailedBiopax(final Model mainModel, Index index) { //collect BioPAX pathway data source names final Set pathwayDataSources = new HashSet<>(); - for (Metadata md : allMetadata) { + for (Datasource md : service.metadata().getDatasources()) { if (md.getType() == METADATA_TYPE.BIOPAX || md.getType() == METADATA_TYPE.SBML) pathwayDataSources.add(md.standardName()); } final String archiveName = service.settings().biopaxFileNameFull("Detailed"); - exportBiopax(mainModel, searcher, archiveName, pathwayDataSources.toArray(new String[]{}), null); - } - - private void createBySpeciesBiopax(final Model mainModel, Searcher searcher) { - // export by organism (name) - Set organisms = service.settings().getOrganismTaxonomyIds(); - if (organisms.size() > 1) { - LOG.info("splitting the main BioPAX model by organism, into " + organisms.size() + " BioPAX files..."); - for (String organism : organisms) { - String archiveName = service.settings().biopaxFileNameFull(organism); - exportBiopax(mainModel, searcher, archiveName, null, new String[]{organism}); - } - } else { - LOG.info("won't generate any 'by organism' archives, for only one " + - ArrayUtils.toString(service.settings().getOrganisms()) + " is listed in the properties file"); - } + exportBiopax(mainModel, index, archiveName, pathwayDataSources.toArray(new String[]{}), null); } - private void exportBiopax( - final Model mainModel, final Searcher searcher, - final String biopaxArchive, final String[] datasources, - final String[] organisms) { + private void exportBiopax(Model mainModel, Index index, String biopaxArchive, + String[] datasources, String[] organisms) { // check file exists if (!(new File(biopaxArchive)).exists()) { LOG.info("creating new " + biopaxArchive); try { //find all entities (all child elements will be then exported too) Collection uris = new HashSet<>(); - uris.addAll(findAllUris(searcher, Pathway.class, datasources, organisms)); - uris.addAll(findAllUris(searcher, Interaction.class, datasources, organisms)); - uris.addAll(findAllUris(searcher, Complex.class, datasources, organisms)); + uris.addAll(findAllUris(index, Pathway.class, datasources, organisms)); + uris.addAll(findAllUris(index, Interaction.class, datasources, organisms)); + uris.addAll(findAllUris(index, Complex.class, datasources, organisms)); // export objects found above to a new biopax archive if (!uris.isEmpty()) { OutputStream os = new GZIPOutputStream(new FileOutputStream(biopaxArchive)); diff --git a/src/main/java/cpath/service/SearchEngine.java b/src/main/java/cpath/service/IndexImpl.java similarity index 53% rename from src/main/java/cpath/service/SearchEngine.java rename to src/main/java/cpath/service/IndexImpl.java index d8d01de37..96af5e91e 100644 --- a/src/main/java/cpath/service/SearchEngine.java +++ b/src/main/java/cpath/service/IndexImpl.java @@ -1,22 +1,22 @@ package cpath.service; -import java.nio.file.Files; import java.nio.file.Path; import java.io.IOException; import java.io.StringReader; import java.nio.file.Paths; import java.util.*; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; +import java.util.stream.Collectors; -import cpath.service.api.Indexer; -import cpath.service.api.Searcher; +import cpath.service.metadata.Index; +import cpath.service.metadata.Mapping; +import cpath.service.metadata.Mappings; +import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -29,12 +29,12 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.highlight.*; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.MMapDirectory; import org.biopax.paxtools.controller.*; import org.biopax.paxtools.model.BioPAXElement; import org.biopax.paxtools.model.Model; import org.biopax.paxtools.model.level3.*; import org.biopax.paxtools.model.level3.Process; +import org.biopax.paxtools.normalizer.Resolver; import org.biopax.paxtools.util.ClassFilterSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,173 +45,128 @@ /** * A full-text searcher/indexer for BioPAX L3 models. * - * Only Entity and EntityReference BioPAX types get indexed (since 12/2015). + * Only Entity and EntityReference BioPAX types (incl. child types) get indexed (since 12/2015). * * @author rodche */ -public class SearchEngine implements Indexer, Searcher { - static final Logger LOG = LoggerFactory.getLogger(SearchEngine.class); - - // search fields - public static final String FIELD_URI = "uri"; - public static final String FIELD_KEYWORD = "keyword"; //anything, e.g., names, terms, comments, incl. - from child elements - public static final String FIELD_NAME = "name"; // standardName, displayName, other names - public static final String FIELD_XREFID = "xrefid"; //xref.id - public static final String FIELD_PATHWAY = "pathway"; //pathways and parent pathways to be inferred from entire biopax model - public static final String FIELD_N_PARTICIPANTS = "participants"; // num. of PEs or Genes in a process or Complex - public static final String FIELD_N_PROCESSES = "processes"; // is same as 'size' used to be before cPath2 v7 - - // Full-text search/filter fields (case sensitive) - - //index organism names, cell/tissue type (term), taxonomy id, but only store BioSource URIs - public static final String FIELD_ORGANISM = "organism"; - //index data source names, but only URIs are stored in the index - public static final String FIELD_DATASOURCE = "datasource"; - public static final String FIELD_TYPE = "type"; - - //Default fields to use with the MultiFieldQueryParser; - //one can still search in other fields directly, like - pathway:some_keywords datasource:"pid" - final static String[] DEFAULT_FIELDS = - { - FIELD_KEYWORD, //data type properties (name, id, term, comment) of this and child elements; - FIELD_XREFID, - FIELD_NAME - }; +public class IndexImpl implements Index, Mappings { + static final Logger LOG = LoggerFactory.getLogger(IndexImpl.class); - private final Model model; + private Model model; private int maxHitsPerPage; private final Analyzer analyzer; - private final Path indexFile; + private IndexWriter indexWriter; private SearcherManager searcherManager; - public final static int DEFAULT_MAX_HITS_PER_PAGE = 100; /** * Constructor. * - * @param model the BioPAX Model to index or search + * @param model the BioPAX Model to index or search * @param indexLocation index directory location + * @param readOnly */ - public SearchEngine(Model model, String indexLocation) { + public IndexImpl(Model model, String indexLocation, boolean readOnly) { this.model = model; - this.indexFile = Paths.get(indexLocation); - initSearcherManager(); - this.maxHitsPerPage = DEFAULT_MAX_HITS_PER_PAGE; - + maxHitsPerPage = DEFAULT_MAX_HITS_PER_PAGE; //refs issue #269 - Map analyzersPerField = new HashMap<>(); - analyzersPerField.put(FIELD_NAME, new KeywordAnalyzer()); - analyzersPerField.put(FIELD_XREFID, new KeywordAnalyzer()); - analyzersPerField.put(FIELD_URI, new KeywordAnalyzer()); - analyzersPerField.put(FIELD_PATHWAY, new KeywordAnalyzer()); - this.analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), analyzersPerField); - } - - private void initSearcherManager() { + KeywordAnalyzer ka = new KeywordAnalyzer(); + Map analyzersPerField = Map.of( + FIELD_NAME, ka, + FIELD_XREFID, ka, + FIELD_URI, ka, + FIELD_PATHWAY, ka, + FIELD_DSTDB, ka, + FIELD_DSTID, ka, + FIELD_SRCDB, ka, + FIELD_SRCID, ka + ); + analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET), analyzersPerField); try { - if(Files.exists(indexFile)) - this.searcherManager = - new SearcherManager(MMapDirectory.open(indexFile), new SearcherFactory()); - else - LOG.info(indexFile + " does not exist."); + Path indexFile = Paths.get(indexLocation); + if(readOnly) { + searcherManager = new SearcherManager(FSDirectory.open(indexFile), new SearcherFactory()); + } else { + indexWriter = new IndexWriter(FSDirectory.open(indexFile), new IndexWriterConfig(analyzer)); + searcherManager = new SearcherManager(indexWriter, new SearcherFactory()); + } } catch (IOException e) { - LOG.warn("Could not create a searcher: " + e); + throw new RuntimeException(e); } } - + public void setMaxHitsPerPage(int maxHitsPerPage) { this.maxHitsPerPage = maxHitsPerPage; } - /** - * The max no. hits to return per results page (pagination). - * @return - */ public int getMaxHitsPerPage() { return maxHitsPerPage; } - public SearchResponse search(String query, int page, - Class filterByType, String[] datasources, - String[] organisms) - { + public SearchResponse search(String query, int page, Class type, + String[] datasources, String[] organisms) { SearchResponse response; LOG.debug("search: '" + query + "', page: " + page - + ", filterBy: " + ((filterByType!=null)?filterByType.getSimpleName():"N/A") + + ", filterBy: " + ((type!=null)?type.getSimpleName():"N/A") + "; extra filters: ds in (" + Arrays.toString(datasources) + "), org. in (" + Arrays.toString(organisms) + ")"); - IndexSearcher searcher = null; - try { QueryParser queryParser = new MultiFieldQueryParser(DEFAULT_FIELDS, analyzer); queryParser.setAllowLeadingWildcard(true);//we want leading wildcards enabled (e.g. *sulin) -// queryParser.setAutoGeneratePhraseQueries(false); //TODO: try it - searcher = searcherManager.acquire(); - + Query q; //find and transform top docs to search hits (beans), considering pagination... if(!query.trim().equals("*")) { //if not "*" query, which is not supported out-of-the-box, then //create the lucene query - Query userQuery = queryParser.parse(query); - LOG.debug("parsed lucene query is " + userQuery.getClass().getSimpleName()); + q = queryParser.parse(query); + LOG.debug("parsed lucene query is " + q.getClass().getSimpleName()); //create filter: type AND (d OR d...) AND (o OR o...) - Query filter = createFilter(filterByType, datasources, organisms); + Query filter = createFilter(type, datasources, organisms); //final query with filter - Query q = (filter!=null) - ? new BooleanQuery.Builder().add(userQuery,Occur.MUST).add(filter,Occur.FILTER).build() - : userQuery; - //get the first page of top hits - TopDocs topDocs = searcher.search(q, maxHitsPerPage); - - //get the required hits page if page>0 - if(page>0) { - TopScoreDocCollector collector = TopScoreDocCollector.create(maxHitsPerPage*(page+1)); - searcher.search(q, collector); - topDocs = collector.topDocs(page * maxHitsPerPage, maxHitsPerPage); + if(filter != null) { + q = new BooleanQuery.Builder().add(q, Occur.MUST).add(filter, Occur.FILTER).build(); } - - //transform docs to hits (optionally use a highlighter, e.g., if debugging...) - response = transform(userQuery, searcher, topDocs); - } else { //find ALL objects of a particular BioPAX class (+ filters by organism, datasource) - if(filterByType==null) - filterByType = Level3Element.class; - - //replace q="*" with a search for the class or its sub-class name in the TYPE field + if(type == null) { + type = Level3Element.class; + } + //replace q="*" with a search for the class or its subclass name in the TYPE field BooleanQuery.Builder starQuery = new BooleanQuery.Builder(); - for(Class subType : SimpleEditorMap.L3.getKnownSubClassesOf(filterByType)) { + for(Class subType : SimpleEditorMap.L3.getKnownSubClassesOf(type)) { starQuery.add(new TermQuery(new Term(FIELD_TYPE, subType.getSimpleName().toLowerCase())), Occur.SHOULD); } Query filter = createFilter(null, datasources, organisms); //combine star and filter queries into one special boolean - Query q = (filter!=null) - ? new BooleanQuery.Builder().add(starQuery.build(),Occur.MUST).add(filter,Occur.FILTER).build() - : starQuery.build(); - //get the first page of top hits - TopDocs topDocs = searcher.search(q, maxHitsPerPage); + q = (filter!=null) + ? new BooleanQuery.Builder().add(starQuery.build(),Occur.MUST).add(filter,Occur.FILTER).build() + : starQuery.build(); + } + + searcher = searcherManager.acquire(); + TopDocs topDocs; + if(page>0) { //get the required hits page if page>0 - if(page>0) { - TopScoreDocCollector collector = TopScoreDocCollector.create(maxHitsPerPage*(page+1)); - searcher.search(q, collector); - topDocs = collector.topDocs(page * maxHitsPerPage, maxHitsPerPage); - } - - //convert - response = transform(q, searcher, topDocs); + TopScoreDocCollector collector = TopScoreDocCollector + .create(maxHitsPerPage*(page+1), maxHitsPerPage*(page+1)); + searcher.search(q, collector); + topDocs = collector.topDocs(page * maxHitsPerPage, maxHitsPerPage); + } else { + //get the first page of the top hits + topDocs = searcher.search(q, maxHitsPerPage); } + //transform docs to hits (optionally use a highlighter, e.g., if debugging...) + response = transform(q, searcher, topDocs); } catch (ParseException e) { throw new RuntimeException("getTopDocs: failed to parse the search query: " + e); } catch (IOException e) { throw new RuntimeException("getTopDocs: failed: " + e); } finally { try { - if(searcher!=null) { - searcherManager.release(searcher); - } - } catch (IOException e) {} + searcherManager.release(searcher); + } catch (IOException e) {} } response.setPageNo(page); - return response; } @@ -219,20 +174,24 @@ public SearchResponse search(String query, int page, // Transform Lucene docs to hits (xml/java beans) private SearchResponse transform(Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException { - if(topDocs == null) + if(topDocs == null) { throw new IllegalArgumentException("topDocs is null"); - + } SearchResponse response = new SearchResponse(); response.setMaxHitsPerPage(maxHitsPerPage); - response.setNumHits(topDocs.totalHits); + long numTotalHits = topDocs.totalHits.value; //todo: call searcher.count(q) instead or it's same?.. + response.setNumHits(numTotalHits); List hits = response.getSearchHit();//empty list assert hits!=null && hits.isEmpty(); LOG.debug("transform, no. TopDocs to process:" + topDocs.scoreDocs.length); - for(ScoreDoc scoreDoc : topDocs.scoreDocs) { + for(ScoreDoc scoreDoc : topDocs.scoreDocs) { SearchHit hit = new SearchHit(); Document doc = searcher.doc(scoreDoc.doc); String uri = doc.get(FIELD_URI); BioPAXElement bpe = model.getByID(uri); + if(bpe == null) { + continue; //was a hit from another model + } // use a highlighter (get matching fragments) if (LOG.isDebugEnabled()) { @@ -319,12 +278,13 @@ private SearchResponse transform(Query query, IndexSearcher searcher, TopDocs to hit.getPathway().addAll(uniqueVals); } - //no. processes, participants in the sub-network - if(doc.get(FIELD_N_PROCESSES)!=null) - hit.setNumProcesses(Integer.valueOf(doc.get(FIELD_N_PROCESSES))); //TODO: try w/o Integer.valueOf - if(doc.get(FIELD_N_PARTICIPANTS)!=null) - hit.setNumParticipants(Integer.valueOf(doc.get(FIELD_N_PARTICIPANTS))); - + //no. processes, participants in the subnetwork + if(doc.getField(FIELD_N_PROCESSES) != null) { + hit.setNumProcesses(doc.getField(FIELD_N_PROCESSES).numericValue().intValue()); + } + if(doc.getField(FIELD_N_PARTICIPANTS) != null) { + hit.setNumParticipants(doc.getField(FIELD_N_PARTICIPANTS).numericValue().intValue()); + } hits.add(hit); } @@ -332,8 +292,7 @@ private SearchResponse transform(Query query, IndexSearcher searcher, TopDocs to if(!hits.isEmpty()) { for(String puri : response.provenanceUris()) { Provenance p = (Provenance) model.getByID(puri); - response.getProviders() - .add((p.getStandardName()!=null)?p.getStandardName():p.getDisplayName()); + response.getProviders().add((p.getStandardName()!=null)?p.getStandardName():p.getDisplayName()); } } @@ -356,207 +315,117 @@ private SearchResponse transform(Query query, IndexSearcher searcher, TopDocs to ); }; - public void index() { - - IndexWriter iw; - try { - //close the searcher manager if the old index exists - if(searcherManager != null) { - searcherManager.close(); - searcherManager = null; - } - - CPathUtils.cleanupDirectory(indexFile.toString(), true); - - IndexWriterConfig conf = new IndexWriterConfig(analyzer); - iw = new IndexWriter(FSDirectory.open(indexFile), conf); - //cleanup - iw.deleteAll(); - iw.commit(); - } catch (IOException e) { - throw new RuntimeException("Failed to create a new IndexWriter.", e); - } - final IndexWriter indexWriter = iw; - - ExecutorService exec = Executors.newFixedThreadPool(30); - - final int numObjectsToIndex = model.getObjects(Entity.class).size() - + model.getObjects(EntityReference.class).size(); - LOG.info("index(), there are " + numObjectsToIndex + " Entity or EntityReference objects to index."); - - final AtomicInteger numLeft = new AtomicInteger(numObjectsToIndex); - - final Fetcher fetcher = new Fetcher(SimpleEditorMap.L3, Fetcher.nextStepFilter); - //disable traversing into sub-pathways when searching for child elements (worth doing for e.g., KEGG model)! - fetcher.setSkipSubPathways(true); - - for(BioPAXElement bpe : model.getObjects()) - { - //Skip for UtilityClass, etc. - if(!(bpe instanceof Entity || bpe instanceof EntityReference || bpe instanceof Provenance)) - continue; - - // prepare & index each element in a separate thread - exec.execute(() -> { - // get or infer some important values if possible from this, child or parent objects: - Set keywords = ModelUtils.getKeywords(bpe, 2, keywordsFilter); - - // do not index the auto-generated biopax comments - for(String s : new HashSet<>(keywords)) { - //exclude additional comments generated by normalizer, merger, etc. - if(s.startsWith("REPLACED") || s.contains("ADDED")) - keywords.remove(s); - } - - Map annotations = bpe.getAnnotations(); - - annotations.put(FIELD_KEYWORD, keywords); - annotations.put(FIELD_DATASOURCE, ModelUtils.getDatasources(bpe)); - annotations.put(FIELD_ORGANISM, ModelUtils.getOrganisms(bpe)); - annotations.put(FIELD_PATHWAY, ModelUtils.getParentPathways(bpe)); - - //set (PEs/Genes), (interactions/pathways), index fields: - if(bpe instanceof org.biopax.paxtools.model.level3.Process) { - int numProc = fetcher.fetch(bpe, Process.class).size(); //except itself - int numPeAndG = fetcher.fetch(bpe, PhysicalEntity.class).size() - + fetcher.fetch(bpe, Gene.class).size(); - annotations.put(FIELD_N_PARTICIPANTS, Integer.toString(numPeAndG)); - annotations.put(FIELD_N_PROCESSES, Integer.toString(numProc)); - } else if(bpe instanceof Complex) { - int numPEs = fetcher.fetch(bpe, PhysicalEntity.class).size(); - annotations.put(FIELD_N_PARTICIPANTS, Integer.toString(numPEs)); - } - - index(bpe, indexWriter); - - //count, log a progress message - int left = numLeft.decrementAndGet(); - if(left % 10000 == 0) - LOG.info("index(), biopax objects left to index: " + left); - }); - } - - exec.shutdown(); //stop accepting new tasks - try { //wait - exec.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); - } catch (InterruptedException e) { - throw new RuntimeException("Interrupted!", e); - } - - try { - indexWriter.close(); //wait for pending op., auto-commit, close. - } catch (IOException e) { - throw new RuntimeException("Failed to close IndexWriter.", e); - } - - //finally, create a new searcher manager - initSearcherManager(); - } - - // internal methods - /* - * Creates a new Lucene Document that corresponds to a BioPAX object. - * It does not check whether the document exists (should not be there, - * because the {@link #index()} method cleans up the index) - * - * Some fields also include biopax data type property values not only from - * the biopax object but also from its child elements, up to some depth + /** + * Creates or updates a Lucene Document that corresponds to a BioPAX object. + * It does not check whether the document already exists. + *

+ * Some fields also include biopax data type property values not only from + * the biopax object but also from its child elements, up to some depth * (using key-value pairs in the pre-computed bpe.annotations map): - * - * 'uri' - biopax object's absolute URI, index=yes, analyze=no, store=yes; - * - * 'name' - names, analyze=yes, store=yes; - * - * 'keyword' - infer from this bpe and its child objects' data properties, - * such as Score.value, structureData, structureFormat, chemicalFormula, - * availability, term, comment, patoData, author, source, title, url, published, - * up to given depth/level; and also all 'pathway' field values are included here; - * analyze=yes, store=yes; + *

+ * 'uri' - biopax object's absolute URI, index=yes, analyze=no, store=yes; + *

+ * 'name' - names, analyze=yes, store=yes; + *

+ * 'keyword' - infer from this bpe and its child objects' data properties, + * such as Score.value, structureData, structureFormat, chemicalFormula, + * availability, term, comment, patoData, author, source, title, url, published, + * up to given depth/level; and also all 'pathway' field values are included here; + * analyze=yes, store=yes; + *

+ * 'xrefid' - Xref.id values - standard biological IDs - from a biopax object and some of its child objects; + * analyze=no, store=no; + *

+ * 'datasource', 'organism' and 'pathway' - infer from this bpe and its child objects + * up to given depth/level, analyze=no, store=yes; + *

+ * 'numprocesses', 'numparticipants' - number of child processes, + * participants; integer values as string; analyze=no, store=yes. * - * 'xrefid' - Xref.id values - standard biological IDs - from a biopax object and some its child objects; - * analyze=no, store=no; - * - * 'datasource', 'organism' and 'pathway' - infer from this bpe and its child objects - * up to given depth/level, analyze=no, store=yes; - * - * 'numprocesses', 'numparticipants' - number of child processes, - * participants; integer values as string; analyze=no, store=yes. - */ - void index(BioPAXElement bpe, IndexWriter indexWriter) - { + * @param bpe BioPAX element + */ + public void save(BioPAXElement bpe) { + //traverse the element to collect more keywords, e.g. names, IDs, from its child elements + Fetcher fetcher = new Fetcher(SimpleEditorMap.L3, Fetcher.nextStepFilter); + //disable traversing into sub-pathways + fetcher.setSkipSubPathways(true); + // get or infer some important values if possible from this, child or parent objects: + Set keywords = ModelUtils.getKeywords(bpe, 2, keywordsFilter); + //exclude from the index any autogenerated comments (e.g. by normalizer, merger) + keywords = keywords.stream() + .filter(s -> !s.startsWith("REPLACED") && !s.contains("ADDED")) + .collect(Collectors.toSet()); + // create a new document final Document doc = new Document(); - - //(will be using StringField and KeywordAnalyser for this field) + // using StringField and KeywordAnalyser for this field final String uri = bpe.getUri(); // save URI: indexed, not analyzed, stored doc.add(new StringField(FIELD_URI, uri, Field.Store.YES)); -// doc.add(new StringField(FIELD_URI, uri.toLowerCase(), Field.Store.NO)); //extract and index the last part of the uri (e.g., 'hsa00010' or like 'ProteinReference_ca123bd44...') if(uri.startsWith("http://")) { String id = (uri.endsWith("/")) ? uri.substring(0, uri.length()-1) : uri; id = id.replaceAll(".*[/#]", "").trim(); doc.add(new StringField(FIELD_URI, id, Field.Store.NO)); -// doc.add(new StringField(FIELD_URI, id.toLowerCase(), Field.Store.NO)); //let it be case-sensitive } // index and store but not analyze/tokenize the biopax class name: doc.add(new StringField(FIELD_TYPE, bpe.getModelInterface().getSimpleName().toLowerCase(), Field.Store.YES)); - - // make index fields from the annotations map (of pre-calculated/inferred values) - Map annotations = bpe.getAnnotations(); - if(!annotations.isEmpty()) - { - if(annotations.containsKey(FIELD_PATHWAY)) { - addPathways((Set)annotations.get(FIELD_PATHWAY), doc); - } - - if(annotations.containsKey(FIELD_ORGANISM)) { - addOrganisms((Set)annotations.get(FIELD_ORGANISM), doc); - } - - if(annotations.containsKey(FIELD_DATASOURCE)) { - addDatasources((Set)annotations.get(FIELD_DATASOURCE), doc); - } - - if(annotations.containsKey(FIELD_KEYWORD)) { - for (String keyword : (Set)annotations.get(FIELD_KEYWORD)) { - Field f = new TextField(FIELD_KEYWORD, keyword.toLowerCase(), Field.Store.NO); - doc.add(f); - } - } + // extra index fields + addPathways(ModelUtils.getParentPathways(bpe), doc); + addOrganisms(ModelUtils.getOrganisms(bpe), doc); + addDatasources(ModelUtils.getDatasources(bpe), doc); + for (String keyword : keywords) { + doc.add(new TextField(FIELD_KEYWORD, keyword.toLowerCase(), Field.Store.NO)); + } - if(annotations.containsKey(FIELD_N_PARTICIPANTS)) { - doc.add(new StoredField(FIELD_N_PARTICIPANTS, - Integer.parseInt((String)annotations.get(FIELD_N_PARTICIPANTS)))); - } + //set (PEs/Genes), (interactions/pathways), index fields: + if(bpe instanceof org.biopax.paxtools.model.level3.Process) { + int numProc = fetcher.fetch(bpe, Process.class).size(); //except itself + int numPeAndG = fetcher.fetch(bpe, PhysicalEntity.class).size() + + fetcher.fetch(bpe, Gene.class).size(); + doc.add(new StoredField(FIELD_N_PARTICIPANTS, numPeAndG)); + doc.add(new StoredField(FIELD_N_PROCESSES, numProc)); + } else if(bpe instanceof Complex) { + int numPEs = fetcher.fetch(bpe, PhysicalEntity.class).size(); + doc.add(new StoredField(FIELD_N_PARTICIPANTS, numPEs)); + } - if(annotations.containsKey(FIELD_N_PROCESSES)) { - doc.add(new StoredField(FIELD_N_PROCESSES, - Integer.parseInt((String)annotations.get(FIELD_N_PROCESSES)))); + // Add more xref IDs to the index using id-mapping + Set ids = CPathUtils.getXrefIds(bpe); + Pattern isoformIdPattern = Pattern.compile(Resolver.getNamespace("uniprot.isoform", true).getPattern()); + Pattern uniprotIdPattern = Pattern.compile(Resolver.getNamespace("uniprot", true).getPattern()); //"uniprot protein" is the preferred name + // in addition, collect ChEBI and UniProt IDs and then + // use id-mapping to associate the bpe with more IDs: + final List uniprotIds = new ArrayList<>(); + final List chebiIds = new ArrayList<>(); + for(String id : ids) { + //Note: ChEBI IDs will be always with 'CHEBI:' prefix; see CPathUtils.getXrefIds impl. + if(id.startsWith("CHEBI:")) { + chebiIds.add(id); + } else if(isoformIdPattern.matcher(id).find()) { + //cut the isoform num. suffix + id = id.replaceFirst("-\\d+$", ""); + uniprotIds.add(id); + } else if(uniprotIdPattern.matcher(id).find()) { + uniprotIds.add(id); } - - // Add xref IDs to the index (IDs are prepared and stored in advance - // in the annotations map, under FIELD_XREFID key) - Set ids = (annotations.containsKey(FIELD_XREFID)) - ? (Set)annotations.get(FIELD_XREFID) :CPathUtils.getXrefIds(bpe); - for (String id : ids) { - //index as not analyzed, not tokenized - doc.add(new StringField(FIELD_XREFID, id.toLowerCase(), Field.Store.NO)); - doc.add(new StringField(FIELD_XREFID, id, Field.Store.NO)); + } + addSupportedIdsThatMapToChebi(chebiIds, ids); + addSupportedIdsThatMapToUniprotId(uniprotIds, ids); + for (String id : ids) {//index as: not analyzed, not tokenized +// doc.add(new StringField(FIELD_XREFID, id.toLowerCase(), Field.Store.NO)); // TODO: why did we do this? IDs are case-sensitive. + doc.add(new StringField(FIELD_XREFID, id, Field.Store.NO)); + //also store a lower-case prefix (banana, e.g. 'chebi:1234' version of the id) + if(StringUtils.contains(id,":")) { + doc.add(new StringField(FIELD_XREFID, + StringUtils.lowerCase(StringUtils.substringBefore(id, ":")) + + ":" + StringUtils.substringAfter(id, ":"), Field.Store.NO)); } } - annotations.remove(FIELD_KEYWORD); - annotations.remove(FIELD_DATASOURCE); - annotations.remove(FIELD_ORGANISM); - annotations.remove(FIELD_PATHWAY); - annotations.remove(FIELD_N_PARTICIPANTS); - annotations.remove(FIELD_N_PROCESSES); - annotations.remove(FIELD_XREFID); - - // name (we store both original and lowercased names due to use of StringField and KeywordAnalyser) + // name (store both the original and lowercase names due to use of StringField and KeywordAnalyser) if(bpe instanceof Named) { Named named = (Named) bpe; if(named.getStandardName() != null) { @@ -579,12 +448,76 @@ void index(BioPAXElement bpe, IndexWriter indexWriter) } } - // write + // save/update the lucene document try { - indexWriter.addDocument(doc); + indexWriter.updateDocument(new Term(FIELD_URI, uri), doc); } catch (IOException e) { - throw new RuntimeException("Failed to index; " + bpe.getUri(), e); + throw new RuntimeException("Failed to index: " + bpe.getUri(), e); + } + } + + @Override + public void save(Model model) { + final int numObjectsToIndex = model.getObjects(Entity.class).size() + + model.getObjects(EntityReference.class).size() + + model.getObjects(Provenance.class).size(); + LOG.info("index(), objects to save: " + numObjectsToIndex); + final AtomicInteger numLeft = new AtomicInteger(numObjectsToIndex); + for(BioPAXElement bpe : model.getObjects()) { + if(bpe instanceof Entity || bpe instanceof EntityReference || bpe instanceof Provenance) { + save(bpe); + int left = numLeft.decrementAndGet(); + if (left % 10000 == 0) { + commit(); + LOG.info("build(), objects to save: " + left); + } + } } + commit(); + //force refreshing the index state (for new readers) + refresh(); + LOG.info("build(), all done."); + } + + @Override + public void commit() { + try { + indexWriter.commit(); + indexWriter.flush(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void close() { + try { + if (indexWriter != null && indexWriter.isOpen()) { + indexWriter.flush(); + indexWriter.close(); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public synchronized void refresh() { + try { + searcherManager.maybeRefreshBlocking(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean isClosed() { + return indexWriter == null || !indexWriter.isOpen(); + } + + @Override + public long count(String queryString) { + return 0; } private void addDatasources(Set set, Document doc) { @@ -604,8 +537,9 @@ private void addDatasources(Set set, Document doc) { } // index names - for (String s : p.getName()) - doc.add(new TextField(FIELD_DATASOURCE, s.toLowerCase(), Field.Store.NO)); + for (String s : p.getName()) { + doc.add(new StringField(FIELD_DATASOURCE, s.toLowerCase(), Field.Store.NO)); + } } } @@ -620,9 +554,10 @@ private void addOrganisms(Set set, Document doc) { } // add taxonomy for(UnificationXref x : - new ClassFilterSet(bs.getXref(), UnificationXref.class)) { - if(x.getId() != null) + new ClassFilterSet<>(bs.getXref(), UnificationXref.class)) { + if(x.getId() != null) { doc.add(new TextField(FIELD_ORGANISM, x.getId().toLowerCase(), Field.Store.NO)); + } } // include tissue type terms if (bs.getTissue() != null) { @@ -639,28 +574,27 @@ private void addOrganisms(Set set, Document doc) { } private void addPathways(Set set, Document doc) { - for(Pathway pw : set) - { + for(Pathway pw : set) { final String uri = pw.getUri(); //URI, index=yes, analyze=no, store=yes (this is to find child objects by pathway URI) - // we want searching by URI or its ending part (id) be case sensitive + // we want searching by URI or its ending part (id) be case-sensitive doc.add(new StringField(FIELD_PATHWAY, uri, Field.Store.YES)); -// doc.add(new StringField(FIELD_PATHWAY, uri.toLowerCase(), Field.Store.NO)); //also, extract and index the last part of the uri (e.g., 'hsa00010' or 'r-hsa-201451') if(uri.startsWith("http://")) { String id = uri.replaceAll(".*[/#]", "").trim(); doc.add(new StringField(FIELD_PATHWAY, id, Field.Store.NO)); -// doc.add(new StringField(FIELD_PATHWAY, id.toLowerCase(), Field.Store.NO)); } // add names to the 'pathway' (don't store); will be case-insensitive (if using StandardAnalyser) // (this allows to find a biopax element, e.g., protein, by a parent pathway name: pathway:) for (String s : pw.getName()) { - doc.add(new TextField(FIELD_PATHWAY, s.toLowerCase(), Field.Store.NO)); + doc.add(new StringField(FIELD_PATHWAY, s.toLowerCase(), Field.Store.NO)); } // add unification xref IDs too (case-sensitive) - for (UnificationXref x : new ClassFilterSet<>(pw.getXref(), UnificationXref.class)) - if (x.getId() != null) + for (UnificationXref x : new ClassFilterSet<>(pw.getXref(), UnificationXref.class)) { + if (x.getId() != null) { doc.add(new StringField(FIELD_PATHWAY, x.getId().trim(), Field.Store.NO)); + } + } } } @@ -698,11 +632,11 @@ private Query createFilter(Class type, String[] datasou if (organisms != null && organisms.length > 0) { builder.add(subQuery(organisms, FIELD_ORGANISM), Occur.MUST); } - //AND type + //AND type (including all biopax subtypes) if(type != null) { //add biopax class filter BooleanQuery.Builder query = new BooleanQuery.Builder(). add(new TermQuery(new Term(FIELD_TYPE, type.getSimpleName().toLowerCase())), Occur.SHOULD);//OR - //for each biopax subclass (interface), add the name to the filter query + //also add all biopax subclasses of the type for(Class subType : SimpleEditorMap.L3.getKnownSubClassesOf(type)) { query.add(new TermQuery(new Term(FIELD_TYPE, subType.getSimpleName().toLowerCase())), Occur.SHOULD);//OR } @@ -710,17 +644,12 @@ private Query createFilter(Class type, String[] datasou } BooleanQuery filter = builder.build(); - //TODO: use LRUQueryCache with the filter somewhere, e.g.: Query q = queryCache.doCache(filter, defaultCachingPolicy); - - if(!filter.clauses().isEmpty()) { - return filter; - } else - return null; + return (filter==null || filter.clauses().isEmpty()) ? null : filter; } /* * Values are joint with OR, but if a value - * has whitespace symbols, it also make a sub-query, + * has whitespace symbols, it also makes a sub-query, * in which terms are joint with AND. This is to filter * by datasource/organism's full name, partial name, uri, * using multiple datasources/organisms. @@ -728,7 +657,7 @@ private Query createFilter(Class type, String[] datasou * "search?q=*&datasource=intact complex&type..." - will get only IntAct Complex objects; * "search?q=*&datasource=intact&type..." - will consider both IntAct and IntAct Complex * "search?q=*&datasource=intact biogrid&type..." means "to occur in both intact and biogrid" - * (can be a canonical protein reference; it's is not equivalent to "search?q=*&datasource=intact&datasource=biogrid&...", + * (can be a canonical protein reference; it is not equivalent to "search?q=*&datasource=intact&datasource=biogrid&...", * which means "to occur either in intact, incl. IntAct Complex, or in biogrid or in all these") * "search?q=*&datasource=intact complex biogrid&..." - won't match anything. */ @@ -746,7 +675,7 @@ private Query subQuery(String[] filterValues, String filterField) CharTermAttribute chattr = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while(tokenStream.incrementToken()) { - //'of', 'and', 'for',.. never occur as tokens (this is how the analyzer works) + //common words, e.g. 'of', 'and', 'for' won't occur as tokens (see the analyzer constructor arg) String token = chattr.toString(); bq.add(new TermQuery(new Term(filterField, token)), Occur.MUST); } @@ -762,7 +691,139 @@ private Query subQuery(String[] filterValues, String filterField) query.add(new TermQuery(new Term(filterField, v.toLowerCase())), Occur.SHOULD); } } - return query.build(); } + + private void addSupportedIdsThatMapToChebi(List chebiIds, final Set resultIds) { + //find other IDs that map to the ChEBI ID + for(String id: chebiIds) { + List mappings = findByDstDbIgnoreCaseAndDstId("CHEBI", id); + if (mappings != null) { + //collect (for 'xrefid' full-text index field) only ID types that we want biopax graph queries support + for (Mapping mapping : mappings) { + if (mapping.getSrcDb().equals("PUBCHEM-COMPOUND") + || mapping.getSrcDb().equals("CHEBI") + || mapping.getSrcDb().equals("DRUGBANK") + || mapping.getSrcDb().startsWith("KEGG") + || mapping.getSrcDb().startsWith("CHEMBL") + || mapping.getSrcDb().startsWith("PHARMGKB") + ) resultIds.add(mapping.getSrcId()); + //(prefix 'CID:' is included in pubchem-compound ids) + } + } + } + } + + private void addSupportedIdsThatMapToUniprotId(List uniprotIds, final Set resultIds) { + //find other IDs that map to the UniProt AC + for(String id: uniprotIds) { + List mappings = findByDstDbIgnoreCaseAndDstId("UNIPROT", id); + if (mappings != null) { + //collect (for 'xrefid' full-text index field) only ID types that we want graph queries support + for (Mapping mapping : mappings) { + if (mapping.getSrcDb().startsWith("UNIPROT") + || mapping.getSrcDb().startsWith("HGNC") + || mapping.getSrcDb().equalsIgnoreCase("NCBI GENE") + || mapping.getSrcDb().equalsIgnoreCase("REFSEQ") + || mapping.getSrcDb().equalsIgnoreCase("IPI") + || mapping.getSrcDb().startsWith("ENSEMBL") + ) resultIds.add(mapping.getSrcId()); + } + } + } + } + + public Model getModel() { + return model; + } + + public void setModel(Model model) { + this.model = model; + } + + @Override + public List findByDstDbIgnoreCaseAndDstId(String dstDb, String dstId) { + return mapBy(FIELD_DSTDB, dstDb, FIELD_DSTID, dstId); + } + + @Override + public List findBySrcIdInAndDstDbIgnoreCase(List srcIds, String dstDb) { + List mappings = new ArrayList<>(); + //query for docs that match any of the srcIds + BooleanQuery.Builder anyId = new BooleanQuery.Builder(); + for(String id : srcIds) { + anyId.add(new TermQuery(new Term(FIELD_SRCID, id)), Occur.SHOULD); + } + //query for docs that match any of srcIds AND the dstDb + Query q = new BooleanQuery.Builder() + .add(new TermQuery(new Term(FIELD_DSTDB, dstDb.toUpperCase())), Occur.MUST) + .add(anyId.build(), Occur.MUST) + .build(); + IndexSearcher searcher = null; + try { + searcher = searcherManager.acquire(); + int nHits = searcher.count(q); + if(nHits > 0) { + TopDocs topDocs = searcher.search(q, nHits); + for (ScoreDoc sd : topDocs.scoreDocs) { + Document d = searcher.doc(sd.doc); + Mapping m = new Mapping(d.get(FIELD_SRCDB), d.get(FIELD_SRCID), d.get(FIELD_DSTDB), d.get(FIELD_DSTID)); + mappings.add(m); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } finally { + try { + searcherManager.release(searcher); + } catch (IOException e) {} + } + return mappings; + } + + private List mapBy(String fieldDb, String db, String fieldId, String id) { + List mappings = new ArrayList<>(); + //query for docs that match the src/dst db and id + Query q = new BooleanQuery.Builder() + .add(new TermQuery(new Term(fieldDb, db.toUpperCase())), Occur.MUST) + .add(new TermQuery(new Term(fieldId, id)), Occur.MUST) + .build(); + IndexSearcher searcher = null; + try { + searcher = searcherManager.acquire(); + int nHits = searcher.count(q); + if(nHits > 0) { + TopDocs topDocs = searcher.search(q, nHits); + for (ScoreDoc sd : topDocs.scoreDocs) { + Document d = searcher.doc(sd.doc); + Mapping m = new Mapping(d.get(FIELD_SRCDB), d.get(FIELD_SRCID), d.get(FIELD_DSTDB), d.get(FIELD_DSTID)); + mappings.add(m); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } finally { + try { + searcherManager.release(searcher); + } catch (IOException e) {} + } + return mappings; + } + + @Override + public void save(Mapping mapping) { + final Document doc = new Document(); + doc.add(new StringField(FIELD_SRCDB, mapping.getSrcDb().toUpperCase(), Field.Store.YES)); + doc.add(new StringField(FIELD_SRCID, mapping.getSrcId(), Field.Store.YES)); + doc.add(new StringField(FIELD_DSTDB, mapping.getDstDb().toUpperCase(), Field.Store.YES)); + doc.add(new StringField(FIELD_DSTID, mapping.getDstId(), Field.Store.YES)); + doc.add(new StringField(FIELD_DOCID, mapping.docId(), Field.Store.NO)); + doc.add(new StringField(FIELD_TYPE, "mapping", Field.Store.NO)); + try { + indexWriter.updateDocument(new Term(FIELD_DOCID, mapping.docId()), doc); + } catch (IOException e) { + throw new RuntimeException(e); + } + //call commit(), refresh() after one or several save(mapping) + } } diff --git a/src/main/java/cpath/service/Merger.java b/src/main/java/cpath/service/Merger.java index acd82206d..de9cf8efd 100644 --- a/src/main/java/cpath/service/Merger.java +++ b/src/main/java/cpath/service/Merger.java @@ -1,13 +1,15 @@ package cpath.service; -import cpath.service.api.CPathService; +import cpath.service.api.Service; import cpath.service.api.RelTypeVocab; -import cpath.service.jpa.Metadata; +import cpath.service.metadata.Datasource; +import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.controller.PropertyEditor; import org.biopax.paxtools.model.*; import org.biopax.paxtools.model.level3.*; import org.biopax.paxtools.model.level3.Process; +import org.biopax.paxtools.normalizer.Resolver; import org.biopax.paxtools.util.ClassFilterSet; import org.biopax.paxtools.controller.ModelUtils; import org.biopax.paxtools.controller.SimpleEditorMap; @@ -36,9 +38,9 @@ public final class Merger { private static final Logger log = LoggerFactory.getLogger(Merger.class); private final String xmlBase; - private final CPathService service; + private final Service service; private final Set supportedTaxonomyIds; - private final Model warehouseModel; + private final Model warehouseModel; private final Model mainModel; @@ -47,16 +49,14 @@ public final class Merger { * * @param service cpath2 service */ - Merger(CPathService service) + Merger(Service service) { this.service = service; this.xmlBase = service.settings().getXmlBase(); this.supportedTaxonomyIds = service.settings().getOrganismTaxonomyIds(); - this.warehouseModel = service.loadWarehouseModel(); Assert.notNull(warehouseModel, "No BioPAX Warehouse"); - log.info("Successfully imported Warehouse BioPAX archive."); - + log.info("Successfully imported Warehouse BioPAX archive."); this.mainModel = BioPAXLevel.L3.getDefaultFactory().createModel(); this.mainModel.setXmlBase(xmlBase); log.info("Created a new empty main BioPAX model."); @@ -70,36 +70,39 @@ public final class Merger { Model getMainModel() { return mainModel; } + + Model getWarehouseModel() { + return warehouseModel; + } public void merge() { SimpleMerger simpleMerger = new SimpleMerger(SimpleEditorMap.L3, object -> true); - Collection providersMetadata = new ArrayList<>(); - for(Metadata metadata : service.metadata().findAll()) { - providersMetadata.add(metadata); - } + //init the lucene index + service.initIndex(mainModel, service.settings().indexDir(), false); - for (Metadata metadata : providersMetadata) { - if(metadata.isNotPathwayData()) { - log.info("Skip Warehouse data: " + metadata); + for (Datasource datasource : service.metadata().getDatasources()) { + if(datasource.getType().isNotPathwayData()) { + log.info("Skip Warehouse data: " + datasource); continue; } - Model providerModel = merge(metadata); - save(providerModel, metadata); + Model providerModel = merge(datasource); + save(providerModel, datasource); - log.info("Replacing conflicting URIs in " + metadata.getIdentifier() + " before merging into Main..."); + log.info("Replacing conflicting URIs in " + datasource.getIdentifier() + " before merging into Main..."); replaceConflictingUris(providerModel, mainModel); - log.info("Merging '" + metadata.getIdentifier() + "' model into the Main BioPAX model..."); + log.info("Merging '" + datasource.getIdentifier() + "' model into the Main BioPAX model..."); simpleMerger.merge(mainModel, providerModel); } ModelUtils.removeObjectsIfDangling(mainModel, UtilityClass.class); - log.info("Creating the main ('All') BioPAX archive..."); + log.info("Saving the main BioPAX Model to file: " + service.settings().mainModelFile()); save(); - log.info("Complete."); + service.setModel(mainModel); + log.info("All merged!"); } //remove bad unif. and rel. xrefs @@ -120,18 +123,18 @@ private void cleanupXrefs(Model m) { } } - private Model merge(Metadata metadata) { - Model providerModel = service.loadBiopaxModelByDatasource(metadata); + private Model merge(Datasource datasource) { + Model providerModel = service.loadBiopaxModelByDatasource(datasource); if(providerModel == null) { - log.info("Merging " + metadata.getIdentifier()); + log.info("Merging " + datasource.getIdentifier()); providerModel = BioPAXLevel.L3.getDefaultFactory().createModel(); providerModel.setXmlBase(xmlBase); - for (String f : metadata.getFiles()) { + for (String f : datasource.getFiles()) { String fn = CPathUtils.normalizedFile(f); if (Files.notExists(Paths.get(fn))) { - log.warn("Skipped " + metadata.getIdentifier() + " - no normalized data found."); + log.warn("Skipped " + datasource.getIdentifier() + " - no normalized data found."); continue; } log.info("Processing: " + fn); @@ -141,37 +144,35 @@ private Model merge(Metadata metadata) { log.error("Skipped " + fn + " - " + "cannot read."); continue; } + //merge each input file model with Warehouse model (using id-mapping too) and into providerModel (one-datasource model) merge(fn, (new SimpleIOHandler(BioPAXLevel.L3)).convertFromOWL(inputStream), providerModel); } ModelUtils.removeObjectsIfDangling(providerModel, UtilityClass.class); - log.info("Normalizing generics (" + metadata.getIdentifier() + ")..."); + log.info("Normalizing generics (" + datasource.getIdentifier() + ")..."); ModelUtils.normalizeGenerics(providerModel); - //replace not normalized URIs with shorter ones to save storage and memory - replaceOriginalUris(providerModel); - - //quick-fix BioSource: add name if it's missing (helps full-text search) + //for (already normalized) BioSource, also add the name from + //application.properties (it helps full-text search) Map orgMap = service.settings().getOrganismsAsTaxonomyToNameMap(); for(BioSource org : providerModel.getObjects(BioSource.class)) { - for (Map.Entry entry : orgMap.entrySet()) { - // BioSource URIs are already normalized and contain identifiers.org/taxonomy - // (if it was possible); might also contain a suffix after "_" (cell type, tissue terms) - if(org.getUri().startsWith("http://identifiers.org/taxonomy/" + entry.getKey() + "_")) { - org.addName(entry.getValue()); //won't duplicate if the name exists in the Set + for(UnificationXref x : new ClassFilterSet<>(org.getXref(), UnificationXref.class)) { + String orgName = orgMap.get(x.getId()); + if(orgName != null) { + org.addName(orgName); } } } - log.info("Breaking pathway/pathwayComponent cycles (" + metadata.getIdentifier() + ")..."); + log.info("Breaking pathway/pathwayComponent cycles (" + datasource.getIdentifier() + ")..."); for(Pathway pathway : providerModel.getObjects(Pathway.class)) { breakPathwayComponentCycle(pathway); } - log.info("Done merging " + metadata); + log.info("Done merging " + datasource); } else { - log.info("Loaded BioPAX model from " + service.settings().biopaxFileNameFull(metadata.getIdentifier())); + log.info("Loaded BioPAX model from " + service.settings().biopaxFileNameFull(datasource.getIdentifier())); } return providerModel; @@ -214,7 +215,7 @@ protected void save() { } } - private void save(Model datasourceModel, Metadata datasource) { + private void save(Model datasourceModel, Datasource datasource) { try { new SimpleIOHandler(BioPAXLevel.L3).convertToOWL(datasourceModel, new GZIPOutputStream(new FileOutputStream( @@ -240,13 +241,12 @@ private void save(Model datasourceModel, Metadata datasource) { * @param target model to merge into */ protected void merge(final String description, final Model source, final Model target) { - final String srcModelInfo = "source: " + description; cleanupXrefs(source); - log.info("Searching for canonical or existing EntityReference objects " + - " to replace equivalent original objects ("+srcModelInfo+")..."); + log.info("Searching for canonical or existing Entity References " + + " to replace the original ones ("+srcModelInfo+")..."); final Map replacements = new HashMap<>(); // map EntityReference objects to the canonical ones (in the warehouse) if possible and safe for (EntityReference origEr: new HashSet<>(source.getObjects(EntityReference.class))) @@ -264,8 +264,7 @@ else if (origEr instanceof SmallMoleculeReference) { if (replacement != null) { //save in the map to replace the source bpe later replacements.put(origEr, replacement); - } else { - //i.e., no matching ER found in the Warehouse (the ER is from unwanted organism or unknown/no id). + } else { // No matching ER found in the Warehouse (the ER is from unwanted organism or unknown id) // Remove the PR/Dna*R/Rna*R if entityReferenceOf() is empty (member of a generic ER, or dangling) if(origEr instanceof SequenceEntityReference && origEr.getEntityReferenceOf().isEmpty()) { //remove unwanted dangling/member ER from the source model @@ -379,46 +378,26 @@ private void replaceConflictingUris(Model source, Model target) { for(BioPAXElement bpe : new HashSet<>(source.getObjects())) { String currUri = bpe.getUri(); - if( ( - !(bpe instanceof ProteinReference) - && (currUri.startsWith("http://identifiers.org/uniprot/") || currUri.toLowerCase().startsWith("uniprot:")) - ) || ( - !(bpe instanceof SmallMoleculeReference) - && (currUri.startsWith("http://identifiers.org/chebi/") || currUri.toLowerCase().startsWith("chebi:"))) - ) { + if( + (!(bpe instanceof ProteinReference) && (StringUtils.containsIgnoreCase(currUri, "bioregistry.io/uniprot"))) + || + (!(bpe instanceof SmallMoleculeReference) && (StringUtils.containsIgnoreCase(currUri, "bioregistry.io/chebi"))) + ){ //Replace URI due to potential type collision CPathUtils.replaceUri(source, bpe, - xmlBase + bpe.getModelInterface() + "_" + UUID.randomUUID()); - } else { + xmlBase + bpe.getModelInterface().getSimpleName() + "_" + UUID.randomUUID()); + } + else { BioPAXElement targetBpe = target.getByID(currUri); if (targetBpe != null && bpe.getModelInterface() != targetBpe.getModelInterface()) { //Replace due to target has the same URI for a different type object CPathUtils.replaceUri(source, bpe, - xmlBase + bpe.getModelInterface() + "_" + UUID.randomUUID()); + xmlBase + bpe.getModelInterface().getSimpleName() + "_" + UUID.randomUUID()); } } } } - /* - * Replaces not normalized yet original BioPAX object URIs in the model - * with new hash based URIs using the xml:base URI prefix. - */ - private void replaceOriginalUris(Model source) { - //wrap source.getObjects() in a new set to avoid concurrent exceptions - for(BioPAXElement bpe : new HashSet<>(source.getObjects())) { - String currUri = bpe.getUri(); - if( !( currUri.startsWith(xmlBase) - || currUri.startsWith("http://identifiers.org/") - || currUri.startsWith("uniprot:") - || currUri.startsWith("chebi:") ) ) { - // Generate a new URI (using MD5HEX sum); - CPathUtils.replaceUri(source, bpe, - xmlBase + bpe.getModelInterface() + "_" + UUID.randomUUID()); - } - } - } - /* * This procedure auto-generates UniProt and ChEBI xrefs * which improve converting of the biopax to simple text formats. @@ -462,9 +441,8 @@ private void xrefByMapping(Model source, String srcModelInfo, Model target) { private void copySomeOfPropertyValues(Map replacements, Model model) { // post-fix for (Map.Entry entry: replacements.entrySet()) { - final EntityReference old = entry.getKey(); + final EntityReference old = entry.getKey(); final EntityReference repl = entry.getValue(); - for (EntityFeature ef : new HashSet<>(old.getEntityFeature())) { // move entity features of the replaced ER to the new canonical one // remove the ef from the old ER @@ -495,16 +473,16 @@ private void copySomeOfPropertyValues(Map repl } } - // Copy/Keep PublicationXrefs and RelationshipXrefs to the original PEs - // (otherwise we'd lost most of original xrefs...) TODO review; shall we copy only PXs? - for(Xref x : new HashSet<>(old.getXref())) { - if(x instanceof UnificationXref) //sub with RX - x = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, x.getDb(), x.getId(), model, false); - - for(SimplePhysicalEntity owner : old.getEntityReferenceOf()) { - owner.addXref(x); - } - } +// TODO: original xrefs of a known/matched ER seem useless (can get later from the primary ID); so why to copy?.. + // Copy orig. ER xrefs to the PEs (old xrefs are gone after the ERs substitution) +// for(Xref x : new HashSet<>(old.getXref())) { +// if(x instanceof UnificationXref) {//sub with RX +// x = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, x.getDb(), x.getId(), model, false); +// } +// for(SimplePhysicalEntity owner : old.getEntityReferenceOf()) { +// owner.addXref(x); +// } +// } } } @@ -513,7 +491,7 @@ private void copySomeOfPropertyValues(Map repl * This won't improve our full-text index/search and graph queries (where id-mapping is used again anyway), * but may help improve export to SIF and GSEA formats. * This method is called only for those PEs/ERs that were not merged into warehouse canonical ERs, - * for reasons such as no match for a ID, or no ID found, ambiguous ID, etc. + * for reasons such as no match for an ID, or no ID found, ambiguous ID, etc. * This method won't add additional xrefs if a ChEBI one (secondary id or not doesn't matter) is already there. */ private void chemXrefByMapping(final Model m, Named bpe, final int maxNumXrefsToAdd) @@ -543,7 +521,7 @@ private void chemXrefByMapping(final Model m, Named bpe, final int maxNumXrefsTo // add rel. xrefs if there are not too many (there's risk to make nonsense SIF/GSEA export...) if (!primaryIds.isEmpty() && primaryIds.size() <= maxNumXrefsToAdd) { - addRelXrefs(m, bpe, "CHEBI", primaryIds, RelTypeVocab.ADDITIONAL_INFORMATION, false); + addRelXrefs(m, bpe, "CHEBI", primaryIds, RelTypeVocab.ADDITIONAL_INFORMATION); } } } @@ -585,14 +563,15 @@ private void genomicXrefByMapping(final Model m, Named bpe, final int maxNumXref if(noneXrefDbStartsWith(bpe, "UNIPROT")) { //bpe does not have any uniprot xrefs; try to map other IDs to the primary ACs of the PRs in the Warehouse primaryACs.addAll(idMappingByXrefs(bpe, UnificationXref.class, "UNIPROT")); - if (primaryACs.isEmpty()) + if (primaryACs.isEmpty()) { primaryACs.addAll(idMappingByXrefs(bpe, RelationshipXref.class, "UNIPROT")); - // FYI: if we'd try mapping biopolymers by name, then e.g,, 'HLA DQB1' or 'ND5' - // would result in hundreds unique uniprot/trembl IDs; so we don't do this! + // FYI: if we'd try mapping biopolymers by name, then, e.g., 'HLA DQB1' or 'ND5' + // would result in hundreds unique uniprot/trembl IDs; so we don't do this! + } // add rel. xrefs if there are not too many (there's risk to make nonsense SIF/GSEA export...) if (!primaryACs.isEmpty() && primaryACs.size() <= maxNumXrefsToAdd) { - addRelXrefs(m, bpe, "UNIPROT", primaryACs, RelTypeVocab.ADDITIONAL_INFORMATION, true); + addRelXrefs(m, bpe, "UNIPROT", primaryACs, RelTypeVocab.ADDITIONAL_INFORMATION); } else if(primaryACs.size() > maxNumXrefsToAdd) { log.debug(bpe.getUri() + ", " + organismRemark + ", ambiguously maps to many UNIPROT ACs: " @@ -603,7 +582,7 @@ else if(primaryACs.size() > maxNumXrefsToAdd) { it.next(); it.remove(); } - addRelXrefs(m, bpe, "UNIPROT", primaryACs, RelTypeVocab.ADDITIONAL_INFORMATION, true); + addRelXrefs(m, bpe, "UNIPROT", primaryACs, RelTypeVocab.ADDITIONAL_INFORMATION); } } else { //bpe has got some UniProt Xrefs (ok if secondary/isoform/trembl ID); // let's map those to primary accessions, then - to HGNC Symbols, and then remove other ids @@ -615,54 +594,48 @@ else if(primaryACs.size() > maxNumXrefsToAdd) { Collection newACs = new HashSet<>(primaryACs); for(Xref x : new HashSet<>(bpe.getXref())) { //here was a bug: body never executed due to empty set if(!(x instanceof PublicationXref) - && CPathUtils.startsWithAnyIgnoreCase(x.getDb(),"uniprot")) - { - if (primaryACs.contains(x.getId())) { - newACs.remove(x.getId()); //won't add the same xref again below - x.addComment("PRIMARY"); - } else { - bpe.removeXref(x); //remove a secondary or unsupported species uniprot xref - } - } + && CPathUtils.startsWithAnyIgnoreCase(x.getDb(),"uniprot")) { + if (primaryACs.contains(x.getId())) { + newACs.remove(x.getId()); //won't add the same xref again below + } else { + bpe.removeXref(x); //remove a secondary or unsupported species uniprot xref + } + } } - addRelXrefs(m, bpe, "UNIPROT", newACs, RelTypeVocab.IDENTITY,true); + addRelXrefs(m, bpe, "UNIPROT", newACs, RelTypeVocab.IDENTITY); } } - // map primary ACs to HGNC Symbols and generate RXs if there're not too many... + // map primary ACs to HGNC Symbols and generate RXs if not too many... if (noneXrefDbStartsWith(bpe, "hgnc symbol")) mayAddHgncXrefs(m, bpe, primaryACs, maxNumXrefsToAdd); } // For biopolymers, also map uniprot accessions to HGNC Symbols, and add the xrefs, if possible - private void mayAddHgncXrefs(final Model m, final XReferrable bpe, - final Collection accessions, final int maxNumXrefsToAdd) - { - if(accessions == null || accessions.isEmpty()) + final Collection accessions, final int maxNumXrefsToAdd) { + if(accessions == null || accessions.isEmpty()) { return; - + } final Set hgncSymbols = new HashSet<>(); for (String ac : accessions) { ProteinReference canonicalPR = - (ProteinReference) warehouseModel.getByID("http://identifiers.org/uniprot/" + ac); + (ProteinReference) warehouseModel.getByID("bioregistry.io/uniprot:" + ac); if (canonicalPR != null) { - //TODO: shall we keep just one-two symbols (which) instead of using 'maxNumXrefsToAdd' param? for (Xref x : canonicalPR.getXref()) if (x.getDb().equalsIgnoreCase("hgnc symbol")) hgncSymbols.add(x.getId()); } } // add rel. xrefs if there are not too many (there's risk to make nonsense SIF/GSEA export...) - if (!hgncSymbols.isEmpty() && hgncSymbols.size() <= maxNumXrefsToAdd) - addRelXrefs(m, bpe, "hgnc symbol", hgncSymbols, RelTypeVocab.ADDITIONAL_INFORMATION, false); + if (!hgncSymbols.isEmpty() && hgncSymbols.size() <= maxNumXrefsToAdd) { + addRelXrefs(m, bpe, "hgnc symbol", hgncSymbols, RelTypeVocab.ADDITIONAL_INFORMATION); + } } - private static boolean noneXrefDbStartsWith(XReferrable xr, String db) - { - db = db.toLowerCase(); - for(Xref x : xr.getXref()) - { - if (!(x instanceof PublicationXref) && x.getDb().toLowerCase().startsWith(db)) { + private static boolean noneXrefDbStartsWith(XReferrable xr, String db) { + for(Xref x : xr.getXref()) { + if (!(x instanceof PublicationXref) && StringUtils.startsWithIgnoreCase(x.getDb(), db)) { return false; } } @@ -677,17 +650,16 @@ private static boolean noneXrefDbStartsWith(XReferrable xr, String db) * @param db ref. target database name for new xrefs; normally, 'uniprot', 'chebi', 'hgnc symbol' * @param accessions bio/chem identifiers * @param relType - vocabulary term to use with the Xref - * @param isPrimaryIds - if so, adds a comment "PRIMARY" to xrefs * @throws AssertionError when bpe is neither Gene nor PhysicalEntity nor EntityReference */ private static void addRelXrefs(Model model, XReferrable bpe, String db, - Collection accessions, RelTypeVocab relType, boolean isPrimaryIds) + Collection accessions, RelTypeVocab relType) { if(!(bpe instanceof Gene || bpe instanceof PhysicalEntity || bpe instanceof EntityReference)) throw new AssertionError("addRelXrefs: not a Gene, ER, or PE: " + bpe.getUri()); for(String ac : accessions) { - RelationshipXref rx = CPathUtils.findOrCreateRelationshipXref(relType, db, ac, model, isPrimaryIds); + RelationshipXref rx = CPathUtils.findOrCreateRelationshipXref(relType, db, ac, model); bpe.addXref(rx); } } @@ -700,57 +672,63 @@ private static void addRelXrefs(Model model, XReferrable bpe, String db, */ private ProteinReference findProteinReferenceInWarehouse(final ProteinReference orig) { - final String standardPrefix = "http://identifiers.org/"; - final String warehouseUniprotUriPrefix = standardPrefix + "uniprot/"; final String origUri = orig.getUri(); - // Try to re-use existing object - if(origUri.startsWith(warehouseUniprotUriPrefix)) { - ProteinReference toReturn = (ProteinReference) warehouseModel.getByID(origUri); - if(toReturn != null) - return toReturn; - } - - // If nothing's found by URI so far, - if (origUri.startsWith(standardPrefix)) { - // try id-mapping to UniProt AC using the ID part of the normalized URI - String id = origUri.substring(origUri.lastIndexOf('/')+1); - Set mp = service.map(id, "UNIPROT"); - Set ers = findEntityRefUsingIdMappingResult(mp, warehouseUniprotUriPrefix); - if(ers.size()>1) { - log.debug(origUri + ": by URI, ambiguously maps to " + ers.size() + " warehouse PRs"); + // first, search in the Warehouse for a PR by the uri + ProteinReference toReturn = (ProteinReference) warehouseModel.getByID(origUri); + if(toReturn != null) { + return toReturn; + } + + // when orig has no xrefs or only publication xrefs, + if(orig.getXref().stream().noneMatch(x -> !(x instanceof PublicationXref))) { + // try id-mapping to uniprot AC using the ID part of the normalized URI + String id = CPathUtils.idFromNormalizedUri(origUri); + if (id != null) { //indeed normalized PR + Set mp = service.map(List.of(id), "UNIPROT"); + Set ers = entRefFromWhByPrimaryId(mp, "UNIPROT"); + if (ers.size() > 1) { + log.debug(origUri + ", by URI, ambiguously maps to " + ers.size() + " warehouse PRs"); + return null; + } else if (ers.size() == 1) + return (ProteinReference) ers.iterator().next(); + } + } else { + // try id-mapping by xrefs + // map by unification xrefs that are equivalent or map to the same, the only, primary ID and warehouse ER + Set primaryIds = idMappingByXrefs(orig, UnificationXref.class, "UNIPROT"); + Set ers = entRefFromWhByPrimaryId(primaryIds, "UNIPROT"); + if (ers.isEmpty()) { + //next, try - relationship xrefs + primaryIds = idMappingByXrefs(orig, RelationshipXref.class, "UNIPROT"); + ers = entRefFromWhByPrimaryId(primaryIds, "UNIPROT"); + } + if (ers.size() > 1) { + log.debug(origUri + ": by Xrefs, ambiguously maps to " + ers.size() + " warehouse PRs"); return null; - } else if (ers.size()==1) + } else if (ers.size() == 1) { return (ProteinReference) ers.iterator().next(); + } } - - // if still nothing came out yet, try id-mapping by `Xrefs: - Set ers = findWarehouseEntityRefByXrefsAndIdMapping(orig, "UNIPROT", warehouseUniprotUriPrefix); - if(ers.size()>1) { - log.debug(origUri + ": by Xrefs, ambiguously maps to " + ers.size() + " warehouse PRs"); - return null; - } else if (ers.size()==1) - return (ProteinReference) ers.iterator().next(); - // mapping/merging proteins by names is too risky, even when unambiguous (quite unlikely); so we won't do. + // protein names are risky to use for mapping even if unambiguous (unlikely); won't do - //nothing found + // none found return null; } /* A tricky internal id-mapping method. - * @param element xRefferable BioPAX object; i.e. that can (and hopefully does) have Xrefs - * @param xrefClassForMapping only use this Xref sub-class for mapping + * @param element XRefferable BioPAX object; i.e. that can (and hopefully does) have Xrefs + * @param xrefClassForMapping only use this Xref subclass for mapping * @param toDb target ID type; can be either 'UNIPROT' or 'CHEBI' only * @param dbStartsWithIgnoringcase optional list of allowed source xref.db names or prefixes - * @param the Xref sub-type - * @return primary accession numbers of the kind (toDb) - */ + * @param only either UnificationXref or RelationshipXref + * @return primary accession numbers of the kind (toDb) + */ private Set idMappingByXrefs(XReferrable element, Class xrefClassForMapping, - String toDb, String... dbStartsWithIgnoringcase) - { - //this method is to be called for a Gene, Complex, EntityReference - // - or a simple PEs that have no ER or its ER has no xrefs. + String toDb, String... dbStartsWithIgnoringcase) { + //this method should be called for a Gene, Complex, EntityReference, + //or for SimplePhysicalEntity that either have no ER or its ER has no xrefs. Assert.isTrue(PublicationXref.class != xrefClassForMapping, "xrefClassForMapping cannot be PublicationXref"); Assert.isTrue(element instanceof Gene || element instanceof PhysicalEntity @@ -762,13 +740,12 @@ private Set idMappingByXrefs(XReferrable element, Class "bad element type"); Set result = Collections.emptySet(); + final Set filteredXrefs = new ClassFilterSet<>(element.getXref(), xrefClassForMapping); if(filteredXrefs.isEmpty()) { log.debug("no " + xrefClassForMapping.getSimpleName() + - " xrefs found for " + element.getModelInterface().getSimpleName() + " (" + element.getUri()); - } - else - { + " found for " + element.getModelInterface().getSimpleName() + ": " + element.getUri()); + } else { final Set sourceIds = new HashSet<>(); for (T x : filteredXrefs) { if ( !(x instanceof PublicationXref) && !CPathUtils.startsWithAnyIgnoreCase(x.getDb(), "PANTHER") @@ -777,7 +754,7 @@ private Set idMappingByXrefs(XReferrable element, Class && (dbStartsWithIgnoringcase.length == 0 || CPathUtils.startsWithAnyIgnoreCase(x.getDb(), dbStartsWithIgnoringcase)) ){ - sourceIds.add(CPathUtils.fixSourceIdForMapping(x.getDb(), x.getId())); + sourceIds.add(CPathUtils.fixIdForMapping(x.getDb(), x.getId())); } } // do id-mapping, for all ids at once, and return the result set: @@ -819,47 +796,49 @@ else if(!bs.getName().isEmpty()) */ private SmallMoleculeReference findSmallMoleculeReferenceInWarehouse(final SmallMoleculeReference orig) { - final String standardPrefix = "http://identifiers.org/"; - final String warehouseChebiUriPrefix = standardPrefix + "chebi/"; final String origUri = orig.getUri(); - - // Try to re-use existing object - if(origUri.startsWith(warehouseChebiUriPrefix)) { - SmallMoleculeReference toReturn = (SmallMoleculeReference) warehouseModel.getByID(origUri); - if(toReturn != null) - return toReturn; - } - - // If nothing's found by URI, try id-mapping of the normalized URI part to chebi ID - if (origUri.startsWith(standardPrefix)) { - String id = origUri.substring(origUri.lastIndexOf('/')+1); - if(origUri.contains("compound")) - id = "CID:" + id; - else if(origUri.contains("substance")) - id = "SID:" + id; - Set mp = service.map(id, "CHEBI"); - Set ers = findEntityRefUsingIdMappingResult(mp, warehouseChebiUriPrefix); - if(ers.size()>1) { - log.debug(origUri + ": by URI (ID part), ambiguously maps to " + ers.size() + " warehouse SMRs"); + + //first, search in the Warehouse with the original SMR uri (feel lucky?) + BioPAXElement el = warehouseModel.getByID(origUri); + if( el instanceof SmallMoleculeReference) { + return (SmallMoleculeReference) el; //awesome! + } + + // when orig has no xrefs or only publication xrefs, + if(orig.getXref().stream().noneMatch(x -> !(x instanceof PublicationXref))) { + // try with the id from the (normalized but not a chebi) SMR URI + // and perform id-mapping to find a canonical SMR in the Warehouse model, + String id = CPathUtils.idFromNormalizedUri(origUri); + if (id != null) { + Set mp = service.map(List.of(id), "CHEBI"); + Set ers = entRefFromWhByPrimaryId(mp, "CHEBI"); + if (ers.size() > 1) { + log.debug(origUri + ": by URI (ID part), ambiguously maps to " + ers.size() + " warehouse SMRs"); + } else if (!ers.isEmpty()) //size==1 + return (SmallMoleculeReference) ers.iterator().next(); + } + } else { //otherwise, use xrefs + // try id-mapping by/from (already normalized) xrefs + Set primaryIds = idMappingByXrefs(orig, UnificationXref.class, "CHEBI"); + Set ers = entRefFromWhByPrimaryId(primaryIds, "CHEBI"); + if (ers.isEmpty()) { + //next, try - relationship xrefs + primaryIds = idMappingByXrefs(orig, RelationshipXref.class, "CHEBI"); + ers = entRefFromWhByPrimaryId(primaryIds, "CHEBI"); } - else if (!ers.isEmpty()) //size==1 + if (ers.size() > 1) { + log.debug(origUri + ", by xrefs, ambiguously maps to " + ers.size() + " warehouse SMRs"); + return null; + } else if (ers.size() == 1) { return (SmallMoleculeReference) ers.iterator().next(); + } } - // if so far the mapping there was either ambiguous or got nothing, - // try id-mapping by (already normalized) Xrefs: - Set ers = findWarehouseEntityRefByXrefsAndIdMapping(orig, "CHEBI", warehouseChebiUriPrefix); - if(ers.size()>1) { - log.debug(origUri + ": by Xrefs, ambiguously maps to " + ers.size() + " warehouse SMRs"); - return null; - } else if (ers.size()==1) - return (SmallMoleculeReference) ers.iterator().next(); - - // nothing? - keep trying, map by name (e..g, 'ethanol') to ChEBI ID + // finally, map by exact name (e.g, 'ethanol' to ChEBI ID, etc.) Set mp = mapSmallMoleculeByExactName(orig); - ers = findEntityRefUsingIdMappingResult(mp, warehouseChebiUriPrefix); + Set ers = entRefFromWhByPrimaryId(mp, "CHEBI"); if(ers.size()>1) { - log.debug(origUri + ": by NAMEs, ambiguously maps to " + ers.size() + " warehouse SMRs"); + log.debug(origUri + ", by names, ambiguously maps to " + ers.size() + " warehouse SMRs"); return null; } else if (ers.size()==1) { SmallMoleculeReference smr = (SmallMoleculeReference) ers.iterator().next(); @@ -867,62 +846,45 @@ else if (!ers.isEmpty()) //size==1 return smr; } - //if nothing found + // none found return null; } - private Set findWarehouseEntityRefByXrefsAndIdMapping( - EntityReference orig, String dest, String canonicalUriPrefix) - { - //map by unification xrefs that are equivalent or map to the same, the only, primary ID and warehouse ER - Set mappingSet = idMappingByXrefs(orig, UnificationXref.class, dest); - Set mapsTo = findEntityRefUsingIdMappingResult(mappingSet, canonicalUriPrefix); - - if(mapsTo.isEmpty()) { - //next, try - relationship xrefs - mappingSet = idMappingByXrefs(orig, RelationshipXref.class, dest); - mapsTo = findEntityRefUsingIdMappingResult(mappingSet, canonicalUriPrefix); - } - - return mapsTo; //can be more than one, but then we won't merge the original ER - } - private Set mapSmallMoleculeByExactName(Named el) { - Set mp = new HashSet<>(); - - // save all the names in a different Set: - final Set names = new HashSet<>(); - for(String n : el.getName()) - names.add(n.toLowerCase()); //LC is vital - + Set mp = new HashSet<>(1); if(el instanceof SmallMolecule || el instanceof SmallMoleculeReference) { //find a warehouse SMR(s) with exactly the same name (case-insensitive). - for(SmallMoleculeReference er : warehouseModel.getObjects(SmallMoleculeReference.class)) - { - for(String s : er.getName()) { - if(names.contains(s.toLowerCase())) { - //extract the ChEBI accession from URI, add + for(SmallMoleculeReference er : warehouseModel.getObjects(SmallMoleculeReference.class)) { + for(String erName : er.getName()) { + if(el.getName().stream().anyMatch(name -> StringUtils.equalsIgnoreCase(name, erName))) { + //extract the ChEBI AC from the normalized SMR URI mp.add(CPathUtils.idFromNormalizedUri(er.getUri())); break; } } - } + } } - return mp; } - private Set findEntityRefUsingIdMappingResult(Set mapsTo, String uriPrefix) - { + /* + * @param primaryIds - chebi or uniprot primary IDs + * @param collection - 'chebi' or 'uniprot' + * @return set of matching ERs from the Warehouse model or empty set + */ + private Set entRefFromWhByPrimaryId(Set primaryIds, String collection) { Set toReturn = new HashSet<>(); - - for(String id : mapsTo) { - String uri = uriPrefix + id; - EntityReference er = (EntityReference) warehouseModel.getByID(uri); - if(er != null) - toReturn.add(er); + for(String id : primaryIds) { + // Normalizer.uri("", prefix, id, EntityReference.class); //alternative way (in case we generalize for more biopax types) + String uri = Resolver.getURI(collection, id); //e.g. id can be 'CHEBI:20' or '20' (no banana) + if(uri != null) { + EntityReference er = (EntityReference) getWarehouseModel().getByID(uri); + if (er != null) { + toReturn.add(er); + } + } } - return toReturn; } + } \ No newline at end of file diff --git a/src/main/java/cpath/service/PreMerger.java b/src/main/java/cpath/service/PreMerger.java index f6f1792fb..31c9a8f5b 100644 --- a/src/main/java/cpath/service/PreMerger.java +++ b/src/main/java/cpath/service/PreMerger.java @@ -1,20 +1,23 @@ package cpath.service; -import cpath.service.api.CPathService; +import cpath.service.api.Service; import cpath.service.api.Cleaner; import cpath.service.api.Converter; import cpath.service.api.RelTypeVocab; -import cpath.service.jpa.Mapping; -import cpath.service.jpa.Metadata; -import cpath.service.jpa.Metadata.METADATA_TYPE; +import cpath.service.metadata.Datasource; +import cpath.service.metadata.Mapping; +import cpath.service.metadata.Datasource.METADATA_TYPE; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.controller.ModelUtils; import org.biopax.paxtools.io.SimpleIOHandler; import org.biopax.paxtools.model.*; import org.biopax.paxtools.model.level3.*; +import org.biopax.paxtools.normalizer.Namespace; import org.biopax.paxtools.normalizer.Normalizer; +import org.biopax.paxtools.normalizer.Resolver; import org.biopax.validator.BiopaxIdentifier; import org.biopax.validator.api.Validator; import org.biopax.validator.api.beans.*; @@ -34,7 +37,7 @@ /** - * Class responsible for premerging pathway and warehouse data. + * Class responsible for pre-merging pathway and warehouse data. */ final class PreMerger { @@ -43,14 +46,14 @@ final class PreMerger { private final String xmlBase; private final Validator validator; - private CPathService service; + private Service service; /** * Constructor. * @param service cpath2 service * @param validator BioPAX Validator */ - PreMerger(CPathService service, Validator validator) { + PreMerger(Service service, Validator validator) { this.service = service; this.validator = validator; this.xmlBase = service.settings().getXmlBase(); @@ -62,33 +65,31 @@ final class PreMerger { void premerge() { // if this has been run before, there are some intermediate files left // in the corresponding output folder (can continue without processing the data from scratch) - // which will be kept unless overwrite=true option is used - // (one can also manually clean up a particular datasource sub-directory in /data to start over on re-run) - for (Metadata metadata : service.metadata().findAll()) + // (one can also manually clean up a particular /data subdirectory to start over) + for (Datasource datasource : service.metadata().getDatasources()) { - final String mid = metadata.getIdentifier(); + final String mid = datasource.getIdentifier(); - if(!Files.isDirectory(Paths.get(service.intermediateDataDir(metadata)))) { - service.clear(metadata); //empty the sub-directory and db entries + if(!Files.isDirectory(Paths.get(service.intermediateDataDir(datasource)))) { + service.clear(datasource); //empty the subdirectory and db entries } else { - metadata.getFiles().clear(); //clear the list of input file names + datasource.getFiles().clear(); //clear the list of input file names } //read and analyze the input data archive log.info("premerge(), processing: " + mid); - service.unzipData(metadata); - metadata = service.metadata().save(metadata); //persist data file names - log.debug("premerge(), " + mid + " contains " + metadata.getFiles().size() + " files"); + service.unzipData(datasource); + log.debug("premerge(), " + mid + " contains " + datasource.getFiles().size() + " files"); - if (metadata.getType() == METADATA_TYPE.MAPPING) { - log.info("premerge(), done (mapping type data)"); - return; + if (datasource.getType() == METADATA_TYPE.MAPPING) { + log.info("premerge(), done for the mapping type data: " + mid); + continue; } try { // Try to instantiate the Cleaner now, and exit if it fails! Cleaner cleaner = null; - String cl = metadata.getCleanerClassname(); + String cl = datasource.getCleanerClass(); if (cl != null && cl.length() > 0) { cleaner = CPathUtils.newCleaner(cl); if (cleaner == null) { @@ -101,7 +102,7 @@ void premerge() { } Converter converter = null; - cl = metadata.getConverterClassname(); + cl = datasource.getConverterClass(); if (cl != null && cl.length() > 0) { converter = CPathUtils.newConverter(cl); if (converter == null) { @@ -115,10 +116,9 @@ void premerge() { } // Premerge for each pathway data: clean, convert, validate. - for (String datafile : new HashSet<>(metadata.getFiles())) { - pipeline(metadata, datafile, cleaner, converter); + for (String datafile : new HashSet<>(datasource.getFiles())) { + pipeline(datasource, datafile, cleaner, converter); } - } catch (Exception e) { log.error("premerge(), failed for datasource: " + mid, e); } @@ -132,19 +132,18 @@ void premerge() { * result model. */ void buildWarehouse() { - Model warehouse = BioPAXLevel.L3.getDefaultFactory().createModel(); warehouse.setXmlBase(xmlBase); // process "warehouse" type metadata - for (Metadata metadata : service.metadata().findAll()) { + for (Datasource datasource : service.metadata().getDatasources()) { //skip for not "warehouse" type data - if (metadata.getType() != METADATA_TYPE.WAREHOUSE) { + if (datasource.getType() != METADATA_TYPE.WAREHOUSE) { continue; } - log.info("buildWarehouse(), adding data: " + metadata.getIdentifier()); + log.info("buildWarehouse(), adding data: " + datasource.getIdentifier()); InputStream inputStream; - for (String datafile : metadata.getFiles()) { + for (String datafile : datasource.getFiles()) { try { inputStream = new GZIPInputStream(new FileInputStream(CPathUtils.normalizedFile(datafile))); Model m = new SimpleIOHandler(BioPAXLevel.L3).convertFromOWL(inputStream); @@ -160,19 +159,19 @@ void buildWarehouse() { //clear all id-mapping tables log.warn("buildWarehouse(), removing all previous id-mapping db entries..."); - service.mapping().deleteAll(); + service.initIndex(null, service.settings().indexDir(), false); // Using the just built Warehouse BioPAX model, generate the id-mapping tables: buildIdMappingFromWarehouse(warehouse); // Process all MAPPING data - save in the id-mapping repository - for (Metadata metadata : service.metadata().findAll()) { + for (Datasource datasource : service.metadata().getDatasources()) { //skip not "mapping" data - if (metadata.getType() != METADATA_TYPE.MAPPING) { + if (datasource.getType() != METADATA_TYPE.MAPPING) { continue; } - log.info("buildWarehouse(), adding id-mapping: " + metadata.getIdentifier()); - for (String content : metadata.getFiles()) { + log.info("buildWarehouse(), adding id-mapping: " + datasource.getIdentifier()); + for (String content : datasource.getFiles()) { Set mappings; try { mappings = loadSimpleMapping(content); @@ -180,13 +179,16 @@ void buildWarehouse() { log.error("buildWarehouse(), failed to get id-mapping from: " + content, e); continue; } - if(mappings != null) //i.e., when no exception was thrown above - service.mapping().saveAll(mappings); + if(mappings != null) { + mappings.stream().forEach(m -> service.mapping().save(m)); + service.mapping().commit(); + } } } + service.mapping().refresh(); //remove dangling xrefs (PDB,RefSeq,..) - left after they've been used for creating id-mappings, then unlinked - ModelUtils.removeObjectsIfDangling(warehouse, Xref.class); + Set removed = ModelUtils.removeObjectsIfDangling(warehouse, Xref.class); // save to compressed file String whFile = service.settings().warehouseModelFile(); @@ -214,8 +216,8 @@ void buildWarehouse() { * This is a package-private method, mainly for jUnit testing * (not API). */ - private Set loadSimpleMapping(String mappingFile) throws IOException - { + private Set loadSimpleMapping(String mappingFile) throws IOException { + log.info("loadSimpleMapping, loading: " + mappingFile); Set mappings = new HashSet<>(); Scanner scanner = new Scanner(new GZIPInputStream(Files.newInputStream(Paths.get(mappingFile))), StandardCharsets.UTF_8.name()); @@ -224,32 +226,67 @@ private Set loadSimpleMapping(String mappingFile) throws IOException assert head.length == 2 : "bad header"; String from = head[0].trim(); String to = head[1].trim(); + + //normalize from/to collection name as bioregistry.io prefix, e.g. 'uniprot', 'pubchem.compound' + Namespace fns = Resolver.getNamespace(from, true); + if(fns != null) { + from = fns.getPrefix(); + } + Namespace tns = Resolver.getNamespace(to, true); + if(tns != null) { + to = tns.getPrefix(); + } + while (scanner.hasNextLine()) { line = scanner.nextLine(); String[] pair = line.split("\t"); - String srcId = pair[0].trim(); - String tgtId = pair[1].trim(); - mappings.add(new Mapping(from, srcId, to, tgtId)); + + //if possible, validate IDs and add banana+peel prefixes + String src = pair[0].trim(); + src = bananaPeelId(fns, src); //null when invalid id + String tgt = pair[1].trim(); + tgt = bananaPeelId(tns, tgt); + + if(src != null && tgt != null) { + mappings.add(new Mapping(from, src, to, tgt)); + } } - scanner.close(); + scanner.close(); return mappings; } + private String bananaPeelId(Namespace ns, String id) { + if(ns == null) { + return id; + } + + if(!Resolver.checkRegExp(id, ns.getPrefix())) { + return null; + } + + String peel = ns.getBanana_peel(); //empty means no banana + if(!StringUtils.isBlank(peel)) { + return ns.getBanana() + peel + id; + } + + return id; + } + /* * Extracts id-mapping information (name/id -> primary id) - * from the Warehouse entity references's xrefs to the mapping tables. + * from the Warehouse entity references' xrefs to the mapping tables. + * + * Currently, we use PR and SMR object types only. */ - private void buildIdMappingFromWarehouse(Model warehouse) { - log.info("buildIdMappingFromWarehouse(), updating id-mapping " + - "tables by analyzing the warehouse data..."); + private void buildIdMappingFromWarehouse(Model warehouse) throws AssertionError { + log.info("buildIdMappingFromWarehouse(), updating id-mapping tables by analyzing the warehouse data..."); - //Generates Mapping tables (objects) using ERs: - //a. ChEBI secondary IDs, PUBCHEM Compound, InChIKey, chem. name - to primary CHEBI AC; - //b. UniProt secondary IDs, RefSeq, NCBI Gene, etc. - to primary UniProt AC. - final Set mappings = new HashSet<>(); + //Generates Mapping tables: + //a) ChEBI secondary IDs, PUBCHEM Compound, InChIKey, chem. name - to primary CHEBI AC; + //b) UniProt secondary IDs, RefSeq, NCBI Gene (number), etc. - to primary UniProt AC. - // for each ER, using its xrefs, map other identifiers to the primary accession + // for each ER, using its xrefs, map other IDs to the primary AC for(EntityReference er : warehouse.getObjects(EntityReference.class)) { String destDb; @@ -257,21 +294,23 @@ private void buildIdMappingFromWarehouse(Model warehouse) { destDb = "UNIPROT"; else if(er instanceof SmallMoleculeReference) destDb = "CHEBI"; - else //there're only PR or SMR types of ER in the warehouse model - throw new AssertionError("Unsupported warehouse ER type: " + er.getModelInterface().getSimpleName()); + else //there are only PR or SMR types of ER in the warehouse model + throw new AssertionError("Unsupported warehouse ER type: " + + er.getModelInterface().getSimpleName()); - //extract the primary id from the standard (identifiers.org) URI - final String ac = CPathUtils.idFromNormalizedUri(er.getUri()); + //extract the primary id from the normalized URI (no db/banana/prefix) + String ac = CPathUtils.idFromNormalizedUri(er.getUri()); // There are lots of unification and different type relationship xrefs - // generated by the the uniprot and chebi Converters; - // we use some of these xrefs to populate our id-mapping repository: + // generated by the uniprot and chebi Converters; + // we use some of these (already normalized) xrefs to populate our id-mapping repository: for(Xref x : new HashSet<>(er.getXref())) { if(!(x instanceof PublicationXref)) { - final String src = x.getDb().toUpperCase(); + final String srcDb = x.getDb(); if(x instanceof UnificationXref) { //map to itself; each warehouse ER has only one UX, the primary AC - mappings.add(new Mapping(src, x.getId(), destDb, ac)); + //new Mapping args (src and dest db and id) are + service.mapping().save(new Mapping(srcDb, x.getId(), destDb, ac)); } else if(x instanceof RelationshipXref) { // each warehouse RX has relationshipType property defined, @@ -281,23 +320,21 @@ else if(x instanceof RelationshipXref) { || rtv.getUri().endsWith(RelTypeVocab.SECONDARY_ACCESSION_NUMBER.id) //other RX types ain't a good idea for id-mapping (has_part,has_role,is_conjugate_*) ) { - mappings.add(new Mapping(src, x.getId(), destDb, ac)); + service.mapping().save(new Mapping(srcDb, x.getId(), destDb, ac)); } - // remove the rel. xref unless it's the secondary/parent ChEBI ID, 'HGNC Symbol' + // remove the rel. xref unless secondary/parent ChEBI ID, HGNC Symbol, NCBI Gene ID // (id-mapping and search/graph queries do not need these xrefs anymore) - if(!src.equalsIgnoreCase("HGNC Symbol") && !src.startsWith("NCBI Gene") - && !src.equalsIgnoreCase("CHEBI")) { + if( !srcDb.equalsIgnoreCase("hgnc.symbol") + && !StringUtils.equalsIgnoreCase(srcDb,"ncbigene") + && !srcDb.equalsIgnoreCase("chebi") + ) { er.removeXref(x); } } } } + service.mapping().commit(); } - - //save/update to the id-mapping database - log.info("buildIdMappingFromWarehouse(), saving all..."); - service.mapping().saveAll(mappings); - log.info("buildIdMappingFromWarehouse(), done."); } @@ -305,23 +342,22 @@ else if(x instanceof RelationshipXref) { /* * Given Content undergoes clean/convert/validate/normalize data pipeline. * - * @param metadata about the data provider + * @param datasource about the data provider * @param content provider's pathway data (file) to be processed and modified * @param cleaner data specific cleaner class (to apply before the validation/normalization) * @param converter data specific to BioPAX L3 converter class * @throws IOException */ - private void pipeline(final Metadata metadata, final String inputDataFile, + private void pipeline(final Datasource datasource, final String inputDataFile, Cleaner cleaner, Converter converter) throws IOException { Path originalDataPath = Paths.get(inputDataFile); - if (metadata.getType() == METADATA_TYPE.MAPPING) { + if (datasource.getType() == METADATA_TYPE.MAPPING) { log.info("pipeline(), skip for id-mapping data: " + originalDataPath); return; - } else { - log.info("pipeline(), process " + originalDataPath); } + log.info("pipeline(), process " + originalDataPath); File inputFile = originalDataPath.toFile(); // will be a different file at different steps final File cleaned = Paths.get(CPathUtils.cleanedFile(inputDataFile)).toFile(); final File converted = Paths.get(CPathUtils.convertedFile(inputDataFile)).toFile(); @@ -380,17 +416,17 @@ private void pipeline(final Metadata metadata, final String inputDataFile, // Validate & normalize the BioPAX model: // synonyms in xref.db property values may be replaced // with the primary db names (based on Miriam db); some URIs get normalized - checkAndNormalize(metadata, inputFile); + checkAndNormalize(datasource, inputFile); } /* * Validates, fixes, and normalizes given pathway data. * - * @param metadata data provider's metadata + * @param datasource data provider's datasource * @param file one of data files from the provider */ - private void checkAndNormalize(Metadata metadata, File file) throws IOException + private void checkAndNormalize(Datasource datasource, File file) throws IOException { // init Normalizer Normalizer normalizer = new Normalizer(); @@ -403,8 +439,8 @@ private void checkAndNormalize(Metadata metadata, File file) throws IOException Model model; //validate or just normalize - if(metadata.isNotPathwayData()) { //when "warehouse" or "mapping" data type - if(metadata.getType() == METADATA_TYPE.MAPPING) { + if(datasource.getType().isNotPathwayData()) { //when "warehouse" or "mapping" data type + if(datasource.getType() == METADATA_TYPE.MAPPING) { throw new IllegalArgumentException("Unsupported data type: MAPPING"); } //just load the model and skip validation @@ -427,7 +463,7 @@ private void checkAndNormalize(Metadata metadata, File file) throws IOException // get the updated model model = (Model) validation.getModel(); // update dataSource property (force new Provenance) for all entities - metadata.setProvenanceFor(model, xmlBase); + datasource.setProvenanceFor(model, xmlBase); service.saveValidationReport(validation, CPathUtils.validationFile(filename)); @@ -435,7 +471,7 @@ private void checkAndNormalize(Metadata metadata, File file) throws IOException int noErrors = validation.countErrors(null, null, null, null, true, true); log.info("pipeline(), summary for " + filename + ". Critical errors found:" + noErrors + ". " - + validation.getComment().toString() + "; " + validation.toString()); + + validation.getComment().toString() + "; " + validation); } catch (Exception e) { log.error("checkAndNormalize(), failed " + filename + "; " + e); @@ -443,9 +479,9 @@ private void checkAndNormalize(Metadata metadata, File file) throws IOException } } - //Normalize URIs, etc. + //Normalize URIs, Xrefs, etc. log.info("checkAndNormalize, normalizing " + filename); - normalizer.normalize(model); + normalizer.normalize(model, true); // save try { diff --git a/src/main/java/cpath/service/CPathServiceImpl.java b/src/main/java/cpath/service/ServiceImpl.java similarity index 69% rename from src/main/java/cpath/service/CPathServiceImpl.java rename to src/main/java/cpath/service/ServiceImpl.java index 8cca9c434..e85df2ceb 100644 --- a/src/main/java/cpath/service/CPathServiceImpl.java +++ b/src/main/java/cpath/service/ServiceImpl.java @@ -7,18 +7,18 @@ import java.nio.file.Paths; import java.util.*; import java.util.regex.Pattern; +import java.util.stream.Collectors; import java.util.zip.GZIPOutputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; -import cpath.analysis.TraverseAnalysis; -import cpath.service.api.*; +import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.controller.*; import org.biopax.paxtools.io.*; import org.biopax.paxtools.model.*; import org.biopax.paxtools.model.level3.*; import org.biopax.paxtools.model.level3.Process; -import org.biopax.paxtools.normalizer.MiriamLink; +import org.biopax.paxtools.normalizer.Resolver; import org.biopax.paxtools.pattern.util.Blacklist; import org.biopax.paxtools.query.QueryExecuter; import org.biopax.paxtools.query.algorithm.Direction; @@ -27,24 +27,25 @@ import org.biopax.paxtools.query.wrapperL3.Filter; import org.biopax.paxtools.query.wrapperL3.OrganismFilter; import org.biopax.paxtools.query.wrapperL3.UbiqueFilter; - import org.biopax.paxtools.util.IllegalBioPAXArgumentException; import org.biopax.validator.api.ValidatorUtils; import org.biopax.validator.api.beans.Validation; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.DefaultResourceLoader; import org.springframework.core.io.Resource; -import org.springframework.stereotype.Service; import org.springframework.util.Assert; -import cpath.service.jpa.*; +import cpath.analysis.TraverseAnalysis; +import cpath.service.api.*; +import cpath.service.metadata.*; import cpath.service.jaxb.*; import static cpath.service.api.Status.*; - /** * Service tier class - to uniformly access * persisted BioPAX model and metadata from console @@ -52,23 +53,19 @@ * * @author rodche */ -@Service -public class CPathServiceImpl implements CPathService { - private static final Logger log = LoggerFactory.getLogger(CPathServiceImpl.class); - private static final Class[] DEFAULT_SEED_TYPES = new Class[]{PhysicalEntity.class, Gene.class}; - private Searcher searcher; +@org.springframework.stereotype.Service +public class ServiceImpl implements Service { + private static final Logger log = LoggerFactory.getLogger(ServiceImpl.class); + private static final Class[] DEFAULT_SEED_TYPES = new Class[]{PhysicalEntity.class, Gene.class}; - @Autowired - MetadataRepository metadataRepository; + IndexImpl index; - @Autowired - MappingsRepository mappingsRepository; + Metadata metadata; @Autowired private Settings settings; - private SimpleIOHandler simpleIO; //init on first access to getBlacklist(); so do not use it directly @@ -77,32 +74,46 @@ public class CPathServiceImpl implements CPathService { //on first access when proxy model mode is enabled (so do not use the var. directly!) private Model paxtoolsModel; - private final Pattern isoformIdPattern = Pattern.compile(MiriamLink.getDatatype("uniprot isoform").getPattern()); - private final Pattern refseqIdPattern = Pattern.compile(MiriamLink.getDatatype("refseq").getPattern()); - private final Pattern uniprotIdPattern = Pattern.compile(MiriamLink.getDatatype("uniprot knowledgebase").getPattern()); + private final Pattern isoformIdPattern = Pattern.compile(Resolver.getNamespace("uniprot.isoform", true).getPattern()); + private final Pattern refseqIdPattern = Pattern.compile(Resolver.getNamespace("refseq", true).getPattern()); - public CPathServiceImpl() { + public ServiceImpl() { this.simpleIO = new SimpleIOHandler(BioPAXLevel.L3); this.simpleIO.mergeDuplicates(true); } /** - * Loads the main BioPAX model, etc. - * This is not required during the data import - * (in premerge, merge, index, etc.); - * call this only after the web service is up and running. + * Loads the main BioPAX model, full-text index, blacklist. + * Call this only after the web service is up and running. */ synchronized public void init() { if(paxtoolsModel == null) { paxtoolsModel = loadMainModel(); if (paxtoolsModel != null) { - paxtoolsModel.setXmlBase(settings.getXmlBase()); + paxtoolsModel.setXmlBase(settings().getXmlBase()); log.info("Main BioPAX model (in-memory) is now ready for queries."); - searcher = new SearchEngine(paxtoolsModel, settings.indexDir()); - ((SearchEngine) searcher).setMaxHitsPerPage(settings.getMaxHitsPerPage()); } } - loadBlacklist(); + initIndex(paxtoolsModel, settings.indexDir(), true); //read-only (search) index + index.setMaxHitsPerPage(settings.getMaxHitsPerPage()); + if(blacklist == null) { + loadBlacklist(); + } + } + + /** + * Init the index unless it's opened (if so, do nothing, ignore the parameters). + * + * @param model + * @param indexLocation + * @param readOnly + */ + @Override + public void initIndex(Model model, String indexLocation, boolean readOnly) { + if(index != null) { + index.close(); + } + index = new IndexImpl(model, indexLocation, readOnly); } public Settings settings() {return settings;} @@ -116,25 +127,32 @@ public Model getModel() { } public void setModel(Model paxtoolsModel) { this.paxtoolsModel = paxtoolsModel; + if(index != null) { + index.setModel(paxtoolsModel); + } + } + + IndexImpl getIndex() { + return index; + } + + void setIndex(IndexImpl index) { + this.index = index; } public ServiceResponse search(String queryStr, int page, Class biopaxClass, - String[] dsources, String[] organisms) - { - if(modelNotReady() || searcher == null) - return new ErrorResponse(MAINTENANCE,"Waiting for the initialization to complete (try later)..."); - + String[] dsources, String[] organisms) { + if(modelNotReady() || index == null) { + return new ErrorResponse(MAINTENANCE, "Waiting for the initialization to complete (try later)..."); + } try { // do search - SearchResponse hits = searcher.search(queryStr, page, biopaxClass, dsources, organisms); - + SearchResponse hits = index.search(queryStr, page, biopaxClass, dsources, organisms); hits.setComment("Search '" + queryStr + "' in " + ((biopaxClass == null) ? "all types" : biopaxClass.getSimpleName()) + "; ds: " + Arrays.toString(dsources)+ "; org.: " + Arrays.toString(organisms)); - return hits; - } catch (Exception e) { log.error("search() failed - " + e); return new ErrorResponse(INTERNAL_ERROR, e); @@ -205,26 +223,26 @@ private Model autoCompleteAndClone(final Set elements, final bool completer.setSkipSubPathways(!includeSubPathways); //mind NOT (!) here Model m = cloner.clone(completer.complete(elements)); if(m != null) { - m.setXmlBase(paxtoolsModel.getXmlBase()); + m.setXmlBase(settings().getXmlBase()); } return m; } - public ServiceResponse getNeighborhood(final OutputFormat format, + public ServiceResponse getNeighborhood(OutputFormat format, Map formatOptions, - final String[] sources, + String[] sources, Integer limit, Direction direction, - final String[] organisms, - final String[] datasources, + String[] organisms, + String[] datasources, boolean subPathways) { if(modelNotReady()) return new ErrorResponse(MAINTENANCE,"Waiting for the initialization to complete (try later)..."); if(direction == null) { - direction = Direction.UNDIRECTED; //TODO: use BOTHSTREAM (less data as it ignores MIs)? + direction = Direction.UNDIRECTED; } // execute the paxtools graph query @@ -245,9 +263,13 @@ public ServiceResponse getNeighborhood(final OutputFormat format, } } - public ServiceResponse getPathsBetween(final OutputFormat format, - Map formatOptions, final String[] sources, final Integer limit, - final String[] organisms, final String[] datasources, boolean subPathways) + public ServiceResponse getPathsBetween(OutputFormat format, + Map formatOptions, + String[] sources, + Integer limit, + String[] organisms, + String[] datasources, + boolean subPathways) { if(modelNotReady()) return new ErrorResponse(MAINTENANCE,"Waiting for the initialization to complete (try later)..."); @@ -271,9 +293,15 @@ public ServiceResponse getPathsBetween(final OutputFormat format, } } - public ServiceResponse getPathsFromTo(final OutputFormat format, Map formatOptions, - final String[] sources, final String[] targets, final Integer limit, - final String[] organisms, final String[] datasources, boolean subPathways) + public ServiceResponse getPathsFromTo(OutputFormat format, + Map formatOptions, + String[] sources, + String[] targets, + LimitType limitType, + Integer limit, + String[] organisms, + String[] datasources, + boolean subPathways) { if(modelNotReady()) return new ErrorResponse(MAINTENANCE,"Waiting for the initialization to complete (try later)..."); @@ -291,7 +319,7 @@ public ServiceResponse getPathsFromTo(final OutputFormat format, Map elements = (target == null || target.isEmpty()) ? QueryExecuter.runPathsBetweenMultiSet(source, paxtoolsModel, limit, createFilters(organisms, datasources)) - : QueryExecuter.runPathsFromToMultiSet(source, target, paxtoolsModel, LimitType.NORMAL, limit, + : QueryExecuter.runPathsFromToMultiSet(source, target, paxtoolsModel, limitType, limit, createFilters(organisms, datasources)); m = autoCompleteAndClone(elements,subPathways); @@ -317,9 +345,9 @@ private ServiceResponse convert(Model m, OutputFormat format, Map(); if(format != OutputFormat.BIOPAX && m != null) { - // remove all Pathway objects from the result model (TODO: keep pathway name,uri somehow) - // (- pathways become incomplete after detaching from main PC model; - // these look confusing after converting to other format.) + // remove all Pathway objects from the result model as + // these become incomplete after detaching from main PC model + // and look confusing after converting to other formats. for(Pathway p : new HashSet<>(m.getObjects(Pathway.class))) { m.remove(p); } @@ -338,13 +366,18 @@ private ServiceResponse convert(Model m, OutputFormat format, Map formatOptions, final String[] sources, - final Integer limit, Direction direction, - final String[] organisms, final String[] datasources, boolean subPathways) + public ServiceResponse getCommonStream(OutputFormat format, + Map formatOptions, + String[] sources, + Integer limit, + Direction direction, + String[] organisms, + String[] datasources, + boolean subPathways) { - if(modelNotReady()) - return new ErrorResponse(MAINTENANCE,"Waiting for the initialization to complete (try again later)..."); + if(modelNotReady()) { + return new ErrorResponse(MAINTENANCE, "Waiting for the initialization to complete (try again later)..."); + } if (direction == Direction.BOTHSTREAM) { return new ErrorResponse(BAD_REQUEST, "Direction cannot be BOTHSTREAM for the COMMONSTREAM query"); @@ -363,7 +396,8 @@ public ServiceResponse getCommonStream(final OutputFormat format, Model m = autoCompleteAndClone(result, subPathways); if(m != null) { String desc = Arrays.toString(sources); - m.setUri("PC_graph_commonstream_" + desc.hashCode()); + //m.setXmlBase(settings().getXmlBase()); //already set in autoCompleteAndClone + m.setUri(m.getXmlBase() + "commonstream_" + desc.hashCode()); m.setName(desc); } return convert(m, format, null); @@ -398,27 +432,32 @@ private Set> mapToSeeds(String[] identifiers) } /** - * Mapping IDs to BioPAX entity URIs. + * Mapping from URL/IDs to the BioPAX entity URIs. * - * @param identifiers - a list of genes/protein or molecules as: \ + * @param identifiers - a list of URIs or genes/protein/molecules IDs: \ * HGNC symbols, UniProt, RefSeq and NCBI Gene IDs; or \ * CHEBI, InChIKey, ChEMBL, DrugBank, PubChem Compound, KEGG Compound, PharmGKB. - * @param types filter search to get back URIs of given biopax types and sub-types + * @param types filter search to get back URIs of given biopax types and subtypes * @return URIs of matching Xrefs * * See also: issue #296 */ private String[] findUrisByIds(String[] identifiers, Class... types) { - if (identifiers.length == 0) + if(identifiers == null) { + return new String[]{}; + } + if (identifiers.length == 0) { return identifiers; //empty array + } Set uris = new TreeSet<>(); StringBuilder q = new StringBuilder(); - for (String identifier : identifiers) - { - if(identifier.startsWith("http://")) { + for (String identifier : identifiers) { + if(identifier.startsWith(settings().getXmlBase()) + || StringUtils.startsWithIgnoreCase(identifier, "http") + || StringUtils.containsAny(identifier, "bioregistry.io/", "identifiers.org/")) { // must be valid URI of some existing BioPAX object in our model uris.add(identifier); } else { @@ -459,7 +498,7 @@ private Set findUrisById(String idOrUri) Set uris = new TreeSet<>(); - if(idOrUri.startsWith("http://")) { + if(idOrUri.startsWith(settings().getXmlBase())) { // must be valid URI of some existing BioPAX object in our model uris.add(idOrUri); } else { @@ -520,13 +559,13 @@ public ServiceResponse traverse(String propertyPath, String... uris) { /** * {@inheritDoc} * - * Collect "top" pathways pathways (sort of) such as those having + * Collect "top" pathways (sort of) such as those having * controlledOf, pathwayComponentOf and stepProcessOf properties empty, and * excluding pathways with less than three components unless there is a non-trivial sub-pathway. */ public ServiceResponse topPathways(String q, final String[] organisms, final String[] datasources) { - if(modelNotReady() || searcher == null) + if(modelNotReady() || index == null) return new ErrorResponse(MAINTENANCE,"Waiting for the initialization to complete (try later)..."); if(q==null || q.isEmpty()) //too much data @@ -538,7 +577,7 @@ public ServiceResponse topPathways(String q, final String[] organisms, final Str SearchResponse r; try { - r = searcher.search(q, page, Pathway.class, datasources, organisms); + r = index.search(q, page, Pathway.class, datasources, organisms); } catch(Exception e) { log.error("topPathways() failed", e); return new ErrorResponse(INTERNAL_ERROR, e); @@ -579,18 +618,15 @@ public ServiceResponse topPathways(String q, final String[] organisms, final Str // go next page try { - r = searcher.search(q, ++page, Pathway.class, datasources, organisms); + r = index.search(q, ++page, Pathway.class, datasources, organisms); } catch(Exception e) { log.error("topPathways() failed", e); return new ErrorResponse(INTERNAL_ERROR, e); } } - // final touches... topPathways.setNumHits((long)hits.size()); - topPathways.setComment("Top Pathways (technically, each has empty index " + - "field 'pathway'; that also means, they are neither components of " + - "other pathways nor controlled of any process)"); + topPathways.setComment("Top Pathways (neither components of other pathways nor controlled by any process)"); topPathways.setMaxHitsPerPage(hits.size()); topPathways.setPageNo(0); @@ -655,11 +691,13 @@ private boolean modelNotReady() { return paxtoolsModel == null; } - public Set map(String fromId, final String toDb) { - return map(Collections.singletonList(fromId), toDb); - } - - public Set map(Collection fromIds, final String toDb) { + /** + * ID mapping from any ID but only to either CHEBI or UNIPROT primary id. + * @param fromIds the source IDs + * @param toDb only "CHEBI" or "UNIPROT" (case-insensitive) + * @return + */ + public Set map(Collection fromIds, String toDb) { Assert.hasText(toDb,"toDb must be not null, empty or blank"); Assert.isTrue("CHEBI".equalsIgnoreCase(toDb) || "UNIPROT".equalsIgnoreCase(toDb), "toDb is not CHEBI or UNIPROT"); @@ -671,140 +709,55 @@ public Set map(Collection fromIds, final String toDb) { List sourceIds = new ArrayList<>(); // let's guess the source db (id type) and take care of isoform ids; - // it's risky if a no-prefix integer ID type (pubchem cid, sid) is used and no srcDb is provided; - // nevertheless, for bio-polymers, we support the only 'NCBI Gene' (integer) ID type. - for(String fromId : fromIds) - { - if (fromId.matches("^\\d+$") && !toDb.equalsIgnoreCase("UNIPROT")) { - //an integer ID is expected to mean NCBI gene ID, and can be mapped only to UNIPROT; - //so, skip this one (won't map to anything anyway) - log.debug("map(), won't map " + fromId + " to " + toDb + " (ambiguous ID, unknown source)"); - continue; - } else if (toDb.equalsIgnoreCase("UNIPROT") && isoformIdPattern.matcher(fromId).find() && fromId.contains("-")) { - //it's certainly a uniprot isoform id; so we replace it with the corresponding accession number - fromId = fromId.replaceFirst("-\\d+$", ""); - } else if (toDb.equalsIgnoreCase("UNIPROT") && refseqIdPattern.matcher(fromId).find() && fromId.contains(".")) { - //remove the versiovaluesn number, such as ".1" - fromId = fromId.replaceFirst("\\.\\d+$", ""); + // it's risky if a no-prefix integer ID type (PubChem CID, SID) is used and no srcDb is provided; + // for biopolymers, we support the only one integer ID type - 'NCBI Gene' + for(String fromId : fromIds) { + if (toDb.equalsIgnoreCase("chebi")) { + if(fromId.matches("^\\d+$")) { + fromId = "CHEBI:" + fromId; + } + } + else if (toDb.equalsIgnoreCase("uniprot")) { + if(isoformIdPattern.matcher(fromId).find() && fromId.contains("-")) { + //it's certainly an uniprot isoform id; so we replace it with the corresponding accession number + fromId = fromId.replaceFirst("-\\d+$", ""); + } else if (refseqIdPattern.matcher(fromId).find() && fromId.contains(".")) { + //remove the version number, such as ".1" + fromId = fromId.replaceFirst("\\.\\d+$", ""); + } } - sourceIds.add(fromId); //collect } - final List mappings = mappingsRepository.findBySrcIdInAndDestIgnoreCase(sourceIds, toDb); + //use Mappings repository to execute the search and get results + Set results = mapping().findBySrcIdInAndDstDbIgnoreCase(sourceIds, toDb).stream() + .map(Mapping::getDstId) + .collect(Collectors.toCollection(TreeSet::new)); - final Set results = new TreeSet<>(); - for(Mapping m : mappings) { - if(toDb.equalsIgnoreCase(m.getDest())) - results.add(m.getDestId()); - } return results; } - /* - * Track core service events using Google Analytics Measurement Protocol - */ - public void track(String ip, String category, String label) - { - log.info(String.format("%s, %s, %s", ip, category.toUpperCase(), String.valueOf(label).toLowerCase())); - } - - public MappingsRepository mapping() { - return mappingsRepository; - } - - public MetadataRepository metadata() { - return metadataRepository; - } - - public void index() - { - init(); //very important - loads the model - - log.info("Associating bio IDs with BioPAX objects using nested Xrefs and id-mapping..."); - addIdsAsBiopaxAnnotations(); - - ((Indexer)searcher).index(); - - log.info("index(), all done."); + public Mappings mapping() { + return index; } - public void clear(Metadata metadata) { - CPathUtils.cleanupDirectory(intermediateDataDir(metadata), true); - metadata.setNumInteractions(null); - metadata.setNumPathways(null); - metadata.setNumPhysicalEntities(null); - metadata.getFiles().clear(); + public Metadata metadata() { + if(metadata == null) { + metadata = CPathUtils.readMetadata(settings().getMetadataLocation()); + } + return metadata; } - private void addIdsAsBiopaxAnnotations() - { - for(final BioPAXElement bpe : getModel().getObjects()) { - if(!(bpe instanceof Entity || bpe instanceof EntityReference)) - continue; //skip for UtilityClass but EntityReference - - final Set ids = CPathUtils.getXrefIds(bpe); - - // in addition, collect ChEBI and UniProt IDs and then - // use id-mapping to associate the bpe with more IDs: - final List uniprotIds = new ArrayList<>(); - final List chebiIds = new ArrayList<>(); - for(String id : ids) - { - if(id.startsWith("CHEBI:")) { - chebiIds.add(id); - } else if(isoformIdPattern.matcher(id).find()) { - //cut the isoform num. suffix - id = id.replaceFirst("-\\d+$", ""); - uniprotIds.add(id); - } else if(uniprotIdPattern.matcher(id).find()) { - uniprotIds.add(id); - } - } - addSupportedIdsThatMapToChebi(chebiIds, ids); - addSupportedIdsThatMapToUniprotId(uniprotIds, ids); - - bpe.getAnnotations().put(SearchEngine.FIELD_XREFID, ids); - } - } - - private void addSupportedIdsThatMapToChebi(List chebiIds, final Set resultIds) { - //find other IDs that map to the ChEBI ID - for(String id: chebiIds) { - List mappings = mappingsRepository.findByDestIgnoreCaseAndDestId("CHEBI", id); - if (mappings != null) { - //collect (for 'xrefid' full-text index field) only ID types that we want biopax graph queries support - for (Mapping mapping : mappings) { - if (mapping.getSrc().equals("PUBCHEM-COMPOUND") - || mapping.getSrc().equals("CHEBI") - || mapping.getSrc().equals("DRUGBANK") - || mapping.getSrc().startsWith("KEGG") - || mapping.getSrc().startsWith("CHEMBL") - || mapping.getSrc().startsWith("PHARMGKB") - ) resultIds.add(mapping.getSrcId()); - //(prefix 'CID:' is included in pubchem-compound ids) - } - } - } + public Index index() { + return index; } - private void addSupportedIdsThatMapToUniprotId(List uniprotIds, final Set resultIds) { - //find other IDs that map to the UniProt AC - for(String id: uniprotIds) { - List mappings = mappingsRepository.findByDestIgnoreCaseAndDestId("UNIPROT", id); - if (mappings != null) { - //collect (for 'xrefid' full-text index field) only ID types that we want graph queries support - for (Mapping mapping : mappings) { - if (mapping.getSrc().startsWith("UNIPROT") - || mapping.getSrc().startsWith("HGNC") - || mapping.getSrc().equalsIgnoreCase("NCBI GENE") - || mapping.getSrc().equalsIgnoreCase("REFSEQ") - || mapping.getSrc().equalsIgnoreCase("IPI") - || mapping.getSrc().startsWith("ENSEMBL") - ) resultIds.add(mapping.getSrcId()); - } - } - } + public void clear(Datasource datasource) { + CPathUtils.cleanupDirectory(intermediateDataDir(datasource), true); + datasource.setNumInteractions(0); + datasource.setNumPathways(0); + datasource.setNumPhysicalEntities(0); + datasource.getFiles().clear(); } public Model loadMainModel() { @@ -815,7 +768,7 @@ public Model loadWarehouseModel() { return CPathUtils.importFromTheArchive(settings.warehouseModelFile()); } - public Model loadBiopaxModelByDatasource(Metadata datasource) { + public Model loadBiopaxModelByDatasource(Datasource datasource) { Path in = Paths.get(settings.biopaxFileNameFull(datasource.getIdentifier())); if (Files.exists(in)) { return CPathUtils.importFromTheArchive(in.toString()); @@ -826,20 +779,20 @@ public Model loadBiopaxModelByDatasource(Metadata datasource) { } } - public String getDataArchiveName(Metadata metadata) { - return Paths.get(settings.dataDir(),metadata.getIdentifier() + ".zip").toString(); + public String getDataArchiveName(Datasource datasource) { + return Paths.get(settings.dataDir(), datasource.getIdentifier() + ".zip").toString(); } - public String intermediateDataDir(Metadata metadata) { - return Paths.get(settings.dataDir(), metadata.getIdentifier()).toString(); + public String intermediateDataDir(Datasource datasource) { + return Paths.get(settings.dataDir(), datasource.getIdentifier()).toString(); } - public void unzipData(Metadata metadata) { + public void unzipData(Datasource datasource) { try { - String fname = (metadata.getUrlToData().startsWith("classpath:"))//a hack for test - ? CPathUtils.LOADER.getResource(metadata.getUrlToData()).getFile().getPath() - : getDataArchiveName(metadata);//production + String fname = (datasource.getDataUrl().startsWith("classpath:"))//a hack for test + ? CPathUtils.LOADER.getResource(datasource.getDataUrl()).getFile().getPath() + : getDataArchiveName(datasource);//production ZipFile zipFile = new ZipFile(fname); Enumeration entries = zipFile.entries(); @@ -856,8 +809,8 @@ public void unzipData(Metadata metadata) { continue; } //create the original data file path/name, replacing all unsafe symbols with underscores - String datafile = CPathUtils.originalFile(intermediateDataDir(metadata), entryName); - metadata.addFile(datafile); + String datafile = CPathUtils.originalFile(intermediateDataDir(datasource), entryName); + datasource.getFiles().add(datafile); // expand original contend and save to the gzip output file Path out = Paths.get(datafile); if(!Files.exists(out)) { @@ -868,11 +821,11 @@ public void unzipData(Metadata metadata) { //done all zip entries zipFile.close(); } catch (IOException e) { - throw new RuntimeException("unzipData(), failed reading from: " + metadata.getIdentifier() , e); + throw new RuntimeException("unzipData(), failed reading from: " + datasource.getIdentifier() , e); } - if(metadata.getFiles().isEmpty()) - log.warn("unzipData(), no data found for " + metadata); + if(datasource.getFiles().isEmpty()) + log.warn("unzipData(), no data found for " + datasource); } public void saveValidationReport(Validation v, String reportFile) { diff --git a/src/main/java/cpath/service/Settings.java b/src/main/java/cpath/service/Settings.java index b872efe85..489b8f8e4 100644 --- a/src/main/java/cpath/service/Settings.java +++ b/src/main/java/cpath/service/Settings.java @@ -6,7 +6,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import cpath.service.api.Scope; import org.apache.commons.text.WordUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,22 +26,17 @@ public class Settings public static final String HOME_DIR = "CPATH2_HOME"; /** - * Name for a cpath2 data sub-directory (under cpath2 home dir.) + * Name for a cpath2 data subdirectory (under cpath2 home dir.) */ public static final String DATA_SUBDIR = "data"; /** - * The sub-directory (under cpath2 Home dir.) to organize user downloadable data. + * The subdirectory (under cpath2 Home dir.) to organize user downloadable data. */ public static final String DOWNLOADS_SUBDIR = "downloads"; /** - * The cache sub-directory (under cpath2 home dir.) - */ - public static final String CACHE_SUBDIR = "cache"; - - /** - * The index sub-directory (under cpath2 home dir.) + * The index subdirectory (under cpath2 home dir.) */ public static final String INDEX_SUBDIR = "index"; @@ -55,14 +49,14 @@ public class Settings public static final String EXPORT_SCRIPT_FILE ="export.sh"; /** - * Metadata configuration default file name. + * Datasource configuration default file name. */ public static final String METADATA_FILE = "metadata.json"; private Boolean sbgnLayoutEnabled; private String xmlBase; private Integer maxHitsPerPage; - private String metadataLocation = "file:" + Paths.get(homeDir(), METADATA_FILE).toString(); + private String metadataLocation = "file:" + Paths.get(homeDir(), METADATA_FILE); private String name; private String description; private String version; @@ -70,6 +64,8 @@ public class Settings private String logo; private String species; private String downloads; + private String email; + private String organization; public Settings() { LOG.info("Working ('home') directory: " + homeDir()); @@ -94,7 +90,7 @@ public void setMaxHitsPerPage(Integer maxHitsPerPage) { this.maxHitsPerPage = maxHitsPerPage; } - public String getMetadataLocation() { + public String getMetadataLocation() { //uri string return metadataLocation; } @@ -110,6 +106,22 @@ public void setSbgnLayoutEnabled(Boolean sbgnLayoutEnabled) { this.sbgnLayoutEnabled = sbgnLayoutEnabled; } + public String getEmail() { + return email; + } + + public void setEmail(String email) { + this.email = email; + } + + public String getOrganization() { + return organization; + } + + public void setOrganization(String organization) { + this.organization = organization; + } + public String getName() { return name; } @@ -223,7 +235,7 @@ public Set getOrganismTaxonomyIds() { * @throws AssertionError when taxonomy ID cannot be recognised or not found there. */ public Map getOrganismsAsTaxonomyToNameMap() { - Map m = new HashMap(); + Map m = new HashMap<>(); final Pattern taxIdPattern = Pattern.compile("([a-zA-Z0-9\\. ]+)\\s*\\(\\s*(\\d+)\\s*\\)"); for(String org : getOrganisms()) { Matcher matcher = taxIdPattern.matcher(org); @@ -318,20 +330,10 @@ public String downloadsDir() { } - /** - * Gets the full path to query cache directory. - * @deprecated - * @return - */ - public String cacheDir() { - return subDir(CACHE_SUBDIR); - } - - /** * Full path to the archive file where a BioPAX sub-model is exported. * - * @param name - a Metadata's identifier, organism name, or a special name, such as "All", "Warehouse", "Detailed". + * @param name - a Datasource's identifier, organism name, or a special name, such as "All", "Warehouse", "Detailed". * @return * @see #downloadsDir() */ @@ -342,7 +344,7 @@ public String biopaxFileNameFull(String name) { /** * Local name of the BioPAX sub-model file (in the batch downloads directory). * - * @param name - a Metadata's identifier, organism name, or a special name, such as "All", "Warehouse", "Detailed". + * @param name - a Datasource's identifier, organism name, or a special name, such as "All", "Warehouse", "Detailed". * @return * @see #downloadsDir() */ @@ -383,4 +385,26 @@ public String warehouseModelFile() { return biopaxFileNameFull(Scope.WAREHOUSE.toString()); } + /** + * Predefined large pathway data submodels + * that are generated and used by the application. + * (toString method here is to get the part of the + * corresponding sub-model filename, such as 'All' in '*.All.*.gz'). + * + * In addition, by-organism and by-source archives + * are also created in the batch downloads directory, + * but those filenames do not require this enum. + * + * @author rodche + */ + enum Scope { + ALL, + DETAILED, + WAREHOUSE; + + @Override + public String toString() { //e.g. "All" + return name().substring(0, 1).toUpperCase() + name().substring(1).toLowerCase(); + } + } } diff --git a/src/main/java/cpath/service/api/Cmd.java b/src/main/java/cpath/service/api/Cmd.java deleted file mode 100644 index 5d03aff1f..000000000 --- a/src/main/java/cpath/service/api/Cmd.java +++ /dev/null @@ -1,71 +0,0 @@ -package cpath.service.api; - -import static cpath.service.api.CmdArgs.*; - - -/** - * cPath2 web service commands. - * - * @author rodche - */ -public enum Cmd { - SEARCH("Full-text search for BioPAX objects. It returns the ordered list of search hits, " + - "which are simplified description of BioPAX elements " + - "matching the query and passing all filters. A hit's uri (same as the corresponding BioPAX " + - "object's RDF ID) can be used with other webservice commands to " + - "extract the corresponding sub-model to BioPAX or another supported format. ", - "/search?q=brca*&organism=9606", - "Search Response that lists Search Hits - XML (default) or JSON (when called as '/search?')", - q, page, type, organism, datasource, user), - GET("Gets a BioPAX element or sub-model by ID(s).", - "/get?uri=http://identifiers.org/uniprot/P38398", - "BioPAX by default, other formats as specified by the format parameter.", - uri, format, user, pattern, subpw, layout), - GRAPH("Executes an advanced graph query on the data within pathway commons. " + - "Returns a sub-model as the result. This command can have the following parameters.", - "/graph?kind=neighborhood&source=URI1&source=URI2&...", - "BioPAX by default, other formats as specified by the format parameter.", - kind, source, target, format, limit, direction, organism, datasource, user, pattern, subpw, layout), - TOP_PATHWAYS("Gets Top Pathways. This command accepts optional filter by organism and by datasource values", - "/top_pathways", - "Search Response - XML (JSON, when called as '/top_pathways?') contains the list of all top pathways.", - organism, datasource, q, user), - TRAVERSE("Gets data property values (or elements's URIs) at the end of the property path.", - "/traverse?uri=http://identifiers.org/uniprot/P38398&path=ProteinReference/organism/displayName", - "Traverse Response - XML (or JSON, when called as '/traverse?').", - path, uri, user) - ; - - private final CmdArgs[] args; //Array is better for use in json/jsp than List/Set - private final String info; - private final String example; - private final String output; - - public CmdArgs[] getArgs() { - return args; - } - - public String getInfo() { - return info; - } - - public String getExample() { - return example; - } - - public String getOutput() { - return output; - } - - Cmd(String info, String example, String output, CmdArgs... args) { - this.info = info; - this.example = example; - this.output = output; - this.args = args; - } - - @Override - public String toString() { - return name().toLowerCase(); - } -} diff --git a/src/main/java/cpath/service/api/CmdArgs.java b/src/main/java/cpath/service/api/CmdArgs.java deleted file mode 100644 index 6f5d18e39..000000000 --- a/src/main/java/cpath/service/api/CmdArgs.java +++ /dev/null @@ -1,48 +0,0 @@ -package cpath.service.api; - -/** - * cPath2 web service command arguments. - * - * @author rodche - * - */ -public enum CmdArgs { - uri("known BioPAX entity URI or standard identifier (e.g., gene symbol); multiple values are supported (array)"), - q("query string (full-text search supports Lucene query syntax)"), - page("full-text search query results page number (>=0)"), - type("a BioPAX class name"), - kind("graph query type"), - format("output format name"), - organism("filter by organism, e.g., taxonomy ID (recommended) or nane; array"), - datasource("filter by data source (name, id or uri; array)"), - source("graph query source URI(s); array"), - target("graph query destination URI(s); array"), - limit("graph query search distance limit"), - path("string expression, like 'Entity/xref:PublicationXref/id' - connected by '/' and ':' " + - "BioPAX types and properties - a path to reach specific model elements through given ones"), - direction("graph query parameter 'direction'"), - - //optional parameters for sub-model extraction and conversion to other format algorithms - - pattern("when format is SIF or TXT - SIF type (pattern) name(s) to apply (can be array)"), - user("client's name, email, or app (for the service access log and usage reporting)"), - subpw("'true' or 'false' (default); for the 'get' and 'graph' queries; " + - " whether to skip traversing into sub-pathways of pathways in the result sub-model"), - layout("when format is SBGN - 'true' or 'false' (default) -" + - " whether to apply the built-in COSE layout or not"), - ; - - private final String info; - - public String getInfo() { - return info; - } - - CmdArgs(String info) { - this.info = info; - } - - @Override - public String toString() { - return name().toLowerCase(); - } -} diff --git a/src/main/java/cpath/service/api/GraphType.java b/src/main/java/cpath/service/api/GraphType.java index a0d5c6274..4d4cbc96d 100644 --- a/src/main/java/cpath/service/api/GraphType.java +++ b/src/main/java/cpath/service/api/GraphType.java @@ -1,20 +1,35 @@ package cpath.service.api; -public enum GraphType -{ - NEIGHBORHOOD("search the neighborhood of given source set of nodes"), - PATHSBETWEEN("find the paths between specific source set of states or entities within the boundaries of a specified length limit"), - PATHSFROMTO("find the paths from a specific source set of states or entities to a specific target set of states or entities within the boundaries of a specified length limit"), - COMMONSTREAM("search common downstream or common upstream of a specified set of entities based on the given directions within the boundaries of a specified length limit"), - ; - - private final String description; - - public String getDescription() { - return description; - } +import org.apache.commons.lang3.StringUtils; + +public enum GraphType { + NEIGHBORHOOD("search the neighborhood of given source set of nodes"), + PATHSBETWEEN("find the paths between specific source set of states or entities within the boundaries of a specified length limit"), + PATHSFROMTO("find the paths from a specific source set of states or entities to a specific target set of states or entities within the boundaries of a specified length limit"), + COMMONSTREAM("search common downstream or common upstream of a specified set of entities based on the given directions within the boundaries of a specified length limit"), + ; + + private final String description; + + public String getDescription() { + return description; + } - GraphType(String description) { - this.description = description; + GraphType(String description) { + this.description = description; + } + + public static GraphType typeOf(String tag) + { + if(StringUtils.isBlank(tag)) + return null; + + GraphType type = null; + try { + type = valueOf(tag.toUpperCase()); } + catch (IllegalArgumentException e){} + + return type; + } } \ No newline at end of file diff --git a/src/main/java/cpath/service/api/Indexer.java b/src/main/java/cpath/service/api/Indexer.java deleted file mode 100644 index 237f6d0d0..000000000 --- a/src/main/java/cpath/service/api/Indexer.java +++ /dev/null @@ -1,5 +0,0 @@ -package cpath.service.api; - -public interface Indexer { - void index(); -} diff --git a/src/main/java/cpath/service/api/OutputFormat.java b/src/main/java/cpath/service/api/OutputFormat.java index 9a3db5da8..56cbc4ce7 100644 --- a/src/main/java/cpath/service/api/OutputFormat.java +++ b/src/main/java/cpath/service/api/OutputFormat.java @@ -1,5 +1,7 @@ package cpath.service.api; +import org.apache.commons.lang3.StringUtils; + /** * Pre-defined Output Formats. * @@ -25,11 +27,11 @@ public enum OutputFormat { JSONLD("JSON-LD format", ".json", "application/ld+json") ; - private final String info; - private final String ext; - private final String mediaType; + final String info; + final String ext; + final String mediaType; - public String getInfo() { + public String getInfo() { return info; } @@ -46,4 +48,18 @@ public String getMediaType() { this.ext = ext; this.mediaType = mediaType; } + + public static OutputFormat typeOf(String tag) + { + if(StringUtils.isBlank(tag)) + return null; + + OutputFormat type = null; + try { + type = valueOf(tag.toUpperCase()); + } + catch (IllegalArgumentException e){} + + return type; + } } \ No newline at end of file diff --git a/src/main/java/cpath/service/api/RelTypeVocab.java b/src/main/java/cpath/service/api/RelTypeVocab.java index 23237d6da..44ffbea2d 100644 --- a/src/main/java/cpath/service/api/RelTypeVocab.java +++ b/src/main/java/cpath/service/api/RelTypeVocab.java @@ -4,22 +4,22 @@ * Values to generate standard BioPAX RelationshipTypeVocabulary objects. */ public enum RelTypeVocab { - IDENTITY("identity", "http://identifiers.org/psimi/MI:0356", "MI", "MI:0356"), - SECONDARY_ACCESSION_NUMBER("secondary-ac", "http://identifiers.org/psimi/MI:0360", "MI", "MI:0360"), - ADDITIONAL_INFORMATION("see-also", "http://identifiers.org/psimi/MI:0361", "MI", "MI:0361"), - //next should work for rel. xrefs pointing to a protein but attached to a Gene, Dna*, Rna* biopax objects - GENE_PRODUCT("gene product", "http://identifiers.org/psimi/MI:0251", "MI", "MI:0251"), - SET_MEMBER("set member", "http://identifiers.org/psimi/MI:1341", "MI", "MI:1341"), - //next one is probably for chebi "is_a" relationships (when parent is a chemical class/concept rather than compound) - MULTIPLE_PARENT_REFERENCE("multiple parent reference", "http://identifiers.org/psimi/MI:0829", "MI", "MI:0829"), - ISOFORM_PARENT("isoform-parent", "http://identifiers.org/psimi/MI:0243", "MI", "MI:0243"),; + IDENTITY("identity", "bioregistry.io/mi:0356", "mi", "0356"), + SECONDARY_ACCESSION_NUMBER("secondary-ac", "bioregistry.io/mi:0360", "mi", "0360"), + ADDITIONAL_INFORMATION("see-also", "bioregistry.io/mi:0361", "mi", "0361"), + //next should work for rel. xrefs pointing to a protein but attached to a Gene, Dna*, Rna* objects + GENE_PRODUCT("gene product", "bioregistry.io/mi:0251", "mi", "0251"), + SET_MEMBER("set member", "bioregistry.io/mi:1341", "mi", "1341"), + //next one is for chebi "is_a" relationships (when parent is a chemical class/concept rather than compound) + MULTIPLE_PARENT_REFERENCE("multiple parent reference", "bioregistry.io/mi:0829", "mi", "0829"), + ISOFORM_PARENT("isoform-parent", "bioregistry.io/mi:0243", "mi", "0243"),; public final String term; public final String uri; public final String db; public final String id; - private RelTypeVocab(String term, String uri, String db, String id) { + RelTypeVocab(String term, String uri, String db, String id) { this.term = term; this.uri = uri; this.db = db; diff --git a/src/main/java/cpath/service/api/Scope.java b/src/main/java/cpath/service/api/Scope.java deleted file mode 100644 index a9ac155e5..000000000 --- a/src/main/java/cpath/service/api/Scope.java +++ /dev/null @@ -1,26 +0,0 @@ -package cpath.service.api; - -/** - * Predefined large pathway data (sub-)models - * that are generated and used by the application. - * (toString method here is to get the part of the - * corresponding filename, such as 'All' in '*.All.*.gz'). - * - * In addition, by-organism and by-source archives - * are also created in the batch downloads directory, - * but those filenames do not require this enum. - * - * @author rodche - */ -public enum Scope { - ALL, // related to the main biopax model and its derivatives - DETAILED, //for sub-models based on biopax type datasources only - WAREHOUSE //warehouse data archive(s) that contain normalized entity references, etc. - ; - - @Override - public String toString() { - String ret = super.toString(); - return ret.substring(0, 1).toUpperCase() + ret.substring(1).toLowerCase(); - }; -} diff --git a/src/main/java/cpath/service/api/Searcher.java b/src/main/java/cpath/service/api/Searcher.java deleted file mode 100644 index ac1a9ec4d..000000000 --- a/src/main/java/cpath/service/api/Searcher.java +++ /dev/null @@ -1,21 +0,0 @@ -package cpath.service.api; - -import org.biopax.paxtools.model.BioPAXElement; - -import cpath.service.jaxb.SearchResponse; - -public interface Searcher { - /** - * Full-text search for BioPAX elements. - * - * @param query String (keywords or Lucene query string) - * @param page hits page number (when the number of hits exceeds a threshold) - * @param filterByType - class filter - * @param datasources - filter by datasource - * @param organisms - filter by organism - * @return ordered list of hits (by score) - */ - SearchResponse search(String query, int page, - Class filterByType, String[] datasources, String[] organisms); - -} diff --git a/src/main/java/cpath/service/api/CPathService.java b/src/main/java/cpath/service/api/Service.java similarity index 73% rename from src/main/java/cpath/service/api/CPathService.java rename to src/main/java/cpath/service/api/Service.java index 24b3f1c72..252b52803 100644 --- a/src/main/java/cpath/service/api/CPathService.java +++ b/src/main/java/cpath/service/api/Service.java @@ -1,37 +1,31 @@ package cpath.service.api; -import java.io.*; import java.util.Collection; import java.util.Map; import java.util.Set; import cpath.service.Settings; +import cpath.service.metadata.Datasource; +import cpath.service.metadata.Index; import org.biopax.paxtools.controller.PathAccessor; import org.biopax.paxtools.model.BioPAXElement; import org.biopax.paxtools.model.Model; import org.biopax.paxtools.query.algorithm.Direction; -import cpath.service.jpa.MappingsRepository; -import cpath.service.jpa.Metadata; -import cpath.service.jpa.MetadataRepository; +import cpath.service.metadata.Mappings; +import cpath.service.metadata.Metadata; import cpath.service.jaxb.ServiceResponse; +import org.biopax.paxtools.query.algorithm.LimitType; import org.biopax.validator.api.beans.Validation; /** - * CPath^2 Service is an adapter between DAO and web controllers. - * Can be used in a console application or integration tests - * (web container is not required.) + * A middle-tier interface that defines data and metadata access and analysis methods. * - * This interface defines several middle-tier data access and analysis methods - * that accept valid parameters, handle exceptions, and return results packed - * in a ServiceResponse bean. - * - * TODO: split into IntegrationService and AnalysisService * @author rodche */ -public interface CPathService { +public interface Service { Model getModel(); @@ -110,8 +104,8 @@ ServiceResponse getPathsBetween(OutputFormat format, Map formatO * @param subPathways optional, include/skip sub-pathways; it does not affect the graph search algorithm, */ ServiceResponse getPathsFromTo(OutputFormat format, Map formatOptions, - String[] sources, String[] targets, Integer limit, String[] organisms, - String[] datasources, boolean subPathways); + String[] sources, String[] targets, LimitType limitType, Integer limit, + String[] organisms, String[] datasources, boolean subPathways); /** * Runs a common upstream or downstream query @@ -155,80 +149,63 @@ ServiceResponse getCommonStream(OutputFormat format, Map formatO */ ServiceResponse topPathways(String q, String[] organisms, String[] datasources); - /** - * Maps an identifier to primary ID(s) of a given type. - * Auto-detects the source ID type or tries all types. - * The result set may contain more than one primary ID. - * - * @param fromId the source ID - * @param toDb standard (MIRIAM) preferred name of the target ID type (e.g., 'UniProt') - * @return a set of primary IDs of the type; normally one or none elements - */ - Set map(String fromId, String toDb); - /** * Maps multiple identifiers to primary IDs of given type. * Auto-detects the source ID type or tries all types. * The result set may contain more than one primary ID. * * @param fromIds the source IDs - * @param toDb standard (MIRIAM) preferred name of the target ID type (e.g., 'UniProt') + * @param toDb standard preferred name of the target ID type/collection name/prefix (e.g., 'UNIPROT','CHEBI') * @return a set of primary IDs of the type; normally one or none elements */ Set map(Collection fromIds, String toDb); - /** - * Record web service and data access events. - * @param ip IP address - * @param category log event category - * @param name event name - */ - void track(String ip, String category, String name); - - //spring-data-jpa repositories + Mappings mapping(); - MappingsRepository mapping(); + Metadata metadata(); - MetadataRepository metadata(); + Index index(); - void init(); //only in production - to load the main biopax model from file + /** + * in production, loads the pre-built biopax model and the corresponding full-text index + * (location depends on application.properties) and the blacklist file. + */ + void init(); /** - * Creates: - *

    - *
  • new BioPAX full-text index;
  • - *
  • the blacklist of ubiquitous small molecules;
  • - *
  • updates counts of different BioPAX entities per data source
  • - *
+ * Loads a biopax model, metadata and the full-text index (existing or to generate) + * + * @param model + * @param indexLocation + * @param readOnly */ - void index() throws IOException; + void initIndex(Model model, String indexLocation, boolean readOnly); - // Metadata and data processing methods + // Datasource and data processing methods /** - * Clears the metadata object and the db record, - * and also drops/creates the data directory. + * Clears the datasource object, drops/creates the data directory. * - * @param metadata data source metadata + * @param datasource data source datasource */ - void clear(Metadata metadata); + void clear(Datasource datasource); Model loadMainModel(); Model loadWarehouseModel(); - Model loadBiopaxModelByDatasource(Metadata datasource); + Model loadBiopaxModelByDatasource(Datasource datasource); - String getDataArchiveName(Metadata metadata); + String getDataArchiveName(Datasource datasource); - String intermediateDataDir(Metadata metadata); + String intermediateDataDir(Datasource datasource); /** - * Given Metadata (data source), this procedure expands the corresponding + * Given data source, this procedure expands the corresponding * original data archive (zip), collecting the data file names. - * @param metadata Metadata + * @param datasource Datasource */ - void unzipData(Metadata metadata); + void unzipData(Datasource datasource); void saveValidationReport(Validation v, String reportFile); diff --git a/src/main/java/cpath/service/jaxb/DataResponse.java b/src/main/java/cpath/service/jaxb/DataResponse.java index 5f73a0205..d9ea1f041 100644 --- a/src/main/java/cpath/service/jaxb/DataResponse.java +++ b/src/main/java/cpath/service/jaxb/DataResponse.java @@ -5,7 +5,7 @@ import java.util.Collections; import java.util.Set; -import javax.xml.bind.annotation.*; +import jakarta.xml.bind.annotation.*; /** * An internal service bean, any-data response type. diff --git a/src/main/java/cpath/service/jaxb/Help.java b/src/main/java/cpath/service/jaxb/Help.java index a1765066f..087e3ca06 100644 --- a/src/main/java/cpath/service/jaxb/Help.java +++ b/src/main/java/cpath/service/jaxb/Help.java @@ -6,7 +6,7 @@ import java.util.Set; import java.util.TreeSet; -import javax.xml.bind.annotation.*; +import jakarta.xml.bind.annotation.*; /** * A bean for the help web service response. diff --git a/src/main/java/cpath/service/jaxb/SearchHit.java b/src/main/java/cpath/service/jaxb/SearchHit.java index f89b377b3..dbd64e681 100644 --- a/src/main/java/cpath/service/jaxb/SearchHit.java +++ b/src/main/java/cpath/service/jaxb/SearchHit.java @@ -3,7 +3,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import javax.xml.bind.annotation.*; +import jakarta.xml.bind.annotation.*; import org.apache.commons.text.StringEscapeUtils; diff --git a/src/main/java/cpath/service/jaxb/SearchResponse.java b/src/main/java/cpath/service/jaxb/SearchResponse.java index 2cc4caa37..04ccb8e2d 100644 --- a/src/main/java/cpath/service/jaxb/SearchResponse.java +++ b/src/main/java/cpath/service/jaxb/SearchResponse.java @@ -1,12 +1,12 @@ package cpath.service.jaxb; +import jakarta.xml.bind.annotation.*; + import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; -import javax.xml.bind.annotation.*; - @XmlRootElement(name="searchResponse") @XmlAccessorType(XmlAccessType.FIELD) @@ -35,7 +35,7 @@ public class SearchResponse extends ServiceResponse { public SearchResponse() { } - public String getComment() { + public String getComment() { return comment; } @@ -108,7 +108,7 @@ public void setMaxHitsPerPage(Integer maxHitsPerPage) { * Calculates the total number of search result * pages using current {@link #getMaxHitsPerPage()} and {@link #numHits} * - * @return no. pages or 0 (if there're no hits yet, or it's a wrong state) + * @return no. pages or 0 (if there are no hits yet, or it's a wrong state) */ public int numPages() { if(numHits > 0 && maxHitsPerPage > 0) diff --git a/src/main/java/cpath/service/jaxb/ServiceResponse.java b/src/main/java/cpath/service/jaxb/ServiceResponse.java index c9116f46d..dd4eb4396 100644 --- a/src/main/java/cpath/service/jaxb/ServiceResponse.java +++ b/src/main/java/cpath/service/jaxb/ServiceResponse.java @@ -2,7 +2,7 @@ import java.io.Serializable; -import javax.xml.bind.annotation.*; +import jakarta.xml.bind.annotation.*; // not instantiable, basic cpath2 xml response type @XmlAccessorType(XmlAccessType.FIELD) diff --git a/src/main/java/cpath/service/jaxb/TraverseEntry.java b/src/main/java/cpath/service/jaxb/TraverseEntry.java index 81e1989bd..2be5b1fbe 100644 --- a/src/main/java/cpath/service/jaxb/TraverseEntry.java +++ b/src/main/java/cpath/service/jaxb/TraverseEntry.java @@ -3,12 +3,8 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; -import javax.xml.bind.annotation.XmlAttribute; -import javax.xml.bind.annotation.XmlTransient; -import javax.xml.bind.annotation.XmlType; +import jakarta.xml.bind.annotation.*; @XmlAccessorType(XmlAccessType.FIELD) @XmlType(name = "TraverseEntry") diff --git a/src/main/java/cpath/service/jaxb/TraverseResponse.java b/src/main/java/cpath/service/jaxb/TraverseResponse.java index 72c472e6b..5c182f62c 100644 --- a/src/main/java/cpath/service/jaxb/TraverseResponse.java +++ b/src/main/java/cpath/service/jaxb/TraverseResponse.java @@ -3,7 +3,7 @@ import java.util.ArrayList; import java.util.List; -import javax.xml.bind.annotation.*; +import jakarta.xml.bind.annotation.*; @XmlRootElement(name="traverseResponse") @XmlAccessorType(XmlAccessType.FIELD) diff --git a/src/main/java/cpath/service/jpa/Mapping.java b/src/main/java/cpath/service/jpa/Mapping.java deleted file mode 100644 index 4cfcf46a1..000000000 --- a/src/main/java/cpath/service/jpa/Mapping.java +++ /dev/null @@ -1,126 +0,0 @@ -package cpath.service.jpa; - -import javax.persistence.*; - -import cpath.service.CPathUtils; -import org.apache.commons.lang3.builder.EqualsBuilder; -import org.apache.commons.lang3.builder.HashCodeBuilder; -import org.hibernate.annotations.DynamicInsert; -import org.hibernate.annotations.DynamicUpdate; -import org.springframework.util.Assert; - -/** - * Id-mapping Entity. - * - * @author rodche - */ -@Entity -@DynamicUpdate -@DynamicInsert -@Table( - name = "mappings", - indexes = { - @Index(name = "src_index", columnList = "src"), - @Index(name = "srcId_index", columnList = "srcId"), - @Index(name = "dest_index", columnList = "dest"), - @Index(name = "destId_index", columnList = "destId"), - @Index(name = "dest_destId_index", columnList = "dest,destId"), - @Index(name = "srcId_dest_index", columnList = "srcId,dest"), - @Index(name = "src_srcId_dest_index", columnList = "src,srcId,dest"), - } -) -public final class Mapping { - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - @Column(nullable = false, length = 30) //e.g., "nucleotide sequence database", - private String src; - - @Column(nullable = false, length = 15) //now, it can be either 'CHEBI' or 'UNIPROT' only. - private String dest; - - @Column(nullable = false, length = 30) //InChIKey ~27 sym (longest ID type); won't map names > 30 symbols - private String srcId; - - @Column(nullable = false, length = 15) - private String destId; - - public Mapping() { - } - - - public Mapping(String src, String srcId, String dest, String destId) { - Assert.hasText(src, "src must not be null, empty, or blank"); - Assert.hasText(srcId, "srcId must not be null, empty, or blank"); - Assert.hasText(dest, "dest must not be null, empty, or blank"); - Assert.hasText(destId, "destId must not be null, empty, or blank"); - Assert.isTrue(srcId.length() <= 30, "srcId is too long (>30)"); - Assert.isTrue(src.length() <= 30, "src is too long (>30)"); - Assert.isTrue(destId.length() <= 15, "destId is too long (>15)"); - Assert.isTrue(dest.length() <= 15, "dest is too long (>15)"); - - src = src.toUpperCase(); - //replace a uniprot* db name with simply 'UNIPROT' - if (src.startsWith("UNIPROT") || src.startsWith("SWISSPROT")) - src = "UNIPROT"; - else if (src.startsWith("PUBCHEM") && (src.contains("COMPOUND") || src.contains("CID"))) { - src = "PUBCHEM-COMPOUND"; - } else if (src.startsWith("PUBCHEM") && (src.contains("SUBSTANCE") || src.contains("SID"))) { - src = "PUBCHEM-SUBSTANCE"; - } - - srcId = CPathUtils.fixSourceIdForMapping(src, srcId); - - this.src = src; - this.srcId = srcId; - this.dest = dest.toUpperCase(); - this.destId = destId; - } - - Long getId() { - return id; - } - - public String getSrc() { - return src; - } - - public String getDest() { - return dest; - } - - public String getSrcId() { - return srcId; - } - - public String getDestId() { - return destId; - } - - @Override - public String toString() { - return src + ":" + srcId + "," + dest + ":" + destId; - } - - @Override - public boolean equals(Object obj) { - if (obj instanceof Mapping) { - final Mapping that = (Mapping) obj; - return new EqualsBuilder() - .append(src, that.getSrc()) - .append(srcId, that.getSrcId()) - .append(dest, that.getDest()) - .append(destId, that.getDestId()) - .isEquals(); - } else - return false; - } - - @Override - public int hashCode() { - return new HashCodeBuilder() - .append(src).append(srcId).append(dest).append(destId).toHashCode(); - } -} diff --git a/src/main/java/cpath/service/jpa/MappingsRepository.java b/src/main/java/cpath/service/jpa/MappingsRepository.java deleted file mode 100644 index 07d9e0e3b..000000000 --- a/src/main/java/cpath/service/jpa/MappingsRepository.java +++ /dev/null @@ -1,48 +0,0 @@ -package cpath.service.jpa; - - -import java.util.List; - -import org.springframework.data.repository.CrudRepository; - - -/** - * A spring-data repository (auto-instantiated) of Mapping entities - * (all methods here follow the spring-data naming and signature conventions, - * and therefore do not require to be implemented by us; these will be auto-generated). - * - * @author rodche - */ -public interface MappingsRepository extends CrudRepository { - - /** - * Mappings 'To' the given identifier. - * - * @param dest - * @param destId - * @return - */ - List findByDestIgnoreCaseAndDestId(String dest, String destId); - - - /** - * Mappings 'From' any of given ids (any kind) 'To' the target type of ID. - * - * @param srcIds - * @param dest - * @return - */ - List findBySrcIdInAndDestIgnoreCase(List srcIds, String dest); - - - /** - * Mappings 'From' the given source db/id 'To' the target type of ID. - * - * @param src - * @param srcId - * @param dest to map to - * @return - */ - List findBySrcIgnoreCaseAndSrcIdAndDestIgnoreCase(String src, String srcId, String dest); - -} diff --git a/src/main/java/cpath/service/jpa/Metadata.java b/src/main/java/cpath/service/jpa/Metadata.java deleted file mode 100755 index 7c1c1902d..000000000 --- a/src/main/java/cpath/service/jpa/Metadata.java +++ /dev/null @@ -1,417 +0,0 @@ -package cpath.service.jpa; - - -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.regex.Pattern; - -import javax.persistence.*; -import javax.validation.constraints.NotBlank; -import javax.validation.constraints.NotEmpty; -import javax.validation.constraints.NotNull; - -import cpath.service.api.Cleaner; -import cpath.service.api.Converter; -import org.apache.commons.lang3.StringUtils; -import org.biopax.paxtools.controller.ModelUtils; -import org.biopax.paxtools.model.Model; -import org.biopax.paxtools.model.level3.Provenance; -import org.biopax.paxtools.model.level3.Score; -import org.hibernate.annotations.DynamicInsert; -import org.hibernate.annotations.DynamicUpdate; - -/** - * Data provider/source metadata. - * - * Node: some public getters and setters below, despite java warnings, are in fact called from the - * web view layer (e.g., JSP) or when a web controller returns JSON/XML object. - */ -@Entity -@DynamicUpdate -@DynamicInsert -@Table(name = "metadata") -public final class Metadata { - - private static final Pattern BAD_ID_PATTERN = Pattern.compile("\\s|-"); - - // METADATA_TYPE Enum - public enum METADATA_TYPE { - // data types - PSI_MI(true), // interactions to be converted to BioPAX L3 format - PSI_MITAB(true), // interactions to be converted to PSI-MI then to BioPAX L3 format - BIOPAX(true), // pathways and interactions in BioPAX L2 or L3 format - SBML(true), // SBML (requires a data source specific Converter to BioPAX) - WAREHOUSE(false), // warehouse data to be converted to BioPAX and used during the merge stage - MAPPING(false); //extra gene/protein id-mapping data (two column, TSV format: "some id or name" \t "primary uniprot/chebi AC") - - private final boolean pathwayData; - - METADATA_TYPE(boolean isPathwayData) { - this.pathwayData = isPathwayData; - } - - public boolean isNotPathwayData() { - return !pathwayData; - } - - } - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - @NotBlank - @Column(length = 40, unique = true, nullable = false) - public String identifier; - - @NotEmpty - @ElementCollection(fetch = FetchType.EAGER) - @JoinTable(name = "metadata_name") - @OrderColumn - private List name; - - @Column(nullable = false) - private String description; - - private String urlToData; - - @Column(nullable = false) - private String urlToHomepage; - - @Column(nullable = false) - private String iconUrl; - - @NotNull - @Column(nullable = false) - @Enumerated(EnumType.STRING) - private METADATA_TYPE type; - - private String cleanerClassname; - private String converterClassname; - - @ElementCollection(fetch = FetchType.EAGER) - private Set files; - - private String pubmedId; - private String availability; - private Integer numPathways; - private Integer numInteractions; - private Integer numPhysicalEntities; - - /** - * Default Constructor. - */ - private Metadata() { - files = new HashSet<>(); - } - - /** - * Create a Metadata obj with the specified properties; - * @param identifier unique short string, will be used in URIs - * @param name the not empty list of names: display name (must present), standard name, other names. - * @param description description of the data source (details, release date, version, etc.) - * @param urlToData URL - where the data can be download (can be part of larger data archive) - * @param urlToHomepage provider's home page URL - * @param urlToLogo provider's logo image URL - * @param metadata_type what kind of data (warehouse, biopax, psi-mi, id-mapping) - * @param cleanerClassname canonical name of a java class that implements {@link Cleaner} - * @param converterClassname canonical name of a java class that implements {@link Converter} - * @param pubmedId recommended by the data provider reference publication PMID - * @param availability data availability: free, academic, not-free - */ - public Metadata(final String identifier, final List name, final String description, - final String urlToData, String urlToHomepage, final String urlToLogo, - final METADATA_TYPE metadata_type, final String cleanerClassname, - final String converterClassname, final String pubmedId, final String availability) - { - this(); - setIdentifier(identifier); - if (name == null || name.isEmpty()) - throw new IllegalAccessError("no names provided"); - setName(name); - setDescription(description); - setUrlToData(urlToData); - setUrlToHomepage(urlToHomepage); - setIconUrl(urlToLogo); - setType(metadata_type); - setCleanerClassname(cleanerClassname); - setConverterClassname(converterClassname); - setPubmedId(pubmedId); - setAvailability(availability); - } - - public void setId(Long id) { - this.id = id; - } - - public Long getId() { - return id; - } - - public Set getFiles() { - return files; - } - - public void addFile(String path) { - files.add(path); - } - - /** - * Sets the identifier. - * No spaces, dashes, allowed. - * - * @param identifier metadata identifier - * @throws IllegalArgumentException if it's null, empty string, or contains spaces or dashes - */ - void setIdentifier(String identifier) { - // validate the parameter - if (identifier == null - || identifier.length() == 0 - || BAD_ID_PATTERN.matcher(identifier).find()) - throw new IllegalAccessError("Bad metadata identifier: " + identifier); - - // copy value - this.identifier = identifier; - } - - /** - * Data source metadata identifier. - *

- * It can be also used as filter ('datasource') - * value in cpath2 full-text search queries - * (for pathway datasource types only) - * - * @return identifier - */ - public String getIdentifier() { - return identifier; - } - - /** - * Sets data provider/source name. - *

- * Please use a standard name for pathway/interaction data types, - * if possible (for warehouse data it's not so important), - * as this will be recommended to use as filter ('datasource') - * value in cpath2 full-text search queries - * - * @param name semicolon-separated names: displayName;standardName;name3;name4... - * @throws IllegalArgumentException when name is null - */ - public void setName(List name) { - if (name == null) { - throw new IllegalArgumentException("name must not be null"); - } - this.name = name; - } - - /** - * Gets the data provider/source name. - * - * @return names - */ - public List getName() { - return name; - } - - - public void setDescription(String releaseDate) { - if (releaseDate == null) { - throw new IllegalArgumentException("release data must not be null"); - } - this.description = releaseDate; - } - - public String getDescription() { - return description; - } - - public void setUrlToData(String urlToData) { - this.urlToData = urlToData; - } - - public String getUrlToData() { - return urlToData; - } - - public void setUrlToHomepage(String urlToHomepage) { - this.urlToHomepage = urlToHomepage; - } - - public String getUrlToHomepage() { - return urlToHomepage; - } - - public void setType(METADATA_TYPE metadata_type) { - if (metadata_type == null) { - throw new IllegalArgumentException("type must not be null"); - } - this.type = metadata_type; - } - - public METADATA_TYPE getType() { - return type; - } - - public void setCleanerClassname(String cleanerClassname) { - if (cleanerClassname == null || cleanerClassname.trim().length() == 0) - this.cleanerClassname = null; - else - this.cleanerClassname = cleanerClassname.trim(); - } - - public String getCleanerClassname() { - return (cleanerClassname == null || cleanerClassname.length() == 0) - ? null : cleanerClassname; - } - - public void setConverterClassname(String converterClassname) { - if (converterClassname == null || converterClassname.trim().length() == 0) - this.converterClassname = null; - else - this.converterClassname = converterClassname.trim(); - } - - public String getConverterClassname() { - return (converterClassname == null || converterClassname.length() == 0) - ? null : converterClassname; - } - - @Override - public String toString() { - return identifier; - } - - /** - * Creates a new Provenance from this Metadata and sets - * if to all Entity class objects in the model. - *

- * Removes all other Provenance instances and - * corresponding dataSource property values - * from the model. - * - * @param model BioPAX model to update - * @param xmlBase xml:base to use for the Provenance - */ - public void setProvenanceFor(Model model, String xmlBase) { - Provenance pro; - - // we create URI from the Metadata identifier and version. - final String uri = xmlBase + identifier; - pro = (model.containsID(uri)) - ? (Provenance) model.getByID(uri) - : model.addNew(Provenance.class, uri); - - // parse/set names - String displayName = getName().iterator().next(); - pro.setDisplayName(displayName); - pro.setStandardName(standardName()); - - if (getName().size() > 2) - for (int i = 2; i < getName().size(); i++) - pro.addName(getName().get(i)); - - // add additional info about the current version, source, identifier, etc... - final String loc = getUrlToData(); - pro.addComment("Source " + - //skip for a local or empty (default) location - ((loc.startsWith("http:") || loc.startsWith("ftp:")) ? loc : "") - + " type: " + getType() + ", " + getDescription()); - - // replace for all entities - for (org.biopax.paxtools.model.level3.Entity ent : model.getObjects(org.biopax.paxtools.model.level3.Entity.class)) { - for (Provenance ds : new HashSet<>(ent.getDataSource())) - ent.removeDataSource(ds); - ent.addDataSource(pro); - } - - for (Score score : model.getObjects(Score.class)) - if (score.getScoreSource() == null) - score.setScoreSource(pro); - - // remove dangling Provenance from the model - ModelUtils.removeObjectsIfDangling(model, Provenance.class); - } - - /** - * Returns the standard name (the second one in the name list), - * if present, otherwise - returns the first name (display name) - * - * @return name - */ - public String standardName() { - //also capitalize (can be extremely useful...) - if (name.size() > 1) - return StringUtils.capitalize(name.get(1)); - else - return StringUtils.capitalize(name.get(0)); - } - - public Integer getNumPathways() { - return numPathways; - } - - public void setNumPathways(Integer numPathways) { - this.numPathways = numPathways; - } - - public Integer getNumInteractions() { - return numInteractions; - } - - public void setNumInteractions(Integer numInteractions) { - this.numInteractions = numInteractions; - } - - public Integer getNumPhysicalEntities() { - return numPhysicalEntities; - } - - public void setNumPhysicalEntities(Integer numPhysicalEntities) { - this.numPhysicalEntities = numPhysicalEntities; - } - - public String getPubmedId() { - return pubmedId; - } - - public void setPubmedId(String pubmedId) { - this.pubmedId = pubmedId; - } - - public String getAvailability() { - return availability; - } - - public void setAvailability(String availability) { - this.availability = availability; - } - - public String getIconUrl() { - return iconUrl; - } - - public void setIconUrl(String iconUrl) { - this.iconUrl = iconUrl; - } - - @Transient - public boolean isNotPathwayData() { - return type.isNotPathwayData(); - } - - public void setNotPathwayData(boolean foo) { - //a fake bean property (for javascript, JSON) - } - - @Override - public boolean equals(Object o) { - return (o instanceof Metadata) && identifier.equals(((Metadata) o).getIdentifier()); - } - - @Override - public int hashCode() { - return identifier.hashCode(); - } -} diff --git a/src/main/java/cpath/service/jpa/MetadataRepository.java b/src/main/java/cpath/service/jpa/MetadataRepository.java deleted file mode 100644 index c679120fd..000000000 --- a/src/main/java/cpath/service/jpa/MetadataRepository.java +++ /dev/null @@ -1,24 +0,0 @@ -package cpath.service.jpa; - - -import org.springframework.data.repository.CrudRepository; - - -/** - * A spring-data repository (auto-instantiated) of Metadata entities - * (all methods here follow the spring-data naming and signature conventions, - * and therefore do not require to be implemented by us; these will be auto-generated). - * - * @author rodche - */ -public interface MetadataRepository extends CrudRepository { - - /** - * Get data provider's Metadata by identifier. - * - * @param identifier - * @return - */ - Metadata findByIdentifier(String identifier); - -} diff --git a/src/main/java/cpath/service/metadata/Datasource.java b/src/main/java/cpath/service/metadata/Datasource.java new file mode 100755 index 000000000..c03c9b962 --- /dev/null +++ b/src/main/java/cpath/service/metadata/Datasource.java @@ -0,0 +1,166 @@ +package cpath.service.metadata; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; + +import lombok.*; +import org.apache.commons.lang3.StringUtils; +import org.biopax.paxtools.controller.ModelUtils; +import org.biopax.paxtools.model.Model; +import org.biopax.paxtools.model.level3.Provenance; +import org.biopax.paxtools.model.level3.Score; + +/** + * Data provider/source metadata. + * + * Node: some public getters and setters below, despite java warnings, are in fact called from the + * web view layer (e.g., JSP) or when a web controller returns JSON/XML object. + */ +@Data //Lombok auto-generates getter/setters, toString, equals, hashCode unless already defined +@NoArgsConstructor +@AllArgsConstructor +public final class Datasource { + + private static final Pattern BAD_ID_PATTERN = Pattern.compile("\\s|-"); + + // METADATA_TYPE Enum + public enum METADATA_TYPE { + // data types + PSI_MI(true), // interactions to be converted to BioPAX L3 format + PSI_MITAB(true), // interactions to be converted to PSI-MI then to BioPAX L3 format + BIOPAX(true), // pathways and interactions in BioPAX L2 or L3 format + SBML(true), // SBML (requires a data source specific Converter to BioPAX) + WAREHOUSE(false), // warehouse data to be converted to BioPAX and used during the merge stage + MAPPING(false); //extra gene/protein id-mapping data (two column, TSV format: "some id or name" \t "primary uniprot/chebi AC") + + private final boolean pathwayData; + + METADATA_TYPE(boolean isPathwayData) { + this.pathwayData = isPathwayData; + } + + public boolean isNotPathwayData() { + return !pathwayData; + } + } + + private String identifier; + private List name; //data provider standard names + private String description; + private String dataUrl; + private String homepageUrl; + private String iconUrl; + private METADATA_TYPE type; + private String cleanerClass; + private String converterClass; + private Set files; + private String pubmedId; + private String availability; + private int numPathways; + private int numInteractions; + private int numPhysicalEntities; + + + public Set getFiles() { + if(files == null) { + files = new HashSet<>(); + } + return files; + } + + /** + * Sets the identifier. + * No spaces, dashes, allowed. + * + * @param identifier metadata identifier + * @throws IllegalArgumentException if it's null, empty string, or contains spaces or dashes + */ + public void setIdentifier(@NonNull String identifier) { + if (StringUtils.isBlank(identifier) || BAD_ID_PATTERN.matcher(identifier).find()) { + throw new IllegalArgumentException("Bad metadata identifier: " + identifier); + } + this.identifier = identifier; + } + + @Override + public String toString() { + return identifier; + } + + /** + * Creates a new Provenance from this Datasource and sets + * if to all Entity class objects in the model. + *

+ * Removes all other Provenance instances and + * corresponding dataSource property values + * from the model. + * + * @param model BioPAX model to update + * @param xmlBase xml:base to use for the Provenance + */ + public void setProvenanceFor(Model model, String xmlBase) { + Provenance pro; + + // we create URI from the Datasource identifier and version. + final String uri = xmlBase + identifier; + pro = (model.containsID(uri)) + ? (Provenance) model.getByID(uri) + : model.addNew(Provenance.class, uri); + + // parse/set names + String displayName = getName().iterator().next(); + pro.setDisplayName(displayName); + pro.setStandardName(standardName()); + + if (getName().size() > 2) + for (int i = 2; i < getName().size(); i++) + pro.addName(getName().get(i)); + + // add additional info about the current version, source, identifier, etc... + final String loc = getDataUrl(); + pro.addComment("Source " + + //skip for a local or empty (default) location + ((loc.startsWith("http:") || loc.startsWith("ftp:")) ? loc : "") + + " type: " + getType() + ", " + getDescription()); + + // replace for all entities + for (org.biopax.paxtools.model.level3.Entity ent : model.getObjects(org.biopax.paxtools.model.level3.Entity.class)) { + for (Provenance ds : new HashSet<>(ent.getDataSource())) + ent.removeDataSource(ds); + ent.addDataSource(pro); + } + + for (Score score : model.getObjects(Score.class)) + if (score.getScoreSource() == null) + score.setScoreSource(pro); + + // remove dangling Provenance from the model + ModelUtils.removeObjectsIfDangling(model, Provenance.class); + } + + /** + * Returns the standard name (the second one in the name list), + * if present, otherwise - returns the first name (display name) + * + * @return name + */ + public String standardName() { + //also capitalize (can be extremely useful...) + if (name.size() > 1) + return StringUtils.capitalize(name.get(1)); + else + return StringUtils.capitalize(name.get(0)); + } + + @Override + public boolean equals(Object o) { + return (o instanceof Datasource) && identifier.equals(((Datasource) o).getIdentifier()); + } + + @Override + public int hashCode() { + return identifier.hashCode(); + } +} diff --git a/src/main/java/cpath/service/metadata/Index.java b/src/main/java/cpath/service/metadata/Index.java new file mode 100644 index 000000000..8c8731397 --- /dev/null +++ b/src/main/java/cpath/service/metadata/Index.java @@ -0,0 +1,60 @@ +package cpath.service.metadata; + +import org.biopax.paxtools.model.BioPAXElement; + +import cpath.service.jaxb.SearchResponse; +import org.biopax.paxtools.model.Model; + +public interface Index { + // search fields + String FIELD_URI = "uri"; + String FIELD_KEYWORD = "keyword"; //anything, e.g., names, terms, comments from child elements including + String FIELD_NAME = "name"; // standardName, displayName, other names + String FIELD_XREFID = "xrefid"; //xref.id + String FIELD_PATHWAY = "pathway"; //pathways and parent pathways to be inferred from entire biopax model + String FIELD_N_PARTICIPANTS = "participants"; //num. of PEs or Genes in a process or Complex + String FIELD_N_PROCESSES = "processes"; //no. bio processes (aka size) + + //Full-text search/filter fields (case-sensitive); + //index organism names, cell/tissue type (CV term), taxonomy id, but store only BioSource URIs + String FIELD_ORGANISM = "organism"; + //index data source names, but only URIs are stored in the index + String FIELD_DATASOURCE = "datasource"; + String FIELD_TYPE = "type"; + + //Default fields to use with the MultiFieldQueryParser; + //one can still search in other fields directly, like - pathway:some_keywords datasource:"pid" + String[] DEFAULT_FIELDS = + { + FIELD_KEYWORD, //data type properties (name, id, term, comment) of this and child elements; + FIELD_XREFID, + FIELD_NAME + }; + + void setMaxHitsPerPage(int maxHitsPerPage); + int getMaxHitsPerPage(); + + /** + * Full-text search for an object. + * + * @param query String (keywords or Lucene query string) + * @param page hits page number (when the number of hits exceeds a threshold) + * @param type - filter by class + * @param datasources - filter by datasource + * @param organisms - filter by organism + * @return ordered list of hits (by score) + */ + SearchResponse search(String query, int page, Class type, String[] datasources, String[] organisms); + + void save(BioPAXElement bpe); + + void save(Model model); + + void commit(); + + void close(); + + void refresh(); + + boolean isClosed(); +} diff --git a/src/main/java/cpath/service/metadata/Mapping.java b/src/main/java/cpath/service/metadata/Mapping.java new file mode 100644 index 000000000..7884cd782 --- /dev/null +++ b/src/main/java/cpath/service/metadata/Mapping.java @@ -0,0 +1,36 @@ +package cpath.service.metadata; + +import cpath.service.CPathUtils; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.NonNull; +import org.biopax.paxtools.controller.ModelUtils; + + +/** + * Bio id-mapping entry. + * + * @author rodche + */ +@Data +@NoArgsConstructor +public final class Mapping { + + private String srcDb; + private String dstDb; + private String srcId; + private String dstId; + + + public Mapping(@NonNull String srcDb, @NonNull String srcId, @NonNull String dstDb, @NonNull String dstId) { + this.srcDb = srcDb.toUpperCase(); + this.srcId = CPathUtils.fixIdForMapping(srcDb, srcId); + this.dstDb = dstDb.toUpperCase(); + this.dstId = CPathUtils.fixIdForMapping(dstDb, dstId); + } + + public String docId() { + return ModelUtils.md5hex(toString()); + } + +} diff --git a/src/main/java/cpath/service/metadata/Mappings.java b/src/main/java/cpath/service/metadata/Mappings.java new file mode 100644 index 000000000..b9646d594 --- /dev/null +++ b/src/main/java/cpath/service/metadata/Mappings.java @@ -0,0 +1,38 @@ +package cpath.service.metadata; + + +import java.util.List; + +/** + * BIO ID-mapping. + * + * @author rodche + */ +public interface Mappings { + String FIELD_SRCDB = "srcDb"; + String FIELD_SRCID = "srcId"; + String FIELD_DSTDB = "dstDb"; + String FIELD_DSTID = "dstId"; + String FIELD_DOCID = "docId"; + + List findByDstDbIgnoreCaseAndDstId(String dstDb, String dstId); + + List findBySrcIdInAndDstDbIgnoreCase(List srcIds, String dstDb); + + void save(Mapping mapping); + + void commit(); + + void refresh(); + + void close(); + + boolean isClosed(); + + /** + * Total number of search hits for the given lucene query. + * @param queryString + * @return + */ + long count(String queryString); +} diff --git a/src/main/java/cpath/service/metadata/Metadata.java b/src/main/java/cpath/service/metadata/Metadata.java new file mode 100644 index 000000000..a36772387 --- /dev/null +++ b/src/main/java/cpath/service/metadata/Metadata.java @@ -0,0 +1,31 @@ +package cpath.service.metadata; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.util.List; + +/** + * A spring-data repository (auto-instantiated) of Datasource entities + * (all methods here follow the spring-data naming and signature conventions, + * and therefore do not require to be implemented by us; these will be auto-generated). + * + * @author rodche + */ +@Data //getter/setters will be auto-generated at compile time by Lombok, etc. +@NoArgsConstructor +@AllArgsConstructor +public class Metadata { + private String description; + private int version; + private List datasources; + + public Datasource findByIdentifier(String identifier) { + if(datasources == null || datasources.isEmpty()) { + return null; + } + return datasources.stream().filter(d -> d.getIdentifier().equalsIgnoreCase(identifier)) + .findFirst().orElse(null); + } +} diff --git a/src/main/java/cpath/web/ApiControllerV1.java b/src/main/java/cpath/web/ApiControllerV1.java new file mode 100644 index 000000000..42c6eb174 --- /dev/null +++ b/src/main/java/cpath/web/ApiControllerV1.java @@ -0,0 +1,320 @@ +package cpath.web; + +import java.util.*; + +import cpath.service.ErrorResponse; +import cpath.service.api.Status; +import cpath.web.args.*; +import cpath.service.jaxb.*; + +import io.swagger.v3.oas.annotations.Operation; +import org.apache.commons.lang3.StringUtils; + +import org.springframework.context.annotation.Profile; +import org.springframework.validation.BindingResult; +import org.springframework.web.bind.annotation.*; + +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import javax.validation.Valid; + +import static org.springframework.http.MediaType.*; + +/** + * cPath2 API v1 (for backward compatibility with existing client apps) + * @deprecated - migrate to the API v2; see: @{@link ApiControllerV2} + */ +@Profile("web") +@RestController +@Deprecated +public class ApiControllerV1 extends BasicController { + /* + * Custom web request parameters bindings are defined in GlobalControllerAdvice class (perhaps not needed anymore...) + */ + + @GetMapping(path = "get", + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "Fetch a BioPAX sub-model by URIs/IDs and optionally convert to another output format (query parameters must be URL-encoded and not too many).", + description = "Retrieve BioPAX pathways, interactions, physical entities from the db by URIs; " + + "optionally, convert the result to other output formats." + ) + public void fetchQueryGet(@Valid Fetch args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) { + fetchQuery(args, bindingResult, request, response); + } + + @PostMapping(path = "get", + consumes = {APPLICATION_FORM_URLENCODED_VALUE}, + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "Fetch a BioPAX sub-model by URIs/IDs and optionally convert to another output format", + description = "Retrieve pathways/interactions/entities by their BioPAX URIs; " + + "optionally, convert the result to other output formats." + ) + public void fetchQuery(@Valid Fetch args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + } else { + String[] uris = args.getUri(); + Map options = new HashMap<>(); + if(args.getPattern()!=null && args.getPattern().length>0) { + //use StringUtils.join (not String.join) here due to it is an enum (not char seq.) array! + options.put("pattern", StringUtils.join(args.getPattern(), ",")); + } + ServiceResponse result = service.fetch(args.getFormat(), options, args.getSubpw(), uris); + stringResponse(args, result, request, response); + } + } + + @GetMapping(path = "top_pathways", + produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "HTTP GET, search for top pathways.", + description = "Find root/parent Pathway objects, i.e, ones that are neither 'controlled' " + + "nor a 'pathwayComponent' of another biological process; trivial pathways are excluded from the results;" + + " can filter by datasource and organism." + ) + public SearchResponse topPathwaysQueryGet(@Valid TopPathways args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + return topPathwaysQuery(args, bindingResult, request, response); + } + + @PostMapping(path = "top_pathways", + consumes = {APPLICATION_FORM_URLENCODED_VALUE}, + produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Search for top-level bio pathways.", + description = "Find root/parent Pathway objects that are neither controlled " + + "nor a pathwayComponent of another biological process; trivial pathways are excluded from the results;" + + " can filter by datasource and organism." + ) + public SearchResponse topPathwaysQuery(@Valid TopPathways args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, + errorFromBindingResult(bindingResult)), request, response); + return null; + } else { + ServiceResponse results = service.topPathways(args.getQ(), args.getOrganism(), args.getDatasource()); + if (results instanceof ErrorResponse) { + errorResponse(args, (ErrorResponse) results, request, response); + return null; + } else { + SearchResponse hits = (SearchResponse) results; + // log/track data access events + audit(request, args, hits.getProviders(), null); + hits.setVersion(service.settings().getVersion()); + return hits; + } + } + } + + @GetMapping(path = "traverse", produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Access properties of BioPAX elements using graph path expressions", + description = "To collect specific BioPAX property values, use the following path accessor format: " + + "InitialClass/property[:filterClass]/[property][:filterClass]... A \"*\" sign after the property " + + "instructs the path accessor to transitively traverse that property. For example, the following " + + "path accessor will traverse through all physical entity components a complex, including components " + + "of nested complexes, if any: Complex/component*/entityReference/xref:UnificationXref. " + + "The next would list display names of all participants of interactions, which are pathway components " + + "of a pathway: Pathway/pathwayComponent:Interaction/participant*/displayName. " + + "Optional restriction ':filterClass' enables limiting the property values to a certain sub-class " + + "of the object property range. In the first example above, this is used to get only the unification xrefs. " + + "All the official BioPAX properties as well as additional derived classes and properties, " + + "such as inverse properties and interfaces that represent anonymous union classes in BioPAX OWL " + + "can be used in a path accessor." + ) + public TraverseResponse traverseQueryGet(@Valid Traverse args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) { + return traverseQuery(args, bindingResult, request, response); + } + + @PostMapping(path = "traverse", + consumes = {APPLICATION_FORM_URLENCODED_VALUE}, + produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Access properties of BioPAX elements using graph path expressions (xpath-like).", + description = "To collect specific BioPAX property values, use the following path accessor format: " + + "InitialClass/property[:filterClass]/[property][:filterClass]... A \"*\" sign after the property " + + "instructs the path accessor to transitively traverse that property. For example, the following " + + "path accessor will traverse through all physical entity components a complex, including components " + + "of nested complexes, if any: Complex/component*/entityReference/xref:UnificationXref. " + + "The next would list display names of all participants of interactions, which are pathway components " + + "of a pathway: Pathway/pathwayComponent:Interaction/participant*/displayName. " + + "Optional restriction ':filterClass' enables limiting the property values to a certain sub-class " + + "of the object property range. In the first example above, this is used to get only the unification xrefs. " + + "All the official BioPAX properties as well as additional derived classes and properties, " + + "such as inverse properties and interfaces that represent anonymous union classes in BioPAX OWL " + + "can be used in a path accessor." + ) + public TraverseResponse traverseQuery(@Valid Traverse args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + } else { + ServiceResponse sr = service.traverse(args.getPath(), args.getUri()); + if(sr instanceof ErrorResponse) { + errorResponse(args, (ErrorResponse) sr, request, response); + } else { + audit(request, args, null, null); + TraverseResponse traverseResponse = (TraverseResponse) sr; + traverseResponse.setVersion(service.settings().getVersion()); + return traverseResponse; + } + } + return null; + } + + @GetMapping(path = "graph", + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "BioPAX Graph Query and optional converter to another output format (parameters must be URL-encoded and not too many).", + description = "Find connections of bio network elements, such as the shortest path between " + + "two proteins or the neighborhood for a particular protein state or all states. " + + "Optionally, convert the result to other output formats." + + "Graph searches consider detailed BioPAX semantics, such as generics, nested complexes, " + + "and traverse the graph accordingly." + ) + public void graphQueryGet(@Valid Graph args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + graphQuery(args, bindingResult, request, response); + } + + + @PostMapping(path = "graph", + consumes = {APPLICATION_FORM_URLENCODED_VALUE}, + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "A BioPAX graph query and optional converter to another output format.", + description = "Find connections of bio network elements, such as the shortest path between " + + "two proteins or the neighborhood for a particular protein state or all states. " + + "Optionally, convert the result to other output formats." + + "Graph searches consider detailed BioPAX semantics, such as generics, nested complexes, " + + "and traverse the graph accordingly." + ) + public void graphQuery(@Valid Graph args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + //check for binding errors + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + return; + } + + ServiceResponse result; + + Map formatOptions = new HashMap<>(); + if(args.getPattern()!=null && args.getPattern().length>0) + formatOptions.put("pattern", StringUtils.join(args.getPattern(),",")); + + switch (args.getKind()) { + case NEIGHBORHOOD: + result = service.getNeighborhood(args.getFormat(), formatOptions, args.getSource(), + args.getLimit(), args.getDirection(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + break; + case PATHSBETWEEN: + result = service.getPathsBetween(args.getFormat(), formatOptions, args.getSource(), + args.getLimit(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + break; + case PATHSFROMTO: + result = service.getPathsFromTo(args.getFormat(), formatOptions, args.getSource(), args.getTarget(), + args.getLimitType(), args.getLimit(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + break; + case COMMONSTREAM: + result = service.getCommonStream(args.getFormat(), formatOptions, args.getSource(), + args.getLimit(), args.getDirection(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + break; + default: + // impossible (should have failed earlier) + String msg = getClass().getCanonicalName() + " does not support " + args.getKind(); + errorResponse(args, new ErrorResponse(Status.INTERNAL_ERROR, msg), + request, response); + return; + } + + // write the result and log/track the service access events + stringResponse(args, result, request, response); + } + + @GetMapping(path = "search", produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Full-text search in the BioPAX database with Lucene query syntax", + description = """ +

+ The index field names are: uri, keyword, name, pathway, xrefid, datasource, organism. + E.g., keyword is the default aggregate field that includes most of BioPAX element's properties + and nested properties (e.g. a Complex can be found by one of its member's names or EC Number). + Search results, specifically the URIs, can be starting point for the graph, get, traverse queries. + Search strings are case-insensitive, except for xrefid, uri, or when it's enclosed in quotes. +

+

+ Returns an ordered list of hits (maxHitsPerPage is configured on the server) as JSON or + XML depending on 'Accept: application/json' or + 'Accept: application/xml' request header. +

+ """ + ) + public SearchResponse searchQueryGet(@Valid Search args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + return searchQuery(args, bindingResult, request, response); + } + + @PostMapping(path = "search", + consumes = {APPLICATION_FORM_URLENCODED_VALUE}, + produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Full-text search the BioPAX model using Lucene query syntax", + description = """ +

+ The index field names are: uri, keyword, name, pathway, xrefid, datasource, organism. + E.g., keyword is the default aggregate field that includes most of BioPAX element's properties + and nested properties (e.g. a Complex can be found by one of its member's names or EC Number). + Search results, specifically the URIs, can be starting point for the graph, get, traverse queries. + Search strings are case insensitive, except for xrefid, uri, or when it's enclosed in quotes. +

+

+ Returns an ordered list of hits (maxHitsPerPage is configured on the server) as JSON or + XML depending on 'Accept: application/json' or + 'Accept: application/xml' request header. +

+ """ + ) + public SearchResponse searchQuery(@Valid Search args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + SearchResponse searchResponse = null; + + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, + errorFromBindingResult(bindingResult)), request, response); + } else { + // get results from the service + ServiceResponse results = service.search(args.getQ(), args.getPage(), args.getBiopaxClass(), + args.getDatasource(), args.getOrganism()); + + if(results instanceof ErrorResponse) { + errorResponse(args, (ErrorResponse) results, request, response); + } else if(results != null) { + // log data access event for each data provider listed in the result + audit(request, args, ((SearchResponse)results).getProviders(), null); + searchResponse = (SearchResponse) results; + searchResponse.setVersion(service.settings().getVersion()); + } + } + + return searchResponse; + } + +} \ No newline at end of file diff --git a/src/main/java/cpath/web/ApiControllerV2.java b/src/main/java/cpath/web/ApiControllerV2.java new file mode 100644 index 000000000..052b9da14 --- /dev/null +++ b/src/main/java/cpath/web/ApiControllerV2.java @@ -0,0 +1,274 @@ +package cpath.web; + +import cpath.service.ErrorResponse; +import cpath.service.api.Status; +import cpath.service.jaxb.SearchResponse; +import cpath.service.jaxb.ServiceResponse; +import cpath.service.jaxb.TraverseResponse; +import cpath.web.args.*; +import io.swagger.v3.oas.annotations.Operation; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import org.apache.commons.lang3.StringUtils; +import org.springframework.context.annotation.Profile; +import org.springframework.validation.BindingResult; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import javax.validation.Valid; +import java.util.HashMap; +import java.util.Map; + +import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE; +import static org.springframework.http.MediaType.APPLICATION_XML_VALUE; + +/** + * cPathSquared Model Access Web Service. + */ +@Profile("web") +@RestController +@RequestMapping("/v2") +public class ApiControllerV2 extends BasicController { + /* + * Custom web request parameters bindings are defined in GlobalControllerAdvice class (perhaps not needed anymore...) + * HTTP GET is not supported anymore (commented out; uncomment if necessary) + */ + + @PostMapping(path = "fetch", + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "Fetch a BioPAX sub-model by URIs/IDs and optionally convert to another output format", + description = "Retrieve pathways/interactions/entities by their BioPAX URIs; " + + "optionally, convert the result to other output formats." + ) + public void fetchQuery(@Valid @RequestBody Fetch args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + } else { + String[] uris = args.getUri(); + Map options = new HashMap<>(); + if(args.getPattern()!=null && args.getPattern().length>0) { + //use StringUtils.join (not String.join) here due to it is an enum (not char seq.) array! + options.put("pattern", StringUtils.join(args.getPattern(), ",")); + } + ServiceResponse result = service.fetch(args.getFormat(), options, args.getSubpw(), uris); + stringResponse(args, result, request, response); + } + } + + @PostMapping(path = "top_pathways", produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Search for top-level bio pathways.", + description = "Find root/parent Pathway objects that are neither controlled " + + "nor a pathwayComponent of another biological process; trivial pathways are excluded from the results;" + + " can filter by datasource and organism." + ) + public SearchResponse topPathwaysQuery(@Valid @RequestBody TopPathways args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, + errorFromBindingResult(bindingResult)), request, response); + return null; + } else { + ServiceResponse results = service.topPathways(args.getQ(), args.getOrganism(), args.getDatasource()); + if (results instanceof ErrorResponse) { + errorResponse(args, (ErrorResponse) results, request, response); + return null; + } else { + SearchResponse hits = (SearchResponse) results; + // log/track data access events + audit(request, args, hits.getProviders(), null); + hits.setVersion(service.settings().getVersion()); + return hits; + } + } + } + + @PostMapping(path = "traverse", produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Access properties of BioPAX elements using graph path expressions (xpath-like).", + description = "To collect specific BioPAX property values, use the following path accessor format: " + + "InitialClass/property[:filterClass]/[property][:filterClass]... A \"*\" sign after the property " + + "instructs the path accessor to transitively traverse that property. For example, the following " + + "path accessor will traverse through all physical entity components a complex, including components " + + "of nested complexes, if any: Complex/component*/entityReference/xref:UnificationXref. " + + "The next would list display names of all participants of interactions, which are pathway components " + + "of a pathway: Pathway/pathwayComponent:Interaction/participant*/displayName. " + + "Optional restriction ':filterClass' enables limiting the property values to a certain sub-class " + + "of the object property range. In the first example above, this is used to get only the unification xrefs. " + + "All the official BioPAX properties as well as additional derived classes and properties, " + + "such as inverse properties and interfaces that represent anonymous union classes in BioPAX OWL " + + "can be used in a path accessor." + ) + public TraverseResponse traverseQuery(@Valid @RequestBody Traverse args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + } else { + ServiceResponse sr = service.traverse(args.getPath(), args.getUri()); + if(sr instanceof ErrorResponse) { + errorResponse(args, (ErrorResponse) sr, request, response); + } else { + audit(request, args, null, null); + TraverseResponse traverseResponse = (TraverseResponse) sr; + traverseResponse.setVersion(service.settings().getVersion()); + return traverseResponse; + } + } + return null; + } + + @PostMapping(path = "search", produces = {APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE}) + @Operation( + summary = "Full-text search the BioPAX model using Lucene query syntax", + description = """ +

+ The index field names are: uri, keyword, name, pathway, xrefid, datasource, organism. + E.g., keyword is the default aggregate field that includes most of BioPAX element's properties + and nested properties (e.g. a Complex can be found by one of its member's names or EC Number). + Search results, specifically the URIs, can be starting point for the graph, get, traverse queries. + Search strings are case insensitive, except for xrefid, uri, or when it's enclosed in quotes. +

+

+ Returns an ordered list of hits (maxHitsPerPage is configured on the server) as JSON or + XML depending on 'Accept: application/json' or + 'Accept: application/xml' request header. +

+ """ + ) + public SearchResponse searchQuery(@Valid @RequestBody Search args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + SearchResponse searchResponse = null; + + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, + errorFromBindingResult(bindingResult)), request, response); + } else { + // get results from the service + ServiceResponse results = service.search(args.getQ(), args.getPage(), args.getBiopaxClass(), + args.getDatasource(), args.getOrganism()); + + if(results instanceof ErrorResponse) { + errorResponse(args, (ErrorResponse) results, request, response); + } else if(results != null) { + // log data access event for each data provider listed in the result + audit(request, args, ((SearchResponse)results).getProviders(), null); + searchResponse = (SearchResponse) results; + searchResponse.setVersion(service.settings().getVersion()); + } + } + + return searchResponse; + } + + @PostMapping(path = "neighborhood", + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "BioPAX Neighborhood graph query and optional converter to another output format.", + description = "Find the neighborhood network given the source bio entity URIs/IDs. " + + "Optionally, convert the result to other output formats." + ) + public void neighborhoodQuery(@Valid @RequestBody Neighborhood args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + return; + } + Map formatOptions = new HashMap<>(); + if(args.getPattern()!=null && args.getPattern().length>0) { + formatOptions.put("pattern", StringUtils.join(args.getPattern(), ",")); + } + ServiceResponse result = service.getNeighborhood(args.getFormat(), formatOptions, args.getSource(), + args.getLimit(), args.getDirection(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + // write the result and log/track the service access events + stringResponse(args, result, request, response); + } + + @PostMapping(path = "pathsbetween", + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "BioPAX PathsBetween graph query and optional converter to another output format.", + description = "Find the BioPAX subnetwork that includes all paths between given source bio entities (URIs/IDs). " + + "Optionally, convert the result to other output formats." + ) + public void pathsbetweenQuery(@Valid @RequestBody PathsBetween args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + return; + } + Map formatOptions = new HashMap<>(); + if(args.getPattern()!=null && args.getPattern().length>0) { + formatOptions.put("pattern", StringUtils.join(args.getPattern(), ",")); + } + ServiceResponse result = service.getPathsBetween(args.getFormat(), formatOptions, args.getSource(), + args.getLimit(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + // write the result and log/track the service access events + stringResponse(args, result, request, response); + } + + @PostMapping(path = "pathsfromto", + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "BioPAX PathsFromTo graph query and optional converter to another output format.", + description = "Find a subnetwork that includes entities on the paths from the source bio " + + "entities (URIs/IDs) to the targets (if empty array, then PathsBetween algorithm is used). " + + "Optionally, convert the result to other output formats." + ) + public void pathsfromtoQuery(@Valid @RequestBody PathsFromTo args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + return; + } + Map formatOptions = new HashMap<>(); + if(args.getPattern()!=null && args.getPattern().length>0) { + formatOptions.put("pattern", StringUtils.join(args.getPattern(), ",")); + } + ServiceResponse result = service.getPathsFromTo(args.getFormat(), formatOptions, args.getSource(), args.getTarget(), + args.getLimitType(), args.getLimit(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + // write the result and log/track the service access events + stringResponse(args, result, request, response); + } + + @PostMapping(path = "commonstream", + produces = {"application/vnd.biopax.rdf+xml", "application/ld+json", "application/xml", "text/plain"}) + @Operation( + summary = "BioPAX CommonStream graph query and optional converter to another output format.", + description = "Find a BioPAX common stream subnetwork from the source bio entities (URIs/IDs). " + + "Optionally, convert the result to other output formats." + ) + public void commonstreamQuery(@Valid @RequestBody CommonStream args, BindingResult bindingResult, + HttpServletRequest request, HttpServletResponse response) + { + if(bindingResult.hasErrors()) { + errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), + request, response); + return; + } + Map formatOptions = new HashMap<>(); + if(args.getPattern()!=null && args.getPattern().length>0) { + formatOptions.put("pattern", StringUtils.join(args.getPattern(), ",")); + } + ServiceResponse result = service.getCommonStream(args.getFormat(), formatOptions, args.getSource(), + args.getLimit(), args.getDirection(), args.getOrganism(), args.getDatasource(), args.getSubpw()); + // write the result and log/track the service access events + stringResponse(args, result, request, response); + } + +} \ No newline at end of file diff --git a/src/main/java/cpath/web/BasicController.java b/src/main/java/cpath/web/BasicController.java index dcdcf35d1..db3149601 100644 --- a/src/main/java/cpath/web/BasicController.java +++ b/src/main/java/cpath/web/BasicController.java @@ -9,27 +9,28 @@ import java.util.Arrays; import java.util.Set; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; import static cpath.service.api.Status.*; -import cpath.service.api.CPathService; +import cpath.service.api.Service; import cpath.service.ErrorResponse; import cpath.service.api.OutputFormat; import cpath.web.args.ServiceQuery; -import cpath.web.args.Search; -import cpath.web.args.TopPathways; -import cpath.web.args.Traverse; import cpath.service.jaxb.*; import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.io.SimpleIOHandler; import org.biopax.paxtools.model.BioPAXLevel; import org.biopax.paxtools.model.Model; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.util.CollectionUtils; import org.springframework.validation.BindingResult; import org.springframework.validation.FieldError; @@ -42,10 +43,10 @@ public abstract class BasicController { private static final Logger log = LoggerFactory.getLogger(BasicController.class); - protected CPathService service; + protected Service service; @Autowired - public void setService(CPathService service) { + public void setService(Service service) { this.service = service; } @@ -58,12 +59,12 @@ final void errorResponse(ServiceQuery args, HttpServletRequest request, HttpServletResponse response) { - //TODO: eventually switch to using @RestControllerAdvice and @ExceptionHandler + // TODO: switch to using @RestControllerAdvice and @ExceptionHandler try { //log/track using a shorter message - track(request, args, null, error); + audit(request, args, null, error); //return a long detailed message - response.sendError(error.getStatus().getCode(), error.getStatus().getCode() + "; " + error.toString()); + response.sendError(error.getStatus().getCode(), error.getStatus().getCode() + "; " + error); } catch (IOException e) { log.error("FAILED sending an error response; " + e); } @@ -73,7 +74,7 @@ final void errorResponse(ServiceQuery args, /* * Builds an error message from * the web parameters binding result - * if there're errors. + * if there are errors. */ final String errorFromBindingResult(BindingResult bindingResult) { @@ -87,8 +88,7 @@ final String errorFromBindingResult(BindingResult bindingResult) rejectedVal = "empty array"; } } - sb.append(fe.getField()).append(" was '").append(rejectedVal).append("'; ") - .append(fe.getDefaultMessage()).append(". "); + sb.append(fe.getDefaultMessage()).append("; value: ").append(rejectedVal); } return sb.toString(); @@ -110,7 +110,7 @@ final void stringResponse(ServiceQuery command, final DataResponse dataResponse = (DataResponse) result; // log/track one data access event for each data provider listed in the result - track(request, command, dataResponse.getProviders(), null); + audit(request, command, dataResponse.getProviders(), null); if (dataResponse.getData() instanceof Path) { //get the temp file @@ -123,8 +123,7 @@ final void stringResponse(ServiceQuery command, Files.copy(resultFile, response.getOutputStream()); } } catch (IOException e) { - String msg = String.format("Failed to process the (temporary) result file %s; %s.", - resultFile, e.toString()); + String msg = String.format("Failed to process the (temporary) result file %s; %s.", resultFile, e); errorResponse(command, new ErrorResponse(INTERNAL_ERROR, msg), request, response); } finally { try { @@ -146,17 +145,16 @@ final void stringResponse(ServiceQuery command, } //else - SIF, GSEA formats do not allow for comment lines anyway } catch (IOException e) { - String msg = String.format("Failed writing a trivial response: %s.", e.toString()); + String msg = String.format("Failed writing a trivial response: %s.", e); errorResponse(command, new ErrorResponse(INTERNAL_ERROR, msg), request, response); } } else { //it's probably a bug - String msg = String.format("BUG: DataResponse.data has value: %s, %s instead of a Path or null.", - dataResponse.getData().getClass().getSimpleName(), dataResponse.toString()); + dataResponse.getData().getClass().getSimpleName(), dataResponse); errorResponse(command, new ErrorResponse(INTERNAL_ERROR, msg), request, response); } } else { //it's a bug - - String msg = String.format("BUG: Unknown ServiceResponse: %s, %s ", - result.getClass().getSimpleName(), result.toString()); + String msg = String.format("BUG: Unknown ServiceResponse: %s, %s ", result, result); errorResponse(command, new ErrorResponse(INTERNAL_ERROR, msg), request, response); } } @@ -192,75 +190,51 @@ final BufferedImage scaleImage(BufferedImage img, int width, int height) /* * Extracts the client's IP from the request headers. */ - private static String clientIpAddress(HttpServletRequest request) +// private static String clientIpAddress(HttpServletRequest request) +// { +// String ip = request.getHeader("X-Forwarded-For"); +// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { +// ip = request.getHeader("Proxy-Client-IP"); +// } +// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { +// ip = request.getHeader("WL-Proxy-Client-IP"); +// } +// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { +// ip = request.getHeader("HTTP_CLIENT_IP"); +// } +// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { +// ip = request.getHeader("HTTP_X_FORWARDED_FOR"); +// } +// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { +// ip = request.getRemoteAddr(); +// } +// return ip; +// } + + void audit(HttpServletRequest request, ServiceQuery command, Set providers, ErrorResponse err) { - String ip = request.getHeader("X-Forwarded-For"); - if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { - ip = request.getHeader("Proxy-Client-IP"); - } - if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { - ip = request.getHeader("WL-Proxy-Client-IP"); - } - if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { - ip = request.getHeader("HTTP_CLIENT_IP"); - } - if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { - ip = request.getHeader("HTTP_X_FORWARDED_FOR"); - } - if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) { - ip = request.getRemoteAddr(); - } - - return ip; - } - - - // data access logging and tracking - void track(HttpServletRequest request, ServiceQuery command, Set providers, ErrorResponse err) - { - final String ip = clientIpAddress(request); + JsonObjectBuilder jb = Json.createObjectBuilder(); - String client = null; - if(command!=null) - client = command.getUser(); - if (client == null || client.isEmpty()) { - //extract http client tool name/version part: - client = request.getHeader("User-Agent"); - if (client != null && !client.isEmpty() && client.contains(" ")) { - client = client.substring(0, client.indexOf(" ")); - } - } +//get user-agent, IP, status, etc. from nginx/apache logs instead of here in the app... +// jb.add("ip", clientIpAddress(request)); - Integer status = 200; if (err != null) { - status = err.getErrorCode(); - service.track(ip, "error", err.getErrorMsg()); + jb.add("error", err.toString()); } - if(command!=null) - service.track(ip, "command", command.cmd()); - - service.track(ip, "client", client); + if(command != null) { + // TODO: change if there is any use (now we just add truncated string, not json object here + //(can be very large if many URIs or SIF patterns are submitted in the request) + jb.add("query", StringUtils.truncate(command.toString(),128)); + } - String msg = status.toString(); - if(command!=null) - msg += ": " + command.toString(); - if (providers != null) { - for (String provider : providers) service.track(ip, "provider", provider); - if (!providers.isEmpty()) msg += "; pro:" + String.join(",", providers); + if (!CollectionUtils.isEmpty(providers)) { + jb.add("pro", Json.createArrayBuilder(providers)); } - service.track(ip, "all", msg); +// jb.add("accept", request.getHeader("Accept")); - if(command!=null) { - //a hack to properly detect resulting data format in some cases - String f = command.outputFormat().toLowerCase(); - if (command instanceof Search || command instanceof TopPathways || command instanceof Traverse) { - if ((String.valueOf(request.getHeader("accept")).contains("application/json"))) { - f = "json"; - } - } - service.track(ip, "format", f); - } + log.info(jb.build().toString()); } + } \ No newline at end of file diff --git a/src/main/java/cpath/web/BiopaxModelController.java b/src/main/java/cpath/web/BiopaxModelController.java deleted file mode 100644 index bb8ed9e80..000000000 --- a/src/main/java/cpath/web/BiopaxModelController.java +++ /dev/null @@ -1,249 +0,0 @@ -package cpath.web; - -import java.util.*; - -import cpath.service.ErrorResponse; -import cpath.service.api.GraphType; -import cpath.service.api.OutputFormat; -import cpath.service.api.Status; -import cpath.web.args.*; -import cpath.service.jaxb.*; -import cpath.web.args.binding.*; - -import io.swagger.v3.oas.annotations.Operation; -import org.apache.commons.lang3.StringUtils; -import org.biopax.paxtools.model.level3.Protein; -import org.biopax.paxtools.pattern.miner.SIFType; -import org.biopax.paxtools.query.algorithm.Direction; -import org.biopax.paxtools.query.algorithm.LimitType; - -import org.springframework.context.annotation.Profile; -import org.springframework.validation.BindingResult; -import org.springframework.web.bind.WebDataBinder; -import org.springframework.web.bind.annotation.*; - -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import javax.validation.Valid; - -/** - * cPathSquared Model Access Web Service. - */ -@Profile("web") -@RestController -@RequestMapping(method = {RequestMethod.GET, RequestMethod.POST}) -public class BiopaxModelController extends BasicController { - - /** - * This configures the web request parameters binding, i.e., - * conversion to the corresponding java types; for example, - * "neighborhood" is recognized as {@link GraphType#NEIGHBORHOOD}, - * "protein" - {@link Protein} , etc. - * Depending on the editor, illegal query parameters may result - * in an error or just NULL value. - * - * @param binder - */ - @InitBinder - public void initBinder(WebDataBinder binder) { - binder.registerCustomEditor(GraphType.class, new GraphTypeEditor()); - binder.registerCustomEditor(Direction.class, new GraphQueryDirectionEditor()); - binder.registerCustomEditor(LimitType.class, new GraphQueryLimitEditor()); - binder.registerCustomEditor(OutputFormat.class, new OutputFormatEditor()); - binder.registerCustomEditor(SIFType.class, new SIFTypeEditor()); //also works for the SIFEnum sub-class - binder.registerCustomEditor(Class.class, new BiopaxTypeEditor()); - } - - // Get by ID (URI) command - @RequestMapping("/get") - @Operation( - summary = "Get BioPAX elements (as sub-model) by URIs.", - description = "Retrieve BioPAX pathways, interactions, physical entities from the db by URIs; " + - "optionally, convert the result to other output formats." - ) - public void elementById(@Valid Get args, BindingResult bindingResult, - HttpServletRequest request, HttpServletResponse response) - { - if(bindingResult.hasErrors()) { - errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), - request, response); - } else { - String[] uris = args.getUri(); - Map options = new HashMap(); - if(args.getPattern()!=null && args.getPattern().length>0) { - //used StringUtils.join vs String.join due to it's array of enum. objects, not char sequences. - options.put("pattern", StringUtils.join(args.getPattern(), ",")); - } - ServiceResponse result = service.fetch(args.getFormat(), options, args.getSubpw(), uris); - stringResponse(args, result, request, response); - } - } - - - @RequestMapping("/top_pathways") - @Operation( - summary = "Search for top pathways.", - description = "Find root/parent Pathway objects, i.e, ones that are neither 'controlled' " + - "nor a 'pathwayComponent' of another biological process; trivial pathways are excluded from the results;" + - " can filter by datasource and organism." - ) - public SearchResponse topPathways(@Valid TopPathways args, BindingResult bindingResult, - HttpServletRequest request, HttpServletResponse response) - { - - if(bindingResult.hasErrors()) { - errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, - errorFromBindingResult(bindingResult)), request, response); - return null; - } else { - ServiceResponse results = service.topPathways(args.getQ(), args.getOrganism(), args.getDatasource()); - if (results instanceof ErrorResponse) { - errorResponse(args, (ErrorResponse) results, request, response); - return null; - } else { - SearchResponse hits = (SearchResponse) results; - // log/track data access events - track(request, args, hits.getProviders(), null); - hits.setVersion(service.settings().getVersion()); - return hits; - } - } - } - - - @RequestMapping("/traverse") - @Operation( - summary = "Access properties of BioPAX elements using graph path expressions", - description = "To collect specific BioPAX property values, use the following path accessor format: " + - "InitialClass/property[:filterClass]/[property][:filterClass]... A \"*\" sign after the property " + - "instructs the path accessor to transitively traverse that property. For example, the following " + - "path accessor will traverse through all physical entity components a complex, including components " + - "of nested complexes, if any: Complex/component*/entityReference/xref:UnificationXref. " + - "The next would list display names of all participants of interactions, which are pathway components " + - "of a pathway: Pathway/pathwayComponent:Interaction/participant*/displayName. " + - "Optional restriction ':filterClass' enables limiting the property values to a certain sub-class " + - "of the object property range. In the first example above, this is used to get only the unification xrefs. " + - "All the official BioPAX properties as well as additional derived classes and properties, " + - "such as inverse properties and interfaces that represent anonymous union classes in BioPAX OWL " + - "can be used in a path accessor." - ) - public TraverseResponse traverse(@Valid Traverse args, BindingResult bindingResult, - HttpServletRequest request, HttpServletResponse response) - { - if(bindingResult.hasErrors()) { - errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), - request, response); - } else { - ServiceResponse sr = service.traverse(args.getPath(), args.getUri()); - if(sr instanceof ErrorResponse) { - errorResponse(args, (ErrorResponse) sr, request, response); - } else { - track(request, args, null, null); - //TODO: log/track data providers that occur is the traverse query result - TraverseResponse traverseResponse = (TraverseResponse) sr; - traverseResponse.setVersion(service.settings().getVersion()); - return traverseResponse; - } - } - return null; - } - - @RequestMapping("/graph") - @Operation( - summary = "BioPAX graph query.", - description = "Find connections of bio network elements, such as the shortest path between " + - "two proteins or the neighborhood for a particular protein state or all states. " + - "Optionally, convert the result to other output formats." + - "Graph searches consider detailed BioPAX semantics, such as generics, nested complexes, " + - "and traverse the graph accordingly. We integrate data from multiple sources " + - "and consistently normalize Xref, EntityReference, Provenance, BioSource, and ControlledVocabulary objects " + - "if we are absolutely sure about several objects of the same type are equivalent. " + - "We do not merge physical entities (states) and processes from different sources automatically, " + - "as accurately matching and aligning pathways at that level is still an open research problem." - ) - public void graphQuery(@Valid Graph args, BindingResult bindingResult, - HttpServletRequest request, HttpServletResponse response) - { - //check for binding errors - if(bindingResult.hasErrors()) { - errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, errorFromBindingResult(bindingResult)), - request, response); - return; - } - - ServiceResponse result; - - Map formatOptions = new HashMap(); - if(args.getPattern()!=null && args.getPattern().length>0) - formatOptions.put("pattern", StringUtils.join(args.getPattern(),",")); - - switch (args.getKind()) { - case NEIGHBORHOOD: - result = service.getNeighborhood(args.getFormat(), formatOptions, args.getSource(), - args.getLimit(), args.getDirection(), args.getOrganism(), args.getDatasource(), args.getSubpw()); - break; - case PATHSBETWEEN: - result = service.getPathsBetween(args.getFormat(), formatOptions, args.getSource(), - args.getLimit(), args.getOrganism(), args.getDatasource(), args.getSubpw()); - break; - case PATHSFROMTO: - result = service.getPathsFromTo(args.getFormat(), formatOptions, args.getSource(), - args.getTarget(), args.getLimit(), args.getOrganism(), args.getDatasource(), args.getSubpw()); - break; - case COMMONSTREAM: - result = service.getCommonStream(args.getFormat(), formatOptions, args.getSource(), - args.getLimit(), args.getDirection(), args.getOrganism(), args.getDatasource(), args.getSubpw()); - break; - default: - // impossible (should have failed earlier) - String msg = getClass().getCanonicalName() + " does not support " + args.getKind(); - errorResponse(args, new ErrorResponse(Status.INTERNAL_ERROR, msg), - request, response); - return; - } - - // write the result and log/track the service access events - stringResponse(args, result, request, response); - } - - @RequestMapping(value="/search") - @Operation( - summary = "A full-text search in the BioPAX database using Lucene query syntax", - description = "Index fields (case-sensitive): uri, keyword, name, pathway, xrefid, datasource, " + - "and organism can be optionally used in a query string. For example, the 'pathway' field " + - "helps find entities and interactions by keywords or uris matching their parent pathways'; " + - "'xrefid' helps find objects by direct or nested Xref; 'keyword' (default search field) " + - "aggregates most of BioPAX properties of each element and child elements (e.g. a Complex " + - "can be found by one of its member's name or EC Number). " + - "Filters by datasource, organism " + - "and BioPAX type can be also used. Search can be used to select starting points (seeds) " + - "for a graph query (see: '/graph','/traverse','/get')." - ) - public SearchResponse search(@Valid Search args, BindingResult bindingResult, - HttpServletRequest request, HttpServletResponse response) - { - SearchResponse searchResponse = null; - - if(bindingResult.hasErrors()) { - errorResponse(args, new ErrorResponse(Status.BAD_REQUEST, - errorFromBindingResult(bindingResult)), request, response); - } else { - // get results from the service - ServiceResponse results = service.search(args.getQ(), args.getPage(), args.getType(), - args.getDatasource(), args.getOrganism()); - - if(results instanceof ErrorResponse) { - errorResponse(args, (ErrorResponse) results, request, response); - } - else { //if, due to a bug, results==null, it'll throw a NullPointerException - // log/track one data access event for each data provider listed in the result - track(request, args, ((SearchResponse)results).getProviders(), null); - searchResponse = (SearchResponse) results; - searchResponse.setVersion(service.settings().getVersion()); - } - } - - return searchResponse; - } - -} \ No newline at end of file diff --git a/src/main/java/cpath/web/GlobalControllerAdvice.java b/src/main/java/cpath/web/GlobalControllerAdvice.java index 21770da97..75b1d74b7 100644 --- a/src/main/java/cpath/web/GlobalControllerAdvice.java +++ b/src/main/java/cpath/web/GlobalControllerAdvice.java @@ -1,10 +1,22 @@ package cpath.web; import cpath.service.Settings; +import cpath.service.api.GraphType; +import cpath.service.api.OutputFormat; +import cpath.service.metadata.Datasource; +import cpath.web.args.binding.*; +import org.biopax.paxtools.model.BioPAXElement; +import org.biopax.paxtools.pattern.miner.SIFEnum; +import org.biopax.paxtools.pattern.miner.SIFType; +import org.biopax.paxtools.query.algorithm.Direction; +import org.biopax.paxtools.query.algorithm.LimitType; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Profile; +import org.springframework.web.bind.WebDataBinder; import org.springframework.web.bind.annotation.ControllerAdvice; +import org.springframework.web.bind.annotation.InitBinder; import org.springframework.web.bind.annotation.ModelAttribute; +import org.springframework.web.context.request.WebRequest; @Profile("web") @ControllerAdvice @@ -17,4 +29,20 @@ public class GlobalControllerAdvice { public Settings cpath() { return cpath; } + + /* + These unfortunately do not apply to a @RequestBody bean (json/xml) binding, mapping (HTTP POST); + TODO: we changed to the bean property setters so that these custom editors might not be necessary anymore + */ + @InitBinder + public void registerCustomEditors(WebDataBinder binder, WebRequest request) { + binder.registerCustomEditor(Class.class, new BiopaxTypeEditor()); + binder.registerCustomEditor(Datasource.METADATA_TYPE.class, new MetadataTypeEditor()); + binder.registerCustomEditor(OutputFormat.class, new OutputFormatEditor()); + binder.registerCustomEditor(Direction.class, new DirectionEditor()); + binder.registerCustomEditor(GraphType.class, new GraphTypeEditor()); + binder.registerCustomEditor(LimitType.class, new GraphQueryLimitEditor()); + binder.registerCustomEditor(SIFType.class, new SIFTypeEditor()); //also works for the SIFEnum subclass +// binder.registerCustomEditor(SIFEnum.class, new SIFTypeEditor()); + } } diff --git a/src/main/java/cpath/web/HelpController.java b/src/main/java/cpath/web/HelpController.java index 6e5ceb4b4..0f95c5457 100644 --- a/src/main/java/cpath/web/HelpController.java +++ b/src/main/java/cpath/web/HelpController.java @@ -3,50 +3,37 @@ import cpath.service.api.GraphType; import cpath.service.api.OutputFormat; import cpath.service.jaxb.*; -import cpath.web.args.binding.*; +import io.swagger.v3.oas.annotations.Hidden; import org.biopax.paxtools.controller.EditorMap; import org.biopax.paxtools.controller.PropertyEditor; import org.biopax.paxtools.controller.SimpleEditorMap; import org.biopax.paxtools.model.BioPAXElement; import org.biopax.paxtools.model.BioPAXLevel; import org.biopax.paxtools.query.algorithm.Direction; +import org.biopax.paxtools.query.algorithm.LimitType; import org.springframework.context.annotation.Profile; +import org.springframework.http.MediaType; import org.springframework.util.ResourceUtils; -import org.springframework.web.bind.WebDataBinder; import org.springframework.web.bind.annotation.*; -import javax.servlet.http.HttpServletResponse; +import jakarta.servlet.http.HttpServletResponse; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; /** - * Help Controller. - * - * Returns XML or JSON documents. + * Help Controller (returns JSON docs). * {@link Help} bean. + * @see GlobalControllerAdvice */ @Profile("web") +@Hidden @RestController -@RequestMapping(method = RequestMethod.GET) +@RequestMapping(method = RequestMethod.GET, produces = {MediaType.APPLICATION_JSON_VALUE}) public class HelpController extends BasicController { - /** - * Customizes request parameters conversion to proper internal types, - * e.g., "network of interest" is recognized as GraphType.NETWORK_OF_INTEREST, etc. - * - * @param binder - */ - @InitBinder - public void initBinder(WebDataBinder binder) { - binder.registerCustomEditor(OutputFormat.class, new OutputFormatEditor()); - binder.registerCustomEditor(GraphType.class, new GraphTypeEditor()); - binder.registerCustomEditor(Direction.class, new DirectionEditor()); - binder.registerCustomEditor(Class.class, new BiopaxTypeEditor()); - } - @RequestMapping("/help/schema") public void getSchema(HttpServletResponse response) throws Exception { response.setContentType("application/xml"); @@ -54,17 +41,12 @@ public void getSchema(HttpServletResponse response) throws Exception { Files.copy(xsdPath, response.getOutputStream()); } - /* - * List of formats that web methods return - * - * @return - */ @RequestMapping("/help/formats") public Help getFormats() { Help help = new Help(); help.setId("formats"); help.setTitle("Output Formats"); - help.setInfo("cPath2 can convert BioPAX to several text formats"); + help.setInfo("can convert BioPAX to several text formats"); help.setExample("help/formats/sif"); for (OutputFormat f : OutputFormat.values()) help.addMember(getFormat(f)); @@ -72,7 +54,6 @@ public Help getFormats() { return help; } - @RequestMapping("/help/formats/{fmt}") public Help getFormat(@PathVariable OutputFormat fmt) { if (fmt == null) return getFormats(); @@ -83,27 +64,20 @@ public Help getFormat(@PathVariable OutputFormat fmt) { return help; } - /** - * List of BioPAX L3 Classes - * - * @return - */ @RequestMapping("/help/types") public Help getBiopaxTypes() { Help help = new Help(); for (Class t : SimpleEditorMap.L3.getKnownSubClassesOf(BioPAXElement.class)) { - if (BioPAXLevel.L3.getDefaultFactory().getImplClass(t) != null) + if (BioPAXLevel.L3.getDefaultFactory().getImplClass(t) != null) { help.addMember(new Help(t.getSimpleName())); + } } help.setId("types"); help.setTitle("BioPAX classes"); - help.setInfo("Objects of the following BioPAX L3 class " + - "(and some abstract ones) " - + System.getProperty("line.separator") + - "are persisted/indexed/searchable in the system " + - "(names are case insensitive):"); + help.setInfo("These BioPAX Level3 classes (including some abstract) can be used in search/traverse queries " + + "(case insensitive):"); help.setExample("search?type=pathway&q=b*"); return help; } @@ -115,7 +89,7 @@ public Help getBiopaxType(@PathVariable Class type) { Help h = new Help(type.getSimpleName()); h.setTitle(type.getSimpleName()); - h.setInfo("See: biopax.org, http://www.biopax.org/webprotege"); + h.setInfo("See: biopax.org, https://www.biopax.org/owldoc/Level3/"); return h; } @@ -136,7 +110,6 @@ public Help getBiopaxTypeProperties(@PathVariable Class return h; } - @RequestMapping("/help/types/properties") public Help getBiopaxTypesProperties() { Help h = new Help("properties"); @@ -170,7 +143,6 @@ public Help getBiopaxTypeInverseProperties(@PathVariable Class queryForMetadata() { TreeMap props = new TreeMap(); @@ -68,17 +62,17 @@ public Map queryForMetadata() { } // to return a xml or json data http response - @RequestMapping(value = "/metadata/datasources", produces = {APPLICATION_JSON_VALUE}) - public List queryForDatasources() { + @GetMapping(path = "/metadata/datasources", produces = {APPLICATION_JSON_VALUE}) + public List queryForDatasources() { log.debug("Getting pathway datasources info."); //pathway/interaction data sources - List ds = new ArrayList<>(); + List ds = new ArrayList<>(); //warehouse data sources - List wh = new ArrayList<>(); + List wh = new ArrayList<>(); - for (Metadata m : service.metadata().findAll()) { + for (Datasource m : service.metadata().getDatasources()) { //set dynamic extra fields - if (m.isNotPathwayData()) { + if (m.getType().isNotPathwayData()) { wh.add(m); } else { ds.add(m); @@ -91,9 +85,9 @@ public List queryForDatasources() { return ds; } - @RequestMapping(value = "/metadata/datasources/{identifier}", produces = {APPLICATION_JSON_VALUE}) - public Metadata datasource(@PathVariable String identifier) { - Metadata m = service.metadata().findByIdentifier(identifier); + @GetMapping(path = "/metadata/datasources/{identifier}", produces = {APPLICATION_JSON_VALUE}) + public Datasource datasource(@PathVariable String identifier) { + Datasource m = service.metadata().findByIdentifier(identifier); if (m == null) return null; return m; diff --git a/src/main/java/cpath/web/OpenApiConfig.java b/src/main/java/cpath/web/OpenApiConfig.java index e3d91542b..c3f540603 100644 --- a/src/main/java/cpath/web/OpenApiConfig.java +++ b/src/main/java/cpath/web/OpenApiConfig.java @@ -25,24 +25,23 @@ public OpenAPI customOpenAPI() { private Info apiInfo() { return new Info() - .title(settings.getName() + " " + settings.getVersion()) - .description("PC2 (cPath2) web services") - .version("14") + .title("Bio pathway services") + .description(settings.getDescription() + "
© " + settings.getOrganization()) + .version(settings.getVersion()) .contact(apiContact()) .license(apiLicence()); } private License apiLicence() { return new License() - .name("MIT Licence") - .url("https://opensource.org/licenses/mit-license.php"); + .name("MIT Licence").url("https://opensource.org/licenses/mit-license.php"); } private Contact apiContact() { return new Contact() - .name("Pathway Commons") - .email("pathway-commons-help@googlegroups.com") - .url("http://www.pathwaycommons.org"); + .name(settings.getOrganization()) + .email(settings.getEmail()) + .url(settings.getUrl()); } } diff --git a/src/main/java/cpath/web/PagesController.java b/src/main/java/cpath/web/PagesController.java index 7ed7ffb87..56871a936 100644 --- a/src/main/java/cpath/web/PagesController.java +++ b/src/main/java/cpath/web/PagesController.java @@ -7,8 +7,11 @@ import javax.imageio.ImageIO; +import cpath.service.Settings; +import io.swagger.v3.oas.annotations.Hidden; import org.springframework.context.annotation.Profile; import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.ResponseBody; @@ -16,9 +19,15 @@ @Controller @Profile("web") +@Hidden //from swagger/openapi @RequestMapping(method = RequestMethod.GET) public class PagesController extends BasicController { + @ModelAttribute("cpath") + public Settings instance() { + return service.settings(); + } + @RequestMapping({"/api", "/swagger"}) public String swagger() { return "redirect:swagger-ui.html"; @@ -65,7 +74,7 @@ String robots() { // deny robots access to logs, web services and data files, // but allow - to web page resources (css, js, images) return "User-agent: *\n" + - "Disallow: /get\n" + + "Disallow: /fetch\n" + "Disallow: /search\n" + "Disallow: /graph\n" + "Disallow: /top_pathways\n" + diff --git a/src/main/java/cpath/web/WebApplication.java b/src/main/java/cpath/web/WebApplication.java index c9a0d7ad1..206739ead 100644 --- a/src/main/java/cpath/web/WebApplication.java +++ b/src/main/java/cpath/web/WebApplication.java @@ -2,7 +2,6 @@ import org.springframework.boot.SpringBootConfiguration; import org.springframework.context.annotation.Profile; -import org.springframework.http.MediaType; import org.springframework.web.servlet.config.annotation.*; import org.springframework.web.servlet.view.InternalResourceViewResolver; import org.springframework.web.servlet.view.JstlView; @@ -12,27 +11,6 @@ @Profile({"web"}) public class WebApplication implements WebMvcConfigurer { - // Enable content negotiation via - // content-type (application/json, application/xml) - @Override - public void configureContentNegotiation(ContentNegotiationConfigurer configurer) { - configurer - .favorParameter(false) - .ignoreAcceptHeader(false) - .useRegisteredExtensionsOnly(true) - .defaultContentType(MediaType.APPLICATION_JSON) - .mediaType("json", MediaType.APPLICATION_JSON) - .mediaType("xml", MediaType.APPLICATION_XML) - ; - } - -// if needed, CORS header will be set via the (Nginx) proxy config. -// // Enable CORS globally; by default - all origins, GET, HEAD, POST -// @Override -// public void addCorsMappings(CorsRegistry registry) { -// registry.addMapping("/**"); -// } - @Override public void addResourceHandlers(ResourceHandlerRegistry registry) { registry.addResourceHandler("/**") diff --git a/src/main/java/cpath/web/args/BaseGraph.java b/src/main/java/cpath/web/args/BaseGraph.java new file mode 100644 index 000000000..c56c57545 --- /dev/null +++ b/src/main/java/cpath/web/args/BaseGraph.java @@ -0,0 +1,160 @@ +package cpath.web.args; + +import cpath.service.api.OutputFormat; +import io.swagger.v3.oas.annotations.media.Schema; +import org.biopax.paxtools.pattern.miner.SIFEnum; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotEmpty; +import javax.validation.constraints.NotNull; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.function.Predicate; + +public abstract class BaseGraph extends ServiceQuery { + + @NotEmpty(message = "Provide at least one source URI.") + @Schema( + description = "Source BioPAX entity URIs or IDs (e.g., gene symbols)", + required = true, + example = "[\"Q16602\"]" + ) + private String[] source; + + @Min(1) //note: this allows for null + @Schema( + description = "Graph search distance limit (default: 1)", + example = "1" + ) + private Integer limit; + + @NotNull(message = "Illegal Output Format") + @Schema( + description = "Output format name (default: BIOPAX)", + example = "jsonld" + ) + private OutputFormat format; + + @Schema( + description = "Filter by organism, e.g., taxonomy ID (recommended) or name.", + example = "[\"9606\"]" + ) + private String[] organism; + + @Schema( + description = "Filter by data source name, id or uri.", + example = "[\"reactome\"]" + ) + private String[] datasource; + + @Schema( + description = "If format is SIF or TXT, one can specify interaction types to apply", + example = "[\"interacts-with\",\"used-to-produce\"]" //editor/setter maps this to "INTERACTS_WITH","USED_TO_PRODUCE" SIFEnum instances + ) + private SIFEnum[] pattern; + + @Schema( + description = "For the 'get' and 'graph' queries, whether to skip or not traversing " + + "into sub-pathways in the result BioPAX sub-model." + ) + private boolean subpw; + + public BaseGraph() { + //set default vales + format = OutputFormat.BIOPAX; + limit = 1; + subpw = false; + } + + public OutputFormat getFormat() { + return format; + } + + public void setFormat(String format) { + OutputFormat f = OutputFormat.typeOf(format.trim().toUpperCase()); + this.format = (f != null) ? f : OutputFormat.BIOPAX; + } + + public String[] getSource() { + return source; + } + + public void setSource(String[] source) { + Set uris = new HashSet<>(source.length); + for (String item : source) { + if (item.contains(",")) { + //split by ',' ignoring spaces and empty values (between ,,) + for (String id : item.split("\\s*,\\s*", -1)) + uris.add(id); + } else { + uris.add(item); + } + } + this.source = uris.toArray(new String[uris.size()]); + } + + public Integer getLimit() { + return limit; + } + + public void setLimit(Integer limit) { + this.limit = limit; + } + + public String[] getOrganism() { + return organism; + } + + public void setOrganism(String[] organism) { + this.organism = organism; + } + + public String[] getDatasource() { + return datasource; + } + + public void setDatasource(String[] datasource) { + this.datasource = datasource; + } + + public SIFEnum[] getPattern() { + return pattern; + } + + public void setPattern(String[] pattern) { + if(pattern != null && pattern.length > 0) + this.pattern = Arrays.stream(pattern) + .distinct().map(p -> SIFEnum.typeOf(p.trim().toUpperCase()))//skip null (bad pattern value) + .filter(Predicate.not(Objects::isNull)).toArray(SIFEnum[]::new); + else + this.pattern = null; + } + + public boolean getSubpw() { + return subpw; + } + + public void setSubpw(boolean subpw) { + this.subpw = subpw; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(super.toString()) + .append(" for:").append(format) + .append("; spw:").append(subpw) + .append("; src:").append(Arrays.toString(source)); + if (limit != null) + sb.append("; lim:").append(limit); + if (organism != null && organism.length > 0) + sb.append("; org:").append(Arrays.toString(organism)); + if (datasource != null && datasource.length > 0) + sb.append("; dts:").append(Arrays.toString(datasource)); + if (pattern != null && pattern.length > 0) + sb.append("; pat:").append(Arrays.toString(pattern)); + return sb.toString(); + } + +} diff --git a/src/main/java/cpath/web/args/CommonStream.java b/src/main/java/cpath/web/args/CommonStream.java new file mode 100644 index 000000000..9c563a1e8 --- /dev/null +++ b/src/main/java/cpath/web/args/CommonStream.java @@ -0,0 +1,41 @@ +package cpath.web.args; + +import io.swagger.v3.oas.annotations.media.Schema; +import org.biopax.paxtools.query.algorithm.Direction; + +public class CommonStream extends BaseGraph { + + @Schema( + description = "Graph search direction (default: UNDIRECTED; cannot be BOTHSTREAM)", + example = "undirected" + ) + private Direction direction; + + public CommonStream() { + super(); + direction = Direction.UNDIRECTED; + } + + public Direction getDirection() { + return direction; + } + + public void setDirection(String direction) { + Direction dir = Direction.typeOf(direction); //null when illegal value (also handles empty/null and register/case) + //also exclude/replace BOTHSTREAM value + this.direction = (dir != null && dir != Direction.BOTHSTREAM) ? dir : Direction.UNDIRECTED; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(super.toString()); + if (direction != null) + sb.append("; dir:").append(direction); + return sb.toString(); + } + + @Override + public String cmd() { + return "commonstream"; + } +} diff --git a/src/main/java/cpath/web/args/Get.java b/src/main/java/cpath/web/args/Fetch.java similarity index 61% rename from src/main/java/cpath/web/args/Get.java rename to src/main/java/cpath/web/args/Fetch.java index be63ad37b..e1ed226a1 100644 --- a/src/main/java/cpath/web/args/Get.java +++ b/src/main/java/cpath/web/args/Fetch.java @@ -3,56 +3,62 @@ import javax.validation.constraints.NotEmpty; import javax.validation.constraints.NotNull; -import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.media.Schema; import org.biopax.paxtools.pattern.miner.SIFEnum; import cpath.service.api.OutputFormat; import java.util.Arrays; import java.util.HashSet; +import java.util.Objects; import java.util.Set; +import java.util.function.Predicate; + +public class Fetch extends ServiceQuery { -public class Get extends ServiceQuery { @NotNull(message = "Illegal Output Format") - @Parameter( + @Schema( description = "Output format name (default: BIOPAX)", - example = "JSONLD" + example = "biopax" ) private OutputFormat format; // required at least one value @NotEmpty(message = "Provide at least one URI.") - @Parameter( - description = "Known BioPAX entity URIs or standard identifiers (e.g., gene symbols)", + @Schema( + description = "BioPAX entity URIs or standard identifiers (e.g., gene symbols)", required = true, - example = "TP53" + example = "[\"CALCRL\"]" ) private String[] uri; - @Parameter( + @Schema( description = "If format is SIF or TXT, one can specify interaction types to apply " + - "(by default it uses all the build-in patterns but 'neighbor-of')", - example = "interacts-with" + "(by default, it uses all the build-in patterns but 'neighbor-of')", + example = "[\"interacts-with\"]" //editor/setter maps this to "INTERACTS_WITH" SIFEnum instance ) private SIFEnum[] pattern; - @Parameter( + @Schema( description = "For the 'get' and 'graph' queries, whether to skip or not traversing into " + - "sub-pathways in the result BioPAX sub-model." + "sub-pathways in the result BioPAX sub-model.", + example = "false" ) private boolean subpw; - public Get() { + public Fetch() { format = OutputFormat.BIOPAX; // default subpw = false; + uri = new String[]{}; } public OutputFormat getFormat() { return format; } - public void setFormat(OutputFormat format) { - this.format = format; + public void setFormat(String format) { + OutputFormat f = OutputFormat.typeOf(format.trim().toUpperCase()); + this.format = (f != null) ? f : OutputFormat.BIOPAX; } public String[] getUri() { @@ -73,18 +79,25 @@ public void setUri(String[] uri) { this.uri = uris.toArray(new String[uris.size()]); } - //SIF Types public SIFEnum[] getPattern() { return pattern; } - public void setPattern(SIFEnum[] pattern) { - this.pattern = pattern; + public void setPattern(String[] pattern) { + if(pattern != null && pattern.length > 0) + this.pattern = Arrays.stream(pattern) + .distinct().map(p -> SIFEnum.typeOf(p.trim().toUpperCase()))// skip null (bad pattern name) + .filter(Predicate.not(Objects::isNull)).toArray(SIFEnum[]::new); + else + this.pattern = null; } public boolean getSubpw() { return subpw; } + public boolean isSubpw() { + return subpw; + } public void setSubpw(boolean subpw) { this.subpw = subpw; @@ -96,9 +109,9 @@ public String toString() { .append(" for:").append(format) .append("; spw:").append(subpw) .append("; uri:").append(Arrays.toString(uri)); - if (pattern != null && pattern.length > 0) + if (pattern != null && pattern.length > 0) { sb.append("; pat:").append(Arrays.toString(pattern)); - + } return sb.toString(); } @@ -107,8 +120,4 @@ public String cmd() { return "get"; } - @Override - public String outputFormat() { - return format.name().toLowerCase(); - } } diff --git a/src/main/java/cpath/web/args/Graph.java b/src/main/java/cpath/web/args/Graph.java index 57679eb08..c7cbd95a0 100644 --- a/src/main/java/cpath/web/args/Graph.java +++ b/src/main/java/cpath/web/args/Graph.java @@ -1,124 +1,70 @@ package cpath.web.args; -import javax.validation.constraints.Min; -import javax.validation.constraints.NotEmpty; import javax.validation.constraints.NotNull; -import io.swagger.v3.oas.annotations.Parameter; -import org.biopax.paxtools.pattern.miner.SIFEnum; -import org.biopax.paxtools.query.algorithm.Direction; +import io.swagger.v3.oas.annotations.media.Schema; import cpath.service.api.GraphType; -import cpath.service.api.OutputFormat; +import org.biopax.paxtools.query.algorithm.Direction; +import org.biopax.paxtools.query.algorithm.LimitType; import java.util.Arrays; import java.util.HashSet; import java.util.Set; -public class Graph extends ServiceQuery { +@Deprecated +/** + * @deprecated - migrating to @{@link Neighborhood}, @{@link PathsBetween}, etc. + */ +public class Graph extends BaseGraph { @NotNull(message = "Parameter 'kind' is required.") - @Parameter( + @Schema( description = "BioPAX graph traversal type.", required = true, - example = "PATHSBETWEEN" - ) - private GraphType kind; //required! - - @NotEmpty(message = "Provide at least one source URI.") - @Parameter( - description = "Source BioPAX entity URIs or standard identifiers (e.g., gene symbols)", - required = true, - example = "TP53" - ) - private String[] source; - - @Parameter( - description = "Target BioPAX entity URIs or standard identifiers (e.g., gene symbols); " + - "this parameter works only with kind=PATHSFROMTO graph queries.", - example = "TP53" + example = "neighborhood" ) - private String[] target; - - @Min(1) //note: this allows for null - @Parameter( - description = "Graph search distance limit (default: 1)" - ) - private Integer limit; + private GraphType kind; - @Parameter( - description = "Graph search direction (default: UNDIRECTED)", - example = "BOTHSTREAM" + @Schema( + description = "Graph search direction (default: UNDIRECTED)", + example = "undirected" ) private Direction direction; - @NotNull(message = "Illegal Output Format") - @Parameter( - description = "Output format name (default: BIOPAX)" - ) - private OutputFormat format; - - @Parameter( - description = "Filter by organism, e.g., taxonomy ID (recommended) or name.", - example = "9606" - ) - private String[] organism; - - @Parameter( - description = "Filter by data source name, id or uri.", - example = "reactome" + @Schema( + description = "Target BioPAX entity URIs/IDs; optional - only for PATHSFROMTO graph " + + "(also when missing, then PATHSBETWEEN is there used).", + example = "[]" ) - private String[] datasource; - - @Parameter( - description = "If format is SIF or TXT, one can specify interaction types to apply", - example = "interacts-with" - ) - private SIFEnum[] pattern; + private String[] target; - @Parameter( - description = "For the 'get' and 'graph' queries, whether to skip or not traversing " + - "into sub-pathways in the result BioPAX sub-model." + @Schema( + description = "Limit Type: 'normal', 'shortest-plus-k'; only for PATHSFROMTO query (default: normal)", + example = "normal" ) - private boolean subpw; + private LimitType limitType; public Graph() { - format = OutputFormat.BIOPAX; // default - limit = 1; - subpw = false; - } - - public OutputFormat getFormat() { - return format; - } - - public void setFormat(OutputFormat format) { - this.format = format; + super(); + limitType = LimitType.NORMAL; //for pathsfromto only + direction = Direction.UNDIRECTED; } public GraphType getKind() { return kind; } - public void setKind(GraphType kind) { - this.kind = kind; + public void setKind(String kind) { + this.kind = GraphType.typeOf(kind); //null when illegal value (also handles empty/null and register/case) } - public String[] getSource() { - return source; + public Direction getDirection() { + return direction; } - public void setSource(String[] source) { - Set uris = new HashSet<>(source.length); - for (String item : source) { - if (item.contains(",")) { - //split by ',' ignoring spaces and empty values (between ,,) - for (String id : item.split("\\s*,\\s*", -1)) - uris.add(id); - } else { - uris.add(item); - } - } - this.source = uris.toArray(new String[uris.size()]); + public void setDirection(String direction) { + Direction dir = Direction.typeOf(direction); //null when illegal value (also handles empty/null and register/case) + this.direction = (dir != null) ? dir : Direction.UNDIRECTED; } public String[] getTarget() { @@ -139,73 +85,24 @@ public void setTarget(String[] target) { this.target = uris.toArray(new String[uris.size()]); } - public Integer getLimit() { - return limit; - } - - public void setLimit(Integer limit) { - this.limit = limit; - } - - public Direction getDirection() { - return direction; - } - - public void setDirection(Direction direction) { - this.direction = direction; + public LimitType getLimitType() { + return limitType; } - public String[] getOrganism() { - return organism; - } - - public void setOrganism(String[] organism) { - this.organism = organism; - } - - public String[] getDatasource() { - return datasource; - } - - public void setDatasource(String[] datasource) { - this.datasource = datasource; - } - - //SIF Types - public SIFEnum[] getPattern() { - return pattern; - } - - public void setPattern(SIFEnum[] pattern) { - this.pattern = pattern; - } - - public boolean getSubpw() { - return subpw; - } - - public void setSubpw(boolean subpw) { - this.subpw = subpw; + public void setLimitType(String limitType) { + LimitType limt = LimitType.typeOf(limitType); + this.limitType = (limt != null) ? limt : LimitType.NORMAL; } @Override public String toString() { - StringBuilder sb = new StringBuilder(super.toString()) - .append(" for:").append(format) - .append("; spw:").append(subpw) - .append("; src:").append(Arrays.toString(source)); + StringBuilder sb = new StringBuilder(super.toString()); + if (limitType != null) + sb.append("; limt:").append(limitType); if (target != null && target.length > 0) sb.append("; tgt:").append(Arrays.toString(target)); - if (limit != null) - sb.append("; lim:").append(limit); - if (organism != null && organism.length > 0) - sb.append("; org:").append(Arrays.toString(organism)); - if (datasource != null && datasource.length > 0) - sb.append("; dts:").append(Arrays.toString(datasource)); if (direction != null) sb.append("; dir:").append(direction); - if (pattern != null && pattern.length > 0) - sb.append("; pat:").append(Arrays.toString(pattern)); return sb.toString(); } @@ -214,8 +111,4 @@ public String cmd() { return (kind != null) ? kind.toString() : "graph"; } - @Override - public String outputFormat() { - return format.name().toLowerCase(); - } } diff --git a/src/main/java/cpath/web/args/Neighborhood.java b/src/main/java/cpath/web/args/Neighborhood.java new file mode 100644 index 000000000..53b287ce7 --- /dev/null +++ b/src/main/java/cpath/web/args/Neighborhood.java @@ -0,0 +1,40 @@ +package cpath.web.args; + +import io.swagger.v3.oas.annotations.media.Schema; +import org.biopax.paxtools.query.algorithm.Direction; + +public class Neighborhood extends BaseGraph { + + @Schema( + description = "Graph search direction (default: UNDIRECTED)", + example = "undirected" + ) + private Direction direction; + + public Neighborhood() { + super(); + direction = Direction.UNDIRECTED; + } + + public Direction getDirection() { + return direction; + } + + public void setDirection(String direction) { + Direction dir = Direction.typeOf(direction); //null when illegal value (also handles empty/null and register/case) + this.direction = (dir != null) ? dir : Direction.UNDIRECTED; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(super.toString()); + if (direction != null) + sb.append("; dir:").append(direction); + return sb.toString(); + } + + @Override + public String cmd() { + return "neighborhood"; + } +} diff --git a/src/main/java/cpath/web/args/PathsBetween.java b/src/main/java/cpath/web/args/PathsBetween.java new file mode 100644 index 000000000..d61a55422 --- /dev/null +++ b/src/main/java/cpath/web/args/PathsBetween.java @@ -0,0 +1,18 @@ +package cpath.web.args; + +public class PathsBetween extends BaseGraph { + + public PathsBetween() { + super(); + } + + @Override + public String toString() { + return super.toString(); + } + + @Override + public String cmd() { + return "pathsbetween"; + } +} diff --git a/src/main/java/cpath/web/args/PathsFromTo.java b/src/main/java/cpath/web/args/PathsFromTo.java new file mode 100644 index 000000000..783560d9b --- /dev/null +++ b/src/main/java/cpath/web/args/PathsFromTo.java @@ -0,0 +1,72 @@ +package cpath.web.args; + +import io.swagger.v3.oas.annotations.media.Schema; +import org.biopax.paxtools.query.algorithm.LimitType; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +public class PathsFromTo extends BaseGraph { + + @Schema( + description = "Target BioPAX entity URIs/IDs (if not present, then 'pathsbetween' algorithm is used instead)", + example = "[]" + ) + private String[] target; + + @Schema( + description = "Limit Type: 'normal', 'shortest-plus-k'", + example = "normal" + ) + private LimitType limitType; + + public PathsFromTo() { + super(); + limitType = LimitType.NORMAL; + } + + public LimitType getLimitType() { + return limitType; + } + + public void setLimitType(String limitType) { + LimitType limt = LimitType.typeOf(limitType); + this.limitType = (limt != null) ? limt : LimitType.NORMAL; + } + + public String[] getTarget() { + return target; + } + + public void setTarget(String[] target) { + Set uris = new HashSet<>(target.length); + for (String item : target) { + if (item.contains(",")) { + //split by ',' ignoring spaces and empty values (between ,,) + for (String id : item.split("\\s*,\\s*", -1)) { + uris.add(id); + } + } else { + uris.add(item.trim()); + } + } + this.target = uris.toArray(new String[uris.size()]); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(super.toString()); + if (limitType != null) + sb.append("; limt:").append(limitType); + if (target != null && target.length > 0) + sb.append("; tgt:").append(Arrays.toString(target)); + return sb.toString(); + } + + @Override + public String cmd() { + return "pathsfromto"; + } + +} diff --git a/src/main/java/cpath/web/args/Search.java b/src/main/java/cpath/web/args/Search.java index 1289a7d4b..e46919389 100644 --- a/src/main/java/cpath/web/args/Search.java +++ b/src/main/java/cpath/web/args/Search.java @@ -3,43 +3,79 @@ import javax.validation.constraints.Min; import javax.validation.constraints.NotBlank; -import io.swagger.v3.oas.annotations.Parameter; +import cpath.web.args.binding.BiopaxTypeEditor; +import io.swagger.v3.oas.annotations.media.Schema; import org.biopax.paxtools.model.BioPAXElement; import java.util.Arrays; public class Search extends ServiceQuery { - @NotBlank(message = "Parameter 'q' (a Lucene query string) is blank (not specified).") - @Parameter( - description = "Query string (full-text search supports Lucene query syntax).", + @NotBlank(message = "Parameter 'q' (a Lucene query string) is blank.") + @Schema( + description = """ + a keyword, name, identifier, or a Lucene query string; + the index field names are: uri, keyword, name, pathway, xrefid, datasource, organism; + e.g. keyword is the default search field that includes most of BioPAX element's properties + and nested properties (e.g. a Complex can be found by one of its member's names or ECNumber). + Search results, specifically the URIs, can be starting point for the graph, get, traverse queries. + Search strings are case insensitive, except for xrefid, uri, or when it's enclosed in quotes. + """, required = true, - example = "xrefid:FGF*" + example = "xrefid:P*" ) private String q; - @Parameter( - description = "Filter by BioPAX L3 class name (case-insensitive).", + @Schema( + description = """ + BioPAX class filter (values; case-insensitive). + Note that some query filters, such as &type=biosource + (for most BioPAX UtilityClass, such as Score, Evidence), will not return any hits. + So, use Entity (e.g., Pathway, Control, Protein) or EntityReference types + (e.g. ProteinReference, SmallMoleculeReference) instead. + """, example = "pathway" ) - private Class type; + private String type; - @Parameter( - description = "Filter by organism, e.g., taxonomy ID (recommended) or name.", - example = "9606" + public String getType() { + return type; + } + public void setType(String type) { + this.type = type; + this.biopaxClass = BiopaxTypeEditor.getSearchableBiopaxClassByName(type); + } + + //this is set from request using custom property editor + private Class biopaxClass; + + @Schema( + description = """ + by-organism filter; values can be either the canonical names, e.g. + homo sapiens or NCBI Taxon IDs, 9606. If multiple values + are provided, then the union of hits is returned; e.g., + organism=9606&organism=10016 results in both human and mouse related hits. + See also: supported species (other organisms data, + such as viruses and model organisms, can go together with e.g. human models that we integrated). + """, + example = "[\"9606\"]" ) private String[] organism; - @Parameter( - description = "Filter by data source name, id or uri.", - example = "reactome" + @Schema( + description = """ + filter by data source (an array of names, URIs + of the data sources or any Provenance). + If multiple data source values are specified, a union of hits from specified sources is returned; + e.g., datasource=reactome&datasource=pid. + """, + example = "[\"reactome\"]" ) private String[] datasource; @Min(0) - @Parameter( - description = "Pagination: page number (>=0) of the full-text search results.", - example = "0", - required = false + @Schema( + description = "Pagination: the search result page number, N>=0; default is 0.", + example = "0" ) private Integer page; @@ -55,12 +91,15 @@ public void setQ(String q) { this.q = q; } - public Class getType() { - return type; + public Class getBiopaxClass() { + return biopaxClass; } - public void setType(Class type) { - this.type = type; + public void setBiopaxClass(Class biopaxClass) { + this.biopaxClass = biopaxClass; + if(biopaxClass != null) { + this.type = biopaxClass.getSimpleName(); + } } public String[] getOrganism() { @@ -90,8 +129,8 @@ public void setPage(Integer page) { @Override public String toString() { StringBuilder sb = new StringBuilder(super.toString()).append(" q:").append(q).append("; p:").append(page); - if (type != null) - sb.append("; t:").append(type.getSimpleName()); + if (biopaxClass != null) + sb.append("; t:").append(biopaxClass.getSimpleName()); if (organism != null && organism.length > 0) sb.append("; org:").append(Arrays.toString(organism)); if (datasource != null && datasource.length > 0) @@ -104,8 +143,4 @@ public String cmd() { return "search"; } - @Override - public String outputFormat() { - return "xml"; //default - } } diff --git a/src/main/java/cpath/web/args/ServiceQuery.java b/src/main/java/cpath/web/args/ServiceQuery.java index 51903b39a..417baa1ef 100644 --- a/src/main/java/cpath/web/args/ServiceQuery.java +++ b/src/main/java/cpath/web/args/ServiceQuery.java @@ -1,31 +1,14 @@ package cpath.web.args; -import io.swagger.v3.oas.annotations.Parameter; - -/** - * Created by igor on 23/06/16. - */ public abstract class ServiceQuery { - - @Parameter( - description = "User or app name (for service analytics)" - ) - private String user; - - public String getUser() { - return user; - } - - public void setUser(String user) { - this.user = user; + public ServiceQuery() { } @Override public String toString() { - return cmd() + ((user != null) ? " cli:" + user + ";" : ""); + return cmd(); } public abstract String cmd(); - public abstract String outputFormat(); } diff --git a/src/main/java/cpath/web/args/TopPathways.java b/src/main/java/cpath/web/args/TopPathways.java index c619ecfdc..ecb693dbf 100644 --- a/src/main/java/cpath/web/args/TopPathways.java +++ b/src/main/java/cpath/web/args/TopPathways.java @@ -1,7 +1,6 @@ package cpath.web.args; - -import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.media.Schema; import javax.validation.constraints.NotBlank; import java.util.Arrays; @@ -10,22 +9,22 @@ public class TopPathways extends ServiceQuery { @NotBlank( message = "Parameter 'q' (a Lucene query string) is blank (not specified)." ) - @Parameter( + @Schema( description = "Query string (supports Lucene query syntax).", required = true, example = "*" ) private String q; - @Parameter( - description = "Filter by organism, e.g., taxonomy ID (recommended) or name.", - example = "9606" + @Schema( + description = "Filter by organism, e.g., taxonomy id (recommended) or name.", + example = "[\"9606\"]" ) private String[] organism; - @Parameter( + @Schema( description = "Filter by data source name, id or uri.", - example = "reactome" + example = "[\"reactome\"]" ) private String[] datasource; @@ -74,8 +73,4 @@ public String cmd() { return "top_pathways"; } - @Override - public String outputFormat() { - return "xml"; //default - } } diff --git a/src/main/java/cpath/web/args/Traverse.java b/src/main/java/cpath/web/args/Traverse.java index aab55f216..ca42c7007 100644 --- a/src/main/java/cpath/web/args/Traverse.java +++ b/src/main/java/cpath/web/args/Traverse.java @@ -1,6 +1,6 @@ package cpath.web.args; -import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.media.Schema; import javax.validation.constraints.NotBlank; import javax.validation.constraints.NotEmpty; @@ -10,8 +10,9 @@ public class Traverse extends ServiceQuery { @NotBlank(message="Property Path is blank (not specified).") - @Parameter( - description = "String expression, e.g., 'Entity/xref:PublicationXref/id' - connected with '/' and ':' BioPAX types and properties - graph path to specific model elements through given ones.", + @Schema( + description = "String expression, e.g., 'Entity/xref:PublicationXref/id' - connected with '/' and ':' " + + "BioPAX types and properties - graph path to specific model elements through given ones.", example = "Entity/xref:PublicationXref/id", required = true ) @@ -19,10 +20,10 @@ public class Traverse extends ServiceQuery { // required at least one value @NotEmpty(message="Provide at least one URI.") - @Parameter( + @Schema( description = "Known BioPAX entity URIs or standard identifiers (e.g., gene symbols)", required = true, - example = "TP53" + example = "[\"TP53\"]" ) private String[] uri; @@ -65,8 +66,4 @@ public String cmd() { return "traverse"; } - @Override - public String outputFormat() { - return "xml"; //default - } } diff --git a/src/main/java/cpath/web/args/binding/BiopaxTypeEditor.java b/src/main/java/cpath/web/args/binding/BiopaxTypeEditor.java index ca709efed..1f2f21110 100644 --- a/src/main/java/cpath/web/args/binding/BiopaxTypeEditor.java +++ b/src/main/java/cpath/web/args/binding/BiopaxTypeEditor.java @@ -2,6 +2,7 @@ import java.beans.PropertyEditorSupport; +import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.controller.EditorMap; import org.biopax.paxtools.controller.SimpleEditorMap; import org.biopax.paxtools.model.BioPAXElement; @@ -10,10 +11,9 @@ /** - * Helps convert request PROVIDER_URL path values to a BioPAX type. + * Convert a request parameter value to the BioPAX type. * * @author rodche - * */ public class BiopaxTypeEditor extends PropertyEditorSupport { private static BioPAXFactory bioPAXFactory = BioPAXLevel.L3.getDefaultFactory(); @@ -24,23 +24,31 @@ public class BiopaxTypeEditor extends PropertyEditorSupport { */ @Override public void setAsText(String type) { - setValue(getSearchableBiopaxClassByName(type)); + setValue(BiopaxTypeEditor.getSearchableBiopaxClassByName(type)); } - /* - * Check whether given 'type' (case insensitive) is a BioPAX interface that + /** + * Check whether given 'type' (case-insensitive) is a BioPAX interface that * has a non-abstract implementation. * BioPAX type names are recognized regardless up/lowercase, e.g.: * ProteinReference, PROTEINREFERENCE, proteinreference, etc. should all work the same. + * + * @param type BioPAX type/interface name, e.g. "Pathway", "Provenance" (case-insensitive) + * @throws IllegalArgumentException when the input string is not blank but does not match any BioPAX type + * @return the BioPAX interface; null when 'type' is null/blank */ - private static Class getSearchableBiopaxClassByName(String type) + public static Class getSearchableBiopaxClassByName(String type) { + if(StringUtils.isBlank(type)) { + return null; + } + for(Class c : editorMap.getKnownSubClassesOf(BioPAXElement.class)) { if(c.getSimpleName().equalsIgnoreCase(type)) { if(c.isInterface() && bioPAXFactory.getImplClass(c) != null) return c; // interface } } - throw new IllegalArgumentException("Illegal BioPAX class name '" + type); + throw new IllegalArgumentException("Unknown BioPAX type: " + type); } } diff --git a/src/main/java/cpath/web/args/binding/DirectionEditor.java b/src/main/java/cpath/web/args/binding/DirectionEditor.java index d94ab20ae..91d2abdbb 100644 --- a/src/main/java/cpath/web/args/binding/DirectionEditor.java +++ b/src/main/java/cpath/web/args/binding/DirectionEditor.java @@ -16,7 +16,7 @@ public class DirectionEditor extends PropertyEditorSupport { */ @Override public void setAsText(String arg0) { - setValue(Direction.valueOf(arg0.trim().toUpperCase())); + setValue(Direction.typeOf(arg0)); } } diff --git a/src/main/java/cpath/web/args/binding/GraphQueryDirectionEditor.java b/src/main/java/cpath/web/args/binding/GraphQueryDirectionEditor.java deleted file mode 100644 index 5770e56b4..000000000 --- a/src/main/java/cpath/web/args/binding/GraphQueryDirectionEditor.java +++ /dev/null @@ -1,21 +0,0 @@ -package cpath.web.args.binding; - -import org.biopax.paxtools.query.algorithm.Direction; - -import java.beans.PropertyEditorSupport; - - -/** - * @author ozgun - * - */ -public class GraphQueryDirectionEditor extends PropertyEditorSupport { - - @Override - public void setAsText(String arg0) - { - Direction value = null; - value = Direction.valueOf(arg0.trim().toUpperCase()); - setValue(value); - } -} \ No newline at end of file diff --git a/src/main/java/cpath/web/args/binding/GraphQueryLimitEditor.java b/src/main/java/cpath/web/args/binding/GraphQueryLimitEditor.java index f19731b9f..f4114f0a2 100644 --- a/src/main/java/cpath/web/args/binding/GraphQueryLimitEditor.java +++ b/src/main/java/cpath/web/args/binding/GraphQueryLimitEditor.java @@ -14,8 +14,6 @@ public class GraphQueryLimitEditor extends PropertyEditorSupport { @Override public void setAsText(String arg0) { - LimitType value = null; - value = LimitType.valueOf(arg0.trim().toUpperCase()); - setValue(value); + setValue(LimitType.typeOf(arg0)); } } \ No newline at end of file diff --git a/src/main/java/cpath/web/args/binding/GraphTypeEditor.java b/src/main/java/cpath/web/args/binding/GraphTypeEditor.java index baa4e820c..1fa54b9a1 100644 --- a/src/main/java/cpath/web/args/binding/GraphTypeEditor.java +++ b/src/main/java/cpath/web/args/binding/GraphTypeEditor.java @@ -16,7 +16,7 @@ public class GraphTypeEditor extends PropertyEditorSupport { */ @Override public void setAsText(String arg0) { - setValue(GraphType.valueOf(arg0.trim().toUpperCase())); + setValue(GraphType.typeOf(arg0)); } } diff --git a/src/main/java/cpath/web/args/binding/MetadataTypeEditor.java b/src/main/java/cpath/web/args/binding/MetadataTypeEditor.java index 95a6b1480..4b619403e 100644 --- a/src/main/java/cpath/web/args/binding/MetadataTypeEditor.java +++ b/src/main/java/cpath/web/args/binding/MetadataTypeEditor.java @@ -2,17 +2,15 @@ import java.beans.PropertyEditorSupport; -import cpath.service.jpa.Metadata; - +import cpath.service.metadata.Datasource.METADATA_TYPE; public class MetadataTypeEditor extends PropertyEditorSupport { @Override public void setAsText(String arg0) { - Metadata.METADATA_TYPE value = null; - value = Metadata.METADATA_TYPE.valueOf(arg0.trim().toUpperCase()); - setValue(value); + METADATA_TYPE value = METADATA_TYPE.valueOf(arg0.trim().toUpperCase()); + setValue(value); } } diff --git a/src/main/java/cpath/web/args/binding/OutputFormatEditor.java b/src/main/java/cpath/web/args/binding/OutputFormatEditor.java index d23c84772..7f6d1be45 100644 --- a/src/main/java/cpath/web/args/binding/OutputFormatEditor.java +++ b/src/main/java/cpath/web/args/binding/OutputFormatEditor.java @@ -16,6 +16,6 @@ public class OutputFormatEditor extends PropertyEditorSupport { */ @Override public void setAsText(String param) { - setValue(OutputFormat.valueOf(param.trim().toUpperCase())); + setValue(OutputFormat.typeOf(param)); } } diff --git a/src/main/java/cpath/web/args/binding/SIFTypeEditor.java b/src/main/java/cpath/web/args/binding/SIFTypeEditor.java index 6eba32248..b9dec3188 100644 --- a/src/main/java/cpath/web/args/binding/SIFTypeEditor.java +++ b/src/main/java/cpath/web/args/binding/SIFTypeEditor.java @@ -16,7 +16,7 @@ public class SIFTypeEditor extends PropertyEditorSupport { */ @Override public void setAsText(String arg0) { - setValue(SIFEnum.typeOf(arg0.trim())); + setValue(SIFEnum.typeOf(arg0.trim().toUpperCase())); } } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 2e2761d5c..e88c8a702 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,53 +1,57 @@ ## cPath2 properties (which are mapped to Settings.java bean properties) cpath2.max-hits-per-page=100 -cpath2.xml-base=test/ +# Unit tests depend on the xml-base (in prod, should be a different value, in CPATH2_HOME/application.properties) +cpath2.xml-base=http://test/ cpath2.version=0 -cpath2.name=Pathway Commons Demo -cpath2.url=http://www.pathwaycommons.org -cpath2.downloads=http://www.pathwaycommons.org/archives/ -cpath2.logo=http://pathwaycommons.github.io/about/images/brand/pc_logo.png -cpath2.description="Default profile (test db configuration)" +cpath2.name=pc +cpath2.organization=Pathway Commons +cpath2.email=pathway-commons-help@googlegroups.com +cpath2.url=https://www.pathwaycommons.org +cpath2.downloads=https://www.pathwaycommons.org/archives/PC2/ +cpath2.logo=https://pathwaycommons.github.io/about/images/brand/pc_logo.png +cpath2.description=Test/dev instance +cpath2.metadata-location=classpath:metadata.json cpath2.species=Homo sapiens (9606) cpath2.sbgn-layout-enabled=false -#cpath2.metadata-location=file:metadata.json -## Spring Boot Application autoconfiguration - -# JPA properties (here - test/demo) -spring.datasource.url = jdbc:h2:mem:testdb;DB_CLOSE_ON_EXIT=FALSE -#spring.datasource.driver-class-name = org.h2.Driver -#spring.datasource.username = sa -#spring.datasource.password = -#spring.jpa.database=h2 -#spring.jpa.generate-ddl=true -#spring.jpa.hibernate.ddl-auto=create -#spring.jpa.hibernate.use-new-id-generator-mappings=true -#spring.jpa.database-platform = org.hibernate.dialect.H2Dialect -#spring.jpa.properties.hibernate.dialect = org.hibernate.dialect.H2Dialect - -#H2 db web management console (should be disabled in production) -spring.h2.console.enabled=true -spring.h2.console.path=/h2 -spring.h2.console.settings.trace=false -spring.h2.console.settings.web-allow-others=false - -# Web service properties +## Spring Boot Application +spring.data.rest.default-media-type=application/json spring.mvc.view.prefix=/jsp/ spring.mvc.view.suffix=.jsp -server.error.whitelabel.enabled=false -#spring.main.banner-mode=off - -# ToDo: hide some actuators in production -management.endpoints.web.exposure.include=* -# to expose shutdown: -# management.endpoint.shutdown.enabled=true - -#due to migration to spring-boot 2.1 +spring.main.banner-mode=off +#after migration to spring-boot>=2.1 spring.main.allow-bean-definition-overriding=true -# OpenAPI/Swagger3 (feature and /v3/api-docs endpoint) -springdoc.api-docs.enabled=false -springdoc.swagger-ui.enabled=false +server.error.whitelabel.enabled=false +server.error.include-exception=true +server.error.include-stacktrace=always +server.tomcat.additional-tld-skip-patterns=saxon*.jar,jsr173_1*.jar,activation.jar,jaxb1*.jar,com.springsource.org.jdom-*.jar + +#web/jmx actuators (=* enables all endpoints: /health,info,beans,etc...) +#management.endpoints.enabled-by-default=false +#management.endpoint.health.enabled=true +#management.endpoint.info.enabled=true +#management.endpoint.beans.enabled=true +#management.endpoint.shutdown.enabled=true +management.endpoints.web.exposure.include=health,beans +#management.endpoints.web.exposure.exclude= + +## OpenAPI/Swagger3 (feature and /v3/api-docs endpoint) #springdoc.packagesToScan=cpath.web.args, cpath.web -#springdoc.pathsToMatch=/pc2 +#springdoc.pathsToMatch= +springdoc.show-actuator=false +springdoc.api-docs.enabled=true +springdoc.swagger-ui.enabled=true +springdoc.swagger-ui.defaultModelsExpandDepth=2 +springdoc.swagger-ui.docExpansion=list +#springdoc.swagger-ui.defaultModelRendering=example +#springdoc.swagger-ui.defaultModelRendering=model +springdoc.swagger-ui.displayRequestDuration=true +springdoc.swagger-ui.operationsSorter=alpha +springdoc.swagger-ui.tagsSorter=alpha +#deepLinking=true is important for the landing page's links +springdoc.swagger-ui.deepLinking=true +springdoc.swagger-ui.disable-swagger-default-url=true +#disable the "Explore" toolbar +springdoc.swagger-ui.layout=BaseLayout diff --git a/src/main/resources/ehcache.xml b/src/main/resources/ehcache.xml deleted file mode 100644 index c4ccb2f51..000000000 --- a/src/main/resources/ehcache.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/src/main/resources/logback-docker.xml b/src/main/resources/logback-docker.xml deleted file mode 100644 index 4496fb03b..000000000 --- a/src/main/resources/logback-docker.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - true - - - - - %d %-5level [%thread] %logger{25} - %msg%n - true - - - - - - - - - diff --git a/src/test/resources/logback-test.xml b/src/main/resources/logback-test.xml similarity index 78% rename from src/test/resources/logback-test.xml rename to src/main/resources/logback-test.xml index c344b97cc..e2721b6e9 100644 --- a/src/test/resources/logback-test.xml +++ b/src/main/resources/logback-test.xml @@ -5,19 +5,17 @@ true - + - %d %-5level [%thread] %logger{25} - %msg%n true - diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index 2a61a8959..13c3f6d3e 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -5,24 +5,22 @@ true - + - - - cpath2.log - - cpath2.%d{yyyy-ww}.log.gz - + - %d %-5level %logger{25} - %msg%n + %d %-5level [%thread] %logger{25} - %msg%n true + + + - + \ No newline at end of file diff --git a/src/test/resources/metadata.json b/src/main/resources/metadata.json similarity index 65% rename from src/test/resources/metadata.json rename to src/main/resources/metadata.json index 16bbe7433..da1da0100 100644 --- a/src/test/resources/metadata.json +++ b/src/main/resources/metadata.json @@ -5,40 +5,40 @@ { "dataUrl": "classpath:test_uniprot_data.dat.zip", "identifier": "TEST_UNIPROT", - "homepageUrl": "http:\/\/www.uniprot.org", + "homepageUrl": "http://www.uniprot.org", "name": [ "UniProt" ], "converterClass": "cpath.converter.UniprotConverter", "description": "Test Uniprot Data", - "iconUrl": "http:\/\/pathway-commons.googlecode.com\/files\/uniprot.png", + "iconUrl": "https://pathwaycommons.github.io/cpath2/logos/uniprot.png", "availability": "free", "type": "WAREHOUSE", - "pubmedId": "00000000" + "pubmedId": "24253303" }, { "dataUrl": "classpath:chebi.obo.zip", "identifier": "TEST_CHEBI", - "homepageUrl": "http:\/\/www.ebi.ac.uk\/chebi\/", + "homepageUrl": "https://www.ebi.ac.uk/chebi/", "name": [ "ChEBI" ], "converterClass": "cpath.converter.ChebiOboConverter", "description": "Test ChEBI OBO", - "iconUrl": "http:\/\/pathway-commons.googlecode.com\/files\/chebi.png", + "iconUrl": "https://pathwaycommons.github.io/cpath2/logos/chebi.png", "availability": "free", "type": "WAREHOUSE", - "pubmedId": "00000000" + "pubmedId": "23180789" }, { "dataUrl": "classpath:test_mapping.zip", "identifier": "TEST_MAPPING", - "homepageUrl": "http:\/\/www.ebi.ac.uk\/unichem\/wholesourcemap", + "homepageUrl": "https://www.ebi.ac.uk/unichem/", "name": [ "UniChem" ], - "description": "Test UniChem batch id-mapping (to ChEBI, manually edited\/fixed to use here)", - "iconUrl": "http:\/\/www.ebi.ac.uk\/unichem\/static\/images\/UniChem_logo2_255.png", + "description": "Test UniChem batch id-mapping (to ChEBI, manually edited/fixed to use here)", + "iconUrl": "https://pathwaycommons.github.io/cpath2/logos/UniChem_logo2_255.png", "availability": "free", "type": "MAPPING", "pubmedId": "23317286" diff --git a/src/main/resources/static/css/pc.css b/src/main/resources/static/css/pc.css index b5ff40859..54bbdc834 100755 --- a/src/main/resources/static/css/pc.css +++ b/src/main/resources/static/css/pc.css @@ -9,42 +9,3 @@ img.datasource-logo {width: 100px; height: 50px;} .thumbnail li {white-space: nowrap; display: compact;} -.btn-file { - position: relative; - overflow: hidden; -} -.btn-file input[type=file] { - position: absolute; - top: 0; - right: 0; - min-width: 100%; - min-height: 100%; - font-size: 999px; - text-align: right; - filter: alpha(opacity=0); - opacity: 0; - outline: none; - background: white; - cursor: inherit; - display: block; -} - -/* Data table fixes */ -table.table thead .sorting, -table.table thead .sorting_asc, -table.table thead .sorting_desc, -table.table thead .sorting_asc_disabled, -table.table thead .sorting_desc_disabled { - cursor: pointer; - *cursor: hand; -} - -table.table thead .sorting { background: url('../img/sort_both.png') no-repeat center right; } -table.table thead .sorting_asc { background: url('../img/sort_asc.png') no-repeat center right; } -table.table thead .sorting_desc { background: url('../img/sort_desc.png') no-repeat center right; } -table.table thead .sorting_asc_disabled { background: url('../img/sort_asc_disabled.png') no-repeat center right; } -table.table thead .sorting_desc_disabled { background: url('../img/sort_desc_disabled.png') no-repeat center right; } - -div.dataTables_filter input { - padding: 3px 3px 3px 3px; -} diff --git a/src/main/resources/static/css/qunit.css b/src/main/resources/static/css/qunit.css deleted file mode 100644 index 7ba3f9a30..000000000 --- a/src/main/resources/static/css/qunit.css +++ /dev/null @@ -1,244 +0,0 @@ -/** - * QUnit v1.12.0 - A JavaScript Unit Testing Framework - * - * http://qunitjs.com - * - * Copyright 2012 jQuery Foundation and other contributors - * Released under the MIT license. - * http://jquery.org/license - */ - -/** Font Family and Sizes */ - -#qunit-tests, #qunit-header, #qunit-banner, #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult { - font-family: "Helvetica Neue Light", "HelveticaNeue-Light", "Helvetica Neue", Calibri, Helvetica, Arial, sans-serif; -} - -#qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult, #qunit-tests li { font-size: small; } -#qunit-tests { font-size: smaller; } - - -/** Resets */ - -#qunit-tests, #qunit-header, #qunit-banner, #qunit-userAgent, #qunit-testresult, #qunit-modulefilter { - margin: 0; - padding: 0; -} - - -/** Header */ - -#qunit-header { - padding: 0.5em 0 0.5em 1em; - - color: #8699a4; - background-color: #0d3349; - - font-size: 1.5em; - line-height: 1em; - font-weight: normal; - - border-radius: 5px 5px 0 0; - -moz-border-radius: 5px 5px 0 0; - -webkit-border-top-right-radius: 5px; - -webkit-border-top-left-radius: 5px; -} - -#qunit-header a { - text-decoration: none; - color: #c2ccd1; -} - -#qunit-header a:hover, -#qunit-header a:focus { - color: #fff; -} - -#qunit-testrunner-toolbar label { - display: inline-block; - padding: 0 .5em 0 .1em; -} - -#qunit-banner { - height: 5px; -} - -#qunit-testrunner-toolbar { - padding: 0.5em 0 0.5em 2em; - color: #5E740B; - background-color: #eee; - overflow: hidden; -} - -#qunit-userAgent { - padding: 0.5em 0 0.5em 2.5em; - background-color: #2b81af; - color: #fff; - text-shadow: rgba(0, 0, 0, 0.5) 2px 2px 1px; -} - -#qunit-modulefilter-container { - float: right; -} - -/** Tests: Pass/Fail */ - -#qunit-tests { - list-style-position: inside; -} - -#qunit-tests li { - padding: 0.4em 0.5em 0.4em 2.5em; - border-bottom: 1px solid #fff; - list-style-position: inside; -} - -#qunit-tests.hidepass li.pass, #qunit-tests.hidepass li.running { - display: none; -} - -#qunit-tests li strong { - cursor: pointer; -} - -#qunit-tests li a { - padding: 0.5em; - color: #c2ccd1; - text-decoration: none; -} -#qunit-tests li a:hover, -#qunit-tests li a:focus { - color: #000; -} - -#qunit-tests li .runtime { - float: right; - font-size: smaller; -} - -.qunit-assert-list { - margin-top: 0.5em; - padding: 0.5em; - - background-color: #fff; - - border-radius: 5px; - -moz-border-radius: 5px; - -webkit-border-radius: 5px; -} - -.qunit-collapsed { - display: none; -} - -#qunit-tests table { - border-collapse: collapse; - margin-top: .2em; -} - -#qunit-tests th { - text-align: right; - vertical-align: top; - padding: 0 .5em 0 0; -} - -#qunit-tests td { - vertical-align: top; -} - -#qunit-tests pre { - margin: 0; - white-space: pre-wrap; - word-wrap: break-word; -} - -#qunit-tests del { - background-color: #e0f2be; - color: #374e0c; - text-decoration: none; -} - -#qunit-tests ins { - background-color: #ffcaca; - color: #500; - text-decoration: none; -} - -/*** Test Counts */ - -#qunit-tests b.counts { color: black; } -#qunit-tests b.passed { color: #5E740B; } -#qunit-tests b.failed { color: #710909; } - -#qunit-tests li li { - padding: 5px; - background-color: #fff; - border-bottom: none; - list-style-position: inside; -} - -/*** Passing Styles */ - -#qunit-tests li li.pass { - color: #3c510c; - background-color: #fff; - border-left: 10px solid #C6E746; -} - -#qunit-tests .pass { color: #528CE0; background-color: #D2E0E6; } -#qunit-tests .pass .test-name { color: #366097; } - -#qunit-tests .pass .test-actual, -#qunit-tests .pass .test-expected { color: #999999; } - -#qunit-banner.qunit-pass { background-color: #C6E746; } - -/*** Failing Styles */ - -#qunit-tests li li.fail { - color: #710909; - background-color: #fff; - border-left: 10px solid #EE5757; - white-space: pre; -} - -#qunit-tests > li:last-child { - border-radius: 0 0 5px 5px; - -moz-border-radius: 0 0 5px 5px; - -webkit-border-bottom-right-radius: 5px; - -webkit-border-bottom-left-radius: 5px; -} - -#qunit-tests .fail { color: #000000; background-color: #EE5757; } -#qunit-tests .fail .test-name, -#qunit-tests .fail .module-name { color: #000000; } - -#qunit-tests .fail .test-actual { color: #EE5757; } -#qunit-tests .fail .test-expected { color: green; } - -#qunit-banner.qunit-fail { background-color: #EE5757; } - - -/** Result */ - -#qunit-testresult { - padding: 0.5em 0.5em 0.5em 2.5em; - - color: #2b81af; - background-color: #D2E0E6; - - border-bottom: 1px solid white; -} -#qunit-testresult .module-name { - font-weight: bold; -} - -/** Fixture */ - -#qunit-fixture { - position: absolute; - top: -10000px; - left: -10000px; - width: 1000px; - height: 1000px; -} diff --git a/src/main/resources/static/scripts/datasources.js b/src/main/resources/static/scripts/datasources.js index 29a9d90d5..b43d07e08 100644 --- a/src/main/resources/static/scripts/datasources.js +++ b/src/main/resources/static/scripts/datasources.js @@ -3,7 +3,6 @@ var dsApp = angular.module('dsApp', ['ngRoute']); dsApp.service('MyPubmed', ['$http', function ($http) { - var euroPmcUrlPrefix = "https://www.ebi.ac.uk/europepmc/webservices/rest/search/query=EXT_ID:"; var euroPmcUrlSuffix = "&format=json&callback=JSON_CALLBACK"; @@ -18,8 +17,7 @@ dsApp.service('MyPubmed', ['$http', function ($http) { + " " + res.journalTitle + ". " + res.pubYear + ";" + res.journalVolume + "(" + res.issue + "):" + res.pageInfo; - -// console.log(res.pmid + ": " + res.title); +// console.log(res.pmid + ": " + res.title); ds.citation = cite; }) .error(function(data, status){ @@ -28,7 +26,6 @@ dsApp.service('MyPubmed', ['$http', function ($http) { }; }]); - dsApp.controller('DatasourcesController', function($scope, $http, $filter, MyPubmed) { // data for a quick off-line test // $scope.datasources = [ @@ -38,14 +35,11 @@ dsApp.controller('DatasourcesController', function($scope, $http, $filter, MyPub // ]; $http.get('metadata/datasources').success(function(datasources) { - $scope.datasources = datasources; - for(var i=0; i'+member.id+' - '+member.info+''); - }else { - $("#"+id).append('
  • '+member.id+'
  • '); + $("#"+id).append('
  • '+member.id+' - '+member.info+'
  • '); + } else { + $("#"+id).append('
  • '+member.id+'
  • '); } }); }); diff --git a/src/main/webapp/jsp/datasources.jsp b/src/main/webapp/jsp/datasources.jsp index 9925086bd..3c4b360a9 100644 --- a/src/main/webapp/jsp/datasources.jsp +++ b/src/main/webapp/jsp/datasources.jsp @@ -1,5 +1,4 @@ <%@ page language="java" contentType="text/html; charset=UTF-8" %> -<%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt" %> <%@ taglib prefix="spring" uri="http://www.springframework.org/tags" %> @@ -7,61 +6,58 @@ - cPath2::Datasources (${cpath.name}) + ${cpath.name} datasources
    -

    Data Sources

    - +

    Acknowledgment

    -
    -
    -

    Warehouse data (canonical molecules, ontologies) are converted - to BioPAX utility classes, such as EntityReference, ControlledVocabulary, - EntityFeature sub-classes, and saved as the initial BioPAX model, - which forms the foundation for integrating pathway data and for id-mapping.

    -

    Pathway and binary interaction data (interactions, participants) are normalized - next and merged into the database. Original reference molecules are replaced - with the corresponding BioPAX warehouse objects.

    -
    -
    - -

    Acknowledgment

    -

    - The ${cpath.name} team much appreciates the fundamental contribution of - all the data providers, authors, Identifiers.org, - all the open biological ontologies, the open-source projects and standards, - which made creating of this integrated BioPAX web service and database feasible.
    -

    -
    +
    +

    We much appreciate the fundamental contribution of all the data providers, authors, + open biological ontologies, identifier registries, open-source projects and standards, + which made creating of this web service possible.

    +
    +
      +
    • WAREHOUSE data (canonical molecules, ontologies) are converted + to BioPAX utility classes, such as EntityReference, ControlledVocabulary, + EntityFeature sub-classes, and saved as the initial BioPAX model, + which forms the foundation for integrating pathway data and for id-mapping. +
    • +
    • PATHWAY and binary interaction data (interactions, participants) are normalized + and merged into the database. Original reference molecules are replaced + with the corresponding BioPAX warehouse objects.
    • +
    • MAPPING data are used to improve the ID-mapping, data merging, and make the graph queries easier to use.
    • +
    +
    +
    +
    +

    Data Sources

    -   - {{ds.name[1] || ds.name[0]}} + {{ds.name[1] || ds.name[0]}}

    -

    {{ds.description}} ({{ds.type}})

    -

    URI: {{ds.uri}}

    -

    - All names (for data filtering): {{uniqueStrings(ds.name) + ""}} -

    +

    {{ds.description}}

    +

    Type: {{ds.type}}

    +

    URI: ${cpath.xmlBase}{{ds.identifier}}

    +

    Names: {{uniqueStrings(ds.name) + ""}}

    - Contains: + Contains: {{ds.numPathways}} pathways, {{ds.numInteractions}} interactions, {{ds.numPhysicalEntities}} participants

    - Publication: {{ds.citation}} - (PMID:{{ds.pubmedId}}) + Publication: {{ds.citation}} + (PMID:{{ds.pubmedId}})

    -

    Availability: {{ds.availability}}

    +

    Availability: {{ds.availability}}

    diff --git a/src/main/webapp/jsp/footer.jsp b/src/main/webapp/jsp/footer.jsp index 05f5d1ea5..ba9f311c1 100644 --- a/src/main/webapp/jsp/footer.jsp +++ b/src/main/webapp/jsp/footer.jsp @@ -10,9 +10,9 @@ diff --git a/src/main/webapp/jsp/formats.jsp b/src/main/webapp/jsp/formats.jsp index 6361eeea8..7ad0a9aee 100644 --- a/src/main/webapp/jsp/formats.jsp +++ b/src/main/webapp/jsp/formats.jsp @@ -6,7 +6,7 @@ -cPath2::Formats (${cpath.name}) +${cpath.name} formats @@ -20,7 +20,7 @@ The GET and GRAPH web service commands return data in several formats explained

    BioPAX (RDF/XML)

    -BioPAX is the default +BioPAX is the default and most complete output format of ${cpath.name} that offers access to all the details of the biological network model stored in the system. This format is ideal for users wishing to to access specific data not @@ -31,13 +31,13 @@ the database are available in BioPAX Level 3. Due to the richness of representation in BioPAX, reading and using such a large BioPAX document requires knowledge of the format and software development tools available for processing it, such as -Paxtools, +Paxtools, a Java library for working with BioPAX as object model, or Jena, SPARQL.

    JSON-LD

    - JSON-LD is a lightweight Linked Data format. + JSON-LD is a lightweight Linked Data format. It is easy for humans to read and write. It is based on the already successful JSON format and provides a way to help JSON data interoperate at Web-scale. JSON-LD is an ideal data format for programming environments, REST Web services, and unstructured databases such as CouchDB and MongoDB. @@ -56,7 +56,7 @@ lists is Gene Set Enrichment Analysis (GSEA). The Gene sets used by GSEA are stored for convenience in the Molecular Signature Database (MSigDB) in the Gene Matrix Transposed file format (*.gmt). This is the main tab-delimited file format specified by the -Broad Molecular Signature Database. +Broad Molecular Signature Database.

    Each gene set is described by a name, a description, and the genes in the gene set: participants in a pathway are specified with one or several HGNC symbols (we can also provide another file using UniProt accession numbers instead). @@ -67,7 +67,7 @@ Exporting to the MSigDB format will enable computational biologists to use pathw within gene set enrichment algorithms, such as GSEA. Available for all pathways within Pathway Commons (only from pathway database sources, not interaction database sources). Full data format details are available at -Broad GSEA Wiki. +Broad GSEA Wiki. We used the normalized and merged BioPAX Level3 model and our simple GSEA converter from the Paxtools library to generate the GSEA (.gmt) archives. (Note: to effectively enforce cross-species check, BioSources must have @@ -88,7 +88,7 @@ Since SIF interactions are always binary it is not possible to fully represent all of BioPAX, thus this translation is lossy in general. Nonetheless, the SIF network is useful for those applications that require pairwise interaction input. SIF format can be easily imported into popular network analysis tools, -such as Cytoscape. +such as Cytoscape.

    In this output format, all participants are specified as chemical or gene names or identifiers. This format does not contain any cross-species @@ -231,7 +231,7 @@ interactions within Pathway Commons.

    SBGN

    -The Systems Biology Graphical Notation (SBGN) +The Systems Biology Graphical Notation (SBGN) is a standard visual notation for network diagrams in biology. SBGN markup language (SBGN-ML) is an associated standard XML format that can be loaded into available software to visualize a diagram of a pathway. BioPAX can be converted to SBGN-ML format, following the diff --git a/src/main/webapp/jsp/head.jsp b/src/main/webapp/jsp/head.jsp index 2c94ec572..d45c16592 100644 --- a/src/main/webapp/jsp/head.jsp +++ b/src/main/webapp/jsp/head.jsp @@ -11,20 +11,19 @@ powered by cPath2 software, version @project.version@" /> +biological networks, ontology, knowledge, analysis, cancer research, systems biology" /> -<%----%> - - - - - - - + + + + + + + diff --git a/src/main/webapp/jsp/header.jsp b/src/main/webapp/jsp/header.jsp index 3c1b362e9..17944da0f 100644 --- a/src/main/webapp/jsp/header.jsp +++ b/src/main/webapp/jsp/header.jsp @@ -14,33 +14,35 @@   -   +  

    - + <%-- collapse --%> + <%-- container --%> - +<%-- begin the #content container that will be closed in the footer.jsp --%>