diff --git a/pom.xml b/pom.xml
index ac6e616d..cc66aaf3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,8 +29,9 @@
UTF-8
9.7.0
-Xmx3g -Dfile.encoding=UTF-8 -ea --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED
-
- target/spring-instrument.jar
+ ${settings.localRepository}/org/springframework/spring-instrument/${spring-framework.version}/spring-instrument-${spring-framework.version}.jar
+
+
diff --git a/src/main/java/cpath/cleaner/PathbankCleaner.java b/src/main/java/cpath/cleaner/PathbankCleaner.java
index f904e3b5..6ddbc779 100644
--- a/src/main/java/cpath/cleaner/PathbankCleaner.java
+++ b/src/main/java/cpath/cleaner/PathbankCleaner.java
@@ -9,6 +9,8 @@
import org.biopax.paxtools.model.Model;
import org.biopax.paxtools.model.level3.*;
import org.biopax.paxtools.model.level3.Process;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.InputStream;
import java.io.OutputStream;
@@ -19,6 +21,8 @@
*/
final class PathbankCleaner implements Cleaner {
+ private static final Logger log = LoggerFactory.getLogger(PathbankCleaner.class);
+
public void clean(InputStream data, OutputStream cleanedData) {
// create bp model from dataFile
SimpleIOHandler simpleReader = new SimpleIOHandler(BioPAXLevel.L3);
@@ -28,59 +32,61 @@ public void clean(InputStream data, OutputStream cleanedData) {
if (!model.containsID(model.getXmlBase() + "Reference/TAXONOMY_9606")
&& !model.containsID(model.getXmlBase() + "Reference/Taxonomy_9606")
&& !model.getObjects(BioSource.class).isEmpty())
+ {
throw new RuntimeException("Highly likely non-human datafile (skip).");
+ }
//since Apr-2018, top pathway URIs are "normalized" like: http://identifiers.org/smpdb/...
//let's fix pathway uris base - use bioregistry.io/pathbank: instead
CPathUtils.rebaseUris(model, "http://identifiers.org/smpdb/", "bioregistry.io/pathbank:");
- // Normalize Pathway URIs KEGG stable id, where possible
- Set pathways = new HashSet<>(model.getObjects(Pathway.class));
- for (Pathway pw : pathways) {
-
- //smpdb/pathbank use pathwayOrder, but it's useless - no nextStep at all!
- for (PathwayStep step : new HashSet<>(pw.getPathwayOrder())) {
- if (step.getNextStep().isEmpty() && step.getNextStepOf().isEmpty()) {
- for (Process process : step.getStepProcess())
- if (process instanceof Interaction && !Interaction.class.equals(process.getModelInterface()))
- pw.addPathwayComponent(process);
- pw.removePathwayOrder(step);
+ //remove pathways that have "SubPathway" name;
+ //though all these could be merged to become more informative pathways (once all the datafiles get merged),
+ //they add too much/unordered nesting/complexity to our model; not very helpful for the graph queries, SIF/GMT...
+ //due to the way pathwayOrder and pathwayComponent are used...
+ for (Pathway sp : new HashSet<>(model.getObjects(Pathway.class))) {
+ if (sp.getName().contains("SubPathway")) {
+ for (Pathway p : new HashSet<>(sp.getPathwayComponentOf())) {
+ p.removePathwayComponent(sp);
}
+ model.remove(sp);
}
+ }
- //remove all Interaction.class (base) objects
- for (Interaction it : new HashSet<>(model.getObjects(Interaction.class))) {
- if (Interaction.class.equals(it.getModelInterface())) {
- model.remove(it);
- }
+ //remove all Interaction.class (base) objects
+ for (Interaction it : new HashSet<>(model.getObjects(Interaction.class))) {
+ if (Interaction.class.equals(it.getModelInterface())) {
+ model.remove(it);
}
+ }
- //remove sub-pathways that have "SubPathway" in names...
- //forgot why we do this (likely due to same pathways were defined in other files and we merge all...)
- for (Pathway pathway : new HashSet<>(model.getObjects(Pathway.class))) {
- if (pathway.getName().contains("SubPathway")) {
- model.remove(pathway);
- for (Pathway pp : new HashSet<>(pathway.getPathwayComponentOf())) {
- pp.removePathwayComponent(pathway);
- }
- for (PathwayStep ps : new HashSet<>(pathway.getStepProcessOf())) {
- ps.removeStepProcess(pathway);
+ //smpdb/pathbank use pathwayOrder, but it seems useless/nonsense, - no nextStep, participants are also added as pathwayComponent...
+ //move reaction/control from PathwayStep having no nextStep to pathwayComponent property of the parent pw.
+ for (PathwayStep step : new HashSet<>(model.getObjects(PathwayStep.class))) {
+ Pathway p = step.getPathwayOrderOf();
+ if (step.getNextStep().isEmpty() && step.getNextStepOf().isEmpty()) { //seems always TRUE (pathbank 2024/02)!
+ for (Process process : step.getStepProcess()) {
+ if (process instanceof Interaction && !Interaction.class.equals(process.getModelInterface())) {
+ p.addPathwayComponent(process);
}
}
+ step.getPathwayOrderOf().removePathwayOrder(step);
+ model.remove(step);
+ } else {
+ log.debug("keep pw step {} of pw {}", step.getUri(), p.getUri());
}
}
+ //delete dummy names if any
for (Named o : model.getObjects(Named.class)) {
- //delete bogus dummy names
for (String name : new HashSet<>(o.getName())) {
if (StringUtils.startsWithIgnoreCase(name, "SubPathway")) {
o.removeName(name);
- o.addComment(name);
}
}
}
-// ModelUtils.replace(model, replacements);
+ //remove dangling, e.g., pathway steps, cv, xrefs, etc.
ModelUtils.removeObjectsIfDangling(model, UtilityClass.class);
// convert model back to OutputStream for return
diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml
index 13c3f6d3..83b6747f 100644
--- a/src/main/resources/logback.xml
+++ b/src/main/resources/logback.xml
@@ -6,7 +6,7 @@
-
+
@@ -16,10 +16,10 @@
-
-
+
+
diff --git a/src/test/java/cpath/cleaner/PathbankCleanerTest.java b/src/test/java/cpath/cleaner/PathbankCleanerTest.java
index d9510667..acaee851 100644
--- a/src/test/java/cpath/cleaner/PathbankCleanerTest.java
+++ b/src/test/java/cpath/cleaner/PathbankCleanerTest.java
@@ -23,15 +23,12 @@ public class PathbankCleanerTest {
public final void testClean() throws IOException {
Cleaner cleaner = new PathbankCleaner();
String uri1 = "bioregistry.io/pathbank:SMP0000040"; //was "http://identifiers.org/smpdb/SMP0000040";
- String f1 = getClass().getClassLoader().getResource("").getPath() + File.separator + "testCleanPW000146.owl";
+ String f1 = getClass().getClassLoader().getResource("").getPath() + File.separator + "PW000146.cleaned.owl";
cleaner.clean(new FileInputStream(getClass().getResource("/PW000146.owl").getFile()), new FileOutputStream(f1));
Model m1 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f1));
- assertTrue(m1.containsID(uri1));
Pathway p1 = (Pathway)m1.getByID(uri1);
- assertEquals("Glycolysis", p1.getDisplayName());
-
String uri2 = "bioregistry.io/pathbank:SMP0000057"; //was "http://identifiers.org/smpdb/SMP0000057";
- String f2 = getClass().getClassLoader().getResource("").getPath() + File.separator + "testCleanPW000005.owl";
+ String f2 = getClass().getClassLoader().getResource("").getPath() + File.separator + "PW000005.cleaned.owl";
cleaner.clean(new FileInputStream(getClass().getResource("/PW000005.owl").getFile()), new FileOutputStream(f2));
Model m2 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f2));
@@ -40,17 +37,22 @@ public final void testClean() throws IOException {
Model model = BioPAXLevel.L3.getDefaultFactory().createModel();
merger.merge(model, m2);
merger.merge(model, m1);
- assertTrue(model.containsID(uri1));
- assertTrue(model.containsID(uri2));
- new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
- getClass().getClassLoader().getResource("").getPath()
- + File.separator + "testCleanSmpdbMergeOK.owl"));
-
Pathway pw = (Pathway) model.getByID(uri1);
- assertEquals(37, pw.getPathwayComponent().size());
- assertTrue(pw.getPathwayOrder().isEmpty()); //smpdb/pathbank use pathwayOrder but it's useless - no nextStep at all!
- assertEquals(2, model.getObjects(Pathway.class).size());
- assertTrue(model.getObjects(PathwayStep.class).isEmpty());
- }
+ //write the merged model (debug)
+ new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
+ getClass().getClassLoader().getResource("").getPath()
+ + File.separator + "PW000005-000146.merged.owl"));
+
+ assertAll(
+ () -> assertTrue(m1.containsID(uri1)),
+ () -> assertEquals("Glycolysis", p1.getDisplayName()),
+ () -> assertTrue(model.containsID(uri1)),
+ () -> assertTrue(model.containsID(uri2)),
+ () -> assertEquals(37, pw.getPathwayComponent().size()),
+ () -> assertTrue(pw.getPathwayOrder().isEmpty()), //smpdb/pathbank use pathwayOrder, but it's useless - no nextStep at all!
+ () -> assertEquals(2, model.getObjects(Pathway.class).size()),
+ () -> assertTrue(model.getObjects(PathwayStep.class).isEmpty())
+ );
+ }
}
diff --git a/src/main/resources/logback-test.xml b/src/test/resources/logback-test.xml
similarity index 87%
rename from src/main/resources/logback-test.xml
rename to src/test/resources/logback-test.xml
index e2721b6e..1075851e 100644
--- a/src/main/resources/logback-test.xml
+++ b/src/test/resources/logback-test.xml
@@ -6,7 +6,7 @@
-
+
@@ -16,8 +16,7 @@
-
-
+
diff --git a/work/cpath2.sh b/work/cpath2.sh
index c93bce4e..18b56fec 100644
--- a/work/cpath2.sh
+++ b/work/cpath2.sh
@@ -5,7 +5,7 @@ export CPATH2_HOME="."
JDK_JAVA_OPTIONS="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED"
DEBUG_OPTS=""
#DEBUG_OPTS="-Dlogback.configurationFile=logback.xml -Xdebug -Xnoagent -Djava.compiler=NONE -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=12345"
-BASE_OPTS="-Dfile.encoding=UTF-8 -Xss32m -Xmx60g -Dpaxtools.normalizer.use-latest-registry=true -Dpaxtools.core.use-latest-genenames=true"
+BASE_OPTS="-Dlogback.configurationFile=logback.xml -Dfile.encoding=UTF-8 -Xss32m -Xmx60g -Dpaxtools.normalizer.use-latest-registry=true -Dpaxtools.core.use-latest-genenames=true"
#use List instead Map collections for read-only BioPAX model if we start the Web app but not if building the instance/data
EXTRA_OPTS="$BASE_OPTS -Dpaxtools.model.safeset=list -server"
diff --git a/work/logback.xml b/work/logback.xml
new file mode 100644
index 00000000..83b6747f
--- /dev/null
+++ b/work/logback.xml
@@ -0,0 +1,26 @@
+
+
+
+
+ true
+
+
+
+
+
+
+
+ %d %-5level [%thread] %logger{25} - %msg%n
+ true
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file