Skip to content

Commit

Permalink
NeoMotif importer, adding Gene index, adding re-attempts for concurrent
Browse files Browse the repository at this point in the history
updates.
  • Loading branch information
marco-brandizi committed Feb 8, 2024
1 parent 1d57b9f commit 34898b0
Showing 1 changed file with 50 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import uk.ac.ebi.utils.exceptions.ExceptionUtils;
import uk.ac.ebi.utils.runcontrol.PercentProgressLogger;
import uk.ac.ebi.utils.streams.StreamUtils;
import uk.ac.rothamsted.knetminer.backend.cypher.CypherClient;
import uk.ac.rothamsted.neo4j.utils.Neo4jDataManager;
import uk.org.lidalia.slf4jext.Level;

/**
* The Neo4j importer for semantic motif summaries.
Expand Down Expand Up @@ -168,26 +171,23 @@ private void saveMotifLinks ( Map<Pair<Integer, Integer>, Integer> genes2PathLen
*
*/
private int processMotifLinksBatch ( List<Map<String, Object>> smRelationsBatch )
{
try ( Session session = driver.session() )
{
/*
* TODO: see if more optimisation is possible:
* https://community.neo4j.com/t/create-cypher-query-very-slow/62780
* https://medium.com/neo4j/5-tips-tricks-for-fast-batched-updates-of-graph-structures-with-neo4j-and-cypher-73c7f693c8cc
*/
session.executeWriteWithoutResult ( tx ->
{
String cyRelations =
"""
UNWIND $smRelRows AS relRow\s
MATCH ( gene:Gene { ondexId: relRow.geneId } ),
( concept:Concept { ondexId: relRow.conceptId } )
CREATE (gene) - [:hasMotifLink{ graphDistance: relRow.graphDistance }] -> (concept)
""";
tx.run ( cyRelations, Map.of ( "smRelRows", smRelationsBatch) );
});
}
{
String cyRelations = """
UNWIND $smRelRows AS relRow\s
MATCH ( gene:Gene { ondexId: relRow.geneId } ),
( concept:Concept { ondexId: relRow.conceptId } )
CREATE (gene) - [:hasMotifLink{ graphDistance: relRow.graphDistance }] -> (concept)
""";

var neoMgr = new Neo4jDataManager ( driver );
// neoMgr.setAttemptMsgLogLevel ( Level.INFO );
// neoMgr.setMaxRetries ( 3 );

// Wraps it in a transaction and also re-attempts it in case of
// node lock issues.
//
neoMgr.runCypher ( cyRelations, "smRelRows", smRelationsBatch );

log.trace ( "{} links stored", smRelationsBatch.size () );
return smRelationsBatch.size ();
}
Expand All @@ -213,8 +213,8 @@ private void deleteOldMotifLinks ()
}

/**
* Create a Neo4j index about Concept.ondexId, the field currently used for
* identifying nodes.
* Create a Neo4j index about Concept.ondexId, Gene.ondexId, the properties
* currently used for identifying nodes.
*
* We added this to rdf2neo (via the Ondex config), here, we're keeping it
* to get old data auto-updated and be sure this index is used by the
Expand All @@ -223,7 +223,7 @@ private void deleteOldMotifLinks ()
*/
private void createIdIndex ()
{
log.info ( "Creating Neo4j node ID index" );
log.info ( "Creating Neo4j Concept ID indexes" );

try ( Session session = driver.session () )
{
Expand All @@ -233,9 +233,19 @@ private void createIdIndex ()
"CREATE INDEX concept_ondexId IF NOT EXISTS FOR (c:Concept) ON (c.ondexId)"
);
});
log.debug ( "Concept index done" );

// Turns out you need this too, despite all Gene nodes are Concept nodes too
session.executeWriteWithoutResult ( tx ->
{
tx.run (
"CREATE INDEX gene_ondexId IF NOT EXISTS FOR (g:Gene) ON (g.ondexId)"
);
});
log.debug ( "Gene index done" );
}

log.info ( "ID index created" );
log.info ( "ID indexes created" );
}

/**
Expand Down Expand Up @@ -330,20 +340,22 @@ private void saveConceptGeneCounts ( Map<Integer, Set<Integer>> concepts2Genes )
*/
private int processConceptGeneCountsBatch ( List<Map<String, Object>> countRowsBatch )
{
try ( Session session = driver.session() )
{
session.executeWriteWithoutResult ( tx ->
{
String cyRelations =
"""
UNWIND $countRows AS countRow\s
MATCH ( concept:Concept { ondexId: countRow.conceptId } )
CREATE (concept) - [:hasMotifStats]
-> (smStats:SemanticMotifStats{ conceptGenesCount: countRow.genesCount })
""";
tx.run ( cyRelations, Map.of ( "countRows", countRowsBatch) );
});
}
String cyRelations = """
UNWIND $countRows AS countRow\s
MATCH ( concept:Concept { ondexId: countRow.conceptId } )
CREATE (concept) - [:hasMotifStats]
-> (smStats:SemanticMotifStats{ conceptGenesCount: countRow.genesCount })
""";

var neoMgr = new Neo4jDataManager ( driver );
// neoMgr.setAttemptMsgLogLevel ( Level.INFO );
// neoMgr.setMaxRetries ( 3 );

// Wraps it in a transaction and also re-attempts it in case of
// node lock issues.
//
neoMgr.runCypher ( cyRelations, "countRows", countRowsBatch );

log.trace ( "{} per-gene concept counts stored", countRowsBatch.size () );
return countRowsBatch.size ();
}
Expand Down

0 comments on commit 34898b0

Please sign in to comment.