Skip to content

Commit

Permalink
Merge pull request #448 from monarch-initiative/mono-graph
Browse files Browse the repository at this point in the history
Mono graph
  • Loading branch information
ielis authored Nov 20, 2023
2 parents cac1441 + 1330cbf commit b424eac
Show file tree
Hide file tree
Showing 55 changed files with 1,582 additions and 611 deletions.
1 change: 1 addition & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[*]
indent_style = space
indent_size = 2
max_line_length = 120
trim_trailing_whitespace = true
insert_final_newline = true
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package org.monarchinitiative.phenol.graph;


import org.monarchinitiative.phenol.graph.csr.CsrOntologyGraphBuilder;
import org.monarchinitiative.phenol.graph.csr.mono.CsrMonoOntologyGraphBuilder;
import org.monarchinitiative.phenol.graph.csr.poly.CsrPolyOntologyGraphBuilder;
import org.monarchinitiative.phenol.ontology.data.TermId;

/**
Expand Down Expand Up @@ -31,7 +32,15 @@ private OntologyGraphBuilders(){}
*
*/
public static <E> OntologyGraphBuilder<TermId> csrBuilder(Class<E> clz) {
return CsrOntologyGraphBuilder.builder(clz);
return CsrPolyOntologyGraphBuilder.builder(clz);
}

/**
* Get an {@link OntologyGraphBuilder} for building a simple graph with one edge type backed by a CSR-like
* data structure.
*/
public static OntologyGraphBuilder<TermId> monoCsrBuilder() {
return CsrMonoOntologyGraphBuilder.builder();
}

}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.monarchinitiative.phenol.graph.csr.mono;

/**
* Essentially a record with {@link StaticCsrArray}s for getting parents and children.
*
* @param <T> type of data.
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
class CsrData<T> {

private final StaticCsrArray<T> parents;
private final StaticCsrArray<T> children;

CsrData(StaticCsrArray<T> parents, StaticCsrArray<T> children) {
this.parents = parents;
this.children = children;
}

StaticCsrArray<T> getParents() {
return parents;
}

StaticCsrArray<T> getChildren() {
return children;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package org.monarchinitiative.phenol.graph.csr.mono;

import org.monarchinitiative.phenol.graph.NodeNotPresentInGraphException;
import org.monarchinitiative.phenol.graph.OntologyGraph;
import org.monarchinitiative.phenol.utils.IterableIteratorWrapper;

import java.util.*;

/**
* An {@link OntologyGraph} that only supports one edge type and supports efficient retrieval of parent or child nodes.
* <p>
* It maintains a pair of CSR-like structures, {@link StaticCsrArray}, one for getting the parents and the other
* for children of a term. Both arrays are sorted to contain information for a node {@link T} under the same integer
* index. We get an index from a mapping.
*
* @param <T> type of the term/graph node.
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
public class CsrMonoOntologyGraph<T> implements OntologyGraph<T> {

private final T root;
private final Map<T, Integer> nodesToIdx;
private final StaticCsrArray<T> parents;
private final StaticCsrArray<T> children;

CsrMonoOntologyGraph(T root,
Map<T, Integer> nodesToIdx,
StaticCsrArray<T> parents,
StaticCsrArray<T> children) {
this.root = Objects.requireNonNull(root);
this.nodesToIdx = Objects.requireNonNull(nodesToIdx);
this.parents = Objects.requireNonNull(parents);
this.children = Objects.requireNonNull(children);
}

StaticCsrArray<T> getParentArray() {
return parents;
}

StaticCsrArray<T> getChildArray() {
return children;
}

private int getNodeIdx(T node) {
Integer idx = nodesToIdx.get(node);
if (idx == null)
throw new NodeNotPresentInGraphException(String.format("Item not found in the graph: %s", node));
return idx;
}

@Override
public T root() {
return root;
}

@Override
public Iterable<T> getChildren(T source, boolean includeSource) {
return getImmediateNeighbors(children, source, includeSource);
}

@Override
public Iterable<T> getDescendants(T source, boolean includeSource) {
// Check if `source` is in the graph.
int intentionallyUnused = getNodeIdx(source);

return new IterableIteratorWrapper<>(() -> new TraversingIterator<>(source, src -> getChildren(src, includeSource)));
}

@Override
public Iterable<T> getParents(T source, boolean includeSource) {
return getImmediateNeighbors(parents, source, includeSource);
}

@Override
public Iterable<T> getAncestors(T source, boolean includeSource) {
// Check if `source` is in the graph.
int intentionallyUnused = getNodeIdx(source);

return new IterableIteratorWrapper<>(() -> new TraversingIterator<>(source, src -> getParents(src, includeSource)));
}

private Iterable<T> getImmediateNeighbors(StaticCsrArray<T> array,
T source,
boolean includeSource) {
int index = getNodeIdx(source);

Set<T> nodes = array.getOutgoingNodes(index);

return includeSource
? new SetIncludingSource<>(source, nodes)
: nodes;
}

@Override
public int size() {
return nodesToIdx.size();
}

@Override
public Iterator<T> iterator() {
return nodesToIdx.keySet().iterator();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package org.monarchinitiative.phenol.graph.csr.mono;

import org.monarchinitiative.phenol.graph.*;
import org.monarchinitiative.phenol.graph.csr.util.Util;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Builder for {@link CsrMonoOntologyGraphBuilder}.
*
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
public class CsrMonoOntologyGraphBuilder implements OntologyGraphBuilder<TermId> {

private static final Logger LOGGER = LoggerFactory.getLogger(CsrMonoOntologyGraphBuilder.class);

private RelationType hierarchyRelation = RelationTypes.isA();

/**
* Create the builder.
*/
public static CsrMonoOntologyGraphBuilder builder() {
return new CsrMonoOntologyGraphBuilder();
}

@Override
public OntologyGraphBuilder<TermId> hierarchyRelation(RelationType relationType) {
if (relationType == null)
LOGGER.warn("Hierarchy relation type must not be null. Skipping..");
else
this.hierarchyRelation = relationType;
return this;
}

@Override
public CsrMonoOntologyGraph<TermId> build(TermId root, Collection<? extends OntologyGraphEdge<TermId>> edges) {
LOGGER.debug("Extracting edges with target hierarchy relation {}", hierarchyRelation.label());
List<? extends OntologyGraphEdge<TermId>> hierarchyEdges = edges.stream()
.filter(e -> e.relationType().equals(hierarchyRelation))
.collect(Collectors.toList());

LOGGER.debug("Sorting graph nodes");
TermId[] nodes = edges.stream()
.flatMap(e -> Stream.of(e.subject(), e.object()))
.distinct()
.sorted(TermId::compareTo)
.toArray(TermId[]::new);

LOGGER.debug("Building CSR arrays");
CsrData<TermId> csrData = makeCsrData(nodes, hierarchyEdges);
Map<TermId, Integer> nodeToIdx = new HashMap<>();
for (int i = 0; i < nodes.length; i++) {
TermId node = nodes[i];
nodeToIdx.put(node, i);
}

return new CsrMonoOntologyGraph<>(root, nodeToIdx, csrData.getParents(), csrData.getChildren());
}

private CsrData<TermId> makeCsrData(TermId[] nodes,
Collection<? extends OntologyGraphEdge<TermId>> edges) {
Map<Integer, List<OntologyGraphEdge<TermId>>> adjacentEdges = Util.findAdjacentEdges(nodes, edges);

List<Integer> parentIndptr = new ArrayList<>();
parentIndptr.add(0);
List<TermId> parents = new ArrayList<>();

List<Integer> childIndptr = new ArrayList<>();
childIndptr.add(0);
List<TermId> children = new ArrayList<>();

for (int rowIdx = 0; rowIdx < nodes.length; rowIdx++) {
TermId source = nodes[rowIdx];
List<OntologyGraphEdge<TermId>> adjacent = adjacentEdges.getOrDefault(rowIdx, List.of());

for (OntologyGraphEdge<TermId> edge : adjacent) {
// `inverted == true` if `src == subject` (child) and `object` is parent.
boolean targetIsChild = source.equals(edge.object());
TermId target = targetIsChild ? edge.subject() : edge.object();
if (targetIsChild) {
// edge where `subject` is child and `object` is parent.
children.add(target);
} else {
// edge where `subject` is parent and `object` is child.
parents.add(target);
}
}

parentIndptr.add(parents.size());
childIndptr.add(children.size());
}

StaticCsrArray<TermId> parentsArray = new StaticCsrArray<>(Util.toIntArray(parentIndptr), parents.toArray(new TermId[0]));
StaticCsrArray<TermId> childrenArray = new StaticCsrArray<>(Util.toIntArray(childIndptr), children.toArray(new TermId[0]));

return new CsrData<>(parentsArray, childrenArray);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package org.monarchinitiative.phenol.graph.csr.mono;

import java.util.AbstractSet;
import java.util.Iterator;
import java.util.Set;

/**
* A utility implementation of a {@link Set} that contains
* an <code>item</code>, a singular instance of {@link T}, and the {@link T} elements in the <code>other</code> set.
*
* @param <T> – the type of elements in this set
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
class SetIncludingSource<T> extends AbstractSet<T> {

private final T item;
private final Set<T> other;

SetIncludingSource(T item, Set<T> other) {
this.item = item;
this.other = other;
}

@Override
public Iterator<T> iterator() {
return new IncludingIterator<>(item, other.iterator());
}

@Override
public int size() {
return other.size() + 1;
}

/**
* An {@link Iterator} that first yields the <code>first</code> item and then the items from the <code>remaining</code> iterator.
* <p>
* NOT THREAD SAFE, of course!
*
* @param <T> – the type of elements in this iterator
*/
private static class IncludingIterator<T> implements Iterator<T> {

private final T first;
private final Iterator<T> remaining;
private boolean yieldedFirst = false;

private IncludingIterator(T first, Iterator<T> remaining) {
this.first = first;
this.remaining = remaining;
}

@Override
public boolean hasNext() {
return !yieldedFirst || remaining.hasNext();
}

@Override
public T next() {
if (!yieldedFirst) {
yieldedFirst = true;
return first;
}

return remaining.next();
}
}

}
Loading

0 comments on commit b424eac

Please sign in to comment.