Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement mono graph #448

Merged
merged 9 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[*]
indent_style = space
indent_size = 2
max_line_length = 120
trim_trailing_whitespace = true
insert_final_newline = true
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package org.monarchinitiative.phenol.graph;


import org.monarchinitiative.phenol.graph.csr.CsrOntologyGraphBuilder;
import org.monarchinitiative.phenol.graph.csr.mono.CsrMonoOntologyGraphBuilder;
import org.monarchinitiative.phenol.graph.csr.poly.CsrPolyOntologyGraphBuilder;
import org.monarchinitiative.phenol.ontology.data.TermId;

/**
Expand Down Expand Up @@ -31,7 +32,15 @@ private OntologyGraphBuilders(){}
*
*/
public static <E> OntologyGraphBuilder<TermId> csrBuilder(Class<E> clz) {
return CsrOntologyGraphBuilder.builder(clz);
return CsrPolyOntologyGraphBuilder.builder(clz);
}

/**
* Get an {@link OntologyGraphBuilder} for building a simple graph with one edge type backed by a CSR-like
* data structure.
*/
public static OntologyGraphBuilder<TermId> monoCsrBuilder() {
return CsrMonoOntologyGraphBuilder.builder();
}

}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.monarchinitiative.phenol.graph.csr.mono;

/**
* Essentially a record with {@link StaticCsrArray}s for getting parents and children.
*
* @param <T> type of data.
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
class CsrData<T> {

private final StaticCsrArray<T> parents;
private final StaticCsrArray<T> children;

CsrData(StaticCsrArray<T> parents, StaticCsrArray<T> children) {
this.parents = parents;
this.children = children;
}

StaticCsrArray<T> getParents() {
return parents;
}

StaticCsrArray<T> getChildren() {
return children;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package org.monarchinitiative.phenol.graph.csr.mono;

import org.monarchinitiative.phenol.graph.NodeNotPresentInGraphException;
import org.monarchinitiative.phenol.graph.OntologyGraph;
import org.monarchinitiative.phenol.utils.IterableIteratorWrapper;

import java.util.*;

/**
* An {@link OntologyGraph} that only supports one edge type and supports efficient retrieval of parent or child nodes.
* <p>
* It maintains a pair of CSR-like structures, {@link StaticCsrArray}, one for getting the parents and the other
* for children of a term. Both arrays are sorted to contain information for a node {@link T} under the same integer
* index. We get an index from a mapping.
*
* @param <T> type of the term/graph node.
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
public class CsrMonoOntologyGraph<T> implements OntologyGraph<T> {

private final T root;
private final Map<T, Integer> nodesToIdx;
private final StaticCsrArray<T> parents;
private final StaticCsrArray<T> children;

CsrMonoOntologyGraph(T root,
Map<T, Integer> nodesToIdx,
StaticCsrArray<T> parents,
StaticCsrArray<T> children) {
this.root = Objects.requireNonNull(root);
this.nodesToIdx = Objects.requireNonNull(nodesToIdx);
this.parents = Objects.requireNonNull(parents);
this.children = Objects.requireNonNull(children);
}

StaticCsrArray<T> getParentArray() {
return parents;
}

StaticCsrArray<T> getChildArray() {
return children;
}

private int getNodeIdx(T node) {
Integer idx = nodesToIdx.get(node);
if (idx == null)
throw new NodeNotPresentInGraphException(String.format("Item not found in the graph: %s", node));
return idx;
}

@Override
public T root() {
return root;
}

@Override
public Iterable<T> getChildren(T source, boolean includeSource) {
return getImmediateNeighbors(children, source, includeSource);
}

@Override
public Iterable<T> getDescendants(T source, boolean includeSource) {
// Check if `source` is in the graph.
int intentionallyUnused = getNodeIdx(source);

return new IterableIteratorWrapper<>(() -> new TraversingIterator<>(source, src -> getChildren(src, includeSource)));
}

@Override
public Iterable<T> getParents(T source, boolean includeSource) {
return getImmediateNeighbors(parents, source, includeSource);
}

@Override
public Iterable<T> getAncestors(T source, boolean includeSource) {
// Check if `source` is in the graph.
int intentionallyUnused = getNodeIdx(source);

return new IterableIteratorWrapper<>(() -> new TraversingIterator<>(source, src -> getParents(src, includeSource)));
}

private Iterable<T> getImmediateNeighbors(StaticCsrArray<T> array,
T source,
boolean includeSource) {
int index = getNodeIdx(source);

Set<T> nodes = array.getOutgoingNodes(index);

return includeSource
? new SetIncludingSource<>(source, nodes)
: nodes;
}

@Override
public int size() {
return nodesToIdx.size();
}

@Override
public Iterator<T> iterator() {
return nodesToIdx.keySet().iterator();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package org.monarchinitiative.phenol.graph.csr.mono;

import org.monarchinitiative.phenol.graph.*;
import org.monarchinitiative.phenol.graph.csr.util.Util;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Builder for {@link CsrMonoOntologyGraphBuilder}.
*
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
public class CsrMonoOntologyGraphBuilder implements OntologyGraphBuilder<TermId> {

private static final Logger LOGGER = LoggerFactory.getLogger(CsrMonoOntologyGraphBuilder.class);

private RelationType hierarchyRelation = RelationTypes.isA();

/**
* Create the builder.
*/
public static CsrMonoOntologyGraphBuilder builder() {
return new CsrMonoOntologyGraphBuilder();
}

@Override
public OntologyGraphBuilder<TermId> hierarchyRelation(RelationType relationType) {
if (relationType == null)
LOGGER.warn("Hierarchy relation type must not be null. Skipping..");
else
this.hierarchyRelation = relationType;
return this;
}

@Override
public CsrMonoOntologyGraph<TermId> build(TermId root, Collection<? extends OntologyGraphEdge<TermId>> edges) {
LOGGER.debug("Extracting edges with target hierarchy relation {}", hierarchyRelation.label());
List<? extends OntologyGraphEdge<TermId>> hierarchyEdges = edges.stream()
.filter(e -> e.relationType().equals(hierarchyRelation))
.collect(Collectors.toList());

LOGGER.debug("Sorting graph nodes");
TermId[] nodes = edges.stream()
.flatMap(e -> Stream.of(e.subject(), e.object()))
.distinct()
.sorted(TermId::compareTo)
.toArray(TermId[]::new);

LOGGER.debug("Building CSR arrays");
CsrData<TermId> csrData = makeCsrData(nodes, hierarchyEdges);
Map<TermId, Integer> nodeToIdx = new HashMap<>();
for (int i = 0; i < nodes.length; i++) {
TermId node = nodes[i];
nodeToIdx.put(node, i);
}

return new CsrMonoOntologyGraph<>(root, nodeToIdx, csrData.getParents(), csrData.getChildren());
}

private CsrData<TermId> makeCsrData(TermId[] nodes,
Collection<? extends OntologyGraphEdge<TermId>> edges) {
Map<Integer, List<OntologyGraphEdge<TermId>>> adjacentEdges = Util.findAdjacentEdges(nodes, edges);

List<Integer> parentIndptr = new ArrayList<>();
parentIndptr.add(0);
List<TermId> parents = new ArrayList<>();

List<Integer> childIndptr = new ArrayList<>();
childIndptr.add(0);
List<TermId> children = new ArrayList<>();

for (int rowIdx = 0; rowIdx < nodes.length; rowIdx++) {
TermId source = nodes[rowIdx];
List<OntologyGraphEdge<TermId>> adjacent = adjacentEdges.getOrDefault(rowIdx, List.of());

for (OntologyGraphEdge<TermId> edge : adjacent) {
// `inverted == true` if `src == subject` (child) and `object` is parent.
boolean targetIsChild = source.equals(edge.object());
TermId target = targetIsChild ? edge.subject() : edge.object();
if (targetIsChild) {
// edge where `subject` is child and `object` is parent.
children.add(target);
} else {
// edge where `subject` is parent and `object` is child.
parents.add(target);
}
}

parentIndptr.add(parents.size());
childIndptr.add(children.size());
}

StaticCsrArray<TermId> parentsArray = new StaticCsrArray<>(Util.toIntArray(parentIndptr), parents.toArray(new TermId[0]));
StaticCsrArray<TermId> childrenArray = new StaticCsrArray<>(Util.toIntArray(childIndptr), children.toArray(new TermId[0]));

return new CsrData<>(parentsArray, childrenArray);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package org.monarchinitiative.phenol.graph.csr.mono;

import java.util.AbstractSet;
import java.util.Iterator;
import java.util.Set;

/**
* A utility implementation of a {@link Set} that contains
* an <code>item</code>, a singular instance of {@link T}, and the {@link T} elements in the <code>other</code> set.
*
* @param <T> – the type of elements in this set
* @author <a href="mailto:[email protected]">Daniel Danis</a>
*/
class SetIncludingSource<T> extends AbstractSet<T> {

private final T item;
private final Set<T> other;

SetIncludingSource(T item, Set<T> other) {
this.item = item;
this.other = other;
}

@Override
public Iterator<T> iterator() {
return new IncludingIterator<>(item, other.iterator());
}

@Override
public int size() {
return other.size() + 1;
}

/**
* An {@link Iterator} that first yields the <code>first</code> item and then the items from the <code>remaining</code> iterator.
* <p>
* NOT THREAD SAFE, of course!
*
* @param <T> – the type of elements in this iterator
*/
private static class IncludingIterator<T> implements Iterator<T> {

private final T first;
private final Iterator<T> remaining;
private boolean yieldedFirst = false;

private IncludingIterator(T first, Iterator<T> remaining) {
this.first = first;
this.remaining = remaining;
}

@Override
public boolean hasNext() {
return !yieldedFirst || remaining.hasNext();
}

@Override
public T next() {
if (!yieldedFirst) {
yieldedFirst = true;
return first;
}

return remaining.next();
}
}

}
Loading