Skip to content

Commit

Permalink
Merge branch 'master' of github.com:bst-mug/trec-pm
Browse files Browse the repository at this point in the history
  • Loading branch information
michelole committed Feb 18, 2019
2 parents cf69ab7 + 7a17655 commit 2e4dc8f
Show file tree
Hide file tree
Showing 10 changed files with 389 additions and 64 deletions.
68 changes: 4 additions & 64 deletions src/main/java/at/medunigraz/imi/bst/lexigram/Lexigram.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package at.medunigraz.imi.bst.lexigram;

import at.medunigraz.imi.bst.config.TrecConfig;
import at.medunigraz.imi.bst.trec.expansion.CachedWebRequester;
import at.medunigraz.imi.bst.trec.model.Topic;
import at.medunigraz.imi.bst.trec.model.TopicSet;
import at.medunigraz.imi.bst.trec.stats.CSVStatsWriter;
Expand Down Expand Up @@ -28,6 +29,8 @@ public class Lexigram {

private static final String ENDPOINT = "https://api.lexigram.io/v1/lexigraph/";

private static final CachedWebRequester REQUESTER = new CachedWebRequester("cache/lexigramV2.ser");

private static final List<String> NOISE = new ArrayList<>();
static {
NOISE.add("classification");
Expand All @@ -39,37 +42,6 @@ public class Lexigram {
NOISE.add("ca - ");
}

private static class Cache {
private static final String FILENAME = "cache/lexigram.ser";
private static HashMap<String, String> CALLS = new HashMap<>();
static {
if (Files.exists(Paths.get(FILENAME))) {
load();
}
}

private static void load() {
try {
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(FILENAME));
CALLS = (HashMap) ois.readObject();
ois.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}

private static void save() {
try
{
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(FILENAME));
oos.writeObject(CALLS);
oos.close();
} catch(IOException e) {
throw new RuntimeException(e);
}
}
}

public static boolean isAPIKeyLoaded() {
final int MIN_API_KEY_LENGTH = 20;
return TrecConfig.LEXIGRAM_APIKEY.length() > MIN_API_KEY_LENGTH;
Expand Down Expand Up @@ -231,39 +203,7 @@ private static String cleanUpString(String label) {
}

private static JSONObject get(String url) {
if (!Cache.CALLS.containsKey(url)) {
HttpResponse<JsonNode> response = null;
try {
response = Unirest.get(url)
.header("authorization", "Bearer " + TrecConfig.LEXIGRAM_APIKEY)
.asJson();
} catch (UnirestException e) {
throw new RuntimeException(e);
}

if (response.getStatus() == 401) {
throw new RuntimeException("Unauthorized access to Lexigram API. Place your key in the file trec-pm.properties.");
}

if (response.getStatus() != 200) {
throw new RuntimeException("Got status code " + response.getStatus() + " from Lexigram API with body " + response.getBody());
}

JSONObject body = new JSONObject(response.getBody());

String firstArrayObject = "";
try {
firstArrayObject = body.getJSONObject("object").toString();
} catch (JSONException e) {
LOG.error("Unexpected response from Lexigram API: " + body);
throw e;
}

Cache.CALLS.put(url, firstArrayObject);
Cache.save();
}

return new JSONObject(Cache.CALLS.get(url));
return new JSONObject(REQUESTER.get(url, TrecConfig.LEXIGRAM_APIKEY));
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package at.medunigraz.imi.bst.trec.expansion;

import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;

import java.io.*;
import java.util.HashMap;

public class CachedWebRequester {

private File cacheFile;

private HashMap<String, String> cache = new HashMap<>();

public CachedWebRequester(String filename) {
this.cacheFile = new File(filename);

if (cacheFile.exists()) {
try {
cache = load(cacheFile);
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
}

/**
* Make a web request to a given url and cache results.
*
* @param url
* @return
*/
public String get(String url) {
if (!cache.containsKey(url)) {
String data = getResource(url);
put(url, data);
}
return cache.get(url);
}

/**
* Make a web request to a given restricted url and cache results.
*
* @param url
* @param bearer The bearer token.
* @return
*/
public String get(String url, String bearer) {
if (!cache.containsKey(url)) {
String data = getRestrictedResource(url, bearer);
put(url, data);
}
return cache.get(url);
}

private HashMap<String, String> load(File file) throws IOException, ClassNotFoundException {
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file));
HashMap<String, String> ret = (HashMap) ois.readObject();
ois.close();
return ret;
}

private void save(Object object, File file) throws IOException {
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file));
oos.writeObject(object);
oos.close();
}

private void put(String url, String data) {
cache.put(url, data);

// Try to persist cache on disk
try {
save(cache, cacheFile);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private String getRestrictedResource(String url, String bearer) {
HttpResponse<JsonNode> response = null;
try {
response = Unirest.get(url)
.header("authorization", String.format("Bearer %s", bearer))
.asJson();
} catch (UnirestException e) {
throw new RuntimeException(e);
}

return parseResponse(response);
}

private String getResource(String url) {
HttpResponse<JsonNode> response = null;
try {
response = Unirest.get(url).asJson();
} catch (UnirestException e) {
throw new RuntimeException(e);
}

return parseResponse(response);
}

private String parseResponse(HttpResponse<JsonNode> response) {
if (response.getStatus() == 401) {
throw new RuntimeException("Unauthorized access to API. Check your keys in the file trec-pm.properties.");
}

if (response.getStatus() != 200) {
throw new RuntimeException("Got status code " + response.getStatus() + " from API with body " + response.getBody());
}

return response.getBody().toString();
}
}
122 changes: 122 additions & 0 deletions src/main/java/at/medunigraz/imi/bst/trec/expansion/DGIdb.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package at.medunigraz.imi.bst.trec.expansion;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONObject;

import java.util.*;

public class DGIdb {

private static final Logger LOG = LogManager.getLogger();

private static final String ENDPOINT = "http://dgidb.org/api/v2/interactions.json";

private static final int DEFAULT_MINIMAL_SCORE = 0;

private static final boolean DEFAULT_EXPERT_CURATED_ONLY = false;

private static final CachedWebRequester REQUESTER = new CachedWebRequester("cache/dgidb.ser");

public Set<String> getDrugInteractions(String gene) {
return getDrugInteractions(gene, DEFAULT_EXPERT_CURATED_ONLY, DEFAULT_MINIMAL_SCORE);
}

public Set<String> getDrugInteractions(String gene, boolean expertCuratedOnly) {
return getDrugInteractions(gene, expertCuratedOnly, DEFAULT_MINIMAL_SCORE);
}

/**
* Get a list of known drug interactions for a given gene.
*
* @param gene The gene to query.
* @param expertCuratedOnly Whether the results should be restricted by `source_trust_levels`.
* @param minimalScore A minimal score, as given by the DGIdb.
* @return A set of drug interactions, sorted by decreasing score.
*/
public Set<String> getDrugInteractions(String gene, boolean expertCuratedOnly, int minimalScore) {
Map<Integer, Map<String, Set<String>>> data = getData(gene, expertCuratedOnly);

Set<String> ret = new LinkedHashSet<>();
data.entrySet().stream()
.filter(e -> e.getKey() >= minimalScore)
.sorted(Map.Entry.comparingByKey(Comparator.reverseOrder()))
.forEach(e -> ret.addAll(e.getValue().keySet()));

return ret;
}

public Set<String> getPublications(String gene) {
return getPublications(gene, DEFAULT_EXPERT_CURATED_ONLY, DEFAULT_MINIMAL_SCORE);
}

public Set<String> getPublications(String gene, boolean expertCuratedOnly) {
return getPublications(gene, expertCuratedOnly, DEFAULT_MINIMAL_SCORE);
}

/**
* Get a list of PubMed IDs backing drug interaction claims.
*
* @param gene The gene to query.
* @param expertCuratedOnly Whether the results should be restricted by `source_trust_levels`.
* @param minimalScore A minimal score, as given by the DGIdb.
* @return A set of PubMed IDs, sorted by score.
*/
public Set<String> getPublications(String gene, boolean expertCuratedOnly, int minimalScore) {
Map<Integer, Map<String, Set<String>>> data = getData(gene, expertCuratedOnly);

Set<String> ret = new LinkedHashSet<>();
data.entrySet().stream()
.filter(e -> e.getKey() >= minimalScore)
.sorted(Map.Entry.comparingByKey(Comparator.reverseOrder()))
.forEach(e -> e.getValue().forEach((k, v) -> ret.addAll(v)));

return ret;
}

/**
*
* @param gene
* @param expertCuratedOnly
* @return A two-level map `score -> (drugName -> pmids)`
*/
private Map<Integer, Map<String, Set<String>>> getData(String gene, boolean expertCuratedOnly) {
// TODO EML4-ALK must split
// TODO check any unwanted gene (e.g. coming from prepositions)

String url = String.format(ENDPOINT + "?genes=%s", gene);
url = expertCuratedOnly ? url + "&source_trust_levels=Expert%20curated" : url;

JSONObject data = new JSONObject(REQUESTER.get(url));

Map<Integer, Map<String, Set<String>>> ret = new TreeMap<>();

JSONArray matchedTerms = data.getJSONArray("matchedTerms");
for (Object term : matchedTerms) {
JSONArray interactions = ((JSONObject) term).getJSONArray("interactions");
for (int i = 0; i < interactions.length(); i++) {
JSONObject interaction = (JSONObject) interactions.get(i);

int score = interaction.getInt("score");
String drugName = interaction.getString("drugName").toLowerCase();

Set<String> pmids = new LinkedHashSet<>();
interaction.getJSONArray("pmids").forEach(e -> pmids.add(e.toString()));

if (!ret.containsKey(score)) {
ret.put(score, new TreeMap<>());
}

// The map might already contain an interaction for a given gene if there are multiple matched terms.
Map<String, Set<String>> interactionsByScore = ret.get(score);
if (!interactionsByScore.containsKey(drugName)) {
interactionsByScore.put(drugName, new LinkedHashSet<>());
}
interactionsByScore.get(drugName).addAll(pmids);
}
}

return ret;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ public ExperimentsBuilder withGeneFamily() {
return this;
}

public ExperimentsBuilder withDrugInteraction() {
Query previousDecorator = buildingExp.getDecorator();
buildingExp.setDecorator(new DrugInteractionQueryDecorator(previousDecorator));
return this;
}

public ExperimentsBuilder withGoldStandard(Experiment.GoldStandard gold) {
buildingExp.setGoldStandard(gold);
return this;
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/at/medunigraz/imi/bst/trec/model/Topic.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ public class Topic {
// MUST be public to be accessed via Reflection on SubTemplateQueryDecorator
public List<String> geneHypernyms = new ArrayList<>();

public List<String> drugInteractions = new ArrayList<>();

public Topic() {

}
Expand Down Expand Up @@ -153,6 +155,11 @@ public Topic withGeneHypernym(String hypernym) {
this.geneHypernyms.add(hypernym);
return this;
}

public Topic withDrugInteraction(String interaction) {
this.drugInteractions.add(interaction);
return this;
}

private static boolean hasElement(Element element, String name) {
return element.getElementsByTagName(name).getLength() > 0 ? true : false;
Expand Down Expand Up @@ -240,6 +247,10 @@ public Map<String, String> getAttributes() {
for (int i = 0; i < geneHypernyms.size(); i++) {
ret.put("geneHypernyms" + i, geneHypernyms.get(i));
}

for (int i = 0; i < drugInteractions.size(); i++) {
ret.put("drugInteractions" + i, drugInteractions.get(i));
}

return ret;
}
Expand Down
Loading

0 comments on commit 2e4dc8f

Please sign in to comment.