diff --git a/src/main/java/at/medunigraz/imi/bst/lexigram/Lexigram.java b/src/main/java/at/medunigraz/imi/bst/lexigram/Lexigram.java index acf3872f..6d7afee0 100644 --- a/src/main/java/at/medunigraz/imi/bst/lexigram/Lexigram.java +++ b/src/main/java/at/medunigraz/imi/bst/lexigram/Lexigram.java @@ -1,6 +1,7 @@ package at.medunigraz.imi.bst.lexigram; import at.medunigraz.imi.bst.config.TrecConfig; +import at.medunigraz.imi.bst.trec.expansion.CachedWebRequester; import at.medunigraz.imi.bst.trec.model.Topic; import at.medunigraz.imi.bst.trec.model.TopicSet; import at.medunigraz.imi.bst.trec.stats.CSVStatsWriter; @@ -28,6 +29,8 @@ public class Lexigram { private static final String ENDPOINT = "https://api.lexigram.io/v1/lexigraph/"; + private static final CachedWebRequester REQUESTER = new CachedWebRequester("cache/lexigramV2.ser"); + private static final List NOISE = new ArrayList<>(); static { NOISE.add("classification"); @@ -39,37 +42,6 @@ public class Lexigram { NOISE.add("ca - "); } - private static class Cache { - private static final String FILENAME = "cache/lexigram.ser"; - private static HashMap CALLS = new HashMap<>(); - static { - if (Files.exists(Paths.get(FILENAME))) { - load(); - } - } - - private static void load() { - try { - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(FILENAME)); - CALLS = (HashMap) ois.readObject(); - ois.close(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private static void save() { - try - { - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(FILENAME)); - oos.writeObject(CALLS); - oos.close(); - } catch(IOException e) { - throw new RuntimeException(e); - } - } - } - public static boolean isAPIKeyLoaded() { final int MIN_API_KEY_LENGTH = 20; return TrecConfig.LEXIGRAM_APIKEY.length() > MIN_API_KEY_LENGTH; @@ -231,39 +203,7 @@ private static String cleanUpString(String label) { } private static JSONObject get(String url) { - if (!Cache.CALLS.containsKey(url)) { - HttpResponse response = null; - try { - response = Unirest.get(url) - .header("authorization", "Bearer " + TrecConfig.LEXIGRAM_APIKEY) - .asJson(); - } catch (UnirestException e) { - throw new RuntimeException(e); - } - - if (response.getStatus() == 401) { - throw new RuntimeException("Unauthorized access to Lexigram API. Place your key in the file trec-pm.properties."); - } - - if (response.getStatus() != 200) { - throw new RuntimeException("Got status code " + response.getStatus() + " from Lexigram API with body " + response.getBody()); - } - - JSONObject body = new JSONObject(response.getBody()); - - String firstArrayObject = ""; - try { - firstArrayObject = body.getJSONObject("object").toString(); - } catch (JSONException e) { - LOG.error("Unexpected response from Lexigram API: " + body); - throw e; - } - - Cache.CALLS.put(url, firstArrayObject); - Cache.save(); - } - - return new JSONObject(Cache.CALLS.get(url)); + return new JSONObject(REQUESTER.get(url, TrecConfig.LEXIGRAM_APIKEY)); } /** diff --git a/src/main/java/at/medunigraz/imi/bst/trec/expansion/CachedWebRequester.java b/src/main/java/at/medunigraz/imi/bst/trec/expansion/CachedWebRequester.java new file mode 100644 index 00000000..b1b546de --- /dev/null +++ b/src/main/java/at/medunigraz/imi/bst/trec/expansion/CachedWebRequester.java @@ -0,0 +1,117 @@ +package at.medunigraz.imi.bst.trec.expansion; + +import com.mashape.unirest.http.HttpResponse; +import com.mashape.unirest.http.JsonNode; +import com.mashape.unirest.http.Unirest; +import com.mashape.unirest.http.exceptions.UnirestException; + +import java.io.*; +import java.util.HashMap; + +public class CachedWebRequester { + + private File cacheFile; + + private HashMap cache = new HashMap<>(); + + public CachedWebRequester(String filename) { + this.cacheFile = new File(filename); + + if (cacheFile.exists()) { + try { + cache = load(cacheFile); + } catch (IOException | ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + } + + /** + * Make a web request to a given url and cache results. + * + * @param url + * @return + */ + public String get(String url) { + if (!cache.containsKey(url)) { + String data = getResource(url); + put(url, data); + } + return cache.get(url); + } + + /** + * Make a web request to a given restricted url and cache results. + * + * @param url + * @param bearer The bearer token. + * @return + */ + public String get(String url, String bearer) { + if (!cache.containsKey(url)) { + String data = getRestrictedResource(url, bearer); + put(url, data); + } + return cache.get(url); + } + + private HashMap load(File file) throws IOException, ClassNotFoundException { + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); + HashMap ret = (HashMap) ois.readObject(); + ois.close(); + return ret; + } + + private void save(Object object, File file) throws IOException { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file)); + oos.writeObject(object); + oos.close(); + } + + private void put(String url, String data) { + cache.put(url, data); + + // Try to persist cache on disk + try { + save(cache, cacheFile); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private String getRestrictedResource(String url, String bearer) { + HttpResponse response = null; + try { + response = Unirest.get(url) + .header("authorization", String.format("Bearer %s", bearer)) + .asJson(); + } catch (UnirestException e) { + throw new RuntimeException(e); + } + + return parseResponse(response); + } + + private String getResource(String url) { + HttpResponse response = null; + try { + response = Unirest.get(url).asJson(); + } catch (UnirestException e) { + throw new RuntimeException(e); + } + + return parseResponse(response); + } + + private String parseResponse(HttpResponse response) { + if (response.getStatus() == 401) { + throw new RuntimeException("Unauthorized access to API. Check your keys in the file trec-pm.properties."); + } + + if (response.getStatus() != 200) { + throw new RuntimeException("Got status code " + response.getStatus() + " from API with body " + response.getBody()); + } + + return response.getBody().toString(); + } +} diff --git a/src/main/java/at/medunigraz/imi/bst/trec/expansion/DGIdb.java b/src/main/java/at/medunigraz/imi/bst/trec/expansion/DGIdb.java new file mode 100644 index 00000000..15d06940 --- /dev/null +++ b/src/main/java/at/medunigraz/imi/bst/trec/expansion/DGIdb.java @@ -0,0 +1,122 @@ +package at.medunigraz.imi.bst.trec.expansion; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.json.JSONArray; +import org.json.JSONObject; + +import java.util.*; + +public class DGIdb { + + private static final Logger LOG = LogManager.getLogger(); + + private static final String ENDPOINT = "http://dgidb.org/api/v2/interactions.json"; + + private static final int DEFAULT_MINIMAL_SCORE = 0; + + private static final boolean DEFAULT_EXPERT_CURATED_ONLY = false; + + private static final CachedWebRequester REQUESTER = new CachedWebRequester("cache/dgidb.ser"); + + public Set getDrugInteractions(String gene) { + return getDrugInteractions(gene, DEFAULT_EXPERT_CURATED_ONLY, DEFAULT_MINIMAL_SCORE); + } + + public Set getDrugInteractions(String gene, boolean expertCuratedOnly) { + return getDrugInteractions(gene, expertCuratedOnly, DEFAULT_MINIMAL_SCORE); + } + + /** + * Get a list of known drug interactions for a given gene. + * + * @param gene The gene to query. + * @param expertCuratedOnly Whether the results should be restricted by `source_trust_levels`. + * @param minimalScore A minimal score, as given by the DGIdb. + * @return A set of drug interactions, sorted by decreasing score. + */ + public Set getDrugInteractions(String gene, boolean expertCuratedOnly, int minimalScore) { + Map>> data = getData(gene, expertCuratedOnly); + + Set ret = new LinkedHashSet<>(); + data.entrySet().stream() + .filter(e -> e.getKey() >= minimalScore) + .sorted(Map.Entry.comparingByKey(Comparator.reverseOrder())) + .forEach(e -> ret.addAll(e.getValue().keySet())); + + return ret; + } + + public Set getPublications(String gene) { + return getPublications(gene, DEFAULT_EXPERT_CURATED_ONLY, DEFAULT_MINIMAL_SCORE); + } + + public Set getPublications(String gene, boolean expertCuratedOnly) { + return getPublications(gene, expertCuratedOnly, DEFAULT_MINIMAL_SCORE); + } + + /** + * Get a list of PubMed IDs backing drug interaction claims. + * + * @param gene The gene to query. + * @param expertCuratedOnly Whether the results should be restricted by `source_trust_levels`. + * @param minimalScore A minimal score, as given by the DGIdb. + * @return A set of PubMed IDs, sorted by score. + */ + public Set getPublications(String gene, boolean expertCuratedOnly, int minimalScore) { + Map>> data = getData(gene, expertCuratedOnly); + + Set ret = new LinkedHashSet<>(); + data.entrySet().stream() + .filter(e -> e.getKey() >= minimalScore) + .sorted(Map.Entry.comparingByKey(Comparator.reverseOrder())) + .forEach(e -> e.getValue().forEach((k, v) -> ret.addAll(v))); + + return ret; + } + + /** + * + * @param gene + * @param expertCuratedOnly + * @return A two-level map `score -> (drugName -> pmids)` + */ + private Map>> getData(String gene, boolean expertCuratedOnly) { + // TODO EML4-ALK must split + // TODO check any unwanted gene (e.g. coming from prepositions) + + String url = String.format(ENDPOINT + "?genes=%s", gene); + url = expertCuratedOnly ? url + "&source_trust_levels=Expert%20curated" : url; + + JSONObject data = new JSONObject(REQUESTER.get(url)); + + Map>> ret = new TreeMap<>(); + + JSONArray matchedTerms = data.getJSONArray("matchedTerms"); + for (Object term : matchedTerms) { + JSONArray interactions = ((JSONObject) term).getJSONArray("interactions"); + for (int i = 0; i < interactions.length(); i++) { + JSONObject interaction = (JSONObject) interactions.get(i); + + int score = interaction.getInt("score"); + String drugName = interaction.getString("drugName").toLowerCase(); + + Set pmids = new LinkedHashSet<>(); + interaction.getJSONArray("pmids").forEach(e -> pmids.add(e.toString())); + + if (!ret.containsKey(score)) { + ret.put(score, new TreeMap<>()); + } + + // The map might already contain an interaction for a given gene if there are multiple matched terms. + Map> interactionsByScore = ret.get(score); + if (!interactionsByScore.containsKey(drugName)) { + interactionsByScore.put(drugName, new LinkedHashSet<>()); + } + interactionsByScore.get(drugName).addAll(pmids); + } + } + + return ret; + } +} diff --git a/src/main/java/at/medunigraz/imi/bst/trec/experiment/ExperimentsBuilder.java b/src/main/java/at/medunigraz/imi/bst/trec/experiment/ExperimentsBuilder.java index 465d09b6..54c164ed 100644 --- a/src/main/java/at/medunigraz/imi/bst/trec/experiment/ExperimentsBuilder.java +++ b/src/main/java/at/medunigraz/imi/bst/trec/experiment/ExperimentsBuilder.java @@ -123,6 +123,12 @@ public ExperimentsBuilder withGeneFamily() { return this; } + public ExperimentsBuilder withDrugInteraction() { + Query previousDecorator = buildingExp.getDecorator(); + buildingExp.setDecorator(new DrugInteractionQueryDecorator(previousDecorator)); + return this; + } + public ExperimentsBuilder withGoldStandard(Experiment.GoldStandard gold) { buildingExp.setGoldStandard(gold); return this; diff --git a/src/main/java/at/medunigraz/imi/bst/trec/model/Topic.java b/src/main/java/at/medunigraz/imi/bst/trec/model/Topic.java index a982ab74..a547cc9c 100644 --- a/src/main/java/at/medunigraz/imi/bst/trec/model/Topic.java +++ b/src/main/java/at/medunigraz/imi/bst/trec/model/Topic.java @@ -35,6 +35,8 @@ public class Topic { // MUST be public to be accessed via Reflection on SubTemplateQueryDecorator public List geneHypernyms = new ArrayList<>(); + public List drugInteractions = new ArrayList<>(); + public Topic() { } @@ -153,6 +155,11 @@ public Topic withGeneHypernym(String hypernym) { this.geneHypernyms.add(hypernym); return this; } + + public Topic withDrugInteraction(String interaction) { + this.drugInteractions.add(interaction); + return this; + } private static boolean hasElement(Element element, String name) { return element.getElementsByTagName(name).getLength() > 0 ? true : false; @@ -240,6 +247,10 @@ public Map getAttributes() { for (int i = 0; i < geneHypernyms.size(); i++) { ret.put("geneHypernyms" + i, geneHypernyms.get(i)); } + + for (int i = 0; i < drugInteractions.size(); i++) { + ret.put("drugInteractions" + i, drugInteractions.get(i)); + } return ret; } diff --git a/src/main/java/at/medunigraz/imi/bst/trec/query/DrugInteractionQueryDecorator.java b/src/main/java/at/medunigraz/imi/bst/trec/query/DrugInteractionQueryDecorator.java new file mode 100644 index 00000000..e0381187 --- /dev/null +++ b/src/main/java/at/medunigraz/imi/bst/trec/query/DrugInteractionQueryDecorator.java @@ -0,0 +1,29 @@ +package at.medunigraz.imi.bst.trec.query; + +import at.medunigraz.imi.bst.trec.expansion.DGIdb; +import at.medunigraz.imi.bst.trec.model.Topic; + +import java.util.Set; + +public class DrugInteractionQueryDecorator extends DynamicQueryDecorator { + + private static final DGIdb dgidb = new DGIdb(); + + public DrugInteractionQueryDecorator(Query decoratedQuery) { + super(decoratedQuery); + } + + @Override + public Topic expandTopic(Topic topic) { + String[] geneTokens = topic.getGeneTokens(); + + for (String token : geneTokens) { + Set interactions = dgidb.getDrugInteractions(token); + for (String interaction : interactions) { + topic.withDrugInteraction(interaction); + } + } + return topic; + } + +} diff --git a/src/test/java/at/medunigraz/imi/bst/trec/expansion/DGIdbTest.java b/src/test/java/at/medunigraz/imi/bst/trec/expansion/DGIdbTest.java new file mode 100644 index 00000000..7c2f2d80 --- /dev/null +++ b/src/test/java/at/medunigraz/imi/bst/trec/expansion/DGIdbTest.java @@ -0,0 +1,62 @@ +package at.medunigraz.imi.bst.trec.expansion; + +import org.junit.Test; + +import java.util.Set; + +import static org.hamcrest.Matchers.*; +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertThat; + +public class DGIdbTest { + + private static final DGIdb DGI_DB = new DGIdb(); + + @Test + public void getInteractions() { + // Baseline, all results + Set actual = DGI_DB.getDrugInteractions("BRAF"); + assertThat(actual, hasItems("selumetinib", "dabrafenib", "bevacizumab", "obatoclax")); + + // Only expert curated + actual = DGI_DB.getDrugInteractions("BRAF", true); + assertThat(actual, hasItems("selumetinib", "dabrafenib")); + assertThat(actual, not(hasItems("bevacizumab", "obatoclax"))); // Bevacizumab and obatoclax are not expert curated + + // Minimal score, expert curated only + actual = DGI_DB.getDrugInteractions("BRAF", true, 100); + assertThat(actual, hasItems("dabrafenib")); + assertThat(actual, not(hasItems("selumetinib", "bevacizumab", "obatoclax"))); // selumetinib has score 21 + + // No results + actual = DGI_DB.getDrugInteractions("LDH"); + assertThat(actual, is(empty())); + + // TODO test sorting by score + } + + @Test + public void getPublications() { + // Baseline, all results + Set actual = DGI_DB.getPublications("BRAF"); + assertThat(actual, hasItems("26343583", "22197931", "23020132", "2015", "21216929", "22460902")); + + // Only expert curated + actual = DGI_DB.getPublications("BRAF", true); + assertThat(actual, hasItems("26343583", "22197931", "23020132", "2015")); + assertThat(actual, hasItems("21216929")); // 21216929 is for bevacizumab (not expert curated), but also temsirolums (still expert curated) + assertThat(actual, not(hasItems("22460902"))); // 22460902 is only for obatoclax, not expert curated + + // Minimal score, expert curated only + actual = DGI_DB.getPublications("BRAF", true, 100); + assertThat(actual, hasItems("26343583", "23020132", "2015")); + assertThat(actual, not(hasItems("22460902", "22197931", "21216929"))); // 22197931 has score 16 and 21216929 has score 10/2 + + // No results + actual = DGI_DB.getPublications("LDH"); + assertThat(actual, is(empty())); + + // TODO test sorting by score + } + +} \ No newline at end of file diff --git a/src/test/java/at/medunigraz/imi/bst/trec/query/DrugInteractionQueryDecoratorTest.java b/src/test/java/at/medunigraz/imi/bst/trec/query/DrugInteractionQueryDecoratorTest.java new file mode 100644 index 00000000..bc11639d --- /dev/null +++ b/src/test/java/at/medunigraz/imi/bst/trec/query/DrugInteractionQueryDecoratorTest.java @@ -0,0 +1,36 @@ +package at.medunigraz.imi.bst.trec.query; + +import at.medunigraz.imi.bst.config.TrecConfig; +import at.medunigraz.imi.bst.trec.model.Topic; +import org.hamcrest.Matchers; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.util.Map; + +public class DrugInteractionQueryDecoratorTest extends QueryDecoratorTest { + private static final String GENE = "BRAF"; + + private final File template = new File(getClass().getResource("/templates/match-title-drug-interaction.json").getFile()); + + public DrugInteractionQueryDecoratorTest() { + this.decoratedQuery = new DrugInteractionQueryDecorator( + new SubTemplateQueryDecorator(template, new ElasticSearchQuery(TrecConfig.ELASTIC_BA_INDEX))); + this.topic = new Topic().withGene(GENE); + } + + @Test + public void testGetTopic() { + DummyElasticSearchQuery dummyQuery = new DummyElasticSearchQuery(); + Query decorator = new DrugInteractionQueryDecorator(dummyQuery); + + decorator.query(new Topic().withGene(GENE)); + + Map actual = dummyQuery.getTopic().getAttributes(); + Assert.assertThat(actual, Matchers.hasValue("selumetinib")); + Assert.assertThat(actual, Matchers.hasValue("dabrafenib")); + Assert.assertThat(actual, Matchers.hasValue("bevacizumab")); + Assert.assertThat(actual, Matchers.hasValue("obatoclax")); + } +} diff --git a/src/test/resources/subtemplates/drug_interaction.json b/src/test/resources/subtemplates/drug_interaction.json new file mode 100644 index 00000000..8d850c36 --- /dev/null +++ b/src/test/resources/subtemplates/drug_interaction.json @@ -0,0 +1 @@ +{ "match": { "title": "{{[drugInteractions]}}" }} \ No newline at end of file diff --git a/src/test/resources/templates/match-title-drug-interaction.json b/src/test/resources/templates/match-title-drug-interaction.json new file mode 100644 index 00000000..21612def --- /dev/null +++ b/src/test/resources/templates/match-title-drug-interaction.json @@ -0,0 +1 @@ +{"dis_max": { "queries": [ {{drugInteractions:drug_interaction.json}} ]}} \ No newline at end of file