Skip to content

Commit

Permalink
Add abstract presummarization to edge sum endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
jrichardson97 committed Mar 27, 2024
1 parent c32d1f8 commit 72df1a0
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 21 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,9 @@ Start server: redis-server --dbfilename aragorn_cache.rdb --dir /home/joeyr/data
https://kg-summarizer.apps.renci.org/docs

## Todo

- Redis database indices changed???
- This example is interesting because Cisplatin is shown to treat multiple subclasses of mucin-producing carcinoma. Can GPT summarize this well? Don't have example indices anymore
- Add pyunit tests
- Aragorn and strider have unit tests
- Add abstract summary preprocess flag to server
- Create database dataclass and sort keys as initialization
14 changes: 11 additions & 3 deletions kg_summarizer/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class LLMParameters(BaseModel):

class Parameters(BaseModel):
llm: Optional[LLMParameters]
summarize_abstracts: Optional[bool] = False
# trapi: Optional[TrapiParameters]


Expand All @@ -41,7 +42,7 @@ class EdgeItem(BaseModel):
parameters: Parameters


KG_SUM_VERSION = "0.0.10"
KG_SUM_VERSION = "0.0.11"

# declare the application and populate some details
app = FastAPI(
Expand Down Expand Up @@ -96,20 +97,27 @@ async def summarize_edge_handler(item: EdgeItem):
with open(p) as f:
edge = EdgeContainer(json.load(f))

logger.info(f"Edge: {edge}")
if item.parameters.summarize_abstracts:
edge.summarize_edge_abstracts(
model=item.parameters.llm.gpt_model,
temperature=item.parameters.llm.temperature,
)
logger.info(f"Edge with summarized abstracts: {edge}")

spo_sentence = edge.format_spo_sentence()

if item.parameters.llm.system_prompt:
system_prompt = item.parameters.llm.system_prompt
else:
system_prompt = f"""
Summarize the following edge publication abstracts listed in the knowledge graph. Make sure the summary supports the statement '{spo_sentence}'. Only use information explicitly stated in the publication abstracts. I repeat, do not make up any information.
Summarize the following edge publication abstracts listed in the knowledge graph. Make sure the summary supports the statement '{spo_sentence}'. Only use information explicitly stated in the publication abstracts. I repeat, do not make up any information. Include a bulleted list of key facts from the abstracts that support the statement '{spo_sentence}'.
"""

if edge.edge["publications"] or edge.edge["sentences"]:
logger.info(f"GPT Prompt: {system_prompt}")
logger.info(f"GPT Mode: {item.parameters.llm.gpt_model}")
logger.info(f"GPT Temperature: {item.parameters.llm.temperature}")
logger.info(f"GPT Input: {edge}")
summary += generate_response(
system_prompt,
str(edge),
Expand Down
25 changes: 25 additions & 0 deletions kg_summarizer/trapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
unique_name_from_str,
)

from kg_summarizer.ai import generate_response


def get_publications(pub_id_list):
pub_list = []
Expand Down Expand Up @@ -71,6 +73,29 @@ def parse_edge(self, input_data):
def format_spo_sentence(self):
return f"{self.edge['subject']} {self.edge['predicate']} {self.edge['object']}"

def summarize_edge_abstracts(self, system_prompt=None, model=None, temperature=0):
if system_prompt is None:
system_prompt = f"""
You are a pharmacology researcher summarizing publication abstracts. Condense the follow abstract to a single sentence.
"""

if model is None:
model = "gpt-3.5-turbo"

summary_list = []
for pub_dict in self.edge["publications"]:
k = list(pub_dict.keys())[0]
abstract = pub_dict[k]
summary_list.append(
{
k: generate_response(
system_prompt, abstract, model=model, temperature=temperature
)
}
)

self.edge["publications"] = summary_list

def __str__(self):
return str(dict(nodes=self.nodes, edge=self.edge)).replace("'", "")

Expand Down
94 changes: 78 additions & 16 deletions notebooks/edge_parser.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -94,33 +94,52 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'subject': 'PUBCHEM.COMPOUND:6018',\n",
" 'object': 'MONDO:0007739',\n",
" 'predicate': 'biolink:treats',\n",
" 'publications': ['PMC:4557792',\n",
" 'PMID:29920125',\n",
" 'PMID:30136594',\n",
" 'PMID:19050408',\n",
" 'PMID:28742396',\n",
" 'PMID:19381278',\n",
" 'PMID:22621818',\n",
" 'PMID:2901681'],\n",
" 'sentences': \"Valbenazine is a modified metabolite of the vesicular monoamine transporter 2 (VMAT-2) inhibitor tetrabenazine, which is approved for the treatment of the hyperkinetic movement disorder, Huntington's disease.|NA|This deuterated form of the drug tetrabenazine is indicated for the treatment of chorea associated with Huntington's disease as well as tardive dyskinesia.|NA|For example, in 2008 the FDA approved the synthetic VMAT2 inhibitor tetrabenazine (TBZ) for treatment of chorea associated with Huntington?s disease.|NA\"}"
"['PMC:4557792',\n",
" 'PMID:29920125',\n",
" 'PMID:30136594',\n",
" 'PMID:19050408',\n",
" 'PMID:28742396',\n",
" 'PMID:19381278',\n",
" 'PMID:22621818',\n",
" 'PMID:2901681']"
]
},
"execution_count": 25,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['edge']"
"data['edge']['publications']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"EdgeContainer(nodes={'Tetrabenazine': {'description': 'A benzoquinolizine that is 1,2,3,4,4a,9,10,10a-octahydrophenanthrene in which the carbon at position 10a is replaced by a nitrogen and which is substituted by an isobutyl group at position 2, an oxo group at position 3, and methoxy groups at positions 6 and 7.'}, 'Huntington disease': {'description': 'Huntington disease (HD) is a rare neurodegenerative disorder of the central nervous system characterized by unwanted choreatic movements, behavioral and psychiatric disturbances and dementia.'}}, edge={'subject': 'Tetrabenazine', 'object': 'Huntington disease', 'predicate': 'treats', 'publications': [{'PMID:30136594': \"OBJECTIVE: Stable heavy isotopes of hydrogen, carbon, and other elements have been incorporated into drug molecules, largely as tracers for quantitation during the drug development process. Studies involving the human use of drugs labeled with deuterium suggest that these compounds may offer some advantages when compared with their nondeuterated counterparts. Deuteration has gained attention because of its potential to affect the pharmacokinetic and metabolic profiles of drugs. Deutetrabenazine (Austedo, Teva Pharmaceutical Industries, Ltd) is the first deuterated drug to receive Food and Drug Administration approval. This deuterated form of the drug tetrabenazine is indicated for the treatment of chorea associated with Huntington's disease as well as tardive dyskinesia. Ongoing clinical trials suggest that a number of other deuterated compounds are being evaluated for the treatment of human diseases and not merely as research tools. DATA SOURCES: A search of the MEDLINE (1946 to present) database was undertaken using the Ovid interface. The search was conducted using the heading deuterium and then limited to Administration & Dosage, Adverse Effects, Pharmacokinetics, Pharmacology, Poisoning, Therapeutic Use, and Toxicity. STUDY SELECTION AND DATA EXTRACTION: All articles were reviewed and those with human information were included. Review articles were likewise interrogated for additional published human data. CONCLUSIONS: Deuterated compounds may, in some cases, offer advantages over nondeuterated forms, often through alterations in clearance. Deuteration may also redirect metabolic pathways in directions that reduce toxicities. The approval of additional deuterated compounds may soon follow. Clinicians will need to be familiar with the dosing, efficacy, potential side effects, and unique metabolic profiles of these new entities.\"}, {'PMID:19050408': \"OBJECTIVES: To enhance the knowledge on the long-term efficacy and safety of tetrabenazine (TBZ) in managing chorea. METHODS: We analyzed 68 Huntington disease patients (mean disease duration, 55.8 +/- 34.7 months) who had been treated with TBZ for a mean period of 34.4 +/- 25.2 months (median, 34 months; mode, 48 months; range, 3-104 months). We measured the variation from pretreatment of the motor score of Unified Huntington's Disease Rating Scale at the first follow-up visit and at the latest. RESULTS: Mean Unified Huntington's Disease Rating Scale-chorea underscore at the time of the pretreatment visit was 10.4 +/- 4.1 (range, 0-28). At the first follow-up, 9.7 +/- 7.8 months after the prescription of TBZ (mean dose, 35.3 +/- 14.7 mg), mean score of chorea was 8.2 +/- 4.1 (-21% compared with baseline), whereas at the latest follow-up visit (mean dose, 57.5 +/- 14.7 mg), it was 9.5 +/- 5.0 (9%). During the follow-up, the clinical benefit persisted, but the magnitude was reduced despite a progressive increase of the doses (up to 60%). Motor improvement was not influenced by sex, or doses or duration of therapy; age at onset was the only predictor of a good outcome. Five patients (7%) did not gain any improvement, and TBZ was discontinued. There were 2 withdrawals because of side effects; 34 patients reported at least 1 side effect. CONCLUSIONS: Tetrabenazine was well tolerated and produced long-term improvement of motor symptoms in Huntington disease patients, although a slight reduction of benefit occurred during the course of treatment.\"}, {'PMID:28742396': \"Tardive dyskinesia (TD) remains a clinical concern for any patient who receives an antipsychotic. While the overall risk of developing TD is lower with newer antipsychotics compared to older agents, a significant number of patients who require long-term treatment will develop TD. Recently, valbenazine (brand name Ingrezza) became the first drug to be approved by the FDA specifically for the treatment of TD. In this New Drug Review, we summarize the basic pharmacology and clinical trial results for valbenazine. Valbenazine is a modified metabolite of the vesicular monoamine transporter 2 (VMAT-2) inhibitor tetrabenazine, which is approved for the treatment of the hyperkinetic movement disorder, Huntington's disease. In short-term clinical trials, valbenazine at a dose of 80 mg/day improved TD, with an effect size that is clinically significant (d=0.90). The effect size for the 40-mg/day dose was lower (d=0.52). Compared to tetrabenazine, valbenazine has better clinical characteristics (i.e., once-a-day dosing, better short-term side effect profile). However, only long-term experience in routine clinical populations can delineate valbenazine's full benefits, optimal dosing, and risks not identified during short-term registration trials.\"}, {'PMID:19381278': \"Tetrabenazine (TBZ), a catecholamine-depleting agent initially developed for the treatment of schizophrenia, when tested for other indications, has proven to be more useful for the treatment of a variety of hyperkinetic movement disorders. These disorders include neurological diseases characterized by abnormal involuntary movements such as chorea associated with Huntington's disease, tics in Tourette's syndrome, dyskinesias and dystonias in tardive dyskinesia, also primary dystonias and myoclonus. This review will include and discuss studies published during the period of 1960-2006 regarding the clinical efficacy and tolerability of TBZ in Huntington's disease (HD). It will also review the chemistry, pharmacokinetics and dynamics of the drug and its mechanism of action compared to that of reserpine, the only similar compound. This review emphasizes the advantage of TBZ over dopamine-depleting compounds used in the treatment of chorea and reveals its clinical efficacy and side effects.\"}, {'PMID:22621818': 'BACKGROUND: Impairment in computerized dynamic posturography scores has been documented in Huntington disease patients. Tetrabenazine is approved to treat chorea in Huntington disease, but its effect on posturography scores, and balance in general, is unknown. MATERIALS AND METHODS: We designed a study to test computerized dynamic posturography performance while taking tetrabenazine and after stopping tetrabenazine for at least three days. RESULTS: 10 Huntington disease patients were studied both ON and OFF tetrabenazine. The composite score was statistically different between ON and OFF conditions and both conditions were significantly worse than reference scores. There was no significant difference between ON and OFF trials in the number of falls. A significant improvement on sensory orientation test conditions 3 (sway-referenced vision) and 5 (sway-referenced motion of the support surface and eyes closed) was seen while ON tetrabenazine. Strategy scores 1-3 were also significantly different while ON tetrabenazine. CONCLUSION: These findings suggest that tetrabenazine aided patients in gating out of abnormal visual cues when other sensory modalities were available, as well as in gating out abnormal kinesthetic cues when visual cues were not available. It could not help with gating out of simultaneous abnormal visual and somatosensory cues. Thus, tetrabenazine can improve postural stability when one sensory modality is irrelevant, but this effect is not sustained when multiple abnormal sensory modalities are present. This is the first study supporting the use of any medicine to treat balance problems in Huntington disease.'}, {'PMID:2901681': \"The neurochemical effect of tetrabenazine was assessed by determining the levels of dopamine, noradrenaline and 5-hydroxytryptamine and their metabolites in post-mortem brain from Huntington's disease patients with or without a history of tetrabenazine treatment. The tetrabenazine-treated group showed a general description of monoamines in all regions studied, the greatest reduction being dopamine in the caudate. This provides the basis for the effect of tetrabenzine on chorea, while monoamine losses in limbic regions may mediate the production of side effects, such as depression.\"}], 'sentences': \"Valbenazine is a modified metabolite of the vesicular monoamine transporter 2 (VMAT-2) inhibitor tetrabenazine, which is approved for the treatment of the hyperkinetic movement disorder, Huntington's disease.|NA|This deuterated form of the drug tetrabenazine is indicated for the treatment of chorea associated with Huntington's disease as well as tardive dyskinesia.|NA|For example, in 2008 the FDA approved the synthetic VMAT2 inhibitor tetrabenazine (TBZ) for treatment of chorea associated with Huntington?s disease.|NA\"})"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from kg_summarizer.trapi import EdgeContainer\n",
"\n",
"edge = EdgeContainer(data)\n",
"edge"
]
},
{
Expand Down Expand Up @@ -160,6 +179,49 @@
"edge = EdgeContainer(data)\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OBJECTIVE: Stable heavy isotopes of hydrogen, carbon, and other elements have been incorporated into drug molecules, largely as tracers for quantitation during the drug development process. Studies involving the human use of drugs labeled with deuterium suggest that these compounds may offer some advantages when compared with their nondeuterated counterparts. Deuteration has gained attention because of its potential to affect the pharmacokinetic and metabolic profiles of drugs. Deutetrabenazine (Austedo, Teva Pharmaceutical Industries, Ltd) is the first deuterated drug to receive Food and Drug Administration approval. This deuterated form of the drug tetrabenazine is indicated for the treatment of chorea associated with Huntington's disease as well as tardive dyskinesia. Ongoing clinical trials suggest that a number of other deuterated compounds are being evaluated for the treatment of human diseases and not merely as research tools. DATA SOURCES: A search of the MEDLINE (1946 to present) database was undertaken using the Ovid interface. The search was conducted using the heading deuterium and then limited to Administration & Dosage, Adverse Effects, Pharmacokinetics, Pharmacology, Poisoning, Therapeutic Use, and Toxicity. STUDY SELECTION AND DATA EXTRACTION: All articles were reviewed and those with human information were included. Review articles were likewise interrogated for additional published human data. CONCLUSIONS: Deuterated compounds may, in some cases, offer advantages over nondeuterated forms, often through alterations in clearance. Deuteration may also redirect metabolic pathways in directions that reduce toxicities. The approval of additional deuterated compounds may soon follow. Clinicians will need to be familiar with the dosing, efficacy, potential side effects, and unique metabolic profiles of these new entities.\n"
]
},
{
"data": {
"text/plain": [
"[{'PMID:30136594': 'Deuterated drugs, such as deutetrabenazine, show promise in altering pharmacokinetic and metabolic profiles, potentially offering advantages over nondeuterated forms, with ongoing clinical trials exploring their therapeutic potential for various human diseases.'}]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from kg_summarizer.ai import generate_response\n",
"system_prompt = f\"\"\"\n",
"You are a pharmacology researcher summarizing publication abstracts. Condense the follow abstract to a single sentence.\n",
"\"\"\"\n",
"model = 'gpt-3.5-turbo'\n",
"temperature = 0\n",
"\n",
"summary_list = []\n",
"for pub_dict in edge.edge['publications']:\n",
" k = list(pub_dict.keys())[0]\n",
" abstract = pub_dict[k]\n",
" print(abstract)\n",
" summary_list.append({\n",
" k: generate_response(system_prompt, abstract, model=model, temperature=temperature)\n",
" })\n",
"\n",
"summary_list"
]
},
{
"cell_type": "code",
"execution_count": 28,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setup(
name="kg-summarizer",
version="0.0.10",
version="0.0.11",
author="Joey Richardson",
author_email="[email protected]",
url="https://github.com/jrichardson97/kg-summarizer",
Expand Down

0 comments on commit 72df1a0

Please sign in to comment.