Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/130 cohort ontology #132

Merged
merged 15 commits into from
May 27, 2024
Binary file modified tracex_project/db.sqlite3
Binary file not shown.
21 changes: 21 additions & 0 deletions tracex_project/extraction/logic/modules/module_cohort_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def execute_and_save(

cohort_dict = self.__extract_cohort_tags(patient_journey)
cohort_dict = self.__remove_placeholder(cohort_dict)
cohort_dict = self.normalize_coniditons_snomed(cohort_dict)

return cohort_dict

Expand Down Expand Up @@ -65,3 +66,23 @@ def __remove_placeholder(cohort_data) -> Optional[Dict[str, str]]:
return None

return cohort_dict

@staticmethod
def normalize_coniditons_snomed(cohort_dict) -> Optional[Dict[str, str]]:
"""Normalizes conditions to a SNOMED code."""
condition = cohort_dict.get("condition")
preexisting_condition = cohort_dict.get("preexisting_condition")

if condition is not None:
(
cohort_dict["condition"],
cohort_dict["condition_snomed_code"],
) = u.get_snomed_ct_info(condition)

if preexisting_condition is not None:
(
cohort_dict["preexisting_condition"],
cohort_dict["preexisting_condition_snomed_code"],
) = u.get_snomed_ct_info(preexisting_condition)

return cohort_dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Generated by Django 4.2.13 on 2024-05-26 09:35

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('extraction', '0021_alter_event_event_type_alter_event_location_and_more'),
]

operations = [
migrations.RemoveField(
model_name='trace',
name='cohort',
),
migrations.AddField(
model_name='cohort',
name='condition_snomed_code',
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name='cohort',
name='preexisting_condition_snomed_code',
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name='cohort',
name='trace',
field=models.OneToOneField(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='cohort', to='extraction.trace'),
),
migrations.AlterField(
model_name='cohort',
name='age',
field=models.IntegerField(blank=True, null=True),
),
migrations.AlterField(
model_name='cohort',
name='condition',
field=models.CharField(blank=True, max_length=50, null=True),
),
migrations.AlterField(
model_name='cohort',
name='origin',
field=models.CharField(blank=True, max_length=50, null=True),
),
migrations.AlterField(
model_name='cohort',
name='preexisting_condition',
field=models.CharField(blank=True, max_length=100, null=True),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.13 on 2024-05-26 09:41

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('extraction', '0022_add_snomed_trace_cohort'),
]

operations = [
migrations.RemoveField(
model_name='cohort',
name='gender',
),
migrations.AddField(
model_name='cohort',
name='sex',
field=models.CharField(blank=True, max_length=25, null=True),
),
]
2 changes: 2 additions & 0 deletions tracex_project/extraction/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ class Cohort(models.Model):
sex = models.CharField(max_length=25, null=True, blank=True)
origin = models.CharField(max_length=50, null=True, blank=True)
condition = models.CharField(max_length=50, null=True, blank=True)
condition_snomed_code = models.IntegerField(null=True, blank=True)
preexisting_condition = models.CharField(max_length=100, null=True, blank=True)
preexisting_condition_snomed_code = models.IntegerField(null=True, blank=True)
manager = models.Manager()

def __str__(self):
Expand Down
13 changes: 13 additions & 0 deletions tracex_project/tracex/logic/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,16 @@
("activity_labeling", "Activity Labeler"),
("cohort_tagging", "Cohort Tagger")
]
SNOMED_CT_API_URL = (
"https://browser.ihtsdotools.org/snowstorm/snomed-ct/browser/MAIN/descriptions"
)
SNOMED_CT_PARAMS = params = {
"limit": 1,
"conceptActive": "true",
"lang": "english",
"skipTo": 0,
"returnLimit": 1,
}
SNOMED_CT_HEADERS = {
"User-Agent": "browser",
}
22 changes: 22 additions & 0 deletions tracex_project/tracex/logic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
DataFrameUtilities -- Groups all functions related to DataFrame operations.
"""
import os
import json
from pathlib import Path
import base64
import tempfile
Expand All @@ -19,16 +20,21 @@
import pandas as pd
import pm4py
import numpy as np
import requests

from django.conf import settings
from django.db.models import Q
from openai import OpenAI

from tracex.logic.logger import log_tokens_used
from tracex.logic.constants import (
MAX_TOKENS,
TEMPERATURE_SUMMARIZING,
MODEL,
OAIK,
SNOMED_CT_API_URL,
SNOMED_CT_PARAMS,
SNOMED_CT_HEADERS,
)

from extraction.models import Trace
Expand Down Expand Up @@ -120,6 +126,22 @@ def get_snippet_bounds(index: int, length: int) -> tuple[int, int]:

return lower_bound, upper_bound

def get_snomed_ct_info(term):
"""Get the first matched name and code of a SNOMED CT term."""
SNOMED_CT_PARAMS["term"] = term
response = requests.get(
SNOMED_CT_API_URL, params=SNOMED_CT_PARAMS, headers=SNOMED_CT_HEADERS
)
data = json.loads(response.text)

term = None
code = None

if data.get("items"):
term = data["items"][0]["term"]
code = data["items"][0]["concept"]["conceptId"]

return term, code

class Conversion:
"""
Expand Down
Loading