Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/130 cohort ontology #132

Merged
merged 15 commits into from
May 27, 2024
Binary file modified tracex_project/db.sqlite3
Binary file not shown.
25 changes: 23 additions & 2 deletions tracex_project/extraction/logic/modules/module_cohort_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ def execute_and_save(
super().execute_and_save(
df,
patient_journey=patient_journey,
patient_journey_sentences=patient_journey_sentences
patient_journey_sentences=patient_journey_sentences,
)

cohort_tags = self.__extract_cohort_tags(patient_journey)
cohort_dict = self.__prepare_cohort_dict(cohort_tags)
cohort_dict_normalized = self.normalize_coniditons_snomed(cohort_dict)

return cohort_dict
return cohort_dict_normalized

@staticmethod
def __extract_cohort_tags(patient_journey):
Expand Down Expand Up @@ -62,3 +63,23 @@ def __prepare_cohort_dict(cohort_data):
return None

return cohort_dict

@staticmethod
def normalize_coniditons_snomed(cohort_dict):
"""Normalizes conditions to a SNOMED code."""
condition = cohort_dict.get("condition")
preexisting_condition = cohort_dict.get("preexisting_condition")

if condition is not None:
(
cohort_dict["condition"],
cohort_dict["condition_snomed_code"],
) = u.get_snomed_ct_info(condition)

if preexisting_condition is not None:
(
cohort_dict["preexisting_condition"],
cohort_dict["preexisting_condition_snomed_code"],
) = u.get_snomed_ct_info(preexisting_condition)

return cohort_dict
2 changes: 2 additions & 0 deletions tracex_project/extraction/logic/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ def set_default_values(self):
"gender": None,
"origin": None,
"condition": None,
"condition_snomed_code": None,
"preexisting_condition": None,
"preexisting_condition_snomed_code": None,
}
self.set_cohort(cohort_default_values)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.11 on 2024-05-16 11:03

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("extraction", "0023_alter_cohort_age_alter_cohort_condition_and_more"),
]

operations = [
migrations.AddField(
model_name="cohort",
name="condition_snomed_code",
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name="cohort",
name="preexisting_condition_snomed_code",
field=models.IntegerField(blank=True, null=True),
),
]
2 changes: 2 additions & 0 deletions tracex_project/extraction/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ class Cohort(models.Model):
gender = models.CharField(max_length=25, null=True, blank=True)
origin = models.CharField(max_length=50, null=True, blank=True)
condition = models.CharField(max_length=50, null=True, blank=True)
condition_snomed_code = models.IntegerField(null=True, blank=True)
preexisting_condition = models.CharField(max_length=100, null=True, blank=True)
preexisting_condition_snomed_code = models.IntegerField(null=True, blank=True)
manager = models.Manager()

def __str__(self):
Expand Down
13 changes: 13 additions & 0 deletions tracex_project/tracex/logic/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,16 @@
("attribute_location", "Location"),
]
THRESHOLD_FOR_MATCH = 0.5
SNOMED_CT_API_URL = (
"https://browser.ihtsdotools.org/snowstorm/snomed-ct/browser/MAIN/descriptions"
)
SNOMED_CT_PARAMS = params = {
"limit": 50,
"conceptActive": "true",
"lang": "english",
"skipTo": 0,
"returnLimit": 100,
}
SNOMED_CT_HEADERS = {
"User-Agent": "browser",
}
32 changes: 30 additions & 2 deletions tracex_project/tracex/logic/utils.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,36 @@
"""Module providing various utility functions for the project."""
import os
from io import StringIO
from pathlib import Path
PitButtchereit marked this conversation as resolved.
Show resolved Hide resolved
import json

import base64
import tempfile
import functools
import warnings

from io import StringIO
from pathlib import Path

import pandas as pd
import pm4py
import numpy as np
import requests

from django.conf import settings
from django.db.models import Q
from openai import OpenAI

from tracex.logic.logger import log_tokens_used
from tracex.logic.constants import (
MAX_TOKENS,
TEMPERATURE_SUMMARIZING,
MODEL,
oaik,
)
from tracex.logic.constants import (
tkv29 marked this conversation as resolved.
Show resolved Hide resolved
SNOMED_CT_API_URL,
SNOMED_CT_PARAMS,
SNOMED_CT_HEADERS,
)

from extraction.models import Trace

Expand Down Expand Up @@ -101,6 +111,24 @@ def calculate_linear_probability(logprob):
return linear_prob


def get_snomed_ct_info(term):
"""Get the first matched name and code of a SNOMED CT term."""
SNOMED_CT_PARAMS["term"] = term
response = requests.get(
SNOMED_CT_API_URL, params=SNOMED_CT_PARAMS, headers=SNOMED_CT_HEADERS
)
data = json.loads(response.text)

term = None
code = None

if data["items"]:
term = data["items"][0]["term"]
code = data["items"][0]["concept"]["conceptId"]

return term, code


class Conversion:
"""Class for all kinds of conversions"""

Expand Down
Loading