Skip to content

Commit

Permalink
Merge pull request #52 from EveCrystali:dev/DiabetesRiskPrediction---…
Browse files Browse the repository at this point in the history
…Enhance-prediction-quality-#49

Dev/DiabetesRiskPrediction---Enhance-prediction-quality-#49
  • Loading branch information
EveCrystali authored Nov 12, 2024
2 parents 0def0de + a8bc2b8 commit c8f2d6c
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 32 deletions.
187 changes: 162 additions & 25 deletions BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public ElasticsearchService(ILogger<ElasticsearchService> logger)
.ThrowExceptions()
.EnableDebugMode()
.PrettyJson()
.DefaultFieldNameInferrer(p => p)
.OnRequestCompleted(response =>
{
Console.WriteLine($"Request: {response.DebugInformation}");
Expand All @@ -34,7 +35,7 @@ public ElasticsearchService(ILogger<ElasticsearchService> logger)

public async Task IndexNoteAsync(NoteRiskInfo note)

Check notice on line 36 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Type member is never used (non-private accessibility)

Method 'IndexNoteAsync' is never used
{
var response = await _elasticsearchClient.IndexDocumentAsync(note);
IndexResponse response = await _elasticsearchClient.IndexDocumentAsync(note);

Check notice on line 38 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (when type is simple)

Use 'var' (simple types)

if (!response.IsValid)
{
Expand All @@ -44,46 +45,182 @@ public async Task IndexNoteAsync(NoteRiskInfo note)

public async Task<int> CountUniqueWordsInNotes(int patientId, HashSet<string> wordsToCount)
{
_logger.LogInformation("CountWordsInNotes called");
_logger.LogInformation("CountUniqueWordsInNotes called with patientId: {patientId}", patientId);

var response = await _elasticsearchClient.SearchAsync<NoteRiskInfo>(s => s
// Step 1: Analyze the trigger words using the same analyzer as the `Body` field
HashSet<string> analyzedWords = new HashSet<string>();

Check notice on line 51 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (elsewhere)

Use 'var' (elsewhere)

Check notice on line 51 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use collection expression syntax

Use collection expression

_logger.LogInformation("Analyzing words: {wordsToCount}", string.Join(", ", wordsToCount));

foreach (string word in wordsToCount)

Check notice on line 55 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (for built-in types)

Use 'var' (built-in types)
{
AnalyzeResponse analyzeResponse = await _elasticsearchClient.Indices.AnalyzeAsync(a => a

Check notice on line 57 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (when type is simple)

Use 'var' (simple types)
.Index("notes_index")
.Analyzer("custom_french_analyzer")
.Text(word)
);

if (!analyzeResponse.IsValid)
{
_logger.LogError("Failed to analyze word: {word}. Reason: {reason}", word, analyzeResponse.OriginalException.Message);
continue;
}

analyzedWords.UnionWith(analyzeResponse.Tokens.Select(token => token.Token));
}

_logger.LogInformation("Analyzed words are: {words}", string.Join(", ", analyzedWords));

// Step 2: Query for documents matching PatientId
_logger.LogInformation("Executing search query for patientId: {patientId}", patientId);

ISearchResponse<NoteRiskInfo> response = await _elasticsearchClient.SearchAsync<NoteRiskInfo>(s => s

Check notice on line 77 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (elsewhere)

Use 'var' (elsewhere)
.Index("notes_index")
.Query(q => q
.Bool(b => b
.Must(m => m.Term("PatientId", patientId)) // Utilisation correcte du champ
.Should(wordsToCount.Select(word => (Func<QueryContainerDescriptor<NoteRiskInfo>, QueryContainer>)(m => m.Match(mt => mt
.Field("Body")
.Query(word)
.Analyzer("custom_french_analyzer"))))
.ToArray())
.MinimumShouldMatch(1)
)
.Term(t => t.Field("PatientId").Value(patientId))
)
.Aggregations(a => a
.Terms("unique_word_counts", t => t
.Field("Body.keyword")
.Size(10000)
)
.Source(src => src
.Includes(i => i.Field("Body"))
)
.Size(1000) // Adjust size as needed
);

if (!response.IsValid)
{
_logger.LogError("Search query failed. Reason: {reason}", response.OriginalException.Message);
throw new Exception($"Failed to search notes: {response.OriginalException.Message}");
}

// Extraire les résultats d'agrégation
var uniqueWordsFound = new HashSet<string>();
var termsAgg = response.Aggregations.Terms("unique_word_counts");
if (termsAgg != null)
if (response.HitsMetadata?.Total.Value == 0)
{
_logger.LogWarning("No notes found for PatientId: {patientId}", patientId);
return 0;
}

_logger.LogInformation("Found {total} notes for PatientId: {patientId}", response.HitsMetadata.Total.Value, patientId);

Check warning on line 100 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Dereference of a possibly null reference.

Dereference of a possibly null reference

foreach (IHit<NoteRiskInfo> hit in response.Hits)

Check notice on line 102 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (elsewhere)

Use 'var' (elsewhere)
{
_logger.LogDebug("Note found: {note}", hit.Source.Body);
}

_logger.LogInformation("Search query executed successfully.");

// Step 3: Analyze the Body text of each document and count unique matching words
HashSet<string> uniqueWordsInNotes = new HashSet<string>();

Check notice on line 110 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (elsewhere)

Use 'var' (elsewhere)

Check notice on line 110 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use collection expression syntax

Use collection expression

foreach (IHit<NoteRiskInfo>? hit in response.Hits)

Check notice on line 112 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (elsewhere)

Use 'var' (elsewhere)
{
foreach (var bucket in termsAgg.Buckets)
if (!string.IsNullOrEmpty(hit.Source.Body))

Check notice on line 114 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Invert 'if' statement to reduce nesting

Invert 'if' statement to reduce nesting
{
uniqueWordsFound.Add(bucket.Key.ToString().ToLowerInvariant());
AnalyzeResponse analyzeBodyResponse = await _elasticsearchClient.Indices.AnalyzeAsync(a => a

Check notice on line 116 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (when type is simple)

Use 'var' (simple types)
.Index("notes_index")
.Analyzer("custom_french_analyzer")
.Text(hit.Source.Body)
);

if (!analyzeBodyResponse.IsValid)
{
_logger.LogError("Failed to analyze Body for document {id}. Reason: {reason}", hit.Id, analyzeBodyResponse.OriginalException.Message);
continue;
}

IEnumerable<string> bodyTokens = analyzeBodyResponse.Tokens.Select(token => token.Token);

Check notice on line 128 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (elsewhere)

Use 'var' (elsewhere)

IEnumerable<string> commonWords = bodyTokens.Intersect(analyzedWords);

Check notice on line 130 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (elsewhere)

Use 'var' (elsewhere)

_logger.LogInformation("Common words found in note {id}: {commonWords}", hit.Id, string.Join(", ", commonWords));

Check warning on line 132 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Possible multiple enumeration

Possible multiple enumeration

uniqueWordsInNotes.UnionWith(commonWords);

Check warning on line 134 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Possible multiple enumeration

Possible multiple enumeration
}
}

_logger.LogInformation($"Unique word count is : {uniqueWordsFound.Count}");
return uniqueWordsFound.Count;
int uniqueWordCount = uniqueWordsInNotes.Count;

Check notice on line 138 in BackendDiabetesRiskPrediction/Services/ElasticsearchService.cs

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Use preferred 'var' style (for built-in types)

Use 'var' (built-in types)

_logger.LogInformation("Unique word count is: {uniqueWordCount}", uniqueWordCount);

_logger.LogInformation("Unique words found: {uniqueWords}", string.Join(", ", uniqueWordsInNotes));

return uniqueWordCount;
}

// if (response.HitsMetadata?.Total.Value == 0)
// {
// _logger.LogWarning("NO PATIENTS FOUND WITH PATIENTID: {patientId}", patientId);
// }

// if (response.IsValid)
// {
// _logger.LogInformation("Search query executed successfully.");
// }
// else
// {
// _logger.LogError("Search query failed. Reason: {reason}", response.OriginalException.Message);
// }

// if (!response.IsValid)
// {
// throw new Exception($"Failed to search notes: {response.OriginalException.Message}");
// }

// // Étape 3 : Extraire les termes uniques de l'agrégation
// TermsAggregate<string> termsAgg = response.Aggregations.Terms("unique_terms");

// if (termsAgg != null)
// {
// _logger.LogInformation($"Unique word count is : {termsAgg.Buckets.Count}");
// return termsAgg.Buckets.Count;
// }
// else
// {
// _logger.LogInformation("Unique word count is : 0");
// return 0;
// }
// }


// public async Task<int> CountUniqueWordsInNotes(int patientId, HashSet<string> wordsToCount)
// {
// _logger.LogInformation("CountWordsInNotes called");

// var response = await _elasticsearchClient.SearchAsync<NoteRiskInfo>(s => s
// .Query(q => q
// .Bool(b => b
// .Must(m => m.Term("PatientId", patientId)) // Utilisation correcte du champ
// .Should(wordsToCount.Select(word => (Func<QueryContainerDescriptor<NoteRiskInfo>, QueryContainer>)(m => m.Match(mt => mt
// .Field("Body")
// .Query(word)
// .Analyzer("custom_french_analyzer"))))
// .ToArray())
// .MinimumShouldMatch(1)
// )
// )
// .Aggregations(a => a
// .Terms("unique_word_counts", t => t
// .Field("Body.keyword")
// .Size(10000)
// )
// )
// );

// if (!response.IsValid)
// {
// throw new Exception($"Failed to search notes: {response.OriginalException.Message}");
// }

// // Extraire les résultats d'agrégation
// var uniqueWordsFound = new HashSet<string>();
// var termsAgg = response.Aggregations.Terms("unique_word_counts");
// if (termsAgg != null)
// {
// foreach (var bucket in termsAgg.Buckets)
// {
// uniqueWordsFound.Add(bucket.Key.ToString().ToLowerInvariant());
// }
// }

// _logger.LogInformation($"Unique word count is : {uniqueWordsFound.Count}");
// return uniqueWordsFound.Count;
// }

}
18 changes: 17 additions & 1 deletion Frontend/Views/Patients/Details.cshtml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
</dl>
<hr />
<h5 class="mt-3">Risque de Diabète</h5>
<p class="text-muted">@Html.DisplayFor(model => model.DiabetesRiskPrediction.DiabetesRisk)</p>
<p class="fw-bold @GetRiskClass(Model.DiabetesRiskPrediction.DiabetesRisk)">

Check warning on line 38 in Frontend/Views/Patients/Details.cshtml

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Dereference of a possibly null reference.

Dereference of a possibly null reference
@Html.DisplayFor(model => model.DiabetesRiskPrediction.DiabetesRisk)

Check warning on line 39 in Frontend/Views/Patients/Details.cshtml

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Dereference of a possibly null reference.

Dereference of a possibly null reference
</p>
</div>

@if (Model.Notes != null)
Expand Down Expand Up @@ -71,5 +73,19 @@
</div>

<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>

@functions {
public string GetRiskClass(DiabetesRisk risk)

Check notice on line 78 in Frontend/Views/Patients/Details.cshtml

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Member can be made static (shared) (non-private accessibility)

Method 'GetRiskClass' can be made static

Check notice on line 78 in Frontend/Views/Patients/Details.cshtml

View workflow job for this annotation

GitHub Actions / Qodana for .NET

Member can be made private (non-private accessibility)

Method 'GetRiskClass' can be made private
{
return risk switch
{
DiabetesRisk.None => "text-success", // Vert
DiabetesRisk.Borderline => "text-warning", // Jaune
DiabetesRisk.InDanger => "text-danger", // Rouge clair
DiabetesRisk.EarlyOnset => "bg-danger text-white", // Rouge foncé avec texte blanc
_ => "text-muted" // Couleur par défaut
};
}
}
</body>
</html>
6 changes: 0 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ services:
volumes:
- elasticsearch_config:/usr/share/elasticsearch/config
- elasticsearch_data:/usr/share/elasticsearch/data
# - ./elasticsearch/config/custom_analyzer.json:/usr/share/elasticsearch/config/custom_analyzer.json
- ./elasticsearch/config/create_index.json:/usr/share/elasticsearch/config/create_index.json
- ./elasticsearch/config/notes_index_template.json:/usr/share/elasticsearch/config/notes_index_template.json
- ./elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
- ./elasticsearch/config/load_template.sh:/usr/share/elasticsearch/config/load_template.sh

# - ./elasticsearch/init/:/usr/share/elasticsearch/init/
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9200/_cat/health" ]
interval: 5s
Expand All @@ -42,7 +39,6 @@ services:
- ingest.geoip.downloader.enabled=false
networks:
- elastic
# command: [ "sh", "/usr/share/elasticsearch/init/elasticsearch-init.sh"]
mem_limit: 2g

init-template:
Expand Down Expand Up @@ -76,7 +72,6 @@ services:
volumes:
- ./logstash/pipeline:/usr/share/logstash/pipeline
- ./logstash/config:/usr/share/logstash/config
- ./logstash/config/load_template.sh:/usr/share/logstash/config/load_template.sh
ports:
- "5044:5044"
env_file:
Expand Down Expand Up @@ -240,7 +235,6 @@ volumes:
shared-output:
data_protection_keys:


networks:
elastic:
driver: bridge

0 comments on commit c8f2d6c

Please sign in to comment.