diff --git a/lambdas/enums/validation_score.py b/lambdas/enums/validation_score.py new file mode 100644 index 000000000..7fa659aa0 --- /dev/null +++ b/lambdas/enums/validation_score.py @@ -0,0 +1,16 @@ +from enum import Enum + +from pydantic import BaseModel + + +class ValidationScore(Enum): + NO_MATCH = 0 + PARTIAL_MATCH = 1 + MIXED_FULL_MATCH = 2 + FULL_MATCH = 3 + + +class ValidationResult(BaseModel): + given_name_match: list[str] = [] + family_name_match: str = "" + score: ValidationScore = ValidationScore.NO_MATCH diff --git a/lambdas/models/pds_models.py b/lambdas/models/pds_models.py index 74e20f1e2..81abdcb75 100644 --- a/lambdas/models/pds_models.py +++ b/lambdas/models/pds_models.py @@ -116,26 +116,32 @@ def get_usual_name(self) -> Optional[Name]: if entry.use.lower() == "usual": return entry - def get_most_recent_name(self) -> Optional[Name]: - active_names = [name for name in self.name if name.is_currently_in_use()] + def get_names_by_start_date(self) -> [Name]: + active_names = [name for name in self.name] if not active_names: return None sorted_by_start_date_desc = sorted( - active_names, key=lambda name: name.period.start, reverse=True + active_names, + key=lambda name: ( + (1, name.period.start, name.use.lower() == "usual") + if name.period and name.period.start is not None + else (0, 0, name.use.lower() == "usual") + ), + reverse=True, ) - return sorted_by_start_date_desc[0] + return sorted_by_start_date_desc def get_current_family_name_and_given_name(self) -> Tuple[str, list[str]]: - current_name = self.get_most_recent_name() or self.get_usual_name() - if not current_name: + ordered_names = self.get_names_by_start_date() + if not ordered_names: logger.warning( "The patient does not have a currently active name or a usual name." ) return "", [""] - given_name = current_name.given - family_name = current_name.family + given_name = ordered_names[0].given + family_name = ordered_names[0].family if not given_name or given_name == [""]: logger.warning("The given name of patient is empty.") diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index bc3c2a5e1..84cee1774 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -8,6 +8,7 @@ from enums.patient_ods_inactive_status import PatientOdsInactiveStatus from enums.snomed_codes import SnomedCodes from enums.upload_status import UploadStatus +from enums.validation_score import ValidationScore from enums.virus_scan_result import VirusScanResult from models.fhir.R4.nrl_fhir_document_reference import Attachment from models.nhs_document_reference import NHSDocumentReference @@ -133,13 +134,34 @@ def handle_sqs_message(self, message: dict): patient_ods_code = ( pds_patient_details.get_ods_code_or_inactive_status_for_gp() ) - is_name_validation_based_on_historic_name = ( - validate_filename_with_patient_details(file_names, pds_patient_details) - ) + ( + name_validation_score, + is_dob_valid, + is_name_validation_based_on_historic_name, + ) = validate_filename_with_patient_details(file_names, pds_patient_details) if is_name_validation_based_on_historic_name: accepted_reason = self.concatenate_acceptance_reason( accepted_reason, "Patient matched on historical name" ) + validation_messages = { + ValidationScore.PARTIAL_MATCH: { + True: "Patient matched on partial match 2/3", + False: "", + }, + ValidationScore.MIXED_FULL_MATCH: { + True: "Patient matched on mixed match 3/3", + False: "Patient matched on mixed match 2/3", + }, + ValidationScore.FULL_MATCH: { + True: "Patient matched on full match 3/3", + False: "Patient matched on full match 2/3", + }, + } + if name_validation_score in validation_messages: + message = validation_messages[name_validation_score][is_dob_valid] + accepted_reason = self.concatenate_acceptance_reason( + accepted_reason, message + ) if not allowed_to_ingest_ods_code(patient_ods_code): raise LGInvalidFilesException("Patient not registered at your practice") patient_death_notification_status = ( diff --git a/lambdas/services/edge_presign_service.py b/lambdas/services/edge_presign_service.py index cab057619..28858a9a5 100644 --- a/lambdas/services/edge_presign_service.py +++ b/lambdas/services/edge_presign_service.py @@ -25,7 +25,7 @@ def use_presign(self, request_values: dict): domain_name: str = request_values["domain_name"] presign_string: str = f"{uri}?{querystring}" - encoded_presign_string: str = presign_string.encode("utf-8") + encoded_presign_string = presign_string.encode("utf-8") presign_credentials_hash: str = hashlib.md5(encoded_presign_string).hexdigest() self.attempt_presign_ingestion( @@ -70,7 +70,7 @@ def filter_request_values(request: dict) -> dict: uri: str = request["uri"] querystring: str = request["querystring"] headers: dict = request["headers"] - origin: str = request.get("origin", {}) + origin: dict = request.get("origin", {}) domain_name: str = origin["s3"]["domainName"] except KeyError as e: logger.error(f"Missing request component: {str(e)}") diff --git a/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py b/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py index 0d8ee3817..bbbb55e67 100644 --- a/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py +++ b/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py @@ -18,9 +18,9 @@ class PdsNameMatchingTestCase(NamedTuple): "Jane Bob Smith Anderson", "Jane Smith Anderson", "Jane B Smith Anderson", + "Bob Smith Anderson", ], "reject": [ - "Bob Smith Anderson", "Jane Smith", "Jane Anderson", "Jane Anderson Smith", diff --git a/lambdas/tests/unit/models/test_pds_models.py b/lambdas/tests/unit/models/test_pds_models.py index a200830b1..9f7357349 100644 --- a/lambdas/tests/unit/models/test_pds_models.py +++ b/lambdas/tests/unit/models/test_pds_models.py @@ -253,6 +253,19 @@ def test_get_current_family_name_and_given_name_return_the_first_usual_name_if_a assert actual == expected_given_name +def test_get_current_family_name_and_given_name_return_the_first_most_recent_name(): + name_1 = build_test_name(use="temp", start="1990-01-01", end=None, given=["Jones"]) + name_2 = build_test_name(use="usual", start=None, end=None, given=["Jane"]) + name_3 = build_test_name(use="usual", start=None, end=None, given=["Bob"]) + + test_patient = build_test_patient_with_names([name_1, name_2, name_3]) + + expected_given_name = ["Jones"] + actual = test_patient.get_patient_details(test_patient.id).given_name + + assert actual == expected_given_name + + def test_get_current_family_name_and_given_name_logs_a_warning_if_no_current_name_or_usual_name_found( caplog, ): @@ -330,7 +343,7 @@ def test_name_is_currently_in_use_return_false_for_nickname_or_old_name(): @freeze_time("2024-01-01") -def test_get_most_recent_name_return_the_name_with_most_recent_start_date(): +def test_get_names_by_start_date_return_ordered_list_by_start_date(): name_1 = build_test_name(start="1990-01-01", end=None, given=["Jane"]) name_2 = build_test_name(start="2010-02-14", end=None, given=["Jones"]) name_3 = build_test_name(start="2000-03-25", end=None, given=["Bob"]) @@ -341,13 +354,22 @@ def test_get_most_recent_name_return_the_name_with_most_recent_start_date(): use="nickname", start="2023-01-01", end=None, given=["Janie"] ) future_name = build_test_name(start="2047-01-01", end=None, given=["Neo Jane"]) + name_no_date = build_test_name(start=None, end=None, given=["Jane"]) test_patient = build_test_patient_with_names( - [name_1, name_2, name_3, expired_name, nickname, future_name] + [name_1, name_2, name_3, expired_name, nickname, future_name, name_no_date] ) - expected = name_2 - actual = test_patient.get_most_recent_name() + expected = [ + future_name, + nickname, + expired_name, + name_2, + name_3, + name_1, + name_no_date, + ] + actual = test_patient.get_names_by_start_date() assert actual == expected @@ -356,7 +378,7 @@ def test_get_most_recent_name_return_the_name_with_most_recent_start_date(): def test_get_most_recent_name_return_none_if_no_active_name_found(): test_patient = build_test_patient_with_names([]) - assert test_patient.get_most_recent_name() is None + assert test_patient.get_names_by_start_date() is None def test_get_death_notification_status_return_the_death_notification_status(): diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 3fe0d2425..d9df0f882 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -7,6 +7,7 @@ from enums.patient_ods_inactive_status import PatientOdsInactiveStatus from enums.snomed_codes import SnomedCodes from enums.upload_status import UploadStatus +from enums.validation_score import ValidationScore from enums.virus_scan_result import SCAN_RESULT_TAG_KEY, VirusScanResult from freezegun import freeze_time from models.fhir.R4.nrl_fhir_document_reference import Attachment @@ -76,7 +77,8 @@ def mock_check_virus_result(mocker): @pytest.fixture def mock_validate_files(mocker): - yield mocker.patch("services.bulk_upload_service.validate_lg_file_names") + validate_files = mocker.patch("services.bulk_upload_service.validate_lg_file_names") + yield validate_files @pytest.fixture @@ -122,7 +124,8 @@ def mock_pds_service_patient_restricted(mocker): @pytest.fixture def mock_pds_validation(mocker): yield mocker.patch( - "services.bulk_upload_service.validate_filename_with_patient_details" + "services.bulk_upload_service.validate_filename_with_patient_details", + return_value=(ValidationScore.FULL_MATCH, True, True), ) @@ -510,7 +513,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_forma mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) - mock_pds_validation.return_value = False + mock_pds_validation.return_value = (ValidationScore.FULL_MATCH, True, False) mock_put_staging_metadata_back_to_queue = ( repo_under_test.sqs_repository.put_staging_metadata_back_to_queue ) @@ -525,7 +528,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_forma mock_report_upload.assert_called_with( TEST_STAGING_METADATA, UploadStatus.COMPLETE, - "Patient is deceased - FORMAL", + "Patient matched on full match 3/3, Patient is deceased - FORMAL", PatientOdsInactiveStatus.DECEASED, ) @@ -544,7 +547,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_infor mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) - mock_pds_validation.return_value = True + mock_pds_validation.return_value = (ValidationScore.MIXED_FULL_MATCH, True, True) mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) @@ -562,7 +565,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_infor mock_report_upload.assert_called_with( TEST_STAGING_METADATA, UploadStatus.COMPLETE, - "Patient matched on historical name, Patient is deceased - INFORMAL", + "Patient matched on historical name, Patient matched on mixed match 3/3, Patient is deceased - INFORMAL", "Y12345", ) @@ -581,7 +584,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_has_hist mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) - mock_pds_validation.return_value = True + mock_pds_validation.return_value = (ValidationScore.MIXED_FULL_MATCH, True, True) mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) @@ -599,7 +602,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_has_hist mock_report_upload.assert_called_with( TEST_STAGING_METADATA, UploadStatus.COMPLETE, - "Patient matched on historical name, PDS record is restricted", + "Patient matched on historical name, Patient matched on mixed match 3/3, PDS record is restricted", "REST", ) @@ -618,7 +621,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_infor mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) - mock_pds_validation.return_value = False + mock_pds_validation.return_value = (ValidationScore.PARTIAL_MATCH, True, False) mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) @@ -636,7 +639,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_infor mock_report_upload.assert_called_with( TEST_STAGING_METADATA, UploadStatus.COMPLETE, - "Patient is deceased - INFORMAL", + "Patient matched on partial match 2/3, Patient is deceased - INFORMAL", "Y12345", ) @@ -979,7 +982,7 @@ def test_handle_sqs_message_happy_path_historical_name( repo_under_test.s3_repository, "remove_ingested_file_from_source_bucket" ) mocker.patch.object(repo_under_test.s3_repository, "check_virus_result") - mock_pds_validation.return_value = True + mock_pds_validation.return_value = (ValidationScore.FULL_MATCH, True, True) repo_under_test.handle_sqs_message(message=TEST_SQS_MESSAGE) @@ -990,7 +993,7 @@ def test_handle_sqs_message_happy_path_historical_name( mock_report_upload_complete.assert_called_with( TEST_STAGING_METADATA, UploadStatus.COMPLETE, - "Patient matched on historical name", + "Patient matched on historical name, Patient matched on full match 3/3", "Y12345", ) mock_remove_ingested_file_from_source_bucket.assert_called() diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index 9019ae3b0..e2b2f4dbb 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -1,6 +1,7 @@ import pytest from botocore.exceptions import ClientError from enums.supported_document_types import SupportedDocumentTypes +from enums.validation_score import ValidationResult, ValidationScore from models.pds_models import Patient from requests import Response from services.base.ssm_service import SSMService @@ -14,6 +15,10 @@ from tests.unit.helpers.data.pds.pds_patient_response import ( PDS_PATIENT_WITH_MIDDLE_NAME, ) +from tests.unit.helpers.data.pds.test_cases_for_date_logic import ( + build_test_name, + build_test_patient_with_names, +) from tests.unit.helpers.data.pds.test_cases_for_patient_name_matching import ( TEST_CASES_FOR_FAMILY_NAME_WITH_HYPHEN, TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME, @@ -30,6 +35,7 @@ from utils.lloyd_george_validator import ( LGInvalidFilesException, allowed_to_ingest_ods_code, + calculate_validation_score, check_for_duplicate_files, check_for_file_names_agrees_with_each_other, check_for_number_of_files_match_expected, @@ -46,10 +52,26 @@ validate_lg_files, validate_patient_date_of_birth, validate_patient_name, - validate_patient_name_using_full_name_history, ) +@pytest.fixture +def mock_get_ssm_parameter(mocker): + return mocker.patch.object(SSMService, "get_ssm_parameter") + + +@pytest.fixture +def mock_pds_call(mocker): + yield mocker.patch("services.mock_pds_service.MockPdsApiService.pds_request") + + +@pytest.fixture +def mock_fetch_available_document_references_by_type(mocker): + yield mocker.patch.object( + DocumentService, "fetch_available_document_references_by_type" + ) + + def test_catching_error_when_file_type_not_pdf(): with pytest.raises(LGInvalidFilesException): file_type = "image/png" @@ -268,24 +290,18 @@ def test_validate_name_with_correct_name(mocker, mock_pds_patient): mock_validate_name = mocker.patch( "utils.lloyd_george_validator.validate_patient_name" ) - - with expect_not_to_raise(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - lg_file_patient_name, mock_pds_patient - ) - assert mock_validate_name.call_count == 1 - - -def test_validate_name_with_file_missing_middle_name(mocker): - lg_file_patient_name = "Jane Smith" - patient = Patient.model_validate(PDS_PATIENT_WITH_MIDDLE_NAME) - mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + mock_validate_name.return_value = ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ) + actual_score, actual_is_name_validation_based_on_historic_name = ( + calculate_validation_score(lg_file_patient_name, mock_pds_patient) ) - with expect_not_to_raise(LGInvalidFilesException): - validate_patient_name_using_full_name_history(lg_file_patient_name, patient) assert mock_validate_name.call_count == 1 + assert actual_is_name_validation_based_on_historic_name is False + assert actual_score == ValidationScore.FULL_MATCH def test_validate_name_with_additional_middle_name_in_file_mismatching_pds(mocker): @@ -294,13 +310,18 @@ def test_validate_name_with_additional_middle_name_in_file_mismatching_pds(mocke "utils.lloyd_george_validator.validate_patient_name" ) patient = Patient.model_validate(PDS_PATIENT_WITH_MIDDLE_NAME) + mock_validate_name.return_value = ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ) + actual_score, actual_is_name_validation_based_on_historic_name = ( + calculate_validation_score(lg_file_patient_name, patient) + ) - with expect_not_to_raise(LGInvalidFilesException): - actual_is_name_validation_based_on_historic_name = ( - validate_patient_name_using_full_name_history(lg_file_patient_name, patient) - ) assert mock_validate_name.call_count == 1 assert actual_is_name_validation_based_on_historic_name is False + assert actual_score == ValidationScore.FULL_MATCH def test_validate_name_with_additional_middle_name_in_file_but_none_in_pds( @@ -310,41 +331,47 @@ def test_validate_name_with_additional_middle_name_in_file_but_none_in_pds( mock_validate_name = mocker.patch( "utils.lloyd_george_validator.validate_patient_name" ) - with expect_not_to_raise(LGInvalidFilesException): - actual_is_name_validation_based_on_historic_name = ( - validate_patient_name_using_full_name_history( - lg_file_patient_name, mock_pds_patient - ) - ) + mock_validate_name.return_value = ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ) + actual_score, actual_is_name_validation_based_on_historic_name = ( + calculate_validation_score(lg_file_patient_name, mock_pds_patient) + ) + assert mock_validate_name.call_count == 1 assert actual_is_name_validation_based_on_historic_name is False + assert actual_score == ValidationScore.FULL_MATCH def test_validate_name_with_wrong_first_name(mocker, mock_pds_patient): lg_file_patient_name = "John Smith" - mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" - ) - mock_validate_name.return_value = False - with pytest.raises(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - lg_file_patient_name, mock_pds_patient - ) - assert mock_validate_name.call_count == 3 + actual_response = validate_patient_name( + lg_file_patient_name, + mock_pds_patient.name[0].given, + mock_pds_patient.name[0].family, + ) + assert actual_response == ValidationResult( + score=ValidationScore.PARTIAL_MATCH, + given_name_match=[], + family_name_match="Smith", + ) def test_validate_name_with_wrong_family_name(mocker, mock_pds_patient): lg_file_patient_name = "Jane Johnson" - mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + actual_response = validate_patient_name( + lg_file_patient_name, + mock_pds_patient.name[0].given, + mock_pds_patient.name[0].family, + ) + assert actual_response == ValidationResult( + score=ValidationScore.PARTIAL_MATCH, + given_name_match=["Jane"], + family_name_match="", ) - mock_validate_name.return_value = False - with pytest.raises(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - lg_file_patient_name, mock_pds_patient - ) - assert mock_validate_name.call_count == 3 def test_validate_name_with_historical_name(mocker, mock_pds_patient): @@ -352,15 +379,20 @@ def test_validate_name_with_historical_name(mocker, mock_pds_patient): mock_validate_name = mocker.patch( "utils.lloyd_george_validator.validate_patient_name" ) - mock_validate_name.side_effect = [False, True] - with expect_not_to_raise(LGInvalidFilesException): - actual_is_name_validation_based_on_historic_name = ( - validate_patient_name_using_full_name_history( - lg_file_patient_name, mock_pds_patient - ) - ) + mock_validate_name.side_effect = [ + ValidationResult( + score=ValidationScore.NO_MATCH, + ), + ValidationResult( + score=ValidationScore.FULL_MATCH, + ), + ] + actual_score, actual_is_validate_on_historic = calculate_validation_score( + lg_file_patient_name, mock_pds_patient + ) + assert actual_score == ValidationScore.FULL_MATCH assert mock_validate_name.call_count == 2 - assert actual_is_name_validation_based_on_historic_name is True + assert actual_is_validate_on_historic is True def test_validate_name_without_given_name(mocker, mock_pds_patient): @@ -369,13 +401,12 @@ def test_validate_name_without_given_name(mocker, mock_pds_patient): mock_validate_name = mocker.patch( "utils.lloyd_george_validator.validate_patient_name" ) - - with expect_not_to_raise(LGInvalidFilesException): - actual_is_validate_on_historic = validate_patient_name_using_full_name_history( - lg_file_patient_name, mock_pds_patient - ) + actual_score, actual_is_validate_on_historic = calculate_validation_score( + lg_file_patient_name, mock_pds_patient + ) + assert actual_score == ValidationScore.NO_MATCH assert actual_is_validate_on_historic is False - assert mock_validate_name.call_count == 1 + assert mock_validate_name.call_count == 2 @pytest.mark.parametrize( @@ -387,19 +418,15 @@ def test_validate_patient_name_with_two_words_family_name( patient_name_in_file_name: str, should_accept_name: bool, ): + actual_score, actual_is_validate_on_historic = calculate_validation_score( + patient_name_in_file_name, patient_details + ) if should_accept_name: - with expect_not_to_raise(LGInvalidFilesException): - actual_is_validate_on_historic = ( - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) - ) - assert actual_is_validate_on_historic is False + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH else: - with pytest.raises(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH @pytest.mark.parametrize( @@ -411,19 +438,15 @@ def test_validate_patient_name_with_family_name_with_hyphen( patient_name_in_file_name: str, should_accept_name: bool, ): + actual_score, actual_is_validate_on_historic = calculate_validation_score( + patient_name_in_file_name, patient_details + ) if should_accept_name: - with expect_not_to_raise(LGInvalidFilesException): - actual_is_validate_on_historic = ( - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) - ) - assert actual_is_validate_on_historic is False + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH else: - with pytest.raises(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH @pytest.mark.parametrize( @@ -435,16 +458,15 @@ def test_validate_patient_name_with_two_words_given_name( patient_name_in_file_name: str, should_accept_name: bool, ): + actual_score, actual_is_validate_on_historic = calculate_validation_score( + patient_name_in_file_name, patient_details + ) if should_accept_name: - with expect_not_to_raise(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH else: - with pytest.raises(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH @pytest.mark.parametrize( @@ -456,16 +478,15 @@ def test_validate_patient_name_with_two_words_family_name_and_given_name( patient_name_in_file_name: str, should_accept_name: bool, ): + actual_score, actual_is_validate_on_historic = calculate_validation_score( + patient_name_in_file_name, patient_details + ) if should_accept_name: - with expect_not_to_raise(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH else: - with pytest.raises(LGInvalidFilesException): - validate_patient_name_using_full_name_history( - patient_name_in_file_name, patient_details - ) + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH def test_missing_middle_name_names_with_pds_service(): @@ -733,70 +754,173 @@ def test_mismatch_nhs_in_validate_lg_file(mocker, mock_pds_patient): @pytest.mark.parametrize( - ["file_patient_name", "first_name_from_pds", "family_name_from_pds"], + ["file_patient_name", "first_name_from_pds", "family_name_from_pds", "result"], [ - ["Jim Stevens", "Jane", "Smith"], - ["Jane Smith Anderson", "Jane", "Smith"], - ["Bob Smith Anderson", "Jane", "Smith"], - ["Jane B Smith Anderson", "Jane", "Smith"], - ["Jane Bob Anderson", "Jane", "Smith"], - ["Jane Bob Smith", "Jane Bob", "Smith Anderson"], - ["Jane Smith", "Jane Bob", "Smith"], - ["Jane B Smith", "Jane Bob", "Smith"], - ["Jane-Bob Smith", "Jane Bob", "Smith"], - ["Jane Smith", "Jane Bob", "Smith"], - ["Jane Smith Anderson", "Jane", "Smith-Anderson"], - ["Jane Smith", "Jane", "Smith-Anderson"], - ["Jane Anderson", "Jane", "Smith-Anderson"], - ["Jane Bob Smith", "Jane Bob", "Smith-Anderson"], - ["Bob Smith Anderson", "Jane", "Smith Anderson"], - ["Jane Smith", "Jane", "Smith Anderson"], - ["Jane Anderson", "Jane", "Smith Anderson"], - ["Jane Anderson Smith", "Jane", "Smith Anderson"], + ("Jim Stevens", ["Jane"], "Smith", ValidationScore.NO_MATCH), + ["Jane Smith Anderson", ["Jane"], "Smith", ValidationScore.FULL_MATCH], + ["Bob Smith Anderson", ["Jane"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Bob Smith Anderson", ["Jane", "Bob"], "Smith", ValidationScore.FULL_MATCH], + ["Jane B Smith Anderson", ["Jane"], "Smith", ValidationScore.FULL_MATCH], + ["Jane Bob Anderson", ["Jane"], "Smith", ValidationScore.PARTIAL_MATCH], + [ + "Jane Bob Smith", + ["Jane Bob"], + "Smith Anderson", + ValidationScore.PARTIAL_MATCH, + ], + ["Jane Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Jane B Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Jane-Bob Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Jane Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + [ + "Jane Smith Anderson", + ["Jane"], + "Smith-Anderson", + ValidationScore.PARTIAL_MATCH, + ], + ["Jane Smith", ["Jane"], "Smith-Anderson", ValidationScore.PARTIAL_MATCH], + ["Jane Anderson", ["Jane"], "Smith-Anderson", ValidationScore.PARTIAL_MATCH], + [ + "Jane Bob Smith", + ["Jane Bob"], + "Smith-Anderson", + ValidationScore.PARTIAL_MATCH, + ], + [ + "Bob Smith Anderson", + ["Jane"], + "Smith Anderson", + ValidationScore.PARTIAL_MATCH, + ], + ["Jane Smith", ["Jane"], "Smith Anderson", ValidationScore.PARTIAL_MATCH], + ["Jane Anderson", ["Jane"], "Smith Anderson", ValidationScore.PARTIAL_MATCH], + [ + "Jane Anderson Smith", + ["Jane"], + "Smith Anderson", + ValidationScore.PARTIAL_MATCH, + ], ], ) def test_validate_patient_name_return_false( - file_patient_name, first_name_from_pds, family_name_from_pds + file_patient_name, first_name_from_pds, family_name_from_pds, result ): actual = validate_patient_name( file_patient_name, first_name_from_pds, family_name_from_pds ) - assert actual is False + assert actual.score == result @pytest.mark.parametrize( - ["file_patient_name", "first_name_from_pds", "family_name_from_pds"], + ["file_patient_name", "first_name_from_pds", "family_name_from_pds", "expected"], [ - ["Jane Smith", "Jane", "Smith"], - ["Jane Bob Smith Anderson", "Jane", "Smith Anderson"], - ["Jane Smith Anderson", "Jane", "Smith Anderson"], - ["Jane B Smith Anderson", "Jane", "Smith Anderson"], - ["Jane Smith-Anderson", "Jane", "Smith-Anderson"], - ["Jane Bob Smith Anderson", "Jane Bob", "Smith Anderson"], - ["Jane Bob Smith", "Jane Bob", "Smith"], + ( + "Jane Smith", + ["Jane"], + "Smith", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ), + ), + ( + "Jane Bob Smith Anderson", + ["Jane"], + "Smith Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith Anderson", + ), + ), + ( + "Jane Smith Anderson", + ["Jane"], + "Smith Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith Anderson", + ), + ), + ( + "Jane B Smith Anderson", + ["Jane"], + "Smith Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith Anderson", + ), + ), + ( + "Jane Smith-Anderson", + ["Jane"], + "Smith-Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith-Anderson", + ), + ), + ( + "Jane Bob Smith Anderson", + ["Jane Bob"], + "Smith Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane Bob"], + family_name_match="Smith Anderson", + ), + ), + ( + "Jane Bob Smith", + ["Jane Bob"], + "Smith", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane Bob"], + family_name_match="Smith", + ), + ), ], ) def test_validate_patient_name_return_true( - file_patient_name, first_name_from_pds, family_name_from_pds + file_patient_name, first_name_from_pds, family_name_from_pds, expected ): actual = validate_patient_name( file_patient_name, first_name_from_pds, family_name_from_pds ) - assert actual is True - - -@pytest.fixture -def mock_get_ssm_parameter(mocker): - return mocker.patch.object(SSMService, "get_ssm_parameter") + assert actual == expected -@pytest.fixture -def mock_pds_call(mocker): - yield mocker.patch("services.mock_pds_service.MockPdsApiService.pds_request") +@pytest.mark.parametrize( + ["file_patient_name", "score"], + [ + ("Jane Smith", ValidationScore.FULL_MATCH), + ("Smith Jane", ValidationScore.FULL_MATCH), + ("Jane Bob Smith Moody", ValidationScore.FULL_MATCH), + ("Jane Smith Moody", ValidationScore.FULL_MATCH), + ("Jane B Smith Moody", ValidationScore.FULL_MATCH), + ("Jane Smith-Moody", ValidationScore.FULL_MATCH), + ("Jane Bob Smith Moody", ValidationScore.FULL_MATCH), + ("Jane Bob Smith", ValidationScore.FULL_MATCH), + ("Bob Smith", ValidationScore.PARTIAL_MATCH), + ("Bob Jane", ValidationScore.FULL_MATCH), + ("Alastor Moody", ValidationScore.NO_MATCH), + ("Jones Bob", ValidationScore.MIXED_FULL_MATCH), + ], +) +def test_calculate_validation_score(file_patient_name, score): + name_1 = build_test_name(start="1990-01-01", end=None, given=["Jane"]) + name_2 = build_test_name(start="1995-01-01", end=None, given=["Jane"], family="Bob") + name_3 = build_test_name(use="temp", start=None, end=None, given=["Jones"]) + test_patient = build_test_patient_with_names([name_1, name_2, name_3]) -@pytest.fixture -def mock_fetch_available_document_references_by_type(mocker): - yield mocker.patch.object( - DocumentService, "fetch_available_document_references_by_type" + actual_result, historical = calculate_validation_score( + file_patient_name, test_patient ) + + assert actual_result == score diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index d44ff154c..3fa353372 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -4,9 +4,9 @@ import pydantic import requests -from botocore.exceptions import ClientError from enums.pds_ssm_parameters import SSMParameter from enums.supported_document_types import SupportedDocumentTypes +from enums.validation_score import ValidationResult, ValidationScore from models.nhs_document_reference import NHSDocumentReference from models.pds_models import Patient from requests import HTTPError @@ -18,11 +18,7 @@ PatientRecordAlreadyExistException, PdsTooManyRequestsException, ) -from utils.unicode_utils import ( - REGEX_PATIENT_NAME_PATTERN, - name_ends_with, - name_starts_with, -) +from utils.unicode_utils import REGEX_PATIENT_NAME_PATTERN, name_contains_in from utils.utilities import get_pds_service logger = LoggingService(__name__) @@ -144,60 +140,87 @@ def check_for_file_names_agrees_with_each_other(file_name_list: list[str]): def validate_filename_with_patient_details( file_name_list: list[str], patient_details: Patient -): +) -> (ValidationScore, bool, bool): try: file_name_info = extract_info_from_filename(file_name_list[0]) file_patient_name = file_name_info["patient_name"] file_date_of_birth = file_name_info["date_of_birth"] - validate_patient_date_of_birth(file_date_of_birth, patient_details) - is_name_validation_based_on_historic_name = ( - validate_patient_name_using_full_name_history( - file_patient_name, patient_details - ) + name_validation_score, historical_match = calculate_validation_score( + file_patient_name, patient_details + ) + if name_validation_score == ValidationScore.NO_MATCH: + raise LGInvalidFilesException("Patient name does not match our records") + is_dob_valid = validate_patient_date_of_birth( + file_date_of_birth, patient_details ) - return is_name_validation_based_on_historic_name - except (ClientError, ValueError) as e: + if not is_dob_valid and name_validation_score == ValidationScore.PARTIAL_MATCH: + raise LGInvalidFilesException("Patient name does not match our records 1/3") + return name_validation_score, is_dob_valid, historical_match + + except (ValueError, KeyError) as e: logger.error(e) raise LGInvalidFilesException(e) -def validate_patient_name( - file_patient_name: str, first_name_in_pds: str, family_name_in_pds: str -): - logger.info("Verifying patient name against the record in PDS...") - - first_name_matches = name_starts_with(file_patient_name, first_name_in_pds) - family_name_matches = name_ends_with(file_patient_name, family_name_in_pds) - - if not (first_name_matches and family_name_matches): - return False - return True - - -def validate_patient_name_using_full_name_history( - file_patient_name: str, pds_patient_details: Patient -): - usual_family_name_in_pds, usual_first_name_in_pds = ( - pds_patient_details.get_current_family_name_and_given_name() - ) - - if validate_patient_name( - file_patient_name, usual_first_name_in_pds[0], usual_family_name_in_pds - ): - return False +def calculate_validation_score( + file_patient_name: str, patient_details: Patient +) -> (ValidationScore, bool): + matched_on_given_name = set() + matched_on_family_name = set() + historical_match = False + ordered_names = patient_details.get_names_by_start_date() + for index, name in enumerate(ordered_names): + first_name_in_pds = name.given + family_name_in_pds = name.family + result = validate_patient_name( + file_patient_name, first_name_in_pds, family_name_in_pds + ) + if result.score == ValidationScore.FULL_MATCH: + historical_match = index != 0 + return result.score, historical_match + elif result.score == ValidationScore.PARTIAL_MATCH: + historical_match = index != 0 + matched_on_given_name.update(result.given_name_match) + matched_on_family_name.update(result.family_name_match) logger.info( - "Failed to validate patient name using usual name, trying to validate using name history" + "Failed to find full match on patient name, trying to validate using name history" ) + if matched_on_given_name and matched_on_family_name: + return ValidationScore.MIXED_FULL_MATCH, historical_match + elif matched_on_given_name or matched_on_family_name: + return ValidationScore.PARTIAL_MATCH, historical_match + return ValidationScore.NO_MATCH, False + - for name in pds_patient_details.name: - historic_first_name_in_pds: str = name.given[0] - historic_family_name_in_pds = name.family - if validate_patient_name( - file_patient_name, historic_first_name_in_pds, historic_family_name_in_pds - ): - return True +def validate_patient_name( + file_patient_name: str, first_name_in_pds: list[str], family_name_in_pds: str +) -> ValidationResult: + logger.info("Verifying patient name against the record in PDS...") + given_name_matches = [ + first_name + for first_name in first_name_in_pds + if name_contains_in(file_patient_name, first_name) + ] + family_name_matches = name_contains_in(file_patient_name, family_name_in_pds) - raise LGInvalidFilesException("Patient name does not match our records") + if given_name_matches and family_name_matches: + return ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=given_name_matches, + family_name_match=family_name_in_pds, + ) + elif given_name_matches: + return ValidationResult( + score=ValidationScore.PARTIAL_MATCH, + given_name_match=given_name_matches, + ) + elif family_name_matches: + return ValidationResult( + score=ValidationScore.PARTIAL_MATCH, family_name_match=family_name_in_pds + ) + return ValidationResult( + score=ValidationScore.NO_MATCH, + ) def validate_patient_date_of_birth(file_date_of_birth, pds_patient_details): diff --git a/lambdas/utils/unicode_utils.py b/lambdas/utils/unicode_utils.py index eadc5a660..0634ad853 100644 --- a/lambdas/utils/unicode_utils.py +++ b/lambdas/utils/unicode_utils.py @@ -58,6 +58,13 @@ def name_ends_with(full_name: str, partial_name: str) -> bool: return folded_full_name.endswith(folded_partial_name) +def name_contains_in(full_name: str, partial_name: str) -> bool: + folded_full_name = convert_to_nfd_form(full_name).casefold() + folded_partial_name = convert_to_nfd_form(partial_name).casefold() + + return folded_partial_name in folded_full_name + + def convert_to_nfc_form(input_str: str) -> str: """ Convert a string to the NFC normalization form