diff --git a/lambdas/enums/feature_flags.py b/lambdas/enums/feature_flags.py index 1d6b6ae18..898e5c20c 100755 --- a/lambdas/enums/feature_flags.py +++ b/lambdas/enums/feature_flags.py @@ -7,3 +7,4 @@ class FeatureFlags(Enum): UPLOAD_LAMBDA_ENABLED = "uploadLambdaEnabled" UPLOAD_ARF_WORKFLOW_ENABLED = "uploadArfWorkflowEnabled" USE_SMARTCARD_AUTH = "useSmartcardAuth" + LLOYD_GEORGE_VALIDATION_STRICT_MODE = "lloydGeorgeValidationStrictMode" diff --git a/lambdas/enums/validation_score.py b/lambdas/enums/validation_score.py new file mode 100644 index 000000000..c20986b9a --- /dev/null +++ b/lambdas/enums/validation_score.py @@ -0,0 +1,17 @@ +from enum import Enum +from typing import Optional + +from pydantic import BaseModel + + +class ValidationScore(Enum): + NO_MATCH = 0 + PARTIAL_MATCH = 1 + MIXED_FULL_MATCH = 2 + FULL_MATCH = 3 + + +class ValidationResult(BaseModel): + given_name_match: list[str] = [] + family_name_match: Optional[str] = None + score: ValidationScore = ValidationScore.NO_MATCH diff --git a/lambdas/handlers/bulk_upload_handler.py b/lambdas/handlers/bulk_upload_handler.py index dc51de17c..874be63d9 100644 --- a/lambdas/handlers/bulk_upload_handler.py +++ b/lambdas/handlers/bulk_upload_handler.py @@ -1,4 +1,6 @@ +from enums.feature_flags import FeatureFlags from services.bulk_upload_service import BulkUploadService +from services.feature_flags_service import FeatureFlagService from utils.audit_logging_setup import LoggingService from utils.decorators.handle_lambda_exceptions import handle_lambda_exceptions from utils.decorators.override_error_check import override_error_check @@ -14,6 +16,16 @@ @handle_lambda_exceptions def lambda_handler(event, _context): logger.info("Received event. Starting bulk upload process") + feature_flag_service = FeatureFlagService() + validation_strict_mode_flag_object = feature_flag_service.get_feature_flags_by_flag( + FeatureFlags.LLOYD_GEORGE_VALIDATION_STRICT_MODE.value + ) + validation_strict_mode = validation_strict_mode_flag_object[ + FeatureFlags.LLOYD_GEORGE_VALIDATION_STRICT_MODE.value + ] + + if validation_strict_mode: + logger.info("Lloyd George validation strict mose is enabled") if "Records" not in event or len(event["Records"]) < 1: http_status_code = 400 @@ -25,7 +37,7 @@ def lambda_handler(event, _context): status_code=http_status_code, body=response_body, methods="GET" ).create_api_gateway_response() - bulk_upload_service = BulkUploadService() + bulk_upload_service = BulkUploadService(strict_mode=validation_strict_mode) try: bulk_upload_service.process_message_queue(event["Records"]) diff --git a/lambdas/models/pds_models.py b/lambdas/models/pds_models.py index 74e20f1e2..7ae13181c 100644 --- a/lambdas/models/pds_models.py +++ b/lambdas/models/pds_models.py @@ -116,26 +116,28 @@ def get_usual_name(self) -> Optional[Name]: if entry.use.lower() == "usual": return entry - def get_most_recent_name(self) -> Optional[Name]: - active_names = [name for name in self.name if name.is_currently_in_use()] - if not active_names: - return None - + def get_names_by_start_date(self) -> [Name]: sorted_by_start_date_desc = sorted( - active_names, key=lambda name: name.period.start, reverse=True + self.name, + key=lambda name: ( + (1, name.period.start, name.use.lower() == "usual") + if name.period and name.period.start is not None + else (0, 0, name.use.lower() == "usual") + ), + reverse=True, ) - return sorted_by_start_date_desc[0] + return sorted_by_start_date_desc def get_current_family_name_and_given_name(self) -> Tuple[str, list[str]]: - current_name = self.get_most_recent_name() or self.get_usual_name() - if not current_name: + ordered_names = self.get_names_by_start_date() + if not ordered_names: logger.warning( "The patient does not have a currently active name or a usual name." ) return "", [""] - given_name = current_name.given - family_name = current_name.family + given_name = ordered_names[0].given + family_name = ordered_names[0].family if not given_name or given_name == [""]: logger.warning("The given name of patient is empty.") diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index bc3c2a5e1..ab1261ed6 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -34,7 +34,8 @@ LGInvalidFilesException, allowed_to_ingest_ods_code, getting_patient_info_from_pds, - validate_filename_with_patient_details, + validate_filename_with_patient_details_lenient, + validate_filename_with_patient_details_strict, validate_lg_file_names, ) from utils.request_context import request_context @@ -48,11 +49,11 @@ class BulkUploadService: - def __init__(self): + def __init__(self, strict_mode): self.dynamo_repository = BulkUploadDynamoRepository() self.sqs_repository = BulkUploadSqsRepository() self.s3_repository = BulkUploadS3Repository() - + self.strict_mode = strict_mode self.pdf_content_type = "application/pdf" self.unhandled_messages = [] self.file_path_cache = {} @@ -133,13 +134,27 @@ def handle_sqs_message(self, message: dict): patient_ods_code = ( pds_patient_details.get_ods_code_or_inactive_status_for_gp() ) - is_name_validation_based_on_historic_name = ( - validate_filename_with_patient_details(file_names, pds_patient_details) - ) + if not self.strict_mode: + ( + name_validation_accepted_reason, + is_name_validation_based_on_historic_name, + ) = validate_filename_with_patient_details_lenient( + file_names, pds_patient_details + ) + accepted_reason = self.concatenate_acceptance_reason( + accepted_reason, name_validation_accepted_reason + ) + else: + is_name_validation_based_on_historic_name = ( + validate_filename_with_patient_details_strict( + file_names, pds_patient_details + ) + ) if is_name_validation_based_on_historic_name: accepted_reason = self.concatenate_acceptance_reason( accepted_reason, "Patient matched on historical name" ) + if not allowed_to_ingest_ods_code(patient_ods_code): raise LGInvalidFilesException("Patient not registered at your practice") patient_death_notification_status = ( diff --git a/lambdas/services/edge_presign_service.py b/lambdas/services/edge_presign_service.py index cab057619..28858a9a5 100644 --- a/lambdas/services/edge_presign_service.py +++ b/lambdas/services/edge_presign_service.py @@ -25,7 +25,7 @@ def use_presign(self, request_values: dict): domain_name: str = request_values["domain_name"] presign_string: str = f"{uri}?{querystring}" - encoded_presign_string: str = presign_string.encode("utf-8") + encoded_presign_string = presign_string.encode("utf-8") presign_credentials_hash: str = hashlib.md5(encoded_presign_string).hexdigest() self.attempt_presign_ingestion( @@ -70,7 +70,7 @@ def filter_request_values(request: dict) -> dict: uri: str = request["uri"] querystring: str = request["querystring"] headers: dict = request["headers"] - origin: str = request.get("origin", {}) + origin: dict = request.get("origin", {}) domain_name: str = origin["s3"]["domainName"] except KeyError as e: logger.error(f"Missing request component: {str(e)}") diff --git a/lambdas/services/mock_data/pds_patient_9000000003_H85686_gp.json b/lambdas/services/mock_data/pds_patient_9000000003_H85686_gp.json index e0f65ef4f..99abc0a28 100644 --- a/lambdas/services/mock_data/pds_patient_9000000003_H85686_gp.json +++ b/lambdas/services/mock_data/pds_patient_9000000003_H85686_gp.json @@ -38,7 +38,7 @@ "use": "usual", "period": { "start": "2020-01-01", - "end": "2025-12-31" + "end": "2035-12-31" }, "given": [ "Jane" @@ -55,8 +55,8 @@ "id": "1234", "use": "other", "period": { - "start": "2020-01-01", - "end": "2025-12-31" + "start": "2010-01-01", + "end": "2019-12-31" }, "given": [ "Jim" diff --git a/lambdas/tests/unit/handlers/conftest.py b/lambdas/tests/unit/handlers/conftest.py index d5fde1ef9..a9a82cfc4 100755 --- a/lambdas/tests/unit/handlers/conftest.py +++ b/lambdas/tests/unit/handlers/conftest.py @@ -111,3 +111,12 @@ def mock_upload_lambda_disabled(mocker): "uploadLambdaEnabled": False } yield mock_upload_lambda_feature_flag + + +@pytest.fixture +def mock_validation_strict_disabled(mocker): + mock_function = mocker.patch.object(FeatureFlagService, "get_feature_flags_by_flag") + mock_upload_lambda_feature_flag = mock_function.return_value = { + "lloydGeorgeValidationStrictMode": False + } + yield mock_upload_lambda_feature_flag diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py index e0b51c408..9a6419600 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py @@ -15,7 +15,9 @@ def mock_service(mocker): yield mocker.patch.object(BulkUploadService, "process_message_queue") -def test_can_process_event_with_one_message(mock_service, context, set_env): +def test_can_process_event_with_one_message( + mock_service, context, set_env, mock_validation_strict_disabled +): expected = ApiGatewayResponse( 200, "Finished processing all 1 messages", "GET" ).create_api_gateway_response() @@ -24,7 +26,9 @@ def test_can_process_event_with_one_message(mock_service, context, set_env): assert expected == actual -def test_can_process_event_with_several_messages(mock_service, context, set_env): +def test_can_process_event_with_several_messages( + mock_service, context, set_env, mock_validation_strict_disabled +): expected = ApiGatewayResponse( 200, "Finished processing all 3 messages", "GET" ).create_api_gateway_response() @@ -34,7 +38,7 @@ def test_can_process_event_with_several_messages(mock_service, context, set_env) def test_receive_correct_response_when_service_returns_error( - mock_service, context, set_env + mock_service, context, set_env, mock_validation_strict_disabled ): expected = ApiGatewayResponse( 500, "Bulk upload failed with error: ", "GET" @@ -46,7 +50,7 @@ def test_receive_correct_response_when_service_returns_error( def test_receive_correct_response_when_no_records_in_event( - mock_service, context, set_env + mock_service, context, set_env, mock_validation_strict_disabled ): expected = ApiGatewayResponse( 400, diff --git a/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py b/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py index 0d8ee3817..559162f8e 100644 --- a/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py +++ b/lambdas/tests/unit/helpers/data/pds/test_cases_for_patient_name_matching.py @@ -18,15 +18,22 @@ class PdsNameMatchingTestCase(NamedTuple): "Jane Bob Smith Anderson", "Jane Smith Anderson", "Jane B Smith Anderson", + "Bob Smith Anderson", ], "reject": [ - "Bob Smith Anderson", "Jane Smith", "Jane Anderson", "Jane Anderson Smith", ], } +TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_STRICT = copy.deepcopy( + TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME +) +TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_STRICT["reject"].append( + TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_STRICT["accept"].pop(3) +) + TEST_CASES_FOR_FAMILY_NAME_WITH_HYPHEN = { "pds_name": {"family": "Smith-Anderson", "given": ["Jane"]}, "accept": ["Jane Smith-Anderson"], diff --git a/lambdas/tests/unit/models/test_pds_models.py b/lambdas/tests/unit/models/test_pds_models.py index a200830b1..66254fe7c 100644 --- a/lambdas/tests/unit/models/test_pds_models.py +++ b/lambdas/tests/unit/models/test_pds_models.py @@ -253,6 +253,19 @@ def test_get_current_family_name_and_given_name_return_the_first_usual_name_if_a assert actual == expected_given_name +def test_get_current_family_name_and_given_name_return_the_first_most_recent_name(): + name_1 = build_test_name(use="temp", start="1990-01-01", end=None, given=["Jones"]) + name_2 = build_test_name(use="usual", start=None, end=None, given=["Jane"]) + name_3 = build_test_name(use="usual", start=None, end=None, given=["Bob"]) + + test_patient = build_test_patient_with_names([name_1, name_2, name_3]) + + expected_given_name = ["Jones"] + actual = test_patient.get_patient_details(test_patient.id).given_name + + assert actual == expected_given_name + + def test_get_current_family_name_and_given_name_logs_a_warning_if_no_current_name_or_usual_name_found( caplog, ): @@ -330,7 +343,7 @@ def test_name_is_currently_in_use_return_false_for_nickname_or_old_name(): @freeze_time("2024-01-01") -def test_get_most_recent_name_return_the_name_with_most_recent_start_date(): +def test_get_names_by_start_date_return_ordered_list_by_start_date(): name_1 = build_test_name(start="1990-01-01", end=None, given=["Jane"]) name_2 = build_test_name(start="2010-02-14", end=None, given=["Jones"]) name_3 = build_test_name(start="2000-03-25", end=None, given=["Bob"]) @@ -341,22 +354,31 @@ def test_get_most_recent_name_return_the_name_with_most_recent_start_date(): use="nickname", start="2023-01-01", end=None, given=["Janie"] ) future_name = build_test_name(start="2047-01-01", end=None, given=["Neo Jane"]) + name_no_date = build_test_name(start=None, end=None, given=["Jane"]) test_patient = build_test_patient_with_names( - [name_1, name_2, name_3, expired_name, nickname, future_name] + [name_1, name_2, name_3, expired_name, nickname, future_name, name_no_date] ) - expected = name_2 - actual = test_patient.get_most_recent_name() + expected = [ + future_name, + nickname, + expired_name, + name_2, + name_3, + name_1, + name_no_date, + ] + actual = test_patient.get_names_by_start_date() assert actual == expected -@freeze_time("2024-01-01") -def test_get_most_recent_name_return_none_if_no_active_name_found(): - test_patient = build_test_patient_with_names([]) - - assert test_patient.get_most_recent_name() is None +# @freeze_time("2024-01-01") +# def test_get_most_recent_name_return_none_if_no_active_name_found(): +# test_patient = build_test_patient_with_names([]) +# +# assert test_patient.get_names_by_start_date() is None def test_get_death_notification_status_return_the_death_notification_status(): diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 3fe0d2425..0e1cc1478 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -62,7 +62,7 @@ @pytest.fixture def repo_under_test(set_env, mocker): - service = BulkUploadService() + service = BulkUploadService(strict_mode=True) mocker.patch.object(service, "dynamo_repository") mocker.patch.object(service, "sqs_repository") mocker.patch.object(service, "s3_repository") @@ -120,9 +120,17 @@ def mock_pds_service_patient_restricted(mocker): @pytest.fixture -def mock_pds_validation(mocker): +def mock_pds_validation_lenient(mocker): yield mocker.patch( - "services.bulk_upload_service.validate_filename_with_patient_details" + "services.bulk_upload_service.validate_filename_with_patient_details_lenient", + return_value=("test string", True), + ) + + +@pytest.fixture +def mock_pds_validation_strict(mocker): + yield mocker.patch( + "services.bulk_upload_service.validate_filename_with_patient_details_strict", ) @@ -150,7 +158,7 @@ def build_resolved_file_names_cache( def test_lambda_handler_process_each_sqs_message_one_by_one( set_env, mock_handle_sqs_message ): - service = BulkUploadService() + service = BulkUploadService(True) service.process_message_queue(TEST_SQS_MESSAGES_AS_LIST) @@ -168,7 +176,7 @@ def test_lambda_handler_continue_process_next_message_after_handled_error( InvalidMessageException, None, ] - service = BulkUploadService() + service = BulkUploadService(True) service.process_message_queue(TEST_SQS_MESSAGES_AS_LIST) assert mock_handle_sqs_message.call_count == len(TEST_SQS_MESSAGES_AS_LIST) @@ -185,7 +193,7 @@ def test_lambda_handler_handle_pds_too_many_requests_exception( expected_handled_messages = TEST_SQS_10_MESSAGES_AS_LIST[0:6] expected_unhandled_message = TEST_SQS_10_MESSAGES_AS_LIST[6:] - service = BulkUploadService() + service = BulkUploadService(True) with pytest.raises(BulkUploadException): service.process_message_queue(TEST_SQS_10_MESSAGES_AS_LIST) @@ -205,7 +213,7 @@ def test_handle_sqs_message_happy_path( repo_under_test, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -237,7 +245,7 @@ def test_handle_sqs_message_happy_path_single_file( repo_under_test, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -334,7 +342,7 @@ def test_handle_sqs_message_happy_path_with_non_ascii_filenames( mock_validate_files, patient_name_on_s3, patient_name_in_metadata_file, - mock_pds_validation, + mock_pds_validation_strict, mock_pds_service, mock_ods_validation, ): @@ -360,7 +368,7 @@ def test_handle_sqs_message_calls_report_upload_failure_when_patient_record_alre mock_uuid, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, ): TEST_STAGING_METADATA.retries = 0 @@ -395,7 +403,7 @@ def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_name_invali mock_uuid, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, ): TEST_STAGING_METADATA.retries = 0 mock_create_lg_records_and_copy_files = mocker.patch.object( @@ -433,7 +441,7 @@ def test_handle_sqs_message_report_failure_when_document_is_infected( mock_validate_files, mock_check_virus_result, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -471,7 +479,7 @@ def test_handle_sqs_message_report_failure_when_document_not_exist( mock_validate_files, mock_check_virus_result, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -501,7 +509,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_forma mock_validate_files, mock_check_virus_result, mock_pds_service_patient_deceased_formal, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): mock_create_lg_records_and_copy_files = mocker.patch.object( @@ -510,7 +518,7 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_forma mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) - mock_pds_validation.return_value = False + mock_pds_validation_strict.return_value = False mock_put_staging_metadata_back_to_queue = ( repo_under_test.sqs_repository.put_staging_metadata_back_to_queue ) @@ -538,13 +546,13 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_infor mock_validate_files, mock_check_virus_result, mock_pds_service_patient_deceased_informal, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) - mock_pds_validation.return_value = True + mock_pds_validation_strict.return_value = True mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) @@ -575,13 +583,13 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_has_hist mock_validate_files, mock_check_virus_result, mock_pds_service_patient_restricted, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) - mock_pds_validation.return_value = True + mock_pds_validation_strict.return_value = True mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) @@ -612,13 +620,13 @@ def test_handle_sqs_message_calls_report_upload_successful_when_patient_is_infor mock_validate_files, mock_check_virus_result, mock_pds_service_patient_deceased_informal, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) - mock_pds_validation.return_value = False + mock_pds_validation_strict.return_value = False mock_remove_ingested_file_from_source_bucket = ( repo_under_test.s3_repository.remove_ingested_file_from_source_bucket ) @@ -649,7 +657,7 @@ def test_handle_sqs_message_put_staging_metadata_back_to_queue_when_virus_scan_r mock_validate_files, mock_check_virus_result, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -687,7 +695,7 @@ def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_t mock_check_virus_result, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -890,7 +898,7 @@ def test_raise_client_error_from_ssm_with_pds_service( repo_under_test, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, ): mock_client_error = ClientError( {"Error": {"Code": "500", "Message": "test error"}}, "testing" @@ -907,7 +915,7 @@ def test_mismatch_ods_with_pds_service( mock_uuid, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, ): mock_ods_validation.return_value = False @@ -929,7 +937,7 @@ def test_create_lg_records_and_copy_files_client_error( mock_check_virus_result, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -965,7 +973,7 @@ def test_handle_sqs_message_happy_path_historical_name( repo_under_test, mock_validate_files, mock_pds_service, - mock_pds_validation, + mock_pds_validation_strict, mock_ods_validation, ): TEST_STAGING_METADATA.retries = 0 @@ -979,7 +987,7 @@ def test_handle_sqs_message_happy_path_historical_name( repo_under_test.s3_repository, "remove_ingested_file_from_source_bucket" ) mocker.patch.object(repo_under_test.s3_repository, "check_virus_result") - mock_pds_validation.return_value = True + mock_pds_validation_strict.return_value = True repo_under_test.handle_sqs_message(message=TEST_SQS_MESSAGE) diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index 9019ae3b0..239a746be 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -1,6 +1,7 @@ import pytest from botocore.exceptions import ClientError from enums.supported_document_types import SupportedDocumentTypes +from enums.validation_score import ValidationResult, ValidationScore from models.pds_models import Patient from requests import Response from services.base.ssm_service import SSMService @@ -14,10 +15,15 @@ from tests.unit.helpers.data.pds.pds_patient_response import ( PDS_PATIENT_WITH_MIDDLE_NAME, ) +from tests.unit.helpers.data.pds.test_cases_for_date_logic import ( + build_test_name, + build_test_patient_with_names, +) from tests.unit.helpers.data.pds.test_cases_for_patient_name_matching import ( TEST_CASES_FOR_FAMILY_NAME_WITH_HYPHEN, TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME, TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_AND_GIVEN_NAME, + TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_STRICT, TEST_CASES_FOR_TWO_WORDS_GIVEN_NAME, load_test_cases, ) @@ -30,6 +36,7 @@ from utils.lloyd_george_validator import ( LGInvalidFilesException, allowed_to_ingest_ods_code, + calculate_validation_score, check_for_duplicate_files, check_for_file_names_agrees_with_each_other, check_for_number_of_files_match_expected, @@ -40,16 +47,34 @@ getting_patient_info_from_pds, parse_pds_response, validate_file_name, - validate_filename_with_patient_details, + validate_filename_with_patient_details_lenient, + validate_filename_with_patient_details_strict, validate_lg_file_names, validate_lg_file_type, validate_lg_files, validate_patient_date_of_birth, - validate_patient_name, + validate_patient_name_lenient, validate_patient_name_using_full_name_history, ) +@pytest.fixture +def mock_get_ssm_parameter(mocker): + return mocker.patch.object(SSMService, "get_ssm_parameter") + + +@pytest.fixture +def mock_pds_call(mocker): + yield mocker.patch("services.mock_pds_service.MockPdsApiService.pds_request") + + +@pytest.fixture +def mock_fetch_available_document_references_by_type(mocker): + yield mocker.patch.object( + DocumentService, "fetch_available_document_references_by_type" + ) + + def test_catching_error_when_file_type_not_pdf(): with pytest.raises(LGInvalidFilesException): file_type = "image/png" @@ -234,7 +259,10 @@ def test_validate_nhs_id_with_pds_service(mock_pds_patient): "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", ] with expect_not_to_raise(LGInvalidFilesException): - validate_filename_with_patient_details(lg_file_list, mock_pds_patient) + validate_filename_with_patient_details_lenient(lg_file_list, mock_pds_patient) + + with expect_not_to_raise(LGInvalidFilesException): + validate_filename_with_patient_details_strict(lg_file_list, mock_pds_patient) def test_mismatch_nhs_id(mocker): @@ -247,28 +275,55 @@ def test_mismatch_nhs_id(mocker): mocker.patch( "utils.lloyd_george_validator.check_for_number_of_files_match_expected" ) - mocker.patch("utils.lloyd_george_validator.validate_file_name") with pytest.raises(LGInvalidFilesException): validate_lg_file_names(lg_file_list, "9000000009") -def test_mismatch_name_with_pds_service(mock_pds_patient): +def test_mismatch_name_with_pds_service_strict(mock_pds_patient): lg_file_list = [ "1of2_Lloyd_George_Record_[Jake Plain Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jake Plain Smith]_[9000000009]_[22-10-2010].pdf", ] with pytest.raises(LGInvalidFilesException): - validate_filename_with_patient_details(lg_file_list, mock_pds_patient) + validate_filename_with_patient_details_strict(lg_file_list, mock_pds_patient) + + +def test_mismatch_name_with_pds_service_lenient(mock_pds_patient): + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jake Plain Moody]_[9000000009]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jake Plain Moody]_[9000000009]_[22-10-2010].pdf", + ] + + with pytest.raises(LGInvalidFilesException): + validate_filename_with_patient_details_lenient(lg_file_list, mock_pds_patient) -def test_validate_name_with_correct_name(mocker, mock_pds_patient): +def test_validate_name_with_correct_name_lenient(mocker, mock_pds_patient): lg_file_patient_name = "Jane Smith" mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_lenient" + ) + mock_validate_name.return_value = ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", ) + actual_score, actual_is_name_validation_based_on_historic_name, result_message = ( + calculate_validation_score(lg_file_patient_name, mock_pds_patient) + ) + + assert mock_validate_name.call_count == 1 + assert actual_is_name_validation_based_on_historic_name is False + assert actual_score == ValidationScore.FULL_MATCH + +def test_validate_name_with_correct_name_strict(mocker, mock_pds_patient): + lg_file_patient_name = "Jane Smith" + mock_validate_name = mocker.patch( + "utils.lloyd_george_validator.validate_patient_name_strict" + ) with expect_not_to_raise(LGInvalidFilesException): validate_patient_name_using_full_name_history( lg_file_patient_name, mock_pds_patient @@ -280,7 +335,7 @@ def test_validate_name_with_file_missing_middle_name(mocker): lg_file_patient_name = "Jane Smith" patient = Patient.model_validate(PDS_PATIENT_WITH_MIDDLE_NAME) mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_strict" ) with expect_not_to_raise(LGInvalidFilesException): validate_patient_name_using_full_name_history(lg_file_patient_name, patient) @@ -288,13 +343,14 @@ def test_validate_name_with_file_missing_middle_name(mocker): assert mock_validate_name.call_count == 1 -def test_validate_name_with_additional_middle_name_in_file_mismatching_pds(mocker): +def test_validate_name_with_additional_middle_name_in_file_mismatching_pds_strict( + mocker, +): lg_file_patient_name = "Jane David Smith" mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_strict" ) patient = Patient.model_validate(PDS_PATIENT_WITH_MIDDLE_NAME) - with expect_not_to_raise(LGInvalidFilesException): actual_is_name_validation_based_on_historic_name = ( validate_patient_name_using_full_name_history(lg_file_patient_name, patient) @@ -303,30 +359,73 @@ def test_validate_name_with_additional_middle_name_in_file_mismatching_pds(mocke assert actual_is_name_validation_based_on_historic_name is False -def test_validate_name_with_additional_middle_name_in_file_but_none_in_pds( +def test_validate_name_with_additional_middle_name_in_file_mismatching_pds_lenient( + mocker, +): + lg_file_patient_name = "Jane David Smith" + mock_validate_name = mocker.patch( + "utils.lloyd_george_validator.validate_patient_name_lenient" + ) + patient = Patient.model_validate(PDS_PATIENT_WITH_MIDDLE_NAME) + mock_validate_name.return_value = ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ) + actual_score, actual_is_name_validation_based_on_historic_name, result_message = ( + calculate_validation_score(lg_file_patient_name, patient) + ) + + assert mock_validate_name.call_count == 1 + assert actual_is_name_validation_based_on_historic_name is False + assert actual_score == ValidationScore.FULL_MATCH + + +def test_validate_name_with_additional_middle_name_in_file_but_none_in_pds_strict( mock_pds_patient, mocker ): lg_file_patient_name = "Jane David Smith" mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_strict" ) - with expect_not_to_raise(LGInvalidFilesException): - actual_is_name_validation_based_on_historic_name = ( - validate_patient_name_using_full_name_history( - lg_file_patient_name, mock_pds_patient - ) + + actual_is_name_validation_based_on_historic_name = ( + validate_patient_name_using_full_name_history( + lg_file_patient_name, mock_pds_patient ) + ) + + assert mock_validate_name.call_count == 1 + assert actual_is_name_validation_based_on_historic_name is False + + +def test_validate_name_with_additional_middle_name_in_file_but_none_in_pds( + mock_pds_patient, mocker +): + lg_file_patient_name = "Jane David Smith" + mock_validate_name = mocker.patch( + "utils.lloyd_george_validator.validate_patient_name_lenient" + ) + mock_validate_name.return_value = ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ) + actual_score, actual_is_name_validation_based_on_historic_name, result_message = ( + calculate_validation_score(lg_file_patient_name, mock_pds_patient) + ) + assert mock_validate_name.call_count == 1 assert actual_is_name_validation_based_on_historic_name is False + assert actual_score == ValidationScore.FULL_MATCH -def test_validate_name_with_wrong_first_name(mocker, mock_pds_patient): +def test_validate_name_with_wrong_first_name_strict(mocker, mock_pds_patient): lg_file_patient_name = "John Smith" mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_strict" ) mock_validate_name.return_value = False - with pytest.raises(LGInvalidFilesException): validate_patient_name_using_full_name_history( lg_file_patient_name, mock_pds_patient @@ -334,10 +433,24 @@ def test_validate_name_with_wrong_first_name(mocker, mock_pds_patient): assert mock_validate_name.call_count == 3 -def test_validate_name_with_wrong_family_name(mocker, mock_pds_patient): - lg_file_patient_name = "Jane Johnson" +def test_validate_name_with_wrong_first_name_lenient(mock_pds_patient): + lg_file_patient_name = "John Smith" + + actual_response = validate_patient_name_lenient( + lg_file_patient_name, + mock_pds_patient.name[0].given, + mock_pds_patient.name[0].family, + ) + assert actual_response == ValidationResult( + score=ValidationScore.PARTIAL_MATCH, + family_name_match="Smith", + ) + + +def test_validate_name_with_wrong_family_name_strict(mocker, mock_pds_patient): + lg_file_patient_name = "John Johnson" mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_strict" ) mock_validate_name.return_value = False with pytest.raises(LGInvalidFilesException): @@ -347,10 +460,23 @@ def test_validate_name_with_wrong_family_name(mocker, mock_pds_patient): assert mock_validate_name.call_count == 3 -def test_validate_name_with_historical_name(mocker, mock_pds_patient): +def test_validate_name_with_wrong_family_name_lenient(mock_pds_patient): + lg_file_patient_name = "Jane Johnson" + actual_response = validate_patient_name_lenient( + lg_file_patient_name, + mock_pds_patient.name[0].given, + mock_pds_patient.name[0].family, + ) + assert actual_response == ValidationResult( + score=ValidationScore.PARTIAL_MATCH, + given_name_match=["Jane"], + ) + + +def test_validate_name_with_historical_name_strict(mocker, mock_pds_patient): lg_file_patient_name = "Jim Stevens" mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_strict" ) mock_validate_name.side_effect = [False, True] with expect_not_to_raise(LGInvalidFilesException): @@ -359,30 +485,67 @@ def test_validate_name_with_historical_name(mocker, mock_pds_patient): lg_file_patient_name, mock_pds_patient ) ) + assert mock_validate_name.call_count == 2 assert actual_is_name_validation_based_on_historic_name is True -def test_validate_name_without_given_name(mocker, mock_pds_patient): +def test_validate_name_with_historical_name_lenient(mocker, mock_pds_patient): + lg_file_patient_name = "Jim Stevens" + mock_validate_name = mocker.patch( + "utils.lloyd_george_validator.validate_patient_name_lenient" + ) + mock_validate_name.side_effect = [ + ValidationResult( + score=ValidationScore.NO_MATCH, + ), + ValidationResult( + score=ValidationScore.FULL_MATCH, + ), + ] + actual_score, actual_is_validate_on_historic, result_message = ( + calculate_validation_score(lg_file_patient_name, mock_pds_patient) + ) + assert actual_score == ValidationScore.FULL_MATCH + assert mock_validate_name.call_count == 2 + assert actual_is_validate_on_historic is True + + +def test_validate_name_without_given_name_strict(mocker, mock_pds_patient): lg_file_patient_name = "Jane Smith" mock_pds_patient.name[0].given = [""] mock_validate_name = mocker.patch( - "utils.lloyd_george_validator.validate_patient_name" + "utils.lloyd_george_validator.validate_patient_name_strict" ) with expect_not_to_raise(LGInvalidFilesException): actual_is_validate_on_historic = validate_patient_name_using_full_name_history( lg_file_patient_name, mock_pds_patient ) + assert actual_is_validate_on_historic is False assert mock_validate_name.call_count == 1 +def test_validate_name_without_given_name_lenient(mocker, mock_pds_patient): + lg_file_patient_name = "Jane Smith" + mock_pds_patient.name[0].given = [""] + mock_validate_name = mocker.patch( + "utils.lloyd_george_validator.validate_patient_name_lenient" + ) + actual_score, actual_is_validate_on_historic, result_message = ( + calculate_validation_score(lg_file_patient_name, mock_pds_patient) + ) + assert actual_score == ValidationScore.NO_MATCH + assert actual_is_validate_on_historic is False + assert mock_validate_name.call_count == 2 + + @pytest.mark.parametrize( ["patient_details", "patient_name_in_file_name", "should_accept_name"], - load_test_cases(TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME), + load_test_cases(TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_STRICT), ) -def test_validate_patient_name_with_two_words_family_name( +def test_validate_patient_name_with_two_words_family_name_strict( patient_details: Patient, patient_name_in_file_name: str, should_accept_name: bool, @@ -402,11 +565,31 @@ def test_validate_patient_name_with_two_words_family_name( ) +@pytest.mark.parametrize( + ["patient_details", "patient_name_in_file_name", "should_accept_name"], + load_test_cases(TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME), +) +def test_validate_patient_name_with_two_words_family_name_lenient( + patient_details: Patient, + patient_name_in_file_name: str, + should_accept_name: bool, +): + actual_score, actual_is_validate_on_historic, result_message = ( + calculate_validation_score(patient_name_in_file_name, patient_details) + ) + if should_accept_name: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH + else: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH + + @pytest.mark.parametrize( ["patient_details", "patient_name_in_file_name", "should_accept_name"], load_test_cases(TEST_CASES_FOR_FAMILY_NAME_WITH_HYPHEN), ) -def test_validate_patient_name_with_family_name_with_hyphen( +def test_validate_patient_name_with_family_name_with_hyphen_strict( patient_details: Patient, patient_name_in_file_name: str, should_accept_name: bool, @@ -426,11 +609,31 @@ def test_validate_patient_name_with_family_name_with_hyphen( ) +@pytest.mark.parametrize( + ["patient_details", "patient_name_in_file_name", "should_accept_name"], + load_test_cases(TEST_CASES_FOR_FAMILY_NAME_WITH_HYPHEN), +) +def test_validate_patient_name_with_family_name_with_hyphen_lenient( + patient_details: Patient, + patient_name_in_file_name: str, + should_accept_name: bool, +): + actual_score, actual_is_validate_on_historic, result_message = ( + calculate_validation_score(patient_name_in_file_name, patient_details) + ) + if should_accept_name: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH + else: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH + + @pytest.mark.parametrize( ["patient_details", "patient_name_in_file_name", "should_accept_name"], load_test_cases(TEST_CASES_FOR_TWO_WORDS_GIVEN_NAME), ) -def test_validate_patient_name_with_two_words_given_name( +def test_validate_patient_name_with_two_words_given_name_strict( patient_details: Patient, patient_name_in_file_name: str, should_accept_name: bool, @@ -447,11 +650,31 @@ def test_validate_patient_name_with_two_words_given_name( ) +@pytest.mark.parametrize( + ["patient_details", "patient_name_in_file_name", "should_accept_name"], + load_test_cases(TEST_CASES_FOR_TWO_WORDS_GIVEN_NAME), +) +def test_validate_patient_name_with_two_words_given_name_lenient( + patient_details: Patient, + patient_name_in_file_name: str, + should_accept_name: bool, +): + actual_score, actual_is_validate_on_historic, result_message = ( + calculate_validation_score(patient_name_in_file_name, patient_details) + ) + if should_accept_name: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH + else: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH + + @pytest.mark.parametrize( ["patient_details", "patient_name_in_file_name", "should_accept_name"], load_test_cases(TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_AND_GIVEN_NAME), ) -def test_validate_patient_name_with_two_words_family_name_and_given_name( +def test_validate_patient_name_with_two_words_family_name_and_given_name_strict( patient_details: Patient, patient_name_in_file_name: str, should_accept_name: bool, @@ -468,6 +691,26 @@ def test_validate_patient_name_with_two_words_family_name_and_given_name( ) +@pytest.mark.parametrize( + ["patient_details", "patient_name_in_file_name", "should_accept_name"], + load_test_cases(TEST_CASES_FOR_TWO_WORDS_FAMILY_NAME_AND_GIVEN_NAME), +) +def test_validate_patient_name_with_two_words_family_name_and_given_name_lenient( + patient_details: Patient, + patient_name_in_file_name: str, + should_accept_name: bool, +): + actual_score, actual_is_validate_on_historic, result_message = ( + calculate_validation_score(patient_name_in_file_name, patient_details) + ) + if should_accept_name: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.FULL_MATCH + else: + assert actual_is_validate_on_historic is False + assert actual_score == ValidationScore.PARTIAL_MATCH + + def test_missing_middle_name_names_with_pds_service(): lg_file_list = [ "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", @@ -476,17 +719,19 @@ def test_missing_middle_name_names_with_pds_service(): patient = Patient.model_validate(PDS_PATIENT_WITH_MIDDLE_NAME) with expect_not_to_raise(LGInvalidFilesException): - validate_filename_with_patient_details(lg_file_list, patient) + validate_filename_with_patient_details_lenient(lg_file_list, patient) + validate_filename_with_patient_details_strict(lg_file_list, patient) -def test_mismatch_dob_with_pds_service(mock_pds_patient): +def test_mismatch_dob_and_name_with_pds_service(mock_pds_patient): lg_file_list = [ - "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[14-01-2000].pdf", - "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[14-01-2000].pdf", + "1of2_Lloyd_George_Record_[Jake Plain Smith]_[9000000009]_[14-01-2000].pdf", + "2of2_Lloyd_George_Record_[Jake Plain Smith]_[9000000009]_[14-01-2000].pdf", ] with pytest.raises(LGInvalidFilesException): - validate_filename_with_patient_details(lg_file_list, mock_pds_patient) + validate_filename_with_patient_details_lenient(lg_file_list, mock_pds_patient) + validate_filename_with_patient_details_strict(lg_file_list, mock_pds_patient) def test_validate_date_of_birth_when_mismatch_dob_with_pds_service( @@ -494,21 +739,22 @@ def test_validate_date_of_birth_when_mismatch_dob_with_pds_service( ): file_date_of_birth = "14-01-2000" - with pytest.raises(LGInvalidFilesException): - validate_patient_date_of_birth(file_date_of_birth, mock_pds_patient) + actual = validate_patient_date_of_birth(file_date_of_birth, mock_pds_patient) + + assert actual is False def test_validate_date_of_birth_valid_with_pds_service(mock_pds_patient): file_date_of_birth = "22-10-2010" - with expect_not_to_raise(LGInvalidFilesException): - validate_patient_date_of_birth(file_date_of_birth, mock_pds_patient) + actual = validate_patient_date_of_birth(file_date_of_birth, mock_pds_patient) + assert actual is True def test_validate_date_of_birth_none_with_pds_service(mock_pds_patient): file_date_of_birth = "22-10-2010" mock_pds_patient.birth_date = None - with pytest.raises(LGInvalidFilesException): - validate_patient_date_of_birth(file_date_of_birth, mock_pds_patient) + actual = validate_patient_date_of_birth(file_date_of_birth, mock_pds_patient) + assert actual is False def test_patient_not_found_with_pds_service(mock_pds_call): @@ -733,70 +979,193 @@ def test_mismatch_nhs_in_validate_lg_file(mocker, mock_pds_patient): @pytest.mark.parametrize( - ["file_patient_name", "first_name_from_pds", "family_name_from_pds"], + ["file_patient_name", "first_name_from_pds", "family_name_from_pds", "result"], [ - ["Jim Stevens", "Jane", "Smith"], - ["Jane Smith Anderson", "Jane", "Smith"], - ["Bob Smith Anderson", "Jane", "Smith"], - ["Jane B Smith Anderson", "Jane", "Smith"], - ["Jane Bob Anderson", "Jane", "Smith"], - ["Jane Bob Smith", "Jane Bob", "Smith Anderson"], - ["Jane Smith", "Jane Bob", "Smith"], - ["Jane B Smith", "Jane Bob", "Smith"], - ["Jane-Bob Smith", "Jane Bob", "Smith"], - ["Jane Smith", "Jane Bob", "Smith"], - ["Jane Smith Anderson", "Jane", "Smith-Anderson"], - ["Jane Smith", "Jane", "Smith-Anderson"], - ["Jane Anderson", "Jane", "Smith-Anderson"], - ["Jane Bob Smith", "Jane Bob", "Smith-Anderson"], - ["Bob Smith Anderson", "Jane", "Smith Anderson"], - ["Jane Smith", "Jane", "Smith Anderson"], - ["Jane Anderson", "Jane", "Smith Anderson"], - ["Jane Anderson Smith", "Jane", "Smith Anderson"], + ("Jim Stevens", ["Jane"], "Smith", ValidationScore.NO_MATCH), + ["Jane Smith Anderson", ["Jane"], "Smith", ValidationScore.FULL_MATCH], + ["Bob Smith Anderson", ["Jane"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Bob Smith Anderson", ["Jane", "Bob"], "Smith", ValidationScore.FULL_MATCH], + ["Jane B Smith Anderson", ["Jane"], "Smith", ValidationScore.FULL_MATCH], + ["Jane Bob Anderson", ["Jane"], "Smith", ValidationScore.PARTIAL_MATCH], + [ + "Jane Bob Smith", + ["Jane Bob"], + "Smith Anderson", + ValidationScore.PARTIAL_MATCH, + ], + ["Jane Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Jane B Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Jane-Bob Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + ["Jane Smith", ["Jane Bob"], "Smith", ValidationScore.PARTIAL_MATCH], + [ + "Jane Smith Anderson", + ["Jane"], + "Smith-Anderson", + ValidationScore.PARTIAL_MATCH, + ], + ["Jane Smith", ["Jane"], "Smith-Anderson", ValidationScore.PARTIAL_MATCH], + ["Jane Anderson", ["Jane"], "Smith-Anderson", ValidationScore.PARTIAL_MATCH], + [ + "Jane Bob Smith", + ["Jane Bob"], + "Smith-Anderson", + ValidationScore.PARTIAL_MATCH, + ], + [ + "Bob Smith Anderson", + ["Jane"], + "Smith Anderson", + ValidationScore.PARTIAL_MATCH, + ], + ["Jane Smith", ["Jane"], "Smith Anderson", ValidationScore.PARTIAL_MATCH], + ["Jane Anderson", ["Jane"], "Smith Anderson", ValidationScore.PARTIAL_MATCH], + [ + "Jane Anderson Smith", + ["Jane"], + "Smith Anderson", + ValidationScore.PARTIAL_MATCH, + ], ], ) def test_validate_patient_name_return_false( - file_patient_name, first_name_from_pds, family_name_from_pds + file_patient_name, first_name_from_pds, family_name_from_pds, result ): - actual = validate_patient_name( + actual = validate_patient_name_lenient( file_patient_name, first_name_from_pds, family_name_from_pds ) - assert actual is False + assert actual.score == result @pytest.mark.parametrize( - ["file_patient_name", "first_name_from_pds", "family_name_from_pds"], + ["file_patient_name", "first_name_from_pds", "family_name_from_pds", "expected"], [ - ["Jane Smith", "Jane", "Smith"], - ["Jane Bob Smith Anderson", "Jane", "Smith Anderson"], - ["Jane Smith Anderson", "Jane", "Smith Anderson"], - ["Jane B Smith Anderson", "Jane", "Smith Anderson"], - ["Jane Smith-Anderson", "Jane", "Smith-Anderson"], - ["Jane Bob Smith Anderson", "Jane Bob", "Smith Anderson"], - ["Jane Bob Smith", "Jane Bob", "Smith"], + ( + "Jane Smith", + ["Jane"], + "Smith", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ), + ), + ( + "Jane Bob Smith Anderson", + ["Jane"], + "Smith", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith", + ), + ), + ( + "Jane Smith Anderson", + ["Jane"], + "Smith Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith Anderson", + ), + ), + ( + "Jane B Smith Anderson", + ["Jane"], + "Smith Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith Anderson", + ), + ), + ( + "Jane Smith-Anderson", + ["Jane"], + "Smith-Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane"], + family_name_match="Smith-Anderson", + ), + ), + ( + "Jane Bob Smith Anderson", + ["Jane Bob"], + "Smith Anderson", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane Bob"], + family_name_match="Smith Anderson", + ), + ), + ( + "Jane Bob Smith", + ["Jane Bob"], + "Smith", + ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=["Jane Bob"], + family_name_match="Smith", + ), + ), + ( + "Jane Smith", + ["Bob"], + "Smith", + ValidationResult( + score=ValidationScore.PARTIAL_MATCH, + family_name_match="Smith", + ), + ), + ( + "Jane Smith", + ["Bob"], + "Dylan", + ValidationResult( + score=ValidationScore.NO_MATCH, + ), + ), ], ) def test_validate_patient_name_return_true( - file_patient_name, first_name_from_pds, family_name_from_pds + file_patient_name, first_name_from_pds, family_name_from_pds, expected ): - actual = validate_patient_name( + actual = validate_patient_name_lenient( file_patient_name, first_name_from_pds, family_name_from_pds ) - assert actual is True - - -@pytest.fixture -def mock_get_ssm_parameter(mocker): - return mocker.patch.object(SSMService, "get_ssm_parameter") + assert actual == expected -@pytest.fixture -def mock_pds_call(mocker): - yield mocker.patch("services.mock_pds_service.MockPdsApiService.pds_request") +@pytest.mark.parametrize( + ["file_patient_name", "expected_score", "expected_historical_match"], + [ + ("Jane Smith", ValidationScore.FULL_MATCH, True), + ("Smith Jane", ValidationScore.FULL_MATCH, True), + ("Jane Bob Smith Moody", ValidationScore.FULL_MATCH, False), + ("Jane Smith Moody", ValidationScore.FULL_MATCH, True), + ("Jane B Smith Moody", ValidationScore.FULL_MATCH, True), + ("Jane Smith-Moody", ValidationScore.FULL_MATCH, True), + ("Jane Bob Smith Moody", ValidationScore.FULL_MATCH, False), + ("Jane Bob Smith", ValidationScore.FULL_MATCH, False), + ("Bob Smith", ValidationScore.MIXED_FULL_MATCH, True), + ("Bob Jane", ValidationScore.FULL_MATCH, False), + ("Alastor Moody", ValidationScore.NO_MATCH, False), + ("Jones Bob", ValidationScore.MIXED_FULL_MATCH, True), + ("Jones Jane", ValidationScore.MIXED_FULL_MATCH, True), + ], +) +def test_calculate_validation_score( + file_patient_name, expected_score, expected_historical_match +): + name_1 = build_test_name(start="1990-01-01", end=None, given=["Jane"]) + name_2 = build_test_name(start="1995-01-01", end=None, given=["Jane"], family="Bob") + name_3 = build_test_name(use="temp", start=None, end=None, given=["Jones"]) + test_patient = build_test_patient_with_names([name_1, name_2, name_3]) -@pytest.fixture -def mock_fetch_available_document_references_by_type(mocker): - yield mocker.patch.object( - DocumentService, "fetch_available_document_references_by_type" + actual_result, historical, _ = calculate_validation_score( + file_patient_name, test_patient ) + assert historical == expected_historical_match + assert actual_result == expected_score diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index d44ff154c..8a6412552 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -4,9 +4,9 @@ import pydantic import requests -from botocore.exceptions import ClientError from enums.pds_ssm_parameters import SSMParameter from enums.supported_document_types import SupportedDocumentTypes +from enums.validation_score import ValidationResult, ValidationScore from models.nhs_document_reference import NHSDocumentReference from models.pds_models import Patient from requests import HTTPError @@ -20,6 +20,7 @@ ) from utils.unicode_utils import ( REGEX_PATIENT_NAME_PATTERN, + name_contains_in, name_ends_with, name_starts_with, ) @@ -95,7 +96,7 @@ def validate_lg_files( files_name_list.append(doc.file_name) check_for_duplicate_files(files_name_list) - validate_filename_with_patient_details(files_name_list, pds_patient_details) + validate_filename_with_patient_details_strict(files_name_list, pds_patient_details) def validate_lg_file_names(file_name_list: list[str], nhs_number: str): @@ -142,26 +143,30 @@ def check_for_file_names_agrees_with_each_other(file_name_list: list[str]): raise LGInvalidFilesException("File names does not match with each other") -def validate_filename_with_patient_details( +def validate_filename_with_patient_details_strict( file_name_list: list[str], patient_details: Patient ): try: file_name_info = extract_info_from_filename(file_name_list[0]) file_patient_name = file_name_info["patient_name"] file_date_of_birth = file_name_info["date_of_birth"] - validate_patient_date_of_birth(file_date_of_birth, patient_details) + is_dob_valid = validate_patient_date_of_birth( + file_date_of_birth, patient_details + ) + if not is_dob_valid: + raise LGInvalidFilesException("Patient DoB does not match our records") is_name_validation_based_on_historic_name = ( validate_patient_name_using_full_name_history( file_patient_name, patient_details ) ) return is_name_validation_based_on_historic_name - except (ClientError, ValueError) as e: + except (ValueError, KeyError) as e: logger.error(e) raise LGInvalidFilesException(e) -def validate_patient_name( +def validate_patient_name_strict( file_patient_name: str, first_name_in_pds: str, family_name_in_pds: str ): logger.info("Verifying patient name against the record in PDS...") @@ -181,7 +186,7 @@ def validate_patient_name_using_full_name_history( pds_patient_details.get_current_family_name_and_given_name() ) - if validate_patient_name( + if validate_patient_name_strict( file_patient_name, usual_first_name_in_pds[0], usual_family_name_in_pds ): return False @@ -192,7 +197,7 @@ def validate_patient_name_using_full_name_history( for name in pds_patient_details.name: historic_first_name_in_pds: str = name.given[0] historic_family_name_in_pds = name.family - if validate_patient_name( + if validate_patient_name_strict( file_patient_name, historic_first_name_in_pds, historic_family_name_in_pds ): return True @@ -200,13 +205,117 @@ def validate_patient_name_using_full_name_history( raise LGInvalidFilesException("Patient name does not match our records") +def validate_filename_with_patient_details_lenient( + file_name_list: list[str], patient_details: Patient +) -> (str, bool): + try: + file_name_info = extract_info_from_filename(file_name_list[0]) + file_patient_name = file_name_info["patient_name"] + file_date_of_birth = file_name_info["date_of_birth"] + name_validation_score, historical_match, result_message = ( + calculate_validation_score(file_patient_name, patient_details) + ) + if name_validation_score == ValidationScore.NO_MATCH: + raise LGInvalidFilesException("Patient name does not match our records") + is_dob_valid = validate_patient_date_of_birth( + file_date_of_birth, patient_details + ) + if not is_dob_valid and name_validation_score == ValidationScore.PARTIAL_MATCH: + raise LGInvalidFilesException("Patient name does not match our records 1/3") + validation_messages = { + ValidationScore.PARTIAL_MATCH: { + True: f"Patient matched on partial match 2/3, {result_message}", + False: "", + }, + ValidationScore.MIXED_FULL_MATCH: { + True: f"Patient matched on mixed match 3/3, {result_message}", + False: f"Patient matched on mixed match 2/3, {result_message}", + }, + ValidationScore.FULL_MATCH: { + True: f"Patient matched on full match 3/3, {result_message}", + False: f"Patient matched on full match 2/3, {result_message}", + }, + } + acceptance_message = validation_messages[name_validation_score][is_dob_valid] + return acceptance_message, historical_match + + except (ValueError, KeyError) as e: + logger.error(e) + raise LGInvalidFilesException(e) + + +def calculate_validation_score( + file_patient_name: str, patient_details: Patient +) -> (ValidationScore, bool, str): + matched_on_given_name = set() + matched_on_family_name = set() + historical_match = False + ordered_names = patient_details.get_names_by_start_date() + for index, name in enumerate(ordered_names): + first_name_in_pds = name.given + family_name_in_pds = name.family + result = validate_patient_name_lenient( + file_patient_name, first_name_in_pds, family_name_in_pds + ) + if result.score == ValidationScore.FULL_MATCH: + result_message = f"matched on {len(matched_on_family_name)} family_name and {len(matched_on_given_name)} given name" + historical_match = index != 0 + return result.score, historical_match, result_message + elif result.score == ValidationScore.PARTIAL_MATCH: + historical_match = index != 0 + matched_on_given_name.update(result.given_name_match) + ( + matched_on_family_name.add(result.family_name_match) + if result.family_name_match + else None + ) + logger.info( + "Failed to find full match on patient name, trying to validate using name history" + ) + result_message = f"matched on {len(matched_on_family_name)} family_name and {len(matched_on_given_name)} given name" + if len(matched_on_given_name) + len(matched_on_family_name) > 1: + return ValidationScore.MIXED_FULL_MATCH, historical_match, result_message + elif matched_on_given_name or matched_on_family_name: + return ValidationScore.PARTIAL_MATCH, historical_match, result_message + return ValidationScore.NO_MATCH, False, result_message + + +def validate_patient_name_lenient( + file_patient_name: str, first_name_in_pds: list[str], family_name_in_pds: str +) -> ValidationResult: + logger.info("Verifying patient name against the record in PDS...") + given_name_matches = [ + first_name + for first_name in first_name_in_pds + if name_contains_in(file_patient_name, first_name) + ] + family_name_matches = name_contains_in(file_patient_name, family_name_in_pds) + + if given_name_matches and family_name_matches: + return ValidationResult( + score=ValidationScore.FULL_MATCH, + given_name_match=given_name_matches, + family_name_match=family_name_in_pds, + ) + elif given_name_matches: + return ValidationResult( + score=ValidationScore.PARTIAL_MATCH, + given_name_match=given_name_matches, + ) + elif family_name_matches: + return ValidationResult( + score=ValidationScore.PARTIAL_MATCH, family_name_match=family_name_in_pds + ) + return ValidationResult( + score=ValidationScore.NO_MATCH, + ) + + def validate_patient_date_of_birth(file_date_of_birth, pds_patient_details): date_of_birth = datetime.datetime.strptime(file_date_of_birth, "%d-%m-%Y").date() - if ( - not pds_patient_details.birth_date - or pds_patient_details.birth_date != date_of_birth - ): - raise LGInvalidFilesException("Patient DoB does not match our records") + if pds_patient_details.birth_date: + return pds_patient_details.birth_date == date_of_birth + return False def getting_patient_info_from_pds(nhs_number: str) -> Patient: diff --git a/lambdas/utils/unicode_utils.py b/lambdas/utils/unicode_utils.py index eadc5a660..0634ad853 100644 --- a/lambdas/utils/unicode_utils.py +++ b/lambdas/utils/unicode_utils.py @@ -58,6 +58,13 @@ def name_ends_with(full_name: str, partial_name: str) -> bool: return folded_full_name.endswith(folded_partial_name) +def name_contains_in(full_name: str, partial_name: str) -> bool: + folded_full_name = convert_to_nfd_form(full_name).casefold() + folded_partial_name = convert_to_nfd_form(partial_name).casefold() + + return folded_partial_name in folded_full_name + + def convert_to_nfc_form(input_str: str) -> str: """ Convert a string to the NFC normalization form