diff --git a/irrd/mirroring/nrtm4/nrtm4_client.py b/irrd/mirroring/nrtm4/nrtm4_client.py index fb23dede0..c77b0ab54 100644 --- a/irrd/mirroring/nrtm4/nrtm4_client.py +++ b/irrd/mirroring/nrtm4/nrtm4_client.py @@ -99,6 +99,7 @@ def _run_client(self) -> bool: version=unf.version, current_key=used_key, next_key=unf.next_signing_key, + previous_file_hashes=self._validate_aggregate_previous_file_hashes_from_unf(unf), ) if self.last_status != new_status: self.database_handler.record_nrtm4_client_status( @@ -194,7 +195,10 @@ def _deserialize_unf(self, unf_content: str) -> Tuple[bytes, str]: def _current_db_status(self) -> Tuple[bool, NRTM4ClientDatabaseStatus]: """Look up the current status of self.source in the database.""" - query = DatabaseStatusQuery().source(self.source) + query = DatabaseStatusQuery( + DatabaseStatusQuery.get_default_columns() + + [DatabaseStatusQuery.columns.nrtm4_client_previous_file_hashes] + ).source(self.source) result = self.database_handler.execute_query(query) try: status = next(result) @@ -203,9 +207,10 @@ def _current_db_status(self) -> Tuple[bool, NRTM4ClientDatabaseStatus]: version=status["nrtm4_client_version"], current_key=status["nrtm4_client_current_key"], next_key=status["nrtm4_client_next_key"], + previous_file_hashes=status["nrtm4_client_previous_file_hashes"], ) except StopIteration: - return False, NRTM4ClientDatabaseStatus(None, None, None, None) + return False, NRTM4ClientDatabaseStatus(None, None, None, None, None) def _find_next_version(self, unf: NRTM4UpdateNotificationFile, last_version: Optional[int] = None): """ @@ -249,6 +254,32 @@ def _find_next_version(self, unf: NRTM4UpdateNotificationFile, last_version: Opt return next_version + def _validate_aggregate_previous_file_hashes_from_unf( + self, unf: NRTM4UpdateNotificationFile + ) -> Dict[str, List[str]]: + """ + Check if the server hasn't been rewriting history, which is obviously not allowed. + Also produces the new value for "previous_file_hashes" + """ + current_files = {f"snapshot-{unf.snapshot.version}": [str(unf.snapshot.url), unf.snapshot.hash]} + for delta in unf.deltas: + current_files[f"delta-{delta.version}"] = [str(delta.url), delta.hash] + if not self.last_status.previous_file_hashes: + return current_files + + for current_file_reference, current_file_details in current_files.items(): + if current_file_reference in self.last_status.previous_file_hashes: + previous_file_details = self.last_status.previous_file_hashes[current_file_reference] + if current_file_details != previous_file_details: + raise NRTM4ClientError( + f"{self.source}: Reference {current_file_reference} has filename" + f" '{current_file_details[0]}' with hash '{current_file_details[1]}' in current" + f" Update Notification File, but had filename '{previous_file_details[0]}' with hash" + f" '{previous_file_details[1]}' in a previous Update Notification File. Server is" + " rewriting history." + ) + return current_files + def _load_snapshot(self, unf: NRTM4UpdateNotificationFile): """ Load a snapshot into the database. diff --git a/irrd/mirroring/nrtm4/tests/test_nrtm4_client.py b/irrd/mirroring/nrtm4/tests/test_nrtm4_client.py index 5561ab04a..81dbad03f 100644 --- a/irrd/mirroring/nrtm4/tests/test_nrtm4_client.py +++ b/irrd/mirroring/nrtm4/tests/test_nrtm4_client.py @@ -20,10 +20,20 @@ MOCK_SESSION_ID = "ca128382-78d9-41d1-8927-1ecef15275be" MOCK_SNAPSHOT_URL = "https://example.com/snapshot.2.json" +MOCK_SNAPSHOT_FILENAME = MOCK_SNAPSHOT_URL.split("/")[-1] MOCK_DELTA3_URL = "https://example.com/delta.3.json" +MOCK_DELTA3_FILENAME = MOCK_DELTA3_URL.split("/")[-1] MOCK_DELTA4_URL = "https://example.com/delta.4.json" +MOCK_DELTA4_FILENAME = MOCK_DELTA4_URL.split("/")[-1] MOCK_UNF_URL = "https://example.com/" + UPDATE_NOTIFICATION_FILENAME +VALID_PREVIOUS_FILE_HASHES = { + # URL is actually reused as the hash in our test data + "snapshot-3": [MOCK_SNAPSHOT_FILENAME, MOCK_SNAPSHOT_URL], + "delta-3": [MOCK_DELTA3_FILENAME, MOCK_DELTA3_URL], + "delta-4": [MOCK_DELTA4_FILENAME, MOCK_DELTA4_URL], +} + MOCK_UNF = { "nrtm_version": 4, "timestamp": "2022-01-01T15:00:00Z", @@ -34,18 +44,18 @@ "version": 4, "snapshot": { "version": 3, - "url": MOCK_SNAPSHOT_URL.split("/")[-1], + "url": MOCK_SNAPSHOT_FILENAME, "hash": MOCK_SNAPSHOT_URL, }, "deltas": [ { "version": 3, - "url": MOCK_DELTA3_URL.split("/")[-1], + "url": MOCK_DELTA3_FILENAME, "hash": MOCK_DELTA3_URL, }, { "version": 4, - "url": MOCK_DELTA4_URL.split("/")[-1], + "url": MOCK_DELTA4_FILENAME, "hash": MOCK_DELTA4_URL, }, ], @@ -136,6 +146,7 @@ def test_valid_from_snapshot(self, prepare_nrtm4_test, caplog): "nrtm4_client_version": None, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -156,6 +167,8 @@ def test_valid_from_delta(self, prepare_nrtm4_test, caplog): "nrtm4_client_version": 2, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + # Also tests the scenario for previous hashes, which do match + "nrtm4_client_previous_file_hashes": {"delta-3": [MOCK_DELTA3_FILENAME, MOCK_DELTA3_URL]}, } ] ) @@ -202,6 +215,7 @@ def test_invalid_empty_delta(self, prepare_nrtm4_test, tmp_path, monkeypatch): "nrtm4_client_version": 2, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -231,6 +245,7 @@ def test_invalid_delta_key_error(self, prepare_nrtm4_test, tmp_path, monkeypatch "nrtm4_client_version": 2, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -250,6 +265,7 @@ def test_invalid_unf_version_too_low(self, prepare_nrtm4_test): "nrtm4_client_version": 6, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -269,6 +285,7 @@ def test_session_id_mismatch(self, prepare_nrtm4_test, caplog): "nrtm4_client_version": 2, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -289,6 +306,7 @@ def test_delta_gap(self, prepare_nrtm4_test, caplog): "nrtm4_client_version": 1, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -309,6 +327,7 @@ def test_force_reload(self, prepare_nrtm4_test, caplog): "nrtm4_client_version": 2, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -339,6 +358,7 @@ def test_valid_up_to_date(self, prepare_nrtm4_test, caplog): "nrtm4_client_version": 4, "nrtm4_client_current_key": None, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -353,6 +373,7 @@ def test_valid_up_to_date(self, prepare_nrtm4_test, caplog): version=4, current_key=MOCK_UNF_PUBLIC_KEY, next_key=MOCK_UNF_PUBLIC_KEY_OTHER, + previous_file_hashes=VALID_PREVIOUS_FILE_HASHES, ), }, ), @@ -392,6 +413,47 @@ def test_invalid_signature_from_config(self, prepare_nrtm4_test, config_override NRTM4Client("TEST", mock_dh).run_client() assert "any known keys" in str(exc) + def test_invalid_hash_change_history_rewrite(self, prepare_nrtm4_test, config_override): + mock_dh = MockDatabaseHandler() + mock_dh.reset_mock() + mock_dh.query_responses[DatabaseStatusQuery] = iter( + [ + { + "force_reload": False, + "nrtm4_client_session_id": UUID(MOCK_SESSION_ID), + "nrtm4_client_version": 4, + "nrtm4_client_current_key": MOCK_UNF_PUBLIC_KEY, + "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": { + "delta-3": [MOCK_DELTA3_FILENAME, "incorrect-hash"] + }, + } + ] + ) + with pytest.raises(NRTM4ClientError) as exc: + NRTM4Client("TEST", mock_dh).run_client() + assert "rewriting history" in str(exc) + + def test_invalid_filename_change_history_rewrite(self, prepare_nrtm4_test, config_override): + mock_dh = MockDatabaseHandler() + mock_dh.reset_mock() + mock_dh.query_responses[DatabaseStatusQuery] = iter( + [ + { + "force_reload": False, + "nrtm4_client_session_id": UUID(MOCK_SESSION_ID), + "nrtm4_client_version": 4, + "nrtm4_client_current_key": MOCK_UNF_PUBLIC_KEY, + "nrtm4_client_next_key": None, + # URL is used as hash in the mock data + "nrtm4_client_previous_file_hashes": {"delta-3": ["changed filename", MOCK_DELTA3_URL]}, + } + ] + ) + with pytest.raises(NRTM4ClientError) as exc: + NRTM4Client("TEST", mock_dh).run_client() + assert "rewriting history" in str(exc) + def test_invalid_current_db_key_with_valid_config_key(self, prepare_nrtm4_test, config_override): config_override( { @@ -416,6 +478,7 @@ def test_invalid_current_db_key_with_valid_config_key(self, prepare_nrtm4_test, # Does not match, but must be used "nrtm4_client_current_key": MOCK_UNF_PUBLIC_KEY_OTHER, "nrtm4_client_next_key": MOCK_UNF_PUBLIC_KEY_OTHER, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -446,6 +509,7 @@ def test_uses_current_db_key(self, prepare_nrtm4_test, config_override): "nrtm4_client_version": 4, "nrtm4_client_current_key": MOCK_UNF_PUBLIC_KEY, "nrtm4_client_next_key": None, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -475,6 +539,7 @@ def test_key_rotation(self, prepare_nrtm4_test, config_override, caplog): # Does not match, but must be used "nrtm4_client_current_key": MOCK_UNF_PUBLIC_KEY_OTHER, "nrtm4_client_next_key": MOCK_UNF_PUBLIC_KEY, + "nrtm4_client_previous_file_hashes": None, } ] ) @@ -489,6 +554,7 @@ def test_key_rotation(self, prepare_nrtm4_test, config_override, caplog): version=4, current_key=MOCK_UNF_PUBLIC_KEY, next_key=MOCK_UNF_PUBLIC_KEY_OTHER, + previous_file_hashes=VALID_PREVIOUS_FILE_HASHES, ), }, ), @@ -496,7 +562,12 @@ def test_key_rotation(self, prepare_nrtm4_test, config_override, caplog): assert "key rotated" in caplog.text def _assert_import_queries(self, mock_dh, expect_reload=True): - assert mock_dh.queries == [DatabaseStatusQuery().source("TEST")] + assert mock_dh.queries == [ + DatabaseStatusQuery( + DatabaseStatusQuery.get_default_columns() + + [DatabaseStatusQuery.columns.nrtm4_client_previous_file_hashes] + ).source("TEST") + ] expected = ( ( [ @@ -550,6 +621,7 @@ def _assert_import_queries(self, mock_dh, expect_reload=True): version=4, current_key=MOCK_UNF_PUBLIC_KEY, next_key=MOCK_UNF_PUBLIC_KEY_OTHER, + previous_file_hashes=VALID_PREVIOUS_FILE_HASHES, ), }, ), diff --git a/irrd/storage/alembic/versions/e1e649b5f8bb_add_nrtm_client_previous_file_hashes_field_to_.py b/irrd/storage/alembic/versions/e1e649b5f8bb_add_nrtm_client_previous_file_hashes_field_to_.py new file mode 100644 index 000000000..6cd2d2c43 --- /dev/null +++ b/irrd/storage/alembic/versions/e1e649b5f8bb_add_nrtm_client_previous_file_hashes_field_to_.py @@ -0,0 +1,29 @@ +"""Add nrtm_client_previous_file_hashes field to database_status + +Revision ID: e1e649b5f8bb +Revises: a635d2217a48 +Create Date: 2024-11-08 17:39:40.872329 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "e1e649b5f8bb" +down_revision = "a635d2217a48" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "database_status", + sa.Column( + "nrtm4_client_previous_file_hashes", postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), + ) + + +def downgrade(): + op.drop_column("database_status", "nrtm4_client_previous_file_hashes") diff --git a/irrd/storage/database_handler.py b/irrd/storage/database_handler.py index cfef2050a..3ae8bb83d 100644 --- a/irrd/storage/database_handler.py +++ b/irrd/storage/database_handler.py @@ -1198,6 +1198,7 @@ def finalise_transaction(self): nrtm4_client_version=status.version, nrtm4_client_current_key=status.current_key, nrtm4_client_next_key=status.next_key, + nrtm4_client_previous_file_hashes=status.previous_file_hashes, rpsl_data_updated=datetime.now(timezone.utc), ) ) diff --git a/irrd/storage/models.py b/irrd/storage/models.py index 10e22e637..ec7fc1032 100644 --- a/irrd/storage/models.py +++ b/irrd/storage/models.py @@ -56,6 +56,7 @@ class NRTM4ClientDatabaseStatus: version: Optional[int] current_key: Optional[str] next_key: Optional[str] + previous_file_hashes: Optional[Dict[str, List[str]]] @dataclasses.dataclass @@ -285,6 +286,7 @@ class RPSLDatabaseStatus(Base): # type: ignore nrtm4_client_version = sa.Column(sa.Integer) nrtm4_client_current_key = sa.Column(sa.Text) nrtm4_client_next_key = sa.Column(sa.Text) + nrtm4_client_previous_file_hashes = sa.Column(pg.JSONB, nullable=True) nrtm4_server_session_id = sa.Column(pg.UUID(as_uuid=True)) nrtm4_server_version = sa.Column(sa.Integer) diff --git a/irrd/storage/queries.py b/irrd/storage/queries.py index 334c2758c..54a42f8f4 100644 --- a/irrd/storage/queries.py +++ b/irrd/storage/queries.py @@ -546,40 +546,42 @@ class DatabaseStatusQuery(BaseDatabaseQuery): table = RPSLDatabaseStatus.__table__ columns = RPSLDatabaseStatus.__table__.c - def __init__(self): + def __init__(self, columns=None): self._sources_list: List[str] = [] - self.statement = sa.select( - [ - self.columns.pk, - self.columns.source, - self.columns.serial_oldest_seen, - self.columns.serial_newest_seen, - self.columns.serial_oldest_journal, - self.columns.serial_newest_journal, - self.columns.serial_last_export, - self.columns.serial_newest_mirror, - self.columns.nrtm4_client_session_id, - self.columns.nrtm4_client_version, - self.columns.nrtm4_client_current_key, - self.columns.nrtm4_client_next_key, - self.columns.nrtm4_server_session_id, - self.columns.nrtm4_server_version, - self.columns.nrtm4_server_last_update_notification_file_update, - self.columns.nrtm4_server_last_snapshot_version, - self.columns.nrtm4_server_last_snapshot_global_serial, - self.columns.nrtm4_server_last_snapshot_filename, - self.columns.nrtm4_server_last_snapshot_timestamp, - self.columns.nrtm4_server_last_snapshot_hash, - self.columns.nrtm4_server_previous_deltas, - self.columns.force_reload, - self.columns.synchronised_serials, - self.columns.last_error, - self.columns.last_error_timestamp, - self.columns.rpsl_data_updated, - self.columns.created, - self.columns.updated, - ] - ) + self.statement = sa.select(columns if columns else self.get_default_columns()) + + @classmethod + def get_default_columns(cls): + return [ + cls.columns.pk, + cls.columns.source, + cls.columns.serial_oldest_seen, + cls.columns.serial_newest_seen, + cls.columns.serial_oldest_journal, + cls.columns.serial_newest_journal, + cls.columns.serial_last_export, + cls.columns.serial_newest_mirror, + cls.columns.nrtm4_client_session_id, + cls.columns.nrtm4_client_version, + cls.columns.nrtm4_client_current_key, + cls.columns.nrtm4_client_next_key, + cls.columns.nrtm4_server_session_id, + cls.columns.nrtm4_server_version, + cls.columns.nrtm4_server_last_update_notification_file_update, + cls.columns.nrtm4_server_last_snapshot_version, + cls.columns.nrtm4_server_last_snapshot_global_serial, + cls.columns.nrtm4_server_last_snapshot_filename, + cls.columns.nrtm4_server_last_snapshot_timestamp, + cls.columns.nrtm4_server_last_snapshot_hash, + cls.columns.nrtm4_server_previous_deltas, + cls.columns.force_reload, + cls.columns.synchronised_serials, + cls.columns.last_error, + cls.columns.last_error_timestamp, + cls.columns.rpsl_data_updated, + cls.columns.created, + cls.columns.updated, + ] def source(self, source: str): """Filter on a source.""" diff --git a/irrd/storage/tests/test_database.py b/irrd/storage/tests/test_database.py index 552abe81d..224498081 100644 --- a/irrd/storage/tests/test_database.py +++ b/irrd/storage/tests/test_database.py @@ -285,7 +285,11 @@ def test_object_writing_and_status_checking(self, monkeypatch, irrd_db_mock_prel self.dh.record_mirror_error("TEST2", "error") nrtm4_client_status = NRTM4ClientDatabaseStatus( - session_id=uuid.uuid4(), version=20, current_key="current key", next_key="next key" + session_id=uuid.uuid4(), + version=20, + current_key="current key", + next_key="next key", + previous_file_hashes={"file": "hash"}, ) self.dh.record_nrtm4_client_status("TEST2", nrtm4_client_status) nrtm4_server_status = NRTM4ServerDatabaseStatus(