From 3a2ac7f144b556c9dc4f0f3530c8aacf8550bd40 Mon Sep 17 00:00:00 2001 From: Joachim Metz Date: Mon, 8 Jan 2024 06:16:17 +0100 Subject: [PATCH] Worked on schema extraction script --- ACKNOWLEDGEMENTS | 2 +- MANIFEST.in | 7 ++- esedbrc/data/known_databases.yaml | 4 ++ esedbrc/resources.py | 16 ++++++ esedbrc/yaml_definitions_file.py | 93 +++++++++++++++++++++++++++++++ test_data/known_databases.yaml | 4 ++ tests/yaml_definitions_file.py | 75 +++++++++++++++++++++++++ 7 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 esedbrc/data/known_databases.yaml create mode 100644 esedbrc/yaml_definitions_file.py create mode 100644 test_data/known_databases.yaml create mode 100644 tests/yaml_definitions_file.py diff --git a/ACKNOWLEDGEMENTS b/ACKNOWLEDGEMENTS index 609e9da..fcf6c2f 100644 --- a/ACKNOWLEDGEMENTS +++ b/ACKNOWLEDGEMENTS @@ -1,4 +1,4 @@ Acknowledgements: esedb-kb -Copyright (c) 2014-2022, Joachim Metz +Copyright (c) 2014-2024, Joachim Metz diff --git a/MANIFEST.in b/MANIFEST.in index e1d10ca..9c8d9fe 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,11 +2,14 @@ include ACKNOWLEDGEMENTS AUTHORS LICENSE README include dependencies.ini run_tests.py utils/__init__.py utils/dependencies.py include utils/check_dependencies.py include requirements.txt test_requirements.txt +recursive-include config * +recursive-include test_data * exclude .gitignore exclude *.pyc recursive-exclude esedbrc *.pyc -recursive-include test_data * -# The test scripts are not required in a binary distribution package they +recursive-include scripts *.py +recursive-exclude scripts *.pyc +# The test scripts are not required in a binary distribution package they # are considered source distribution files and excluded in find_package() # in setup.py. recursive-include tests *.py diff --git a/esedbrc/data/known_databases.yaml b/esedbrc/data/known_databases.yaml new file mode 100644 index 0000000..04a2ac3 --- /dev/null +++ b/esedbrc/data/known_databases.yaml @@ -0,0 +1,4 @@ +# esedb-kb database definitions +--- +artifact_definition: WindowsCortanaDatabase +database_identifier: windows_CortanaCoreDb.dat diff --git a/esedbrc/resources.py b/esedbrc/resources.py index 917cf57..e0d295a 100644 --- a/esedbrc/resources.py +++ b/esedbrc/resources.py @@ -4,6 +4,22 @@ import difflib +class DatabaseDefinition(object): + """Database definition. + + Attributes: + artifact_definition (str): name of the corresponding Digital Forensics + Artifact definition. + database_identifier (str): identifier of the database type. + """ + + def __init__(self): + """Initializes a database definition.""" + super(DatabaseDefinition, self).__init__() + self.artifact_definition = None + self.database_identifier = None + + class EseColumnDefinition(object): """ESE database column definition. diff --git a/esedbrc/yaml_definitions_file.py b/esedbrc/yaml_definitions_file.py new file mode 100644 index 0000000..4c822a5 --- /dev/null +++ b/esedbrc/yaml_definitions_file.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +"""YAML-based database definitions file.""" + +import yaml + +from esedbrc import resources + + +class YAMLDatabaseDefinitionsFile(object): + """YAML-based database definitions file. + + A YAML-based database definitions file contains one or more database + definitions. A database definition consists of: + + artifact_definition: SafariCacheSQLiteDatabaseFile + database_identifier: safari:cache.db + + Where: + * artifact_definition, name of the corresponding Digital Forensics Artifact + definition. + * database_identifier, identifier of the database type. + """ + + _SUPPORTED_KEYS = frozenset([ + 'artifact_definition', + 'database_identifier']) + + def _ReadDatabaseDefinition(self, yaml_database_definition): + """Reads a database definition from a dictionary. + + Args: + yaml_database_definition (dict[str, object]): YAML database definition + values. + + Returns: + DatabaseDefinition: database definition. + + Raises: + RuntimeError: if the format of the formatter definition is not set + or incorrect. + """ + if not yaml_database_definition: + raise RuntimeError('Missing database definition values.') + + different_keys = set(yaml_database_definition) - self._SUPPORTED_KEYS + if different_keys: + different_keys = ', '.join(different_keys) + raise RuntimeError('Undefined keys: {0:s}'.format(different_keys)) + + artifact_definition = yaml_database_definition.get( + 'artifact_definition', None) + if not artifact_definition: + raise RuntimeError( + 'Invalid database definition missing format identifier.') + + database_identifier = yaml_database_definition.get( + 'database_identifier', None) + if not database_identifier: + raise RuntimeError( + 'Invalid database definition missing database identifier.') + + database_definition = resources.DatabaseDefinition() + database_definition.artifact_definition = artifact_definition + database_definition.database_identifier = database_identifier + + return database_definition + + def _ReadFromFileObject(self, file_object): + """Reads the event formatters from a file-like object. + + Args: + file_object (file): formatters file-like object. + + Yields: + DatabaseDefinition: database definition. + """ + yaml_generator = yaml.safe_load_all(file_object) + + for yaml_database_definition in yaml_generator: + yield self._ReadDatabaseDefinition(yaml_database_definition) + + def ReadFromFile(self, path): + """Reads the event formatters from a YAML file. + + Args: + path (str): path to a formatters file. + + Yields: + DatabaseDefinition: database definition. + """ + with open(path, 'r', encoding='utf-8') as file_object: + for yaml_database_definition in self._ReadFromFileObject(file_object): + yield yaml_database_definition diff --git a/test_data/known_databases.yaml b/test_data/known_databases.yaml new file mode 100644 index 0000000..04a2ac3 --- /dev/null +++ b/test_data/known_databases.yaml @@ -0,0 +1,4 @@ +# esedb-kb database definitions +--- +artifact_definition: WindowsCortanaDatabase +database_identifier: windows_CortanaCoreDb.dat diff --git a/tests/yaml_definitions_file.py b/tests/yaml_definitions_file.py new file mode 100644 index 0000000..0599d03 --- /dev/null +++ b/tests/yaml_definitions_file.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Tests for the YAML-based database definitions file.""" + +import unittest + +from esedbrc import yaml_definitions_file + +from tests import test_lib + + +class YAMLDatabaseDefinitionsFileTest(test_lib.BaseTestCase): + """Tests for the YAML-based database definitions file.""" + + # pylint: disable=protected-access + + _TEST_YAML = { + 'artifact_definition': 'MacOSNotesSQLiteDatabaseFile', + 'database_identifier': 'Notes.storedata'} + + def testReadDatabaseDefinition(self): + """Tests the _ReadDatabaseDefinition function.""" + test_definitions_file = yaml_definitions_file.YAMLDatabaseDefinitionsFile() + + definitions = test_definitions_file._ReadDatabaseDefinition(self._TEST_YAML) + + self.assertIsNotNone(definitions) + self.assertEqual( + definitions.artifact_definition, 'MacOSNotesSQLiteDatabaseFile') + self.assertEqual(definitions.database_identifier, 'Notes.storedata') + + with self.assertRaises(RuntimeError): + test_definitions_file._ReadDatabaseDefinition({}) + + with self.assertRaises(RuntimeError): + test_definitions_file._ReadDatabaseDefinition({ + 'artifact_definition': 'MacOSNotesSQLiteDatabaseFile'}) + + with self.assertRaises(RuntimeError): + test_definitions_file._ReadDatabaseDefinition({ + 'database_identifier': 'Notes.storedata'}) + + with self.assertRaises(RuntimeError): + test_definitions_file._ReadDatabaseDefinition({ + 'bogus': 'test'}) + + def testReadFromFileObject(self): + """Tests the _ReadFromFileObject function.""" + test_file_path = self._GetTestFilePath(['known_databases.yaml']) + self._SkipIfPathNotExists(test_file_path) + + test_definitions_file = yaml_definitions_file.YAMLDatabaseDefinitionsFile() + + with open(test_file_path, 'r', encoding='utf-8') as file_object: + definitions = list(test_definitions_file._ReadFromFileObject(file_object)) + + self.assertEqual(len(definitions), 1) + + def testReadFromFile(self): + """Tests the ReadFromFile function.""" + test_file_path = self._GetTestFilePath(['known_databases.yaml']) + self._SkipIfPathNotExists(test_file_path) + + test_definitions_file = yaml_definitions_file.YAMLDatabaseDefinitionsFile() + + definitions = list(test_definitions_file.ReadFromFile(test_file_path)) + + self.assertEqual(len(definitions), 1) + + self.assertEqual( + definitions[0].artifact_definition, 'WindowsCortanaDatabase') + + +if __name__ == '__main__': + unittest.main()