From c71bfd7811fbc6dad1aa7f676d30b36516a90546 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Thu, 16 Jan 2025 13:34:51 +0100 Subject: [PATCH] Fix ExFAT parsing with clusters larger than one sector (#31) --- dissect/fat/c_exfat.py | 2 +- dissect/fat/exfat.py | 30 ++++++++++++++++++------------ tests/conftest.py | 7 +++++++ tests/data/exfat4m.bin | 3 +++ tests/test_exfat.py | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 tests/data/exfat4m.bin diff --git a/dissect/fat/c_exfat.py b/dissect/fat/c_exfat.py index 1cda56e..7a472ba 100644 --- a/dissect/fat/c_exfat.py +++ b/dissect/fat/c_exfat.py @@ -79,7 +79,7 @@ uint64 valid_data_length; // 0x08 allocated size of data in bytes only counts for files and is used for // pre-allocation zero if directory uint8 reserved_2[4]; // 0x10 no clue always zero - uint32 location; // 0x15 starting cluster of data + uint32 location; // 0x14 starting cluster of data uint64 data_length; // 0x18 actual size of data if directory always multiples of sector size }; diff --git a/dissect/fat/exfat.py b/dissect/fat/exfat.py index 34228db..e4aae57 100644 --- a/dissect/fat/exfat.py +++ b/dissect/fat/exfat.py @@ -86,34 +86,36 @@ def sector_to_cluster(self, sector): def runlist(self, starting_cluster, not_fragmented=True, size=None): """ - Creates a RunlistStream compatible runlist from exFAT FAT structures + Creates a RunlistStream compatible runlist from exFAT FAT structures, in sectors. Args: starting_cluster (int): First cluster of file, folder or location in question Returns: - runlist: [(sector_offset, run_length)]""" + runlist: [(sector_offset, run_length_in_sectors)]""" # If file is not fragmented clusters will not be present in the FAT cluster_chain = [starting_cluster] if not_fragmented else self.get_cluster_chain(starting_cluster) runlist = [] + sectors_per_cluster = 2**self.vbr.sectors_per_cluster_exp # TODO Graceful way to construct a runlist of non-fragmented streams spanning multiple sectors if size: - run = -(-size // self.cluster_size) + run = -(-size // self.cluster_size) * sectors_per_cluster runlist.append((self.cluster_to_sector(cluster_chain[0]), run)) else: # This is a somewhat convoluted, but short way to group successive # clusters together. # As the cluster numbers in the cluster_chain are strictly - # incrementing, a succesive range of clusters will have the same + # incrementing, a successive range of clusters will have the same # delta with respect to their position in the cluster_chain, which # is different from any other successive range, which is what is # used in the groupby(). + # Example: [(3, 1), (4, 1), (6, 1)] becomes [(3, 2), (6, 1)] for _, cluster_group in groupby(enumerate(cluster_chain), lambda i: i[0] - i[1]): run = list(map(itemgetter(1), cluster_group)) start_cluster = run[0] - run_len = len(run) + run_len = len(run) * sectors_per_cluster runlist.append((self.cluster_to_sector(start_cluster), run_len)) return runlist @@ -194,14 +196,14 @@ def _parse_root_dir(self, fh): # Root dir is always present in FAT so we pass False to traverse the FAT table # thus root dir is per definition fragmented runlist = self.exfat.runlist(self.location, False) - size = 0 + size = 0 # in bytes # Calculate size of rootdir from runlist since rootdir has no size attribute for run in runlist: - size += run[1] * self.exfat.cluster_size + size += run[1] * self.exfat.sector_size self.size = size - self.root_dir = RunlistStream(fh, runlist, self.size, self.exfat.cluster_size) + self.root_dir = RunlistStream(fh, runlist, self.size, self.exfat.sector_size) self.dict = self._create_root_dir(self.root_dir) def _parse_subdir(self, entry): @@ -216,10 +218,10 @@ def _parse_subdir(self, entry): """ folder_location = entry.stream.location - folder_size = entry.stream.data_length + folder_size = entry.stream.data_length # in bytes folder_runlist = self.exfat.runlist(folder_location, not_fragmented=entry.stream.flags.not_fragmented) - fh = RunlistStream(self.exfat.filesystem, folder_runlist, folder_size, self.exfat.cluster_size) + fh = RunlistStream(self.exfat.filesystem, folder_runlist, folder_size, self.exfat.sector_size) return self._parse_file_entries(fh) @staticmethod @@ -276,13 +278,17 @@ def _parse_file_entries(self, fh): file_ = c_exfat.FILE(metadata=metadata, stream=stream, fn_entries=fn_entries) if file_.metadata.attributes.directory: - # A directory will have its own file entry as it's first element - # and a ordered dict of file entry contained in it which can be accessed by their corresponding keys + # A directory will have its own file entry as its first element and an ordered dict + # of file entry contained in it which can be accessed by their corresponding keys filename = self._construct_filename(file_.fn_entries) entries[filename] = (file_, self._parse_subdir(file_)) else: filename = self._construct_filename(file_.fn_entries) entries[filename] = (file_, None) + elif entry.entry_type == 0x00: + # We could break early on entry_type==0x00 (end-of-directory) + # since all following entries are expected to be 0x00 as well. + pass else: self._non_file_entries(entry) diff --git a/tests/conftest.py b/tests/conftest.py index 15b4fbc..ede85af 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,6 +14,13 @@ def exfat_simple(): yield f +@pytest.fixture +def exfat_4m(): + name = "data/exfat4m.bin" + with open(absolute_path(name), "rb") as f: + yield f + + @pytest.fixture def fat12(): name = "data/fat12.bin" diff --git a/tests/data/exfat4m.bin b/tests/data/exfat4m.bin new file mode 100644 index 0000000..6afd556 --- /dev/null +++ b/tests/data/exfat4m.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21350fa8b43f67b1d726dec1cdbd24505bffc8462db00d20017d5dd195557629 +size 4194304 diff --git a/tests/test_exfat.py b/tests/test_exfat.py index 6d7d426..cc65c9d 100644 --- a/tests/test_exfat.py +++ b/tests/test_exfat.py @@ -39,3 +39,36 @@ def test_exfat(exfat_simple): assert sysvol.metadata.attributes.directory == 1 assert sysvol.stream.flags.not_fragmented == 1 assert sysvol.stream.data_length == 512 + + +def test_exfat_4m(exfat_4m): + e = exfat.ExFAT(exfat_4m) + + assert e.volume_label == "" + assert e.cluster_count == 512 + assert e.sector_size == 512 + assert e.cluster_size == 4096 + assert e.fat_sector == 2048 + assert e.root_dir_cluster == 5 + assert e.root_dir_sector == 4120 + assert e.runlist(e.root_dir_cluster) == [(e.root_dir_sector, 8)] + + files = e.files + assert sorted(files.keys()) == ["/"] + + root = files["/"][0] + assert root.metadata.attributes.directory == 1 + assert root.stream.flags.not_fragmented == 0 + assert root.stream.data_length == 4096 + + empty_file = files["/"][1]["file.txt"][0] + assert empty_file.metadata.attributes.directory == 0 + assert empty_file.stream.flags.not_fragmented == 0 + assert empty_file.stream.data_length == 0 + + subdir = files["/"][1]["subdir"][0] + assert subdir.metadata.attributes.directory == 1 + assert subdir.stream.flags.not_fragmented == 1 + assert subdir.stream.data_length == 4096 + + assert sorted(files["/"][1]["subdir"][1].keys()) == ["sub.txt"]