Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve support for searching indexes #36

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions dissect/esedb/btree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from dissect.esedb.exceptions import KeyNotFoundError, NoNeighbourPageError
from dissect.esedb.page import Node, Page

if TYPE_CHECKING:
from dissect.esedb.esedb import EseDB

Check warning on line 9 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L9

Added line #L9 was not covered by tests


class BTree:
"""A simple implementation for searching the ESE B+Trees.

This is a stateful interactive class that moves an internal cursor to a position within the BTree.

Args:
esedb: An instance of :class:`~dissect.esedb.esedb.EseDB`.
page: The page to open a BTree on.
"""

def __init__(self, esedb: EseDB, root: int | Page):
self.esedb = esedb

if isinstance(root, int):
page_num = root
root = esedb.page(page_num)

Check warning on line 27 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L26-L27

Added lines #L26 - L27 were not covered by tests
else:
page_num = root.num

self.root = root

self._page = root
self._page_num = page_num
self._node_num = 0

def reset(self) -> None:
"""Reset the internal state to the root of the BTree."""
self._page = self.root
self._page_num = self._page.num
self._node_num = 0

def node(self) -> Node:
"""Return the node the BTree is currently on."""
return self._page.node(self._node_num)

def next(self) -> Node:
"""Move the BTree to the next node and return it.

Can move the BTree to the next page as a side effect.
"""
if self._node_num + 1 > self._page.node_count - 1:
self.next_page()
else:
self._node_num += 1

return self.node()

def next_page(self) -> None:
"""Move the BTree to the next page in the tree.

Raises:
NoNeighbourPageError: If the current page has no next page.
"""
if self._page.next_page:
self._page = self.esedb.page(self._page.next_page)
self._node_num = 0
else:
raise NoNeighbourPageError(f"{self._page} has no next page")

def prev(self) -> Node:
"""Move the BTree to the previous node and return it.

Can move the BTree to the previous page as a side effect.
"""
if self._node_num - 1 < 0:
self.prev_page()

Check warning on line 77 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L77

Added line #L77 was not covered by tests
else:
self._node_num -= 1

return self.node()

def prev_page(self) -> None:
"""Move the BTree to the previous page in the tree.

Raises:
NoNeighbourPageError: If the current page has no previous page.
"""
if self._page.previous_page:
self._page = self.esedb.page(self._page.previous_page)
self._node_num = self._page.node_count - 1

Check warning on line 91 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L89-L91

Added lines #L89 - L91 were not covered by tests
else:
raise NoNeighbourPageError(f"{self._page} has no previous page")

Check warning on line 93 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L93

Added line #L93 was not covered by tests

def search(self, key: bytes, exact: bool = True) -> Node:
"""Search the tree for the given key.

Moves the BTree to the matching node, or on the last node that is less than the requested key.

Args:
key: The key to search for.
exact: Whether to only return successfully on an exact match.

Raises:
KeyNotFoundError: If an ``exact`` match was requested but not found.
"""
page = self._page
while True:
node = find_node(page, key)

if page.is_branch:
page = self.esedb.page(node.child)
else:
self._page = page
self._page_num = page.num
self._node_num = node.num
break

if exact and key != node.key:
raise KeyNotFoundError(f"Can't find key: {key}")

Check warning on line 120 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L120

Added line #L120 was not covered by tests

return self.node()


def find_node(page: Page, key: bytes) -> Node:
"""Search a page for a node matching ``key``.

Args:
page: The page to search.
key: The key to search.
"""
first_node_idx = 0
last_node_idx = page.node_count - 1

node = None
while first_node_idx < last_node_idx:
node_idx = (first_node_idx + last_node_idx) // 2
node = page.node(node_idx)

# It turns out that the way BTree keys are compared matches 1:1 with how Python compares bytes
# First compare data, then length
if key < node.key:
last_node_idx = node_idx
elif key == node.key:
if page.is_branch:
# If there's an exact match on a key on a branch page, the actual leaf nodes are in the next branch
# Page keys for branch pages appear to be non-inclusive upper bounds
node_idx = min(node_idx + 1, page.node_count - 1)
node = page.node(node_idx)

return node
else:
first_node_idx = node_idx + 1

# We're at the last node
return page.node(first_node_idx)
32 changes: 32 additions & 0 deletions dissect/esedb/c_esedb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import datetime
import struct
import uuid
Expand Down Expand Up @@ -425,6 +427,34 @@
DotNetGuid = 0x00040000, // index over GUID column according to .Net GUID sort order
ImmutableStructure = 0x00080000, // Do not write to the input structures during a JetCreateIndexN call.
};

flag IDBFLAG : uint16 {
Unique = 0x0001, // Duplicate keys not allowed
AllowAllNulls = 0x0002, // Make entries for NULL keys (all segments are null)
AllowFirstNull = 0x0004, // First index column NULL allowed in index
AllowSomeNulls = 0x0008, // Make entries for keys with some null segments
NoNullSeg = 0x0010, // Don't allow a NULL key segment
Primary = 0x0020, // Index is the primary index
LocaleSet = 0x0040, // Index locale information (locale name) is set (JET_bitIndexUnicode was specified).
Multivalued = 0x0080, // Has a multivalued segment
TemplateIndex = 0x0100, // Index of a template table
DerivedIndex = 0x0200, // Index derived from template table
// Note that this flag is persisted, but
// never used in an in-memory IDB, because
// we use the template index IDB instead.
LocalizedText = 0x0400, // Has a unicode text column? (code page is 1200)
SortNullsHigh = 0x0800, // NULL sorts after data
// Jan 2012: MSU is being removed. fidbUnicodeFixupOn should no longer be referenced.
UnicodeFixupOn_Deprecated = 0x1000, // Track entries with undefined Unicode codepoints
CrossProduct = 0x2000, // all combinations of multi-valued columns are indexed
DisallowTruncation = 0x4000, // fail update rather than allow key truncation
NestedTable = 0x8000, // combinations of multi-valued columns of same itagSequence are indexed
};

flag IDXFLAG : uint16 {
ExtendedColumns = 0x0001, // IDXSEGs are comprised of JET_COLUMNIDs, not FIDs
DotNetGuid = 0x0002, // GUIDs sort according to .Net rules
};
""" # noqa E501

c_esedb = cstruct().load(esedb_def)
Expand All @@ -443,6 +473,8 @@
TAGFLD_HEADER = c_esedb.TAGFLD_HEADER
CODEPAGE = c_esedb.CODEPAGE
COMPRESSION_SCHEME = c_esedb.COMPRESSION_SCHEME
IDBFLAG = c_esedb.IDBFLAG
IDXFLAG = c_esedb.IDXFLAG

CODEPAGE_MAP = {
CODEPAGE.UNICODE: "utf-16-le",
Expand Down
5 changes: 3 additions & 2 deletions dissect/esedb/compression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import struct
from typing import Optional

from dissect.util.compression import lzxpress, sevenbit

Expand Down Expand Up @@ -29,7 +30,7 @@ def decompress(buf: bytes) -> bytes:
return buf


def decompress_size(buf: bytes) -> Optional[int]:
def decompress_size(buf: bytes) -> int | None:
"""Return the decompressed size of the given bytes according to the encoded compression scheme.

Args:
Expand Down
Loading
Loading