Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Impement python types for xml sleigh files
Browse files Browse the repository at this point in the history
twizmwazin committed Dec 14, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent a245227 commit 9c8a4ac
Showing 8 changed files with 864 additions and 31 deletions.
72 changes: 41 additions & 31 deletions pypcode/__init__.py
Original file line number Diff line number Diff line change
@@ -31,14 +31,17 @@
)

from .pypcode_native import Context as _Context # pylint:disable=no-name-in-module

from .cspec import CompilerSpec
from .pspec import ProcessorSpec
from .ldefs import LanguageDefinitions, Language

__all__ = [
"Address",
"AddrSpace",
"Arch",
"ArchLanguage",
"BadDataError",
"CompilerSpec",
"Context",
"DecoderError",
"Disassembly",
@@ -52,6 +55,7 @@
"OpFormatUnary",
"PcodeOp",
"PcodePrettyPrinter",
"ProcessorSpec",
"TranslateFlags",
"Translation",
"UnimplError",
@@ -84,62 +88,63 @@ class ArchLanguage:
)

archdir: str
ldef: ET.Element
ldef: Language

def __init__(self, archdir: str, ldef: ET.Element):
def __init__(self, archdir: str, ldef: Language):
self.archdir = archdir
self.ldef = ldef
self._pspec: Optional[ET.Element] = None
self._cspecs: Optional[Dict[Tuple[str, str], ET.Element]] = None
self._pspec: Optional[ProcessorSpec] = None
self._cspecs: Optional[Dict[Tuple[str, str], CompilerSpec]] = None

@property
def pspec_path(self) -> str:
return os.path.join(self.archdir, self.processorspec)
return os.path.join(self.archdir, self.ldef.processorspec)

@property
def slafile_path(self) -> str:
return os.path.join(self.archdir, self.slafile)
return os.path.join(self.archdir, self.ldef.slafile)

@property
def description(self) -> str:
elem = self.ldef.find("description")
if elem is not None:
return elem.text or ""
return ""
return self.ldef.description or ""

def __getattr__(self, key):
if key in self.ldef.attrib:
return self.ldef.attrib[key]
raise AttributeError(key)
return getattr(self.ldef, key)

@property
def pspec(self) -> Optional[ET.Element]:
def pspec(self) -> Optional[ProcessorSpec]:
if self._pspec is None:
self._pspec = ET.parse(self.pspec_path).getroot()
try:
root = ET.parse(self.pspec_path).getroot()
self._pspec = ProcessorSpec.from_element(root)
except Exception:
return None
return self._pspec

@property
def cspecs(self) -> Mapping[Tuple[str, str], ET.Element]:
def cspecs(self) -> Mapping[Tuple[str, str], CompilerSpec]:
if self._cspecs is None:
self._cspecs = {}
for e in self.ldef.findall("compiler"):
path = os.path.join(self.archdir, e.attrib["spec"])
cspec = ET.parse(path).getroot()
self._cspecs[(e.attrib["id"], e.attrib["name"])] = cspec
for e in self.ldef.compilers:
path = os.path.join(self.archdir, e.spec)
root = ET.parse(path).getroot()
cspec = CompilerSpec.from_element(root)
self._cspecs[(e.id, e.name)] = cspec
return self._cspecs

def init_context_from_pspec(self, ctx: "Context") -> None:
if self.pspec is None:
return
cd = self.pspec.find("context_data")
if cd is None:

if self.pspec.context_data is None:
return
cs = cd.find("context_set")
if cs is None:

context_set = self.pspec.context_data.context_set
if context_set is None:
return
for e in cs:
assert e.tag == "set"
ctx.setVariableDefault(e.attrib["name"], int(e.attrib["val"]))

for name, value in context_set.values.items():
ctx.setVariableDefault(name, value)

@classmethod
def from_id(cls, langid: str) -> Optional["ArchLanguage"]:
@@ -169,7 +174,7 @@ class Arch:
archpath: str
archname: str
ldefpath: str
ldef: ET.ElementTree
ldef: LanguageDefinitions
languages: Sequence[ArchLanguage]

def __init__(self, name: str, ldefpath: str):
@@ -182,8 +187,13 @@ def __init__(self, name: str, ldefpath: str):
self.archpath = os.path.dirname(ldefpath)
self.archname = name
self.ldefpath = ldefpath
self.ldef = ET.parse(ldefpath)
self.languages = [ArchLanguage(self.archpath, e) for e in self.ldef.getroot()]

# Parse ldefs file into structured format
with open(ldefpath, "r") as f:
self.ldef = LanguageDefinitions.from_xml(f.read())

# Create ArchLanguage objects from structured data
self.languages = [ArchLanguage(self.archpath, lang) for lang in self.ldef.languages]

@classmethod
def enumerate(cls) -> Generator["Arch", None, None]:
137 changes: 137 additions & 0 deletions pypcode/cspec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Dict, List, Optional
from xml.etree import ElementTree as ET


@dataclass
class DataOrganization:
absolute_max_alignment: Optional[int] = None
machine_alignment: Optional[int] = None
default_alignment: Optional[int] = None
default_pointer_alignment: Optional[int] = None
wchar_size: Optional[int] = None
short_size: Optional[int] = None
integer_size: Optional[int] = None
long_size: Optional[int] = None
long_long_size: Optional[int] = None
float_size: Optional[int] = None
double_size: Optional[int] = None
long_double_size: Optional[int] = None
size_alignment_map: Dict[int, int] = field(default_factory=dict)
bitfield_packing_uses_ms: bool = False

@classmethod
def from_element(cls, element: ET.Element) -> DataOrganization:
if element is None:
return cls()

def get_int_attr(elem: ET.Element, attr: str) -> Optional[int]:
# Handle both attribute and element value cases
if attr in elem.attrib:
return int(elem.attrib[attr])
# Look for a child element with this name
child = elem.find(attr)
if child is not None and "value" in child.attrib:
return int(child.attrib["value"])
return None

alignment_map = {}
map_elem = element.find("size_alignment_map")
if map_elem is not None:
for entry in map_elem.findall("entry"):
size = int(entry.attrib["size"])
alignment = int(entry.attrib["alignment"])
alignment_map[size] = alignment

packing_elem = element.find("bitfield_packing")
uses_ms = False
if packing_elem is not None:
ms_conv = packing_elem.find("use_MS_convention")
uses_ms = (
ms_conv is not None
and ms_conv.attrib.get("value", "false").lower() == "true"
)

return cls(
absolute_max_alignment=get_int_attr(element, "absolute_max_alignment"),
machine_alignment=get_int_attr(element, "machine_alignment"),
default_alignment=get_int_attr(element, "default_alignment"),
default_pointer_alignment=get_int_attr(element, "default_pointer_alignment"),
wchar_size=get_int_attr(element, "wchar_size"),
short_size=get_int_attr(element, "short_size"),
integer_size=get_int_attr(element, "integer_size"),
long_size=get_int_attr(element, "long_size"),
long_long_size=get_int_attr(element, "long_long_size"),
float_size=get_int_attr(element, "float_size"),
double_size=get_int_attr(element, "double_size"),
long_double_size=get_int_attr(element, "long_double_size"),
size_alignment_map=alignment_map,
bitfield_packing_uses_ms=uses_ms,
)


@dataclass
class GlobalScope:
ram_present: bool = False
registers: List[str] = field(default_factory=list)

@classmethod
def from_element(cls, element: ET.Element) -> GlobalScope:
if element is None:
return cls()

ram_present = any(r.attrib.get("space", "") == "ram" for r in element.findall("range"))
registers = [r.attrib["name"] for r in element.findall("register")]

return cls(ram_present=ram_present, registers=registers)


@dataclass
class CompilerSpec:
data_organization: DataOrganization = field(default_factory=DataOrganization)
global_scope: GlobalScope = field(default_factory=GlobalScope)
stackpointer_register: Optional[str] = None
returnaddress_register: Optional[str] = None
returnaddress_space: Optional[str] = None
returnaddress_offset: Optional[int] = None
returnaddress_size: Optional[int] = None

@classmethod
def from_xml(cls, xml_string: str) -> CompilerSpec:
root = ET.fromstring(xml_string)
return cls.from_element(root)

@classmethod
def from_element(cls, element: ET.Element) -> CompilerSpec:
data_org_elem = element.find("data_organization")
data_org = DataOrganization.from_element(data_org_elem) if data_org_elem is not None else DataOrganization()
global_elem = element.find("global")
global_scope = GlobalScope.from_element(global_elem) if global_elem is not None else GlobalScope()

sp_elem = element.find("stackpointer")
stackpointer = sp_elem.attrib["register"] if sp_elem is not None else None

ret_elem = element.find("returnaddress")
retaddr_reg = retaddr_space = None
retaddr_offset = retaddr_size = None
if ret_elem is not None:
if "register" in ret_elem.attrib:
retaddr_reg = ret_elem.attrib["register"]
else:
var_elem = ret_elem.find("varnode")
if var_elem is not None:
retaddr_space = var_elem.attrib["space"]
retaddr_offset = int(var_elem.attrib["offset"])
retaddr_size = int(var_elem.attrib["size"])

return cls(
data_organization=data_org,
global_scope=global_scope,
stackpointer_register=stackpointer,
returnaddress_register=retaddr_reg,
returnaddress_space=retaddr_space,
returnaddress_offset=retaddr_offset,
returnaddress_size=retaddr_size,
)
78 changes: 78 additions & 0 deletions pypcode/ldefs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import List, Optional
from xml.etree import ElementTree as ET


@dataclass
class ExternalName:
tool: str
name: str

@classmethod
def from_element(cls, element: ET.Element) -> ExternalName:
return cls(tool=element.attrib["tool"], name=element.attrib["name"])


@dataclass
class Compiler:
name: str
spec: str
id: str

@classmethod
def from_element(cls, element: ET.Element) -> Compiler:
return cls(name=element.attrib["name"], spec=element.attrib["spec"], id=element.attrib["id"])


@dataclass
class Language:
processor: str
endian: str
size: int
variant: str
version: str
slafile: str
processorspec: str
id: str
description: Optional[str] = None
manualindexfile: Optional[str] = None
instructionEndian: Optional[str] = None
compilers: List[Compiler] = field(default_factory=list)
external_names: List[ExternalName] = field(default_factory=list)

@classmethod
def from_element(cls, element: ET.Element) -> Language:
desc_elem = element.find("description")
description = desc_elem.text if desc_elem is not None else ""

return cls(
processor=element.attrib["processor"],
endian=element.attrib["endian"],
size=int(element.attrib["size"]),
variant=element.attrib["variant"],
version=element.attrib["version"],
slafile=element.attrib["slafile"],
processorspec=element.attrib["processorspec"],
id=element.attrib["id"],
description=description,
manualindexfile=element.attrib.get("manualindexfile"),
instructionEndian=element.attrib.get("instructionEndian"),
compilers=[Compiler.from_element(e) for e in element.findall("compiler")],
external_names=[ExternalName.from_element(e) for e in element.findall("external_name")],
)


@dataclass
class LanguageDefinitions:
languages: List[Language] = field(default_factory=list)

@classmethod
def from_xml(cls, xml_string: str) -> LanguageDefinitions:
root = ET.fromstring(xml_string)
return cls.from_element(root)

@classmethod
def from_element(cls, element: ET.Element) -> LanguageDefinitions:
return cls(languages=[Language.from_element(e) for e in element.findall("language")])
Loading

0 comments on commit 9c8a4ac

Please sign in to comment.