Skip to content

Commit

Permalink
Inline lxmlh
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Aug 9, 2024
1 parent a8202f3 commit 73d230d
Show file tree
Hide file tree
Showing 19 changed files with 735 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [DEV]

- Include `pycasreg` in source repo to avoid bumping version dominoes
- Include `lxmlh` in source repo to avoid bumping version dominoes

## [3.6.2] - 2024-06-21

Expand Down
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include pyecospold/schemas/*/*.xsd
include pyecospold/schemas/v1/*.xsd
include pyecospold/schemas/v2/*.xsd
23 changes: 23 additions & 0 deletions data/lxmlh/sample.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version='1.0' encoding='UTF-8'?>

<shiporder
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="schema.xsd"
orderid="889923">
<orderperson>John Smith</orderperson>
<shipto>
<name>Ola Nordmann</name>
<address>Langgt 23</address>
<city>4000 Stavanger</city>
<country>Norway</country>
</shipto>
<item>
<note>Item1</note>
<note>Item1.1</note>
</item>
<item>
<note>Item2</note>
</item>
<discount>1</discount>
<discount>2</discount>
</shiporder>
3 changes: 3 additions & 0 deletions data/lxmlh/sample.xml.invalid
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<?xml version='1.0' encoding='UTF-8'?>

<shiporder></shiporder>
23 changes: 23 additions & 0 deletions data/lxmlh/sample_defaults.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version='1.0' encoding='UTF-8'?>

<shiporder
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="schema.xsd"
orderid="889923" orderstatus="wip" ordertime="889923">
<orderperson>John Smith</orderperson>
<shipto>
<name>Ola Nordmann</name>
<address>Langgt 23</address>
<city>4000 Stavanger</city>
<country>Norway</country>
</shipto>
<item>
<note>Item1</note>
<note>Item1.1</note>
</item>
<item>
<note>Item2</note>
</item>
<discount>1</discount>
<discount>2</discount>
</shiporder>
33 changes: 33 additions & 0 deletions data/lxmlh/schema.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8" ?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">

<xs:element name="shiporder">
<xs:complexType>
<xs:sequence>
<xs:element name="orderperson" type="xs:string"/>
<xs:element name="shipto">
<xs:complexType>
<xs:sequence>
<xs:element name="name" type="xs:string"/>
<xs:element name="address" type="xs:string"/>
<xs:element name="city" type="xs:string"/>
<xs:element name="country" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="item" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="note" maxOccurs="unbounded" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="discount" maxOccurs="unbounded" type="xs:string"/>
</xs:sequence>
<xs:attribute name="orderid" type="xs:string" use="required"/>
<xs:attribute name="orderstatus" type="xs:string" use="optional"/>
<xs:attribute name="ordertime" type="xs:string" use="optional"/>
</xs:complexType>
</xs:element>

</xs:schema>
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

MOCK_MODULES = [
'lxml',
'lxmlh',
'numpy',
]

Expand Down
6 changes: 3 additions & 3 deletions pyecospold/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from typing import List, Tuple, Union

from lxml import etree
from lxmlh import (

from .config import Defaults
from .lxmlh import (
parse_directory,
parse_file,
parse_zip_file,
Expand All @@ -16,8 +18,6 @@
validate_file,
validate_zip_file,
)

from .config import Defaults
from .model_v1 import AdministrativeInformation as AdministrativeInformationV1
from .model_v1 import Allocation
from .model_v1 import DataEntryBy as DataEntryByV1
Expand Down
3 changes: 1 addition & 2 deletions pyecospold/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import Callable, Optional

from lxmlh import create_attribute, create_attribute_list, create_element_text

from .config import Defaults
from .lxmlh import create_attribute, create_attribute_list, create_element_text


def create_attribute_v1(
Expand Down
40 changes: 40 additions & 0 deletions pyecospold/lxmlh/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from .config import TIMESTAMP_FORMAT, TYPE_DEFAULTS, TYPE_FUNC_MAP
from .helpers import (
create_attribute,
create_attribute_list,
create_element_text,
fill_in_defaults,
get_element,
get_element_list,
get_inner_text_list,
)
from .parsers import (
parse_directory,
parse_file,
parse_zip_file,
save_file,
validate_directory,
validate_file,
validate_zip_file,
)

__all__ = (
"__version__",
"fill_in_defaults",
"create_attribute",
"create_element_text",
"create_attribute_list",
"get_element",
"get_element_list",
"get_inner_text_list",
"parse_directory",
"parse_file",
"parse_zip_file",
"save_file",
"TIMESTAMP_FORMAT",
"TYPE_DEFAULTS",
"TYPE_FUNC_MAP",
"validate_directory",
"validate_file",
"validate_zip_file",
)
18 changes: 18 additions & 0 deletions pyecospold/lxmlh/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from datetime import datetime
from typing import Any, Callable, Dict

import numpy as np

TIMESTAMP_FORMAT: str = "%Y-%m-%dT%H:%M:%S"

TYPE_FUNC_MAP: Dict[type, Callable[[str], Any]] = {
bool: lambda string: string.lower() == "true",
datetime: lambda string: datetime.strptime(string, TIMESTAMP_FORMAT),
}

TYPE_DEFAULTS: Dict[type, Any] = {
int: np.nan_to_num(np.nan),
float: np.nan,
bool: "false",
str: "",
}
159 changes: 159 additions & 0 deletions pyecospold/lxmlh/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import re
from typing import Any, Callable, Dict, List, Optional

from lxml import etree

from .config import TYPE_DEFAULTS, TYPE_FUNC_MAP


def set_attribute(
element: etree.ElementBase,
key: str,
value: str,
schema_file: str,
validator: Optional[Callable],
) -> None:
"""Helper method for setting XML attributes. Raises DocumentInvalid
exception on inappropriate setting according to XSD schema."""
if validator is not None:
value = validator(value)
element.set(key, str(value))
schema = etree.XMLSchema(file=schema_file)
schema.assertValid(element.getroottree())


def set_attribute_list(
element: etree.ElementBase, key: str, values: List[Any], schema_file: str
) -> None:
"""Helper method for setting XML list attributes. Raises DocumentInvalid
exception on inappropriate setting according to XSD schema."""
for oldValue in get_element_list(element, key):
element.remove(oldValue)
elements = []
nameSpace = element.nsmap.get(None, "")
for value in values:
elements.append(etree.SubElement(element, f"{{{nameSpace}}}{key}"))
elements[-1].text = str(value)
element.extend(elements)
schema = etree.XMLSchema(file=schema_file)
schema.assertValid(element.getroottree())


def set_element_text(
parent: etree.ElementBase, element: str, value: str, schema_file: str
) -> None:
"""Helper method for setting XML element text. Raises DocumentInvalid exception
on inappropriate setting according to XSD schema."""
get_element(parent, element).text = str(value)
schema = etree.XMLSchema(file=schema_file)
schema.assertValid(parent.getroottree())


def get_element(parent: etree.ElementBase, element: str) -> etree.ElementBase:
"""Helper wrapper method for retrieving XML elements as custom XML classes."""
return parent.find(element, namespaces=parent.nsmap)


def get_element_list(
parent: etree.ElementBase, element: str
) -> List[etree.ElementBase]:
"""Helper wrapper method for retrieving XML list elements as a list
of custom XML classes."""
return parent.findall(element, namespaces=parent.nsmap)


def get_element_text(
parent: etree.ElementBase, element: str, element_type: type = str
) -> str:
"""Helper wrapper method for retrieving XML element text as a string.
Returns TYPE_DEFAULTS[str] if no text exists or element is None."""
return TYPE_FUNC_MAP.get(element_type, element_type)(
getattr(
get_element(parent, element),
"text",
str(TYPE_DEFAULTS[str]),
)
)


def get_inner_text_list(parent: etree.ElementBase, element: str):
"""Helper wrapper method for retrieving the list of last nodes in a chain
of XML elements."""
innerElements = get_element_list(parent, element)
return [
re.sub("[ ]{2,}", "", str(innerElement.text)).replace("\n", " ")
for innerElement in innerElements
]


def get_attribute(
parent: etree.ElementBase, attribute: str, attr_type: type = str
) -> Any:
"""Helper wrapper method for retrieving XML attributes. Returns
TYPE_DEFAULTS[type] if attribute doesn't exist."""
return TYPE_FUNC_MAP.get(attr_type, attr_type)(
parent.get(attribute, TYPE_DEFAULTS.get(attr_type, None))
)


def get_attribute_list(
parent: etree.ElementBase, attribute: str, attr_type: type = str
) -> List[Any]:
"""Helper wrapper method for retrieving XML list attributes.
Returns empty list if attributes don't exist."""
return list(
map(
lambda x: TYPE_FUNC_MAP.get(attr_type, attr_type)(
re.sub("[\n]{1,}", " ", re.sub("[ ]{2,}", "", x.text))
),
get_element_list(parent, attribute),
)
)


def create_attribute(
name: str,
attr_type: type,
schema_file: str,
validator: Optional[Callable] = None,
) -> property:
"""Helper wrapper method for creating setters and getters for an attribute"""
return property(
fget=lambda self: get_attribute(self, name, attr_type),
fset=lambda self, value: set_attribute(
self, name, value, schema_file, validator
),
)


def create_element_text(name: str, element_type: type, schema_file: str) -> property:
"""Helper wrapper method for creating setters and getters for an element text."""
return property(
fget=lambda self: get_element_text(self, name, element_type),
fset=lambda self, value: set_element_text(self, name, value, schema_file),
)


def create_attribute_list(name: str, attr_type: type, schema_file: str) -> property:
"""Helper wrapper method for creating setters and getters for an attribute list."""
return property(
fget=lambda self: get_attribute_list(self, name, attr_type),
fset=lambda self, values: set_attribute_list(self, name, values, schema_file),
)


def fill_in_defaults(
node: etree.ElementBase,
static_defaults: Dict[str, Dict[str, str]],
dynamic_defaults: Dict[str, Dict[str, Callable[[etree.ElementBase], str]]],
) -> None:
"""Helper method for filling in defaults in all tree given any node."""
root = node.getroottree()
for child in root.iter():
for defaults in [static_defaults, dynamic_defaults]:
for key, value in defaults.get(child.__class__.__name__, {}).items():
if getattr(child, key, TYPE_DEFAULTS[str]) in TYPE_DEFAULTS.values():
if isinstance(value, str):
setattr(child, key, value)
else:
setattr(child, key, value(child))
Loading

0 comments on commit 73d230d

Please sign in to comment.