Skip to content

Commit

Permalink
Factor out functions for parsers for built-in collections (#101)
Browse files Browse the repository at this point in the history
 Creates public functions for the four built-in collection types handled by _get_parser: list, set, tuple, and dict.
jdidion authored Feb 26, 2024
1 parent cdfbc53 commit 0aa7346
Showing 2 changed files with 199 additions and 109 deletions.
269 changes: 160 additions & 109 deletions fgpyo/util/inspect.py
Original file line number Diff line number Diff line change
@@ -70,6 +70,162 @@ def split_at_given_level(
NoneType = type(None)


def list_parser(
cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
) -> partial:
"""
Returns a function that parses a stringified list into a `List` of the correct type.
Args:
cls: the type of the class object this is being parsed for (used to get default val for
parsers)
type_: the type of the attribute to be parsed
parsers: an optional mapping from type to the function to use for parsing that type (allows
for parsing of more complex types)
"""
subtypes = typing.get_args(type_)
assert len(subtypes) == 1, "Lists are allowed only one subtype per PEP specification!"
subtype_parser = _get_parser(
cls,
subtypes[0],
parsers,
)
return functools.partial(
lambda s: list(
[]
if s == ""
else [subtype_parser(item) for item in list(split_at_given_level(s, split_delim=","))]
)
)


def set_parser(
cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
) -> partial:
"""
Returns a function that parses a stringified set into a `Set` of the correct type.
Args:
cls: the type of the class object this is being parsed for (used to get default val for
parsers)
type_: the type of the attribute to be parsed
parsers: an optional mapping from type to the function to use for parsing that type (allows
for parsing of more complex types)
"""
subtypes = typing.get_args(type_)
assert len(subtypes) == 1, "Sets are allowed only one subtype per PEP specification!"
subtype_parser = _get_parser(
cls,
subtypes[0],
parsers,
)
return functools.partial(
lambda s: set(
set({})
if s == "{}"
else [
subtype_parser(item)
for item in set(split_at_given_level(s[1:-1], split_delim=","))
]
)
)


def tuple_parser(
cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
) -> partial:
"""
Returns a function that parses a stringified tuple into a `Tuple` of the correct type.
Args:
cls: the type of the class object this is being parsed for (used to get default val for
parsers)
type_: the type of the attribute to be parsed
parsers: an optional mapping from type to the function to use for parsing that type (allows
for parsing of more complex types)
"""
subtype_parsers = [
_get_parser(
cls,
subtype,
parsers,
)
for subtype in typing.get_args(type_)
]

def tuple_parse(tuple_string: str) -> Tuple[Any, ...]:
"""
Parses a dictionary value (can do so recursively)
Note that this tool will fail on tuples containing strings containing
unpaired '{', or '}' characters
"""
assert tuple_string[0] == "(", "Tuple val improperly formatted"
assert tuple_string[-1] == ")", "Tuple val improperly formatted"
tuple_string = tuple_string[1:-1]
if len(tuple_string) == 0:
return ()
else:
val_strings = split_at_given_level(tuple_string, split_delim=",")
return tuple(parser(val_str) for parser, val_str in zip(subtype_parsers, val_strings))

return functools.partial(tuple_parse)


def dict_parser(
cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
) -> partial:
"""
Returns a function that parses a stringified dict into a `Dict` of the correct type.
Args:
cls: the type of the class object this is being parsed for (used to get default val for
parsers)
type_: the type of the attribute to be parsed
parsers: an optional mapping from type to the function to use for parsing that type (allows
for parsing of more complex types)
"""
subtypes = typing.get_args(type_)
assert len(subtypes) == 2, "Dict object must have exactly 2 subtypes per PEP specification!"
(key_parser, val_parser) = (
_get_parser(
cls,
subtypes[0],
parsers,
),
_get_parser(
cls,
subtypes[1],
parsers,
),
)

def dict_parse(dict_string: str) -> Dict[Any, Any]:
"""
Parses a dictionary value (can do so recursively)
"""
assert dict_string[0] == "{", "Dict val improperly formatted"
assert dict_string[-1] == "}", "Dict val improprly formatted"
dict_string = dict_string[1:-1]
if len(dict_string) == 0:
return {}
else:
outer_splits = split_at_given_level(dict_string, split_delim=",")
out_dict = {}
for outer_split in outer_splits:
inner_splits = split_at_given_level(outer_split, split_delim=";")
assert (
len(inner_splits) % 2 == 0
), "Inner splits of dict didn't have matched key val pairs"
for i in range(0, len(inner_splits), 2):
key = key_parser(inner_splits[i])
if key in out_dict:
raise ValueError("Duplicate key found in dict: {}".format(key))
out_dict[key] = val_parser(inner_splits[i + 1])
return out_dict

return functools.partial(dict_parse)


def _get_parser(
cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
) -> partial:
@@ -110,118 +266,13 @@ def get_parser() -> partial:
elif type_ == dict:
raise ValueError("Unable to parse dict (try typing.Mapping[type])")
elif typing.get_origin(type_) == list:
subtypes = typing.get_args(type_)

assert (
len(subtypes) == 1
), "Lists are allowed only one subtype per PEP specification!"
subtype_parser = _get_parser(
cls,
subtypes[0],
parsers,
)
return functools.partial(
lambda s: list(
[]
if s == ""
else [
subtype_parser(item)
for item in list(split_at_given_level(s, split_delim=","))
]
)
)
return list_parser(cls, type_, parsers)
elif typing.get_origin(type_) == set:
subtypes = typing.get_args(type_)
assert (
len(subtypes) == 1
), "Sets are allowed only one subtype per PEP specification!"
subtype_parser = _get_parser(
cls,
subtypes[0],
parsers,
)
return functools.partial(
lambda s: set(
set({})
if s == "{}"
else [
subtype_parser(item)
for item in set(split_at_given_level(s[1:-1], split_delim=","))
]
)
)
return set_parser(cls, type_, parsers)
elif typing.get_origin(type_) == tuple:
subtype_parsers = [
_get_parser(
cls,
subtype,
parsers,
)
for subtype in typing.get_args(type_)
]

def tuple_parse(tuple_string: str) -> Tuple[Any, ...]:
"""
Parses a dictionary value (can do so recursively)
Note that this tool will fail on tuples containing strings containing
unpaired '{', or '}' characters
"""
assert tuple_string[0] == "(", "Tuple val improperly formatted"
assert tuple_string[-1] == ")", "Tuple val improperly formatted"
tuple_string = tuple_string[1:-1]
if len(tuple_string) == 0:
return ()
else:
val_strings = split_at_given_level(tuple_string, split_delim=",")
return tuple(
parser(val_str)
for parser, val_str in zip(subtype_parsers, val_strings)
)

return functools.partial(tuple_parse)

return tuple_parser(cls, type_, parsers)
elif typing.get_origin(type_) == dict:
subtypes = typing.get_args(type_)
assert (
len(subtypes) == 2
), "Dict object must have exactly 2 subtypes per PEP specification!"
(key_parser, val_parser) = (
_get_parser(
cls,
subtypes[0],
parsers,
),
_get_parser(
cls,
subtypes[1],
parsers,
),
)

def dict_parse(dict_string: str) -> Dict[Any, Any]:
"""
Parses a dictionary value (can do so recursively)
"""
assert dict_string[0] == "{", "Dict val improperly formatted"
assert dict_string[-1] == "}", "Dict val improprly formatted"
dict_string = dict_string[1:-1]
if len(dict_string) == 0:
return {}
else:
outer_splits = split_at_given_level(dict_string, split_delim=",")
out_dict = {}
for outer_split in outer_splits:
inner_splits = split_at_given_level(outer_split, split_delim=";")
assert (
len(inner_splits) % 2 == 0
), "Inner splits of dict didn't have matched key val pairs"
for i in range(0, len(inner_splits), 2):
out_dict[key_parser(inner_splits[i])] = val_parser(
inner_splits[i + 1]
)
return out_dict

return functools.partial(dict_parse)
return dict_parser(cls, type_, parsers)
elif isinstance(type_, type) and issubclass(type_, Enum):
return types.make_enum_parser(type_)
elif types.is_constructible_from_str(type_):
39 changes: 39 additions & 0 deletions fgpyo/util/tests/test_inspect.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
from typing import Dict
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple

import attr
import pytest

from fgpyo.util.inspect import attr_from
from fgpyo.util.inspect import attribute_has_default
from fgpyo.util.inspect import attribute_is_optional
from fgpyo.util.inspect import dict_parser
from fgpyo.util.inspect import list_parser
from fgpyo.util.inspect import set_parser
from fgpyo.util.inspect import tuple_parser


@attr.s(auto_attribs=True, frozen=True)
@@ -66,3 +74,34 @@ def test_attr_from_custom_type_without_parser_fails() -> None:
kwargs={"foo": ""},
parsers={},
)


def test_list_parser() -> None:
parser = list_parser(Foo, List[int], {})
assert parser("") == []
assert parser("1,2,3") == [1, 2, 3]


def test_set_parser() -> None:
parser = set_parser(Foo, Set[int], {})
assert parser("{}") == set()
assert parser("{1,2,3}") == {1, 2, 3}
assert parser("{1,1,2,3}") == {1, 2, 3}


def test_tuple_parser() -> None:
parser = tuple_parser(Foo, Tuple[int, str], {})
assert parser("()") == ()
assert parser("(1,a)") == (1, "a")


def test_dict_parser() -> None:
parser = dict_parser(Foo, Dict[int, str], {})
assert parser("{}") == {}
assert parser("{123;a}") == {123: "a"}


def test_dict_parser_with_duplicate_keys() -> None:
parser = dict_parser(Foo, Dict[int, str], {})
with pytest.raises(ValueError):
parser("{123;a,123;b}")

0 comments on commit 0aa7346

Please sign in to comment.