Factor out functions for parsers for built-in collections (#101)

Creates public functions for the four built-in collection types handled by _get_parser: list, set, tuple, and dict.
fulcrumgenomics · Feb 26, 2024 · 0aa7346 · 0aa7346
1 parent cdfbc53
commit 0aa7346
Showing 2 changed files with 199 additions and 109 deletions.
diff --git a/fgpyo/util/inspect.py b/fgpyo/util/inspect.py
@@ -70,6 +70,162 @@ def split_at_given_level(
 NoneType = type(None)
 
 
+def list_parser(
+    cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
+) -> partial:
+    """
+    Returns a function that parses a stringified list into a `List` of the correct type.
+
+    Args:
+        cls: the type of the class object this is being parsed for (used to get default val for
+        parsers)
+        type_: the type of the attribute to be parsed
+        parsers: an optional mapping from type to the function to use for parsing that type (allows
+        for parsing of more complex types)
+    """
+    subtypes = typing.get_args(type_)
+    assert len(subtypes) == 1, "Lists are allowed only one subtype per PEP specification!"
+    subtype_parser = _get_parser(
+        cls,
+        subtypes[0],
+        parsers,
+    )
+    return functools.partial(
+        lambda s: list(
+            []
+            if s == ""
+            else [subtype_parser(item) for item in list(split_at_given_level(s, split_delim=","))]
+        )
+    )
+
+
+def set_parser(
+    cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
+) -> partial:
+    """
+    Returns a function that parses a stringified set into a `Set` of the correct type.
+
+    Args:
+        cls: the type of the class object this is being parsed for (used to get default val for
+        parsers)
+        type_: the type of the attribute to be parsed
+        parsers: an optional mapping from type to the function to use for parsing that type (allows
+        for parsing of more complex types)
+    """
+    subtypes = typing.get_args(type_)
+    assert len(subtypes) == 1, "Sets are allowed only one subtype per PEP specification!"
+    subtype_parser = _get_parser(
+        cls,
+        subtypes[0],
+        parsers,
+    )
+    return functools.partial(
+        lambda s: set(
+            set({})
+            if s == "{}"
+            else [
+                subtype_parser(item)
+                for item in set(split_at_given_level(s[1:-1], split_delim=","))
+            ]
+        )
+    )
+
+
+def tuple_parser(
+    cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
+) -> partial:
+    """
+    Returns a function that parses a stringified tuple into a `Tuple` of the correct type.
+
+    Args:
+        cls: the type of the class object this is being parsed for (used to get default val for
+        parsers)
+        type_: the type of the attribute to be parsed
+        parsers: an optional mapping from type to the function to use for parsing that type (allows
+        for parsing of more complex types)
+    """
+    subtype_parsers = [
+        _get_parser(
+            cls,
+            subtype,
+            parsers,
+        )
+        for subtype in typing.get_args(type_)
+    ]
+
+    def tuple_parse(tuple_string: str) -> Tuple[Any, ...]:
+        """
+        Parses a dictionary value (can do so recursively)
+        Note that this tool will fail on tuples containing strings containing
+        unpaired '{', or '}' characters
+        """
+        assert tuple_string[0] == "(", "Tuple val improperly formatted"
+        assert tuple_string[-1] == ")", "Tuple val improperly formatted"
+        tuple_string = tuple_string[1:-1]
+        if len(tuple_string) == 0:
+            return ()
+        else:
+            val_strings = split_at_given_level(tuple_string, split_delim=",")
+            return tuple(parser(val_str) for parser, val_str in zip(subtype_parsers, val_strings))
+
+    return functools.partial(tuple_parse)
+
+
+def dict_parser(
+    cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
+) -> partial:
+    """
+    Returns a function that parses a stringified dict into a `Dict` of the correct type.
+
+    Args:
+        cls: the type of the class object this is being parsed for (used to get default val for
+        parsers)
+        type_: the type of the attribute to be parsed
+        parsers: an optional mapping from type to the function to use for parsing that type (allows
+        for parsing of more complex types)
+    """
+    subtypes = typing.get_args(type_)
+    assert len(subtypes) == 2, "Dict object must have exactly 2 subtypes per PEP specification!"
+    (key_parser, val_parser) = (
+        _get_parser(
+            cls,
+            subtypes[0],
+            parsers,
+        ),
+        _get_parser(
+            cls,
+            subtypes[1],
+            parsers,
+        ),
+    )
+
+    def dict_parse(dict_string: str) -> Dict[Any, Any]:
+        """
+        Parses a dictionary value (can do so recursively)
+        """
+        assert dict_string[0] == "{", "Dict val improperly formatted"
+        assert dict_string[-1] == "}", "Dict val improprly formatted"
+        dict_string = dict_string[1:-1]
+        if len(dict_string) == 0:
+            return {}
+        else:
+            outer_splits = split_at_given_level(dict_string, split_delim=",")
+            out_dict = {}
+            for outer_split in outer_splits:
+                inner_splits = split_at_given_level(outer_split, split_delim=";")
+                assert (
+                    len(inner_splits) % 2 == 0
+                ), "Inner splits of dict didn't have matched key val pairs"
+                for i in range(0, len(inner_splits), 2):
+                    key = key_parser(inner_splits[i])
+                    if key in out_dict:
+                        raise ValueError("Duplicate key found in dict: {}".format(key))
+                    out_dict[key] = val_parser(inner_splits[i + 1])
+            return out_dict
+
+    return functools.partial(dict_parse)
+
+
 def _get_parser(
     cls: Type, type_: TypeAlias, parsers: Optional[Dict[type, Callable[[str], Any]]] = None
 ) -> partial:
@@ -110,118 +266,13 @@ def get_parser() -> partial:
             elif type_ == dict:
                 raise ValueError("Unable to parse dict (try typing.Mapping[type])")
             elif typing.get_origin(type_) == list:
-                subtypes = typing.get_args(type_)
-
-                assert (
-                    len(subtypes) == 1
-                ), "Lists are allowed only one subtype per PEP specification!"
-                subtype_parser = _get_parser(
-                    cls,
-                    subtypes[0],
-                    parsers,
-                )
-                return functools.partial(
-                    lambda s: list(
-                        []
-                        if s == ""
-                        else [
-                            subtype_parser(item)
-                            for item in list(split_at_given_level(s, split_delim=","))
-                        ]
-                    )
-                )
+                return list_parser(cls, type_, parsers)
             elif typing.get_origin(type_) == set:
-                subtypes = typing.get_args(type_)
-                assert (
-                    len(subtypes) == 1
-                ), "Sets are allowed only one subtype per PEP specification!"
-                subtype_parser = _get_parser(
-                    cls,
-                    subtypes[0],
-                    parsers,
-                )
-                return functools.partial(
-                    lambda s: set(
-                        set({})
-                        if s == "{}"
-                        else [
-                            subtype_parser(item)
-                            for item in set(split_at_given_level(s[1:-1], split_delim=","))
-                        ]
-                    )
-                )
+                return set_parser(cls, type_, parsers)
             elif typing.get_origin(type_) == tuple:
-                subtype_parsers = [
-                    _get_parser(
-                        cls,
-                        subtype,
-                        parsers,
-                    )
-                    for subtype in typing.get_args(type_)
-                ]
-
-                def tuple_parse(tuple_string: str) -> Tuple[Any, ...]:
-                    """
-                    Parses a dictionary value (can do so recursively)
-                    Note that this tool will fail on tuples containing strings containing
-                    unpaired '{', or '}' characters
-                    """
-                    assert tuple_string[0] == "(", "Tuple val improperly formatted"
-                    assert tuple_string[-1] == ")", "Tuple val improperly formatted"
-                    tuple_string = tuple_string[1:-1]
-                    if len(tuple_string) == 0:
-                        return ()
-                    else:
-                        val_strings = split_at_given_level(tuple_string, split_delim=",")
-                        return tuple(
-                            parser(val_str)
-                            for parser, val_str in zip(subtype_parsers, val_strings)
-                        )
-
-                return functools.partial(tuple_parse)
-
+                return tuple_parser(cls, type_, parsers)
             elif typing.get_origin(type_) == dict:
-                subtypes = typing.get_args(type_)
-                assert (
-                    len(subtypes) == 2
-                ), "Dict object must have exactly 2 subtypes per PEP specification!"
-                (key_parser, val_parser) = (
-                    _get_parser(
-                        cls,
-                        subtypes[0],
-                        parsers,
-                    ),
-                    _get_parser(
-                        cls,
-                        subtypes[1],
-                        parsers,
-                    ),
-                )
-
-                def dict_parse(dict_string: str) -> Dict[Any, Any]:
-                    """
-                    Parses a dictionary value (can do so recursively)
-                    """
-                    assert dict_string[0] == "{", "Dict val improperly formatted"
-                    assert dict_string[-1] == "}", "Dict val improprly formatted"
-                    dict_string = dict_string[1:-1]
-                    if len(dict_string) == 0:
-                        return {}
-                    else:
-                        outer_splits = split_at_given_level(dict_string, split_delim=",")
-                        out_dict = {}
-                        for outer_split in outer_splits:
-                            inner_splits = split_at_given_level(outer_split, split_delim=";")
-                            assert (
-                                len(inner_splits) % 2 == 0
-                            ), "Inner splits of dict didn't have matched key val pairs"
-                            for i in range(0, len(inner_splits), 2):
-                                out_dict[key_parser(inner_splits[i])] = val_parser(
-                                    inner_splits[i + 1]
-                                )
-                        return out_dict
-
-                return functools.partial(dict_parse)
+                return dict_parser(cls, type_, parsers)
             elif isinstance(type_, type) and issubclass(type_, Enum):
                 return types.make_enum_parser(type_)
             elif types.is_constructible_from_str(type_):

diff --git a/fgpyo/util/tests/test_inspect.py b/fgpyo/util/tests/test_inspect.py
@@ -1,11 +1,19 @@
+from typing import Dict
+from typing import List
 from typing import Optional
+from typing import Set
+from typing import Tuple
 
 import attr
 import pytest
 
 from fgpyo.util.inspect import attr_from
 from fgpyo.util.inspect import attribute_has_default
 from fgpyo.util.inspect import attribute_is_optional
+from fgpyo.util.inspect import dict_parser
+from fgpyo.util.inspect import list_parser
+from fgpyo.util.inspect import set_parser
+from fgpyo.util.inspect import tuple_parser
 
 
 @attr.s(auto_attribs=True, frozen=True)
@@ -66,3 +74,34 @@ def test_attr_from_custom_type_without_parser_fails() -> None:
             kwargs={"foo": ""},
             parsers={},
         )
+
+
+def test_list_parser() -> None:
+    parser = list_parser(Foo, List[int], {})
+    assert parser("") == []
+    assert parser("1,2,3") == [1, 2, 3]
+
+
+def test_set_parser() -> None:
+    parser = set_parser(Foo, Set[int], {})
+    assert parser("{}") == set()
+    assert parser("{1,2,3}") == {1, 2, 3}
+    assert parser("{1,1,2,3}") == {1, 2, 3}
+
+
+def test_tuple_parser() -> None:
+    parser = tuple_parser(Foo, Tuple[int, str], {})
+    assert parser("()") == ()
+    assert parser("(1,a)") == (1, "a")
+
+
+def test_dict_parser() -> None:
+    parser = dict_parser(Foo, Dict[int, str], {})
+    assert parser("{}") == {}
+    assert parser("{123;a}") == {123: "a"}
+
+
+def test_dict_parser_with_duplicate_keys() -> None:
+    parser = dict_parser(Foo, Dict[int, str], {})
+    with pytest.raises(ValueError):
+        parser("{123;a,123;b}")